chgksuite 0.26.0b11__py3-none-any.whl → 0.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chgksuite/_html2md.py +90 -0
- chgksuite/cli.py +38 -8
- chgksuite/common.py +16 -12
- chgksuite/composer/__init__.py +9 -7
- chgksuite/composer/chgksuite_parser.py +20 -9
- chgksuite/composer/composer_common.py +30 -3
- chgksuite/composer/db.py +1 -2
- chgksuite/composer/docx.py +542 -292
- chgksuite/composer/latex.py +3 -4
- chgksuite/composer/lj.py +1 -2
- chgksuite/composer/{reddit.py → markdown.py} +35 -25
- chgksuite/composer/openquiz.py +2 -3
- chgksuite/composer/pptx.py +18 -6
- chgksuite/composer/telegram.py +22 -10
- chgksuite/handouter/gen.py +11 -7
- chgksuite/handouter/installer.py +0 -0
- chgksuite/handouter/runner.py +237 -10
- chgksuite/handouter/tex_internals.py +12 -13
- chgksuite/handouter/utils.py +22 -1
- chgksuite/lastdir +1 -0
- chgksuite/parser.py +218 -37
- chgksuite/parser_db.py +4 -6
- chgksuite/resources/labels_az.toml +22 -0
- chgksuite/resources/labels_by.toml +1 -2
- chgksuite/resources/labels_by_tar.toml +1 -2
- chgksuite/resources/labels_en.toml +1 -2
- chgksuite/resources/labels_kz_cyr.toml +1 -2
- chgksuite/resources/labels_ru.toml +1 -2
- chgksuite/resources/labels_sr.toml +1 -2
- chgksuite/resources/labels_ua.toml +1 -2
- chgksuite/resources/labels_uz.toml +0 -3
- chgksuite/resources/labels_uz_cyr.toml +1 -2
- chgksuite/resources/regexes_az.json +17 -0
- chgksuite/resources/regexes_by.json +3 -2
- chgksuite/resources/regexes_by_tar.json +17 -0
- chgksuite/resources/regexes_en.json +3 -2
- chgksuite/resources/regexes_kz_cyr.json +3 -2
- chgksuite/resources/regexes_ru.json +3 -2
- chgksuite/resources/regexes_sr.json +3 -2
- chgksuite/resources/regexes_ua.json +3 -2
- chgksuite/resources/regexes_uz.json +16 -0
- chgksuite/resources/regexes_uz_cyr.json +3 -2
- chgksuite/trello.py +8 -9
- chgksuite/typotools.py +9 -8
- chgksuite/version.py +1 -1
- {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/METADATA +10 -19
- chgksuite-0.27.0.dist-info/RECORD +63 -0
- {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/WHEEL +1 -2
- chgksuite/composer/telegram_parser.py +0 -230
- chgksuite-0.26.0b11.dist-info/RECORD +0 -59
- chgksuite-0.26.0b11.dist-info/top_level.txt +0 -1
- {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/entry_points.txt +0 -0
- {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -9,7 +9,6 @@ sources = "Манбалар"
|
|
|
9
9
|
author = "Муаллиф"
|
|
10
10
|
authors = "Муаллифлар"
|
|
11
11
|
handout = "Тарқатма материал"
|
|
12
|
-
handout_short = "Тарқат"
|
|
13
12
|
|
|
14
13
|
[general]
|
|
15
14
|
section = "Тур"
|
|
@@ -19,4 +18,4 @@ questions_in_comments = "Саволлар изоҳларда."
|
|
|
19
18
|
handout_for_question = "{} саволга тарқатма материал."
|
|
20
19
|
general_impressions_caption = "Умумий таасуротлар"
|
|
21
20
|
general_impressions_text = "Тўпламдан умумий таасуротлар — ушбу постнинг изоҳларида."
|
|
22
|
-
right_answers_for_stats = "Тўғри жавоблар фоизи"
|
|
21
|
+
right_answers_for_stats = "Тўғри жавоблар фоизи"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"tour": "^(#\\s+)?T[Uu][Rr]\\s?№?\\s?([0-9IVXLCDM]*)([\\.:])?$",
|
|
3
|
+
"tourrev": "^([0-9IVXLCDM]+)\\s[Tt][Uu][Rr]([\\.:])?$",
|
|
4
|
+
"question": "S[Uu][Aa][Ll]\\s?[№N]?(?P<number>[0-9\\s]*)\\s*([\\.:]|\\n|\\r\\n|$)",
|
|
5
|
+
"handout": "^P[Aa][Yy][Ll][Aa][Mm][Aa]\\s+[Mm][Aa][Tt][Ee][Rr][İi][Aa][Ll][\\.:]",
|
|
6
|
+
"answer": "C[Aa][Vv][Aa][Bb]?\\s?[№N]?([0-9]+)?\\s*[\\.:]?",
|
|
7
|
+
"zachet": "S[Aa][Yy][ıI][Ll][Mm][Aa]\\s+[Mm][Ee][Yy][Aa][Rr][ıI]\\s*[\\.:]?",
|
|
8
|
+
"nezachet": "S[Aa][Yy][ıI][Ll][Mm][ıI][Rr]\\s*[\\.:]?",
|
|
9
|
+
"comment": "Ş[Əə][Rr][Hh]\\s?[№N]?([0-9]+)?\\s*[\\.:]?",
|
|
10
|
+
"author": "M[Üü][Əə][Ll][Ll][İiİı][Ff]\\s*[\\.:]?",
|
|
11
|
+
"source": "M[Əə][Nn][Bb][Əə]\\s*[\\.:]?",
|
|
12
|
+
"editor": "R[Ee][Dd][Aa]([Kk][Tt][Oo][Rr]?\\s*(\\s*[\\.:]|\\s*[\\-–—]+\\s)?)",
|
|
13
|
+
"date": "TT[Aa][Rr][İiİı][Xx]\\s*[\\.:]?",
|
|
14
|
+
"date2": "(^|\\s)[Yy][Aa][Nn][Vv][Aa][Rr]|[Ff][Ee][Vv][Rr][Aa][Ll]|[Mm][Aa][Rr][Tt]|[Aa][Pp][Rr][Ee][Ll]|[Mm][Aa][Yy]|[İi][Yy][Uu][Nn]|[İi][Yy][Uu][Ll]|[Aa][Vv][Qq][Uu][Ss][Tt]|[Ss][Ee][Nn][Tt][Yy][Aa][Bb][Rr]|[Oo][Kk][Tt][Yy][Aa][Bb][Rr]|[Nn][Oo][Yy][Aa][Bb][Rr]|[Dd][Ee][Kk][Aa][Bb][Rr](\\s|$)",
|
|
15
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
16
|
+
"handout_short": "(P[Aa][Yy][Ll]|R[Ee][Kk][Vv][Ii][Zz][Ii][Tt])"
|
|
17
|
+
}
|
|
@@ -12,5 +12,6 @@
|
|
|
12
12
|
"editor": "[Рр][Ээ][Дд][Аа][Кк][Тт][Аа][Рр]([Ыы])?(\\s?[\\.:]|\\s[\\-–—]+\\s)",
|
|
13
13
|
"date": "Д[Аа][Тт][Аа]\\s?[\\.:]",
|
|
14
14
|
"date2": "(^|\\s)студзеня|лютага|сакавіка|красавіка|мая|чэрвеня|ліпеня|жніўня|верасня|кастрычніка|лістапада|снежня(\\s|$)",
|
|
15
|
-
"number": "^[0-9]+[\\.\\)]\\s*"
|
|
16
|
-
|
|
15
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
16
|
+
"handout_short": "Р[Аа][Зз][Дд][Аа][Тт]"
|
|
17
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"tour": "^(#\\s+)?Т[Уу][Рр]\\s?№?\\s?([0-9IVXLCDM]*)([\\.:])?$",
|
|
3
|
+
"tourrev": "^([0-9IVXLCDM]+)\\s[Тт][Уу][Рр]([\\.:])?$",
|
|
4
|
+
"question": "П[Ыы][Тт][Аа][Нн][Ьь][Нн][Ее]\\s?[№N]?(?P<number>[0-9\\s]*)\\s?([\\.:]|\n|\r\n|$)",
|
|
5
|
+
"handout": "Р[Аа][Зз][Дд][Аа][Тт][Кк][Аа][Вв][Ыы][\\s\\s][Мм][Аа][Тт][Ээ][Рр][Ыы][Яя][Лл][\\.:]",
|
|
6
|
+
"answer": "А[Дд][Кк][Аа][Зз][Ыы]?\\s?[№N]?([0-9]+)?\\s?[\\.:]",
|
|
7
|
+
"zachet": "З[Аа][Лл][Іі][Кк]\\s?[\\.:]",
|
|
8
|
+
"nezachet": "Н[Ее][Зз][Аа][Лл][Іі][Кк]\\s?[\\.:]",
|
|
9
|
+
"comment": "К[Аа][Мм][Ее][Нн][Тт][Аа][Рр][Ыы][Йй]\\s?[№N]?([0-9]+)?\\s?[\\.:]",
|
|
10
|
+
"author": "А[Ўў][Тт][Аа][Рр](\\(?[Ыы]?\\)?|[Кк][АаИи])?\\s?[\\.:]",
|
|
11
|
+
"source": "К[Рр][Ыы][Нн][Іі][Цц][АаЫы]\\s?[\\.:]",
|
|
12
|
+
"editor": "[Рр][Ээ][Дд][Аа][Кк][Тт][Аа][Рр]([Ыы])?(\\s?[\\.:]|\\s[\\-–—]+\\s)",
|
|
13
|
+
"date": "Д[Аа][Тт][Аа]\\s?[\\.:]",
|
|
14
|
+
"date2": "(^|\\s)студзеня|лютага|сакавіка|красавіка|мая|траўня|чэрвеня|ліпеня|жніўня|верасьня|кастрычніка|лістапада|сьнежня(\\s|$)",
|
|
15
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
16
|
+
"handout_short": "Р[Аа][Зз][Дд][Аа][Тт]"
|
|
17
|
+
}
|
|
@@ -12,5 +12,6 @@
|
|
|
12
12
|
"editor": "E[Dd][Ii][Tt]([Oo][Rr][Ss]?|[Ee][Dd]\\s[Bb][Yy])\\s?(\\s?[\\.:]|\\s[\\-–—]+\\s)",
|
|
13
13
|
"date": "D[Aa][Tt][Ee]\\s?[\\.:]",
|
|
14
14
|
"date2": "(^|\\s)Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?(\\s|$)",
|
|
15
|
-
"number": "^[0-9]+[\\.\\)]\\s*"
|
|
16
|
-
|
|
15
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
16
|
+
"handout_short": "H[Aa][Nn][Dd][Oo][Uu][Tt]"
|
|
17
|
+
}
|
|
@@ -11,5 +11,6 @@
|
|
|
11
11
|
"editor": "Р[Ее][Дд][Аа][Кк][Тт][Оо][Рр]([Лл][Аа][Рр])?(\\s?[\\.:]|\\s[\\-–—]+\\s)",
|
|
12
12
|
"date": "Д[Аа][Тт][Аа]\\s?[\\.:]",
|
|
13
13
|
"date2": "(^|\\s)[Яя][Нн][Вв][Аа][Рр][ЬьЯя]|[Фф][Ее][Вв][Рр][Аа][Лл][ЬьЯя]|[Мм][Аа][Рр][Тт][Аа]?|[Аа][Пп][Рр][Ее][Лл][ЬьЯя]|[Мм][Аа][ЙйЯя]|[Ии][Юю][Нн][ЬьЯя]|[Ии][Юю][Лл][ЬьЯя]|[Аа][Вв][Гг][Уу][Сс][Тт][Аа]?|[Сс][Ее][Нн][Тт][Яя][Бб][Рр][ЬьЯя]|[Оо][Кк][Тт][Яя][Бб][Рр][ЬьЯя]|[Нн][Оо][Яя][Бб][Рр][ЬьЯя]|[Дд][Ее][Кк][Аа][Бб][Рр][ЬьЯя](\\s|$)",
|
|
14
|
-
"number": "^[0-9]+[\\.\\)]\\s*"
|
|
15
|
-
|
|
14
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
15
|
+
"handout_short": "М[Аа][Тт][Ее][Рр][Ии][Аа][Лл]"
|
|
16
|
+
}
|
|
@@ -12,5 +12,6 @@
|
|
|
12
12
|
"editor": "[Рр][Ее][Дд][Аа][Кк][Тт][Оо][Рр]([Ыы]|[Сс][Кк][Аа][Яя]\\s[Гг][Рр][Уу][Пп][Пп][Аа])?(\\s?[\\.:]|\\s[\\-–—]+\\s)",
|
|
13
13
|
"date": "Д[Аа][Тт][Аа]\\s?[\\.:]",
|
|
14
14
|
"date2": "(^|\\s)[Яя][Нн][Вв][Аа][Рр][ЬьЯя]|[Фф][Ее][Вв][Рр][Аа][Лл][ЬьЯя]|[Мм][Аа][Рр][Тт][Аа]?|[Аа][Пп][Рр][Ее][Лл][ЬьЯя]|[Мм][Аа][ЙйЯя]|[Ии][Юю][Нн][ЬьЯя]|[Ии][Юю][Лл][ЬьЯя]|[Аа][Вв][Гг][Уу][Сс][Тт][Аа]?|[Сс][Ее][Нн][Тт][Яя][Бб][Рр][ЬьЯя]|[Оо][Кк][Тт][Яя][Бб][Рр][ЬьЯя]|[Нн][Оо][Яя][Бб][Рр][ЬьЯя]|[Дд][Ее][Кк][Аа][Бб][Рр][ЬьЯя](\\s|$)",
|
|
15
|
-
"number": "^[0-9]+[\\.\\)]\\s*"
|
|
16
|
-
|
|
15
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
16
|
+
"handout_short": "Р[Аа][Зз][Дд][Аа][Тт]"
|
|
17
|
+
}
|
|
@@ -12,5 +12,6 @@
|
|
|
12
12
|
"editor": "U[Rr][Ee][Dd][Nn][Ii][Kk][Ii]?\\s?(\\s?[\\.:]|\\s[\\-–—]+\\s)",
|
|
13
13
|
"date": "D[Aa][Tt][Uu][Mm]\\s?[\\.:]",
|
|
14
14
|
"date2": "WILL_FILL_LATER",
|
|
15
|
-
"number": "^[0-9]+[\\.\\)]\\s*"
|
|
16
|
-
|
|
15
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
16
|
+
"handout_short": "P[Oo][Dd][Ee][Ll][Jj][Ee][Nn][Oo]"
|
|
17
|
+
}
|
|
@@ -12,5 +12,6 @@
|
|
|
12
12
|
"editor": "[Рр][Ее][Дд][Аа][Кк][Тт][Оо][Рр]([Ии]|[Сс][Ьь][Кк][Аа]\\s[Гг][Рр][Уу][Пп][Аа])?(\\s?[\\.:]|\\s[\\-–—]+\\s)",
|
|
13
13
|
"date": "Д[Аа][Тт][Аа]\\s?[\\.:]",
|
|
14
14
|
"date2": "(^|\\s)cічня|лютого|березня|квітня|травня|червня|липня|серпня|вересня|жовтня|листопада|грудня(\\s|$)",
|
|
15
|
-
"number": "^[0-9]+[\\.\\)]\\s*"
|
|
16
|
-
|
|
15
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
16
|
+
"handout_short": "Р[Оо][Зз][Дд][Аа][Тт]"
|
|
17
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"tour": "^(#\\s+)?([0-9]+)\\s?[-–—]\\s?[Tt][Uu][Rr]\\s*$",
|
|
3
|
+
"question": "(?P<number>[0-9]+)\\s?[-–—]\\s?[Ss][Aa][Vv][Oo][Ll]([\\.:]|\n|\r\n|$)",
|
|
4
|
+
"answer": "[Jj][Aa][Vv][Oo][Bb]\\s?[№N]?([0-9]+)?\\s?[\\.:]",
|
|
5
|
+
"handout": "[Tt][Aa][Rr][Qq][Aa][Tt][Mm][Aa][\\s\\s][Mm][Aa][Tt][Ee][Rr][Ii][Aa][Ll][\\.:]",
|
|
6
|
+
"zachet": "[Qq][Aa][Bb][Uu][Ll]\\s?[\\.:]",
|
|
7
|
+
"nezachet": "[Qq][Aa][Bb][Uu][Ll]\\s[Ee][Mm][Aa][Ss]?[\\.:]",
|
|
8
|
+
"comment": "[Ii][Zz][Oo][Hh]\\s?[№N]?([0-9]+)?\\s?[\\.:]",
|
|
9
|
+
"author": "[Mm][Uu][Aa][Ll][Ll][Ii][Ff]([Ll][Aa][Rr])?\\s?[\\.:]",
|
|
10
|
+
"source": "[Mm][Aa][Nn][Bb][Aa]([Ll][Aa][Rr])?\\s?[\\.:]",
|
|
11
|
+
"editor": "(M|Tur\\sm)[Uu][Hh][Aa][Rr][Rr][Ii][Rr]([Ll][Aa][Rr][Ii]?)?(\\s?[\\.:]|\\s[\\-–—]+\\s)",
|
|
12
|
+
"date": "[Ss][Aa][Nn][Aa]\\s?[\\.:]",
|
|
13
|
+
"date2": "(^|\\s)[Yy]anvar|[Ff]evral|[Mm]art|[Aa]prel|[Mm]ay|[Ii]yun|[Ii]yul|[Aa]vgust|[Ss]entabr|[Oo]ktabr|[Nn]oyabr|[Dd]ekabr(\\s|$)",
|
|
14
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
15
|
+
"handout_short": "T[Aa][Rr][Qq][Aa][Tt]"
|
|
16
|
+
}
|
|
@@ -11,5 +11,6 @@
|
|
|
11
11
|
"editor": "(М|Тур\\sм)[Уу][Ҳҳ][Аа][Рр][Рр][Ии][Рр]([Лл][Аа][Рр][Ии]?)?(\\s?[\\.:]|\\s[\\-–—]+\\s)",
|
|
12
12
|
"date": "С[Аа][Нн][Аа]\\s?[\\.:]",
|
|
13
13
|
"date2": "(^|\\s)январь|февраль|март|апрель|май|июнь|июль|август|сентябрь|октябрь|ноябрь|декабрь(\\s|$)",
|
|
14
|
-
"number": "^[0-9]+[\\.\\)]\\s*"
|
|
15
|
-
|
|
14
|
+
"number": "^[0-9]+[\\.\\)]\\s*",
|
|
15
|
+
"handout_short": "Т[Аа][Рр][Ққ][Аа][Тт]"
|
|
16
|
+
}
|
chgksuite/trello.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
-
import codecs
|
|
4
3
|
import json
|
|
5
4
|
import os
|
|
6
5
|
import pdb
|
|
@@ -52,7 +51,7 @@ def upload_file(filepath, trello, list_name=None):
|
|
|
52
51
|
assert lid is not None
|
|
53
52
|
print(f"uploading to list '{list_['name']}'")
|
|
54
53
|
content = ""
|
|
55
|
-
with
|
|
54
|
+
with open(filepath, "r", encoding="utf-8") as f:
|
|
56
55
|
content = f.read()
|
|
57
56
|
cards = re.split(r"(\r?\n){2,}", content)
|
|
58
57
|
cards = [x for x in cards if x != "" and x != "\n" and x != "\r\n"]
|
|
@@ -255,7 +254,7 @@ def gui_trello_download(args):
|
|
|
255
254
|
|
|
256
255
|
board_id_path = os.path.join(args.folder, ".board_id")
|
|
257
256
|
if os.path.isfile(board_id_path):
|
|
258
|
-
with
|
|
257
|
+
with open(board_id_path, "r", encoding="utf-8") as f:
|
|
259
258
|
board_id = f.read().rstrip()
|
|
260
259
|
else:
|
|
261
260
|
board_id = get_board_id(path=args.folder)
|
|
@@ -401,14 +400,14 @@ def gui_trello_download(args):
|
|
|
401
400
|
result.extend(_lists[_list["name"]])
|
|
402
401
|
filename = "singlefile.4s"
|
|
403
402
|
print("outputting {}".format(filename))
|
|
404
|
-
with
|
|
403
|
+
with open(filename, "w", encoding="utf-8") as f:
|
|
405
404
|
for item in result:
|
|
406
405
|
f.write("\n" + item + "\n")
|
|
407
406
|
else:
|
|
408
407
|
for _list in _lists:
|
|
409
408
|
filename = "{}.4s".format(_list)
|
|
410
409
|
print("outputting {}".format(filename))
|
|
411
|
-
with
|
|
410
|
+
with open(filename, "w", encoding="utf-8") as f:
|
|
412
411
|
for item in _lists[_list]:
|
|
413
412
|
f.write("\n" + item + "\n")
|
|
414
413
|
|
|
@@ -421,12 +420,12 @@ def get_board_id(path=None):
|
|
|
421
420
|
print(" board_id")
|
|
422
421
|
print()
|
|
423
422
|
board_id = input(
|
|
424
|
-
"Please paste your board_id
|
|
423
|
+
"Please paste your board_id (or the board link, we'll parse it): "
|
|
425
424
|
).rstrip()
|
|
426
425
|
if "trello.com" in board_id:
|
|
427
426
|
board_id = re_bi.search(board_id).group(1)
|
|
428
427
|
if path:
|
|
429
|
-
with
|
|
428
|
+
with open(os.path.join(path, ".board_id"), "w", encoding="utf-8") as f:
|
|
430
429
|
f.write(board_id)
|
|
431
430
|
return board_id
|
|
432
431
|
|
|
@@ -437,7 +436,7 @@ def get_token(tokenpath, args):
|
|
|
437
436
|
else:
|
|
438
437
|
webbrowser.open(TRELLO_URL)
|
|
439
438
|
token = input("Please paste the obtained token: ").rstrip()
|
|
440
|
-
with
|
|
439
|
+
with open(tokenpath, "w", encoding="utf-8") as f:
|
|
441
440
|
f.write(token)
|
|
442
441
|
return token
|
|
443
442
|
|
|
@@ -452,7 +451,7 @@ def gui_trello(args):
|
|
|
452
451
|
if not os.path.isfile(tokenpath):
|
|
453
452
|
token = get_token(tokenpath, args)
|
|
454
453
|
else:
|
|
455
|
-
with
|
|
454
|
+
with open(tokenpath, "r", encoding="utf-8") as f:
|
|
456
455
|
token = f.read().rstrip()
|
|
457
456
|
|
|
458
457
|
with open(os.path.join(resourcedir, "trello.json")) as f:
|
chgksuite/typotools.py
CHANGED
|
@@ -92,13 +92,13 @@ def uni_normalize(k):
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
def cyr_lat_check_char(i, char, word):
|
|
95
|
-
if char in CYRILLIC_CHARS:
|
|
95
|
+
if char.lower() in CYRILLIC_CHARS:
|
|
96
96
|
return
|
|
97
97
|
if not (
|
|
98
|
-
(i == 0 or word[i - 1] in CYRILLIC_CHARS or not word[i - 1].isalpha())
|
|
98
|
+
(i == 0 or word[i - 1].lower() in CYRILLIC_CHARS or not word[i - 1].isalpha())
|
|
99
99
|
and (
|
|
100
100
|
i == len(word) - 1
|
|
101
|
-
or word[i + 1] in CYRILLIC_CHARS
|
|
101
|
+
or word[i + 1].lower() in CYRILLIC_CHARS
|
|
102
102
|
or not word[i + 1].isalpha()
|
|
103
103
|
)
|
|
104
104
|
):
|
|
@@ -121,7 +121,7 @@ def cyr_lat_check_word(word):
|
|
|
121
121
|
if check_result:
|
|
122
122
|
replacements[char] = check_result
|
|
123
123
|
elif (
|
|
124
|
-
char in CYRILLIC_CHARS
|
|
124
|
+
char.lower() in CYRILLIC_CHARS
|
|
125
125
|
and i < len(word) - 1
|
|
126
126
|
and word[i + 1] in ACCENTS_TO_FIX
|
|
127
127
|
):
|
|
@@ -258,7 +258,9 @@ def replace_no_break(s, spaces=True, hyphens=True):
|
|
|
258
258
|
r_from = "(^|[ \u00a0]){sp} ".format(sp=sp)
|
|
259
259
|
r_to = "\\g<1>{sp}\u00a0".format(sp=sp)
|
|
260
260
|
s = re.sub(r_from, r_to, s)
|
|
261
|
-
for sp in NO_BREAK_SEQUENCES_LEFT + [
|
|
261
|
+
for sp in NO_BREAK_SEQUENCES_LEFT + [
|
|
262
|
+
x.title() for x in NO_BREAK_SEQUENCES_LEFT
|
|
263
|
+
]:
|
|
262
264
|
r_from = " {sp}([ \u00a0]|$)".format(sp=sp)
|
|
263
265
|
r_to = "\u00a0{sp}\\g<1>".format(sp=sp)
|
|
264
266
|
s = re.sub(r_from, r_to, s)
|
|
@@ -298,8 +300,7 @@ def detect_accent(s):
|
|
|
298
300
|
s = s[: s.index(word)] + word_new + s[s.index(word) + len(word) :]
|
|
299
301
|
except Exception as e:
|
|
300
302
|
sys.stderr.write(
|
|
301
|
-
f"exception {type(e)} {e} "
|
|
302
|
-
f"while trying to process word {repr(word)}"
|
|
303
|
+
f"exception {type(e)} {e} while trying to process word {repr(word)}"
|
|
303
304
|
)
|
|
304
305
|
return s
|
|
305
306
|
|
|
@@ -313,7 +314,7 @@ def percent_decode(s):
|
|
|
313
314
|
s = s.replace(gr, unquote(gr.encode("utf8")).decode("utf8"))
|
|
314
315
|
except Exception as e:
|
|
315
316
|
sys.stderr.write(
|
|
316
|
-
f"exception {type(e)} {e}
|
|
317
|
+
f"exception {type(e)} {e} while trying to replace percents in {gr}"
|
|
317
318
|
)
|
|
318
319
|
return s
|
|
319
320
|
|
chgksuite/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.27.0"
|
|
@@ -1,23 +1,21 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chgksuite
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.27.0
|
|
4
4
|
Summary: A package for chgk automation
|
|
5
|
-
|
|
6
|
-
Author: Alexander Pecheny
|
|
7
|
-
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
5
|
+
Project-URL: Homepage, https://gitlab.com/peczony/chgksuite
|
|
6
|
+
Author-email: Alexander Pecheny <ap@pecheny.me>
|
|
7
|
+
License-Expression: MIT
|
|
12
8
|
License-File: LICENSE
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Requires-Python: >=3.9
|
|
13
12
|
Requires-Dist: beautifulsoup4
|
|
14
13
|
Requires-Dist: chardet
|
|
15
|
-
Requires-Dist: dashtable
|
|
16
14
|
Requires-Dist: dateparser
|
|
17
15
|
Requires-Dist: mammoth
|
|
18
16
|
Requires-Dist: openpyxl
|
|
19
17
|
Requires-Dist: parse
|
|
20
|
-
Requires-Dist:
|
|
18
|
+
Requires-Dist: pillow
|
|
21
19
|
Requires-Dist: ply
|
|
22
20
|
Requires-Dist: pypandoc
|
|
23
21
|
Requires-Dist: pypdf
|
|
@@ -27,16 +25,9 @@ Requires-Dist: python-pptx
|
|
|
27
25
|
Requires-Dist: python-telegram-bot
|
|
28
26
|
Requires-Dist: requests
|
|
29
27
|
Requires-Dist: toml
|
|
28
|
+
Requires-Dist: urllib3>=2.6.2
|
|
30
29
|
Requires-Dist: watchdog
|
|
31
|
-
|
|
32
|
-
Dynamic: author-email
|
|
33
|
-
Dynamic: classifier
|
|
34
|
-
Dynamic: description
|
|
35
|
-
Dynamic: description-content-type
|
|
36
|
-
Dynamic: home-page
|
|
37
|
-
Dynamic: license-file
|
|
38
|
-
Dynamic: requires-dist
|
|
39
|
-
Dynamic: summary
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
40
31
|
|
|
41
32
|
**chgksuite** is an utility that helps chgk editors.
|
|
42
33
|
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
chgksuite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
chgksuite/__main__.py,sha256=0-_jfloveTW3SZYW5XEagbyaHKGCiDhGNgcLxsT_dMs,140
|
|
3
|
+
chgksuite/_html2md.py,sha256=_FP2tXangkFubnByXNoe4sDfR_njUy19MCD4OWX-BP0,2414
|
|
4
|
+
chgksuite/cli.py,sha256=fHa7HNJeQeUNXpbqnMnSXHOMpbxpaph6d3KdVNx9uNg,41257
|
|
5
|
+
chgksuite/common.py,sha256=TGXXWjdCS8Fy5Gkxuw7ZN3xh9pTS84ZtvRQj7IYU1E4,11341
|
|
6
|
+
chgksuite/lastdir,sha256=BbZVRYZnXBQzJUrl7a2e4xuUcfmq6asNI705pTxBfD4,49
|
|
7
|
+
chgksuite/parser.py,sha256=6T6o3F_oJVe9sl96gCaACCwtuF7lcn2V-24qEMN27zw,45847
|
|
8
|
+
chgksuite/parser_db.py,sha256=Ngh2ZYhAyetb6Sa-5xC9aX8quX9Ar1WheSfhSy-JADw,11105
|
|
9
|
+
chgksuite/trello.py,sha256=UYWemtf6z0jKW3tY-aof7_w2hWqcljYNd891vXFZ8mw,14731
|
|
10
|
+
chgksuite/typotools.py,sha256=J2AEQbfcR0HHSu5WCALpj4Ya_ngOMXRh7esJgcybWQM,12797
|
|
11
|
+
chgksuite/version.py,sha256=Hjsc9XQ3AkqXm-5cdlX-H8iKjURzi1FQeWNi2xx34Uo,23
|
|
12
|
+
chgksuite/vulture_whitelist.py,sha256=P__p_X0zt10ivddIf81uyxsobV14vFg8uS2lt4foYpc,3582
|
|
13
|
+
chgksuite/composer/__init__.py,sha256=kTQsxgn_ED18DgZjdvh7QuwC36FVbaiv_eYTjj3zdIk,6596
|
|
14
|
+
chgksuite/composer/chgksuite_parser.py,sha256=ItlTenviFDuqP-f1960nzD-gRPFDQy4RdOL39PswTvg,9044
|
|
15
|
+
chgksuite/composer/composer_common.py,sha256=kc-_Tc9NjevfXGj4fXoa9fye9AO0EuMSnEPJnS0n-aQ,16281
|
|
16
|
+
chgksuite/composer/db.py,sha256=DI-goR4V69S8bufNfW5smTFG9puiyjcxC1u1TFuhfYs,8162
|
|
17
|
+
chgksuite/composer/docx.py,sha256=cxPgjykAlU3XxKCtM3K13Wm6IcKqAHo0ngLqa8-2opM,23716
|
|
18
|
+
chgksuite/composer/latex.py,sha256=Ouq3OkUA0oS_zhCOiPZ2tY2sMe3OVg84Cead2GgJi2c,9231
|
|
19
|
+
chgksuite/composer/lj.py,sha256=7zrLrvbgYEoiF-NC7zyDaJTvPMhwt8tQT_KWgsAeG4A,15202
|
|
20
|
+
chgksuite/composer/markdown.py,sha256=hWajBAvop_BTyBI6rzeIwAaj7p_6p2kQre4WDMalD90,4343
|
|
21
|
+
chgksuite/composer/openquiz.py,sha256=BF506tH6b1IoocC61l5xBU969l38S1IjqMi6mqhG_HI,6990
|
|
22
|
+
chgksuite/composer/pptx.py,sha256=9O0tfx2xpyx9Y4ceatVIXdiwvslnj8gliZIlxsDe5Ow,23971
|
|
23
|
+
chgksuite/composer/stats.py,sha256=GbraSrjaZ8Mc2URs5aGAsI4ekboAKzlJJOqsbe96ELA,3995
|
|
24
|
+
chgksuite/composer/telegram.py,sha256=KM9Bnkf2bxdJNMrhjCCw2xx-sWcIQioBc1FdSY4OX-g,47431
|
|
25
|
+
chgksuite/composer/telegram_bot.py,sha256=xT5D39m4zGmIbHV_ZfyQ9Rc8PAmG2V5FGUeDKpkgyTw,3767
|
|
26
|
+
chgksuite/handouter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
+
chgksuite/handouter/gen.py,sha256=CJfGl8R2vnElnsjBo0n4u-y8KD-l2qxrIuUHj5WZBuI,5241
|
|
28
|
+
chgksuite/handouter/installer.py,sha256=u4jQKeCn0VjOnaDFezx35g8oRjji5edvYGj5xSHCEW4,7574
|
|
29
|
+
chgksuite/handouter/pack.py,sha256=H-Ln1JqKK2u3jFI5wwsh7pQdJBpQJ-s8gV9iECQ3kgU,2504
|
|
30
|
+
chgksuite/handouter/runner.py,sha256=AOJ90ncqMotib_FCPL7K50oQh6tH1EuhnKf3FGSUTxk,17331
|
|
31
|
+
chgksuite/handouter/tex_internals.py,sha256=j9zvai6zmaEg8osXnJ4yaXAx-89ufcpypySjyoxGFM0,1544
|
|
32
|
+
chgksuite/handouter/utils.py,sha256=0RoECvHzfmwWnRL2jj4WKh22oTCPh2MXid_a9ShplDA,2243
|
|
33
|
+
chgksuite/resources/cheader.tex,sha256=Jfe3LESk0VIV0HCObbajSQpEMljaIDAIEGSs6YY9rTk,3454
|
|
34
|
+
chgksuite/resources/fix-unnumbered-sections.sty,sha256=FN6ZSWC6MvoRoThPm5AxCF98DdgcxbxyBYG6YImM05s,1409
|
|
35
|
+
chgksuite/resources/labels_az.toml,sha256=hiXt-54nMQ4Ie0RUVbQhXxlj47NEc2qNwe7O1NIMe-8,662
|
|
36
|
+
chgksuite/resources/labels_by.toml,sha256=nZv6fwUYEPPVooyZi8U3LOkn8PZajKxm0fzATWOBVDE,794
|
|
37
|
+
chgksuite/resources/labels_by_tar.toml,sha256=Ba4HGg4MbPTV7GEcfVScpiRL3X8sIpVGi1-dFXm2dms,812
|
|
38
|
+
chgksuite/resources/labels_en.toml,sha256=XEQdFXHXZSl1X7pxeddWoFVwkHaxxxSmvo9IJLHcmus,591
|
|
39
|
+
chgksuite/resources/labels_kz_cyr.toml,sha256=giUX-SDSI_p4Z2G6y4zHc_ZX8uf63OBwvmmKGInD1y8,912
|
|
40
|
+
chgksuite/resources/labels_ru.toml,sha256=nEc-eQdtkMj6Ibcu4QZaLYq-OvLqjxjZzyEGsWZ4UAQ,829
|
|
41
|
+
chgksuite/resources/labels_sr.toml,sha256=xYnZuvvg2f_A314deiqpTI6PcqRfl3hqCp51HR3DmgY,601
|
|
42
|
+
chgksuite/resources/labels_ua.toml,sha256=1xmySvbVXUMltRNQq5Vxjyf0ipBIsvbd--UtJbTBq4M,793
|
|
43
|
+
chgksuite/resources/labels_uz.toml,sha256=YAVut-eYv-xmCfd81hTIYyK01HCR0xMkmA_lzKzOYek,582
|
|
44
|
+
chgksuite/resources/labels_uz_cyr.toml,sha256=vUH4oUzKzjPFQjFE_nACLIiRL5_bV4tcttzpbPKkztQ,784
|
|
45
|
+
chgksuite/resources/pptx_config.toml,sha256=pbdQpZTBZajfXxf9Ej6jFQv8luA9_h2Lsihx9hs2jkI,570
|
|
46
|
+
chgksuite/resources/regexes_az.json,sha256=TOJIo6o1Fd1PMkCPdN9de81kv9txlyOSbz-zTVsbyts,1252
|
|
47
|
+
chgksuite/resources/regexes_by.json,sha256=tWwL03dSuV9SF31fFC26zER6w2Va41Ko5D0gp9fgEYQ,1364
|
|
48
|
+
chgksuite/resources/regexes_by_tar.json,sha256=k5eCNOUinOadqfSVpx6nHj2KW2oleIZYcAACBe_iDc4,1344
|
|
49
|
+
chgksuite/resources/regexes_en.json,sha256=Faj3KAwLaPXOwJEK7d3tnRuSB0NYwAeSG-Vcmy9D06g,1064
|
|
50
|
+
chgksuite/resources/regexes_kz_cyr.json,sha256=Ggu6owq5sLatlDoRgSRhtFNvIeDRvr9tDyezw68IaC4,1759
|
|
51
|
+
chgksuite/resources/regexes_ru.json,sha256=BFZw2FW7Vf49C_DD-nXi5yf9DQYAVpMJUWjDVz3HrGg,2245
|
|
52
|
+
chgksuite/resources/regexes_sr.json,sha256=kyQQ2Ht3ucZRIgEKyqiGtEVUoUiTrmE1JSVasU8FSD8,994
|
|
53
|
+
chgksuite/resources/regexes_ua.json,sha256=SSLjLDKfKzbEIvxJAXwSGCgsmnyKgXnkhUNByjN2gVs,1398
|
|
54
|
+
chgksuite/resources/regexes_uz.json,sha256=07HrBZd9XymcEpHYW9Kf41-hZtvFJQdeJbge-TE-QMM,1015
|
|
55
|
+
chgksuite/resources/regexes_uz_cyr.json,sha256=D4AyaEPEY753I47Ky2Fwol_4kxQsl-Yu96HwbIiyDwI,1174
|
|
56
|
+
chgksuite/resources/template.docx,sha256=Do29TAsg3YbH0rRSaXhVzKEoh4pwXkklW_idWA34HVE,11189
|
|
57
|
+
chgksuite/resources/template.pptx,sha256=hEFWqE-yYpwZ8ejrMCJIPEyoMT3eDqaqtiEeQ7I4fyk,29777
|
|
58
|
+
chgksuite/resources/trello.json,sha256=M5Q9JR-AAJF1u16YtNAxDX-7c7VoVTXuq4POTqYvq8o,555
|
|
59
|
+
chgksuite-0.27.0.dist-info/METADATA,sha256=Ln_5OzfvOLqioOMK-mPmYiqZI1bPcpguRByxzfIvBFs,1152
|
|
60
|
+
chgksuite-0.27.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
61
|
+
chgksuite-0.27.0.dist-info/entry_points.txt,sha256=lqjX6ULQZGDt0rgouTXBuwEPiwKkDQkSiNsT877A_Jg,54
|
|
62
|
+
chgksuite-0.27.0.dist-info/licenses/LICENSE,sha256=_a1yfntuPmctLsuiE_08xMSORuCfGS8X5hQph2U_PUw,1081
|
|
63
|
+
chgksuite-0.27.0.dist-info/RECORD,,
|
|
@@ -1,230 +0,0 @@
|
|
|
1
|
-
# from https://gist.github.com/YouKnow-sys/3d571bdd4857f175d91db8146ec065bf
|
|
2
|
-
|
|
3
|
-
from collections import deque
|
|
4
|
-
from html import escape
|
|
5
|
-
from html.parser import HTMLParser
|
|
6
|
-
from typing import Iterable, List, Tuple
|
|
7
|
-
|
|
8
|
-
from telethon.helpers import add_surrogate, del_surrogate, strip_text, within_surrogate
|
|
9
|
-
from telethon.tl import TLObject
|
|
10
|
-
from telethon.types import (
|
|
11
|
-
MessageEntityBlockquote,
|
|
12
|
-
MessageEntityBold,
|
|
13
|
-
MessageEntityCode,
|
|
14
|
-
MessageEntityCustomEmoji,
|
|
15
|
-
MessageEntityEmail,
|
|
16
|
-
MessageEntityItalic,
|
|
17
|
-
MessageEntityMentionName,
|
|
18
|
-
MessageEntityPre,
|
|
19
|
-
MessageEntitySpoiler,
|
|
20
|
-
MessageEntityStrike,
|
|
21
|
-
MessageEntityTextUrl,
|
|
22
|
-
MessageEntityUnderline,
|
|
23
|
-
MessageEntityUrl,
|
|
24
|
-
TypeMessageEntity,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class HTMLToTelegramParser(HTMLParser):
|
|
29
|
-
def __init__(self):
|
|
30
|
-
super().__init__()
|
|
31
|
-
self.text = ""
|
|
32
|
-
self.entities = []
|
|
33
|
-
self._building_entities = {}
|
|
34
|
-
self._open_tags = deque()
|
|
35
|
-
self._open_tags_meta = deque()
|
|
36
|
-
|
|
37
|
-
def handle_starttag(self, tag, attrs):
|
|
38
|
-
self._open_tags.appendleft(tag)
|
|
39
|
-
self._open_tags_meta.appendleft(None)
|
|
40
|
-
|
|
41
|
-
attrs = dict(attrs)
|
|
42
|
-
EntityType = None
|
|
43
|
-
args = {}
|
|
44
|
-
match tag:
|
|
45
|
-
case "strong" | "b":
|
|
46
|
-
EntityType = MessageEntityBold
|
|
47
|
-
case "em" | "i":
|
|
48
|
-
EntityType = MessageEntityItalic
|
|
49
|
-
case "u":
|
|
50
|
-
EntityType = MessageEntityUnderline
|
|
51
|
-
case "del" | "s":
|
|
52
|
-
EntityType = MessageEntityStrike
|
|
53
|
-
case "blockquote":
|
|
54
|
-
EntityType = MessageEntityBlockquote
|
|
55
|
-
case "spoiler":
|
|
56
|
-
EntityType = MessageEntitySpoiler
|
|
57
|
-
case "code":
|
|
58
|
-
try:
|
|
59
|
-
# If we're in the middle of a <pre> tag, this <code> tag is
|
|
60
|
-
# probably intended for syntax highlighting.
|
|
61
|
-
#
|
|
62
|
-
# Syntax highlighting is set with
|
|
63
|
-
# <code class='language-...'>codeblock</code>
|
|
64
|
-
# inside <pre> tags
|
|
65
|
-
pre = self._building_entities["pre"]
|
|
66
|
-
try:
|
|
67
|
-
pre.language = attrs["class"][len("language-") :] # type: ignore
|
|
68
|
-
except KeyError:
|
|
69
|
-
pass
|
|
70
|
-
except KeyError:
|
|
71
|
-
EntityType = MessageEntityCode
|
|
72
|
-
case "pre":
|
|
73
|
-
EntityType = MessageEntityPre
|
|
74
|
-
args["language"] = ""
|
|
75
|
-
case "a":
|
|
76
|
-
try:
|
|
77
|
-
url = attrs["href"]
|
|
78
|
-
if not url:
|
|
79
|
-
raise KeyError
|
|
80
|
-
except KeyError:
|
|
81
|
-
return
|
|
82
|
-
if url.startswith("mailto:"):
|
|
83
|
-
url = url[len("mailto:") :]
|
|
84
|
-
EntityType = MessageEntityEmail
|
|
85
|
-
else:
|
|
86
|
-
if self.get_starttag_text() == url:
|
|
87
|
-
EntityType = MessageEntityUrl
|
|
88
|
-
else:
|
|
89
|
-
EntityType = MessageEntityTextUrl
|
|
90
|
-
args["url"] = del_surrogate(url)
|
|
91
|
-
url = None
|
|
92
|
-
self._open_tags_meta.popleft()
|
|
93
|
-
self._open_tags_meta.appendleft(url)
|
|
94
|
-
case "tg-emoji":
|
|
95
|
-
try:
|
|
96
|
-
emoji_id = attrs["emoji-id"]
|
|
97
|
-
if not emoji_id:
|
|
98
|
-
raise ValueError
|
|
99
|
-
emoji_id = int(emoji_id)
|
|
100
|
-
except (KeyError, ValueError):
|
|
101
|
-
return
|
|
102
|
-
EntityType = MessageEntityCustomEmoji
|
|
103
|
-
args["document_id"] = emoji_id
|
|
104
|
-
|
|
105
|
-
if EntityType and tag not in self._building_entities:
|
|
106
|
-
self._building_entities[tag] = EntityType(
|
|
107
|
-
offset=len(self.text),
|
|
108
|
-
# The length will be determined when closing the tag.
|
|
109
|
-
length=0,
|
|
110
|
-
**args,
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
def handle_data(self, data):
|
|
114
|
-
previous_tag = self._open_tags[0] if len(self._open_tags) > 0 else ""
|
|
115
|
-
if previous_tag == "a":
|
|
116
|
-
url = self._open_tags_meta[0]
|
|
117
|
-
if url:
|
|
118
|
-
data = url
|
|
119
|
-
|
|
120
|
-
for tag, entity in self._building_entities.items():
|
|
121
|
-
entity.length += len(data)
|
|
122
|
-
|
|
123
|
-
self.text += data
|
|
124
|
-
|
|
125
|
-
def handle_endtag(self, tag):
|
|
126
|
-
try:
|
|
127
|
-
self._open_tags.popleft()
|
|
128
|
-
self._open_tags_meta.popleft()
|
|
129
|
-
except IndexError:
|
|
130
|
-
pass
|
|
131
|
-
entity = self._building_entities.pop(tag, None)
|
|
132
|
-
if entity:
|
|
133
|
-
self.entities.append(entity)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
ENTITY_TO_FORMATTER = {
|
|
137
|
-
MessageEntityBold: ("<strong>", "</strong>"),
|
|
138
|
-
MessageEntityItalic: ("<em>", "</em>"),
|
|
139
|
-
MessageEntityCode: ("<code>", "</code>"),
|
|
140
|
-
MessageEntityUnderline: ("<u>", "</u>"),
|
|
141
|
-
MessageEntityStrike: ("<del>", "</del>"),
|
|
142
|
-
MessageEntityBlockquote: ("<blockquote>", "</blockquote>"),
|
|
143
|
-
MessageEntitySpoiler: ("<spoiler>", "</spoiler>"),
|
|
144
|
-
MessageEntityPre: lambda e, _: (
|
|
145
|
-
"<pre>\n <code class='language-{}'>\n ".format(e.language),
|
|
146
|
-
"{}\n </code>\n</pre>",
|
|
147
|
-
),
|
|
148
|
-
MessageEntityEmail: lambda _, t: ('<a href="mailto:{}">'.format(t), "</a>"),
|
|
149
|
-
MessageEntityUrl: lambda _, t: ('<a href="{}">'.format(t), "</a>"),
|
|
150
|
-
MessageEntityTextUrl: lambda e, _: ('<a href="{}">'.format(escape(e.url)), "</a>"),
|
|
151
|
-
MessageEntityMentionName: lambda e, _: (
|
|
152
|
-
'<a href="tg://user?id={}">'.format(e.user_id),
|
|
153
|
-
"</a>",
|
|
154
|
-
),
|
|
155
|
-
MessageEntityCustomEmoji: lambda e, _: (
|
|
156
|
-
'<tg-emoji emoji-id="{}">'.format(e.document_id),
|
|
157
|
-
"</tg-emoji>",
|
|
158
|
-
),
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
class CustomHtmlParser:
|
|
163
|
-
@staticmethod
|
|
164
|
-
def parse(html: str) -> Tuple[str, List[TypeMessageEntity]]:
|
|
165
|
-
"""
|
|
166
|
-
Parses the given HTML message and returns its stripped representation
|
|
167
|
-
plus a list of the MessageEntity's that were found.
|
|
168
|
-
|
|
169
|
-
:param html: the message with HTML to be parsed.
|
|
170
|
-
:return: a tuple consisting of (clean message, [message entities]).
|
|
171
|
-
"""
|
|
172
|
-
if not html:
|
|
173
|
-
return html, []
|
|
174
|
-
|
|
175
|
-
parser = HTMLToTelegramParser()
|
|
176
|
-
parser.feed(add_surrogate(html))
|
|
177
|
-
text = strip_text(parser.text, parser.entities)
|
|
178
|
-
parser.entities.reverse()
|
|
179
|
-
parser.entities.sort(key=lambda entity: entity.offset)
|
|
180
|
-
return del_surrogate(text), parser.entities
|
|
181
|
-
|
|
182
|
-
@staticmethod
|
|
183
|
-
def unparse(text: str, entities: Iterable[TypeMessageEntity]) -> str:
|
|
184
|
-
"""
|
|
185
|
-
Performs the reverse operation to .parse(), effectively returning HTML
|
|
186
|
-
given a normal text and its MessageEntity's.
|
|
187
|
-
|
|
188
|
-
:param text: the text to be reconverted into HTML.
|
|
189
|
-
:param entities: the MessageEntity's applied to the text.
|
|
190
|
-
:return: a HTML representation of the combination of both inputs.
|
|
191
|
-
"""
|
|
192
|
-
if not text:
|
|
193
|
-
return text
|
|
194
|
-
elif not entities:
|
|
195
|
-
return escape(text)
|
|
196
|
-
|
|
197
|
-
if isinstance(entities, TLObject):
|
|
198
|
-
entities = (entities,) # type: ignore
|
|
199
|
-
|
|
200
|
-
text = add_surrogate(text)
|
|
201
|
-
insert_at = []
|
|
202
|
-
for i, entity in enumerate(entities):
|
|
203
|
-
s = entity.offset
|
|
204
|
-
e = entity.offset + entity.length
|
|
205
|
-
delimiter = ENTITY_TO_FORMATTER.get(type(entity), None) # type: ignore
|
|
206
|
-
if delimiter:
|
|
207
|
-
if callable(delimiter):
|
|
208
|
-
delimiter = delimiter(entity, text[s:e])
|
|
209
|
-
insert_at.append((s, i, delimiter[0]))
|
|
210
|
-
insert_at.append((e, -i, delimiter[1]))
|
|
211
|
-
|
|
212
|
-
insert_at.sort(key=lambda t: (t[0], t[1]))
|
|
213
|
-
next_escape_bound = len(text)
|
|
214
|
-
while insert_at:
|
|
215
|
-
# Same logic as markdown.py
|
|
216
|
-
at, _, what = insert_at.pop()
|
|
217
|
-
while within_surrogate(text, at):
|
|
218
|
-
at += 1
|
|
219
|
-
|
|
220
|
-
text = (
|
|
221
|
-
text[:at]
|
|
222
|
-
+ what
|
|
223
|
-
+ escape(text[at:next_escape_bound])
|
|
224
|
-
+ text[next_escape_bound:]
|
|
225
|
-
)
|
|
226
|
-
next_escape_bound = at
|
|
227
|
-
|
|
228
|
-
text = escape(text[:next_escape_bound]) + text[next_escape_bound:]
|
|
229
|
-
|
|
230
|
-
return del_surrogate(text)
|