md2bbcode 1.0.9__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/PKG-INFO +30 -30
- md2bbcode-1.1.0/README.md +122 -0
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/bb_codes.xml +90 -0
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/pyproject.toml +2 -1
- md2bbcode-1.1.0/src/md2bbcode/html2bbcode.py +491 -0
- md2bbcode-1.1.0/src/md2bbcode/image_rewrite.py +67 -0
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/src/md2bbcode/main.py +1 -1
- md2bbcode-1.1.0/src/md2bbcode/plugins/merge_lists.py +83 -0
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/src/md2bbcode/renderers/bbcode.py +19 -1
- md2bbcode-1.0.9/README.md +0 -122
- md2bbcode-1.0.9/src/md2bbcode/html2bbcode.py +0 -132
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/.gitignore +0 -0
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/LICENSE.txt +0 -0
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/src/md2bbcode/__init__.py +0 -0
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/src/md2bbcode/md2ast.py +0 -0
- {md2bbcode-1.0.9 → md2bbcode-1.1.0}/src/md2bbcode/renderers/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: md2bbcode
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Convert Markdown to BBCode using a custom Mistune renderer.
|
|
5
5
|
Project-URL: Repository, https://github.com/RedGuides/md2bbcode.git
|
|
6
6
|
Project-URL: Issues, https://github.com/RedGuides/md2bbcode/issues
|
|
@@ -17,23 +17,21 @@ Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
|
17
17
|
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
18
18
|
Classifier: Topic :: Utilities
|
|
19
19
|
Requires-Dist: beautifulsoup4
|
|
20
|
-
Requires-Dist: mistune>=3.0
|
|
20
|
+
Requires-Dist: mistune>=3.2.0
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
|
|
23
23
|
[](https://github.com/RedGuides/md2bbcode/actions/workflows/publish.yml)
|
|
24
24
|
|
|
25
|
-

|
|
25
|
+

|
|
26
26
|
|
|
27
27
|
# md2bbcode
|
|
28
|
-
**A wrapper and plugin for [Mistune](https://github.com/lepture/mistune).** It converts GitHub-flavored Markdown to Xenforo-flavored BBCode.
|
|
28
|
+
**A wrapper and plugin for [Mistune](https://github.com/lepture/mistune).** It converts most GitHub-flavored Markdown to Xenforo-flavored BBCode.
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
> [!TIP]
|
|
31
|
+
> Custom BBCodes made for RedGuides are included in `bb_codes.xml`, import the ones you want in your Xenforo installation at `admin.php?bb-codes`. Some custom BBCodes include css, which you can split off to your extra.css template for more efficiency.
|
|
31
32
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
```bash
|
|
35
|
-
pip install md2bbcode
|
|
36
|
-
```
|
|
33
|
+
> [!NOTE]
|
|
34
|
+
> This project is made with LLM assistance.
|
|
37
35
|
|
|
38
36
|
## Usage
|
|
39
37
|
|
|
@@ -84,17 +82,12 @@ If you want to contribute to md2bbcode or set up a development environment, foll
|
|
|
84
82
|
cd md2bbcode
|
|
85
83
|
```
|
|
86
84
|
|
|
87
|
-
2.
|
|
88
|
-
```bash
|
|
89
|
-
pip install hatch
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
3. Create a development environment and install dependencies:
|
|
85
|
+
2. Create a development environment and install dependencies:
|
|
93
86
|
```bash
|
|
94
87
|
hatch env create
|
|
95
88
|
```
|
|
96
89
|
|
|
97
|
-
|
|
90
|
+
3. Activate the development environment:
|
|
98
91
|
```bash
|
|
99
92
|
hatch shell
|
|
100
93
|
```
|
|
@@ -109,9 +102,9 @@ The custom plugin for Mistune, which converts AST to bbcode.[^1]
|
|
|
109
102
|
|
|
110
103
|
### html2bbcode
|
|
111
104
|
|
|
112
|
-
Converts
|
|
105
|
+
Converts most HTML tags typically allowed in Github Flavored Markdown to BBCode.[^2]
|
|
113
106
|
|
|
114
|
-
[^2]: Currently used for post-processing mistune output
|
|
107
|
+
[^2]: Currently used for post-processing mistune output. Reference: https://github.github.com/gfm/#raw-html
|
|
115
108
|
|
|
116
109
|
```bash
|
|
117
110
|
html2bbcode input_file.html
|
|
@@ -127,18 +120,25 @@ md2ast input.md output.json
|
|
|
127
120
|
|
|
128
121
|
## Features Test
|
|
129
122
|
|
|
130
|
-
Here are a few GitHub-flavored Markdown features so you can use this README.md for testing:
|
|
123
|
+
Here are a few GitHub-flavored Markdown features so you can use this README.md for testing, including the table:
|
|
124
|
+
|
|
125
|
+
| Feature | Markdown | Rendered |
|
|
126
|
+
| :------------ | :-------------: | ---------------:|
|
|
127
|
+
| Bold | `**text**` | **bold** |
|
|
128
|
+
| Italic | `*text*` | *italic* |
|
|
129
|
+
| Strikethrough| `~~text~~` | ~~struck~~ |
|
|
130
|
+
| Code | `` `code` `` | `inline` |
|
|
131
|
+
| Link | `[text](url)` | [example](https://example.com) |
|
|
132
|
+
| Superscript | `<sup>2</sup>` | E=mc<sup>2</sup> |
|
|
133
|
+
| Subscript | `<sub>2</sub>` | H<sub>2</sub>O |
|
|
131
134
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
- **Table:**
|
|
135
|
+
<details>
|
|
136
|
+
<summary>HTML spoiler (details/summary)</summary>
|
|
135
137
|
|
|
136
|
-
|
|
137
|
-
| ----------- | ----------- |
|
|
138
|
-
| Header | Title |
|
|
139
|
-
| Paragraph | Text |
|
|
138
|
+
<b>html2bbcode</b> test. This is hidden content. Water is H<sub>2</sub>O.
|
|
140
139
|
|
|
141
|
-
|
|
140
|
+
<font color="red" size="3" face="Arial">Font tag inside details size 3 Arial red</font>
|
|
142
141
|
|
|
143
|
-
-
|
|
144
|
-
-
|
|
142
|
+
<span style="color: #27F573; font-size: 12px; font-family: Times New Roman; font-weight: bold; font-style: italic; text-decoration: underline line-through;">Inline style inside details green times new roman strikethrough italic bold underline</span>
|
|
143
|
+
<blockquote data-author="John Doe">This is a quote by John Doe</blockquote>
|
|
144
|
+
</details>
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
[](https://github.com/RedGuides/md2bbcode/actions/workflows/publish.yml)
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
# md2bbcode
|
|
6
|
+
**A wrapper and plugin for [Mistune](https://github.com/lepture/mistune).** It converts most GitHub-flavored Markdown to Xenforo-flavored BBCode.
|
|
7
|
+
|
|
8
|
+
> [!TIP]
|
|
9
|
+
> Custom BBCodes made for RedGuides are included in `bb_codes.xml`, import the ones you want in your Xenforo installation at `admin.php?bb-codes`. Some custom BBCodes include css, which you can split off to your extra.css template for more efficiency.
|
|
10
|
+
|
|
11
|
+
> [!NOTE]
|
|
12
|
+
> This project is made with LLM assistance.
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
After installation, you can use md2bbcode from the command line:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
md2bbcode README.md
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
If the markdown includes relative images or other assets, you can use the --domain flag to prepend a domain to the relative URLs:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
md2bbcode README.md --domain https://raw.githubusercontent.com/RedGuides/md2bbcode/main/
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
You can also use the package in your Python project:
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from md2bbcode.main import process_readme
|
|
32
|
+
|
|
33
|
+
# Your Markdown content
|
|
34
|
+
markdown_text = "# Hell World"
|
|
35
|
+
|
|
36
|
+
# Optional domain to prepend to relative URLs
|
|
37
|
+
domain = 'https://raw.githubusercontent.com/yourusername/yourrepo/main/'
|
|
38
|
+
|
|
39
|
+
# Convert Markdown to BBCode
|
|
40
|
+
bbcode_output = process_readme(markdown_text, domain=domain)
|
|
41
|
+
|
|
42
|
+
# Output the BBCode
|
|
43
|
+
print(bbcode_output)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Debug Mode
|
|
47
|
+
|
|
48
|
+
You can use the `--debug` flag to save intermediate results to files for debugging:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
md2bbcode README.md --debug
|
|
52
|
+
```
|
|
53
|
+
## Development
|
|
54
|
+
|
|
55
|
+
If you want to contribute to md2bbcode or set up a development environment, follow these steps:
|
|
56
|
+
|
|
57
|
+
1. Clone the repository:
|
|
58
|
+
```bash
|
|
59
|
+
git clone https://github.com/RedGuides/md2bbcode.git
|
|
60
|
+
cd md2bbcode
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
2. Create a development environment and install dependencies:
|
|
64
|
+
```bash
|
|
65
|
+
hatch env create
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
3. Activate the development environment:
|
|
69
|
+
```bash
|
|
70
|
+
hatch shell
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### renderers/bbcode.py
|
|
74
|
+
|
|
75
|
+
The custom plugin for Mistune, which converts AST to bbcode.[^1]
|
|
76
|
+
|
|
77
|
+
[^1]: Mistune does not convert Markdown HTML to AST, hence the need for `html2bbcode`.
|
|
78
|
+
|
|
79
|
+
## Additional Tools
|
|
80
|
+
|
|
81
|
+
### html2bbcode
|
|
82
|
+
|
|
83
|
+
Converts most HTML tags typically allowed in Github Flavored Markdown to BBCode.[^2]
|
|
84
|
+
|
|
85
|
+
[^2]: Currently used for post-processing mistune output. Reference: https://github.github.com/gfm/#raw-html
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
html2bbcode input_file.html
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### md2ast
|
|
92
|
+
|
|
93
|
+
For debugging Mistune's renderer, converts a Markdown file to AST (JSON format).
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
md2ast input.md output.json
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Features Test
|
|
100
|
+
|
|
101
|
+
Here are a few GitHub-flavored Markdown features so you can use this README.md for testing, including the table:
|
|
102
|
+
|
|
103
|
+
| Feature | Markdown | Rendered |
|
|
104
|
+
| :------------ | :-------------: | ---------------:|
|
|
105
|
+
| Bold | `**text**` | **bold** |
|
|
106
|
+
| Italic | `*text*` | *italic* |
|
|
107
|
+
| Strikethrough| `~~text~~` | ~~struck~~ |
|
|
108
|
+
| Code | `` `code` `` | `inline` |
|
|
109
|
+
| Link | `[text](url)` | [example](https://example.com) |
|
|
110
|
+
| Superscript | `<sup>2</sup>` | E=mc<sup>2</sup> |
|
|
111
|
+
| Subscript | `<sub>2</sub>` | H<sub>2</sub>O |
|
|
112
|
+
|
|
113
|
+
<details>
|
|
114
|
+
<summary>HTML spoiler (details/summary)</summary>
|
|
115
|
+
|
|
116
|
+
<b>html2bbcode</b> test. This is hidden content. Water is H<sub>2</sub>O.
|
|
117
|
+
|
|
118
|
+
<font color="red" size="3" face="Arial">Font tag inside details size 3 Arial red</font>
|
|
119
|
+
|
|
120
|
+
<span style="color: #27F573; font-size: 12px; font-family: Times New Roman; font-weight: bold; font-style: italic; text-decoration: underline line-through;">Inline style inside details green times new roman strikethrough italic bold underline</span>
|
|
121
|
+
<blockquote data-author="John Doe">This is a quote by John Doe</blockquote>
|
|
122
|
+
</details>
|
|
@@ -1,5 +1,79 @@
|
|
|
1
1
|
<?xml version="1.0" encoding="utf-8"?>
|
|
2
2
|
<bb_codes>
|
|
3
|
+
<bb_code bb_code_id="admonition" bb_code_mode="replace" has_option="yes" option_regex="/^(note|tip|important|warning|caution)$/i" trim_lines_after="1" plain_children="0" disable_smilies="0" disable_nl2br="0" disable_autolink="0" allow_empty="0" allow_signature="1" editor_icon_type="fa" editor_icon_value="bell" title="Admonition (GitHub alerts)">
|
|
4
|
+
<desc><![CDATA[GitHub-style alerts/admonitions: NOTE, TIP, IMPORTANT, WARNING, CAUTION.]]></desc>
|
|
5
|
+
<example><![CDATA[[admonition=warning]
|
|
6
|
+
Be careful with this step.
|
|
7
|
+
[/admonition]]]></example>
|
|
8
|
+
<output><![CDATA[]]></output>
|
|
9
|
+
<replace_html><![CDATA[<style>
|
|
10
|
+
/* you can cut this style out and place it in your extra.css template so it's not repeated for each admonition. */
|
|
11
|
+
.bbWrapper .rgAdmonition{
|
|
12
|
+
--rgAdmonition-accent: #0969da; /* note */
|
|
13
|
+
--rgAdmonition-bg: rgba(9,105,218,.10);
|
|
14
|
+
margin: 1em 0;
|
|
15
|
+
padding: .75em 1em;
|
|
16
|
+
border-left: 4px solid var(--rgAdmonition-accent);
|
|
17
|
+
background: var(--rgAdmonition-bg);
|
|
18
|
+
border-radius: 6px;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
.bbWrapper .rgAdmonition--tip{
|
|
22
|
+
--rgAdmonition-accent: #1a7f37;
|
|
23
|
+
--rgAdmonition-bg: rgba(26,127,55,.12);
|
|
24
|
+
}
|
|
25
|
+
.bbWrapper .rgAdmonition--important{
|
|
26
|
+
--rgAdmonition-accent: #8250df;
|
|
27
|
+
--rgAdmonition-bg: rgba(130,80,223,.12);
|
|
28
|
+
}
|
|
29
|
+
.bbWrapper .rgAdmonition--warning{
|
|
30
|
+
--rgAdmonition-accent: #9a6700;
|
|
31
|
+
--rgAdmonition-bg: rgba(154,103,0,.12);
|
|
32
|
+
}
|
|
33
|
+
.bbWrapper .rgAdmonition--caution{
|
|
34
|
+
--rgAdmonition-accent: #d1242f;
|
|
35
|
+
--rgAdmonition-bg: rgba(209,36,47,.12);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
.bbWrapper .rgAdmonition-header{
|
|
39
|
+
display: flex;
|
|
40
|
+
align-items: center;
|
|
41
|
+
gap: .5em;
|
|
42
|
+
font-weight: 600;
|
|
43
|
+
margin: 0 0 .5em 0;
|
|
44
|
+
color: var(--rgAdmonition-accent);
|
|
45
|
+
}
|
|
46
|
+
.bbWrapper .rgAdmonition-label{ text-transform: capitalize; }
|
|
47
|
+
.bbWrapper .rgAdmonition-label:empty::before{ content: "Note"; }
|
|
48
|
+
|
|
49
|
+
/* Hide the leading and trailing newline. */
|
|
50
|
+
.bbWrapper .rgAdmonition-body > br:first-child{ display: none; }
|
|
51
|
+
.bbWrapper .rgAdmonition-body > br:last-child{ display: none; }
|
|
52
|
+
|
|
53
|
+
.bbWrapper .rgAdmonition-icon{
|
|
54
|
+
flex: 0 0 auto;
|
|
55
|
+
display: none;
|
|
56
|
+
}
|
|
57
|
+
.bbWrapper .rgAdmonition--note .rgAdmonition-icon.fa-info-circle,
|
|
58
|
+
.bbWrapper .rgAdmonition--tip .rgAdmonition-icon.fa-lightbulb,
|
|
59
|
+
.bbWrapper .rgAdmonition--important .rgAdmonition-icon.fa-exclamation-circle,
|
|
60
|
+
.bbWrapper .rgAdmonition--warning .rgAdmonition-icon.fa-exclamation-triangle,
|
|
61
|
+
.bbWrapper .rgAdmonition--caution .rgAdmonition-icon.fa-stop-circle{ display: inline-block; }
|
|
62
|
+
</style>
|
|
63
|
+
<aside class="rgAdmonition rgAdmonition--{option}">
|
|
64
|
+
<div class="rgAdmonition-header">
|
|
65
|
+
<i data-xf-init="icon" class="fas fa-info-circle rgAdmonition-icon"></i>
|
|
66
|
+
<i data-xf-init="icon" class="fas fa-lightbulb rgAdmonition-icon"></i>
|
|
67
|
+
<i data-xf-init="icon" class="fas fa-exclamation-circle rgAdmonition-icon"></i>
|
|
68
|
+
<i data-xf-init="icon" class="fas fa-exclamation-triangle rgAdmonition-icon"></i>
|
|
69
|
+
<i data-xf-init="icon" class="fas fa-stop-circle rgAdmonition-icon"></i>
|
|
70
|
+
<span class="rgAdmonition-label">{option}</span>
|
|
71
|
+
</div>
|
|
72
|
+
<div class="rgAdmonition-body">{text}</div>
|
|
73
|
+
</aside>]]></replace_html>
|
|
74
|
+
<replace_html_email><![CDATA[]]></replace_html_email>
|
|
75
|
+
<replace_text><![CDATA[]]></replace_text>
|
|
76
|
+
</bb_code>
|
|
3
77
|
<bb_code bb_code_id="abbr" bb_code_mode="replace" has_option="yes" trim_lines_after="0" plain_children="0" disable_smilies="0" disable_nl2br="0" disable_autolink="0" allow_empty="0" allow_signature="1" editor_icon_type="fa" editor_icon_value="acorn" title="Abbreviation">
|
|
4
78
|
<desc><![CDATA[defines an abbreviation or an acronym]]></desc>
|
|
5
79
|
<example><![CDATA[The [abbr=World Health Organization]WHO[/abbr] was founded in 1948.]]></example>
|
|
@@ -54,4 +128,20 @@ Normal:
|
|
|
54
128
|
<replace_html_email><![CDATA[]]></replace_html_email>
|
|
55
129
|
<replace_text><![CDATA[]]></replace_text>
|
|
56
130
|
</bb_code>
|
|
131
|
+
<bb_code bb_code_id="sub" bb_code_mode="replace" has_option="no" trim_lines_after="0" plain_children="0" disable_smilies="0" disable_nl2br="0" disable_autolink="0" allow_empty="0" allow_signature="1" editor_icon_type="fa" editor_icon_value="fa-subscript" title="Subscript">
|
|
132
|
+
<desc><![CDATA[Subscript text]]></desc>
|
|
133
|
+
<example><![CDATA[H[sub]2[/sub]O]]></example>
|
|
134
|
+
<output><![CDATA[]]></output>
|
|
135
|
+
<replace_html><![CDATA[<sub>{text}</sub>]]></replace_html>
|
|
136
|
+
<replace_html_email><![CDATA[]]></replace_html_email>
|
|
137
|
+
<replace_text><![CDATA[]]></replace_text>
|
|
138
|
+
</bb_code>
|
|
139
|
+
<bb_code bb_code_id="sup" bb_code_mode="replace" has_option="no" trim_lines_after="0" plain_children="0" disable_smilies="0" disable_nl2br="0" disable_autolink="0" allow_empty="0" allow_signature="1" editor_icon_type="fa" editor_icon_value="fa-superscript" title="Superscript">
|
|
140
|
+
<desc><![CDATA[Displays text as superscript]]></desc>
|
|
141
|
+
<example><![CDATA[e=mc[sup]2[/sup]]]></example>
|
|
142
|
+
<output><![CDATA[]]></output>
|
|
143
|
+
<replace_html><![CDATA[<sup>{text}</sup>]]></replace_html>
|
|
144
|
+
<replace_html_email><![CDATA[]]></replace_html_email>
|
|
145
|
+
<replace_text><![CDATA[]]></replace_text>
|
|
146
|
+
</bb_code>
|
|
57
147
|
</bb_codes>
|
|
@@ -18,7 +18,7 @@ classifiers = [
|
|
|
18
18
|
"Topic :: Text Processing :: Markup :: Markdown"
|
|
19
19
|
]
|
|
20
20
|
dependencies = [
|
|
21
|
-
"mistune>=3.0
|
|
21
|
+
"mistune>=3.2.0",
|
|
22
22
|
"beautifulsoup4"
|
|
23
23
|
]
|
|
24
24
|
|
|
@@ -44,6 +44,7 @@ include = [
|
|
|
44
44
|
"README.md",
|
|
45
45
|
"LICENSE",
|
|
46
46
|
"src/md2bbcode/*.py",
|
|
47
|
+
"src/md2bbcode/plugins/*.py",
|
|
47
48
|
"src/md2bbcode/renderers/*.py",
|
|
48
49
|
"bb_codes.xml"
|
|
49
50
|
]
|
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
# converts some HTML tags to BBCode
|
|
2
|
+
# pass --debug to save the output to readme.finalpass
|
|
3
|
+
import argparse
|
|
4
|
+
import re
|
|
5
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
6
|
+
from urllib.parse import urljoin, urlparse
|
|
7
|
+
|
|
8
|
+
from bs4 import BeautifulSoup, Comment, NavigableString, Tag
|
|
9
|
+
|
|
10
|
+
from md2bbcode.image_rewrite import rewrite_svg_url
|
|
11
|
+
|
|
12
|
+
# XenForo (2.3.x) built-in BBCode option validation (see XF\BbCode\RuleSet::addDefaultTags()).
|
|
13
|
+
_XF_COLOR_OPTION_RE = re.compile(
|
|
14
|
+
r"^(rgb\(\s*\d+%?\s*,\s*\d+%?\s*,\s*\d+%?\s*\)|#[a-f0-9]{6}|#[a-f0-9]{3}|[a-z]+)$",
|
|
15
|
+
re.IGNORECASE,
|
|
16
|
+
)
|
|
17
|
+
_XF_FONT_OPTION_RE = re.compile(r"^[a-z0-9 \-]+$", re.IGNORECASE)
|
|
18
|
+
_XF_SIZE_OPTION_RE = re.compile(r"^[0-9]+(px)?$", re.IGNORECASE)
|
|
19
|
+
|
|
20
|
+
_TOKEN_RE = re.compile("\x1A(\\d+)\x1A")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _strip_important(value: str) -> str:
|
|
24
|
+
# Common in inline CSS; XF doesn't accept it in BBCode options.
|
|
25
|
+
return value.replace("!important", "").strip()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _sanitize_color(value: str) -> Optional[str]:
|
|
29
|
+
value = _strip_important(value.strip().strip('"').strip("'"))
|
|
30
|
+
return value if value and _XF_COLOR_OPTION_RE.match(value) else None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _sanitize_size(value: str) -> Optional[str]:
|
|
34
|
+
value = _strip_important(value.strip().strip('"').strip("'"))
|
|
35
|
+
return value if value and _XF_SIZE_OPTION_RE.match(value) else None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _sanitize_font(value: str) -> Optional[str]:
|
|
39
|
+
value = _strip_important(value.strip())
|
|
40
|
+
# CSS font-family often includes fallbacks: "Arial", sans-serif
|
|
41
|
+
if "," in value:
|
|
42
|
+
value = value.split(",", 1)[0]
|
|
43
|
+
value = value.strip().strip('"').strip("'")
|
|
44
|
+
# XF allows letters/numbers/spaces/hyphens. Drop other characters safely.
|
|
45
|
+
value = re.sub(r"[^a-z0-9 \-]+", " ", value, flags=re.IGNORECASE)
|
|
46
|
+
value = re.sub(r"\s+", " ", value).strip()
|
|
47
|
+
return value if value and _XF_FONT_OPTION_RE.match(value) else None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _safe_url(url: str, domain: Optional[str]) -> str:
|
|
51
|
+
# Simple URL sanitization matching BBCodeRenderer.safe_url.
|
|
52
|
+
if url.startswith(("javascript:", "vbscript:", "data:")):
|
|
53
|
+
return "#harmful-link"
|
|
54
|
+
if domain and not urlparse(url).netloc:
|
|
55
|
+
return urljoin(domain, url)
|
|
56
|
+
return url
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _add_token(tokens: List[str], original: str) -> str:
|
|
60
|
+
token_id = len(tokens)
|
|
61
|
+
token = f"\x1A{token_id}\x1A"
|
|
62
|
+
tokens.append(original)
|
|
63
|
+
return token
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _stash_bbcode_plain_items(text: str) -> Tuple[str, List[str]]:
|
|
67
|
+
"""
|
|
68
|
+
Stash BBCode segments that must be treated as "plain" so we don't accidentally
|
|
69
|
+
parse/convert HTML inside them (e.g. inline code that contains <font>...).
|
|
70
|
+
|
|
71
|
+
Mirrors the spirit of XF's own Markdown stashing of plain tags.
|
|
72
|
+
"""
|
|
73
|
+
tokens: List[str] = []
|
|
74
|
+
# Tags that XF treats as plain or that should not be HTML-parsed in our pipeline.
|
|
75
|
+
plain_tags = ("code", "icode", "php", "html", "plain", "media", "img", "user", "attach")
|
|
76
|
+
plain_tags_regex = "|".join(plain_tags)
|
|
77
|
+
|
|
78
|
+
pattern = re.compile(
|
|
79
|
+
rf"\[(?P<tag>{plain_tags_regex})(?:[^\]]*)\](?P<content>.*?)\[/\1\]",
|
|
80
|
+
re.IGNORECASE | re.DOTALL,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def repl(match: "re.Match") -> str:
|
|
84
|
+
return _add_token(tokens, match.group(0))
|
|
85
|
+
|
|
86
|
+
return pattern.sub(repl, text), tokens
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _restore_tokens(text: str, tokens: List[str]) -> str:
|
|
90
|
+
def repl(match: "re.Match") -> str:
|
|
91
|
+
idx = int(match.group(1))
|
|
92
|
+
return tokens[idx] if 0 <= idx < len(tokens) else ""
|
|
93
|
+
|
|
94
|
+
return _TOKEN_RE.sub(repl, text)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class HtmlToBbCodeConverter:
|
|
98
|
+
def __init__(self, domain: Optional[str] = None) -> None:
|
|
99
|
+
self.domain = domain or ""
|
|
100
|
+
self.handlers = {
|
|
101
|
+
"details": self._handle_details,
|
|
102
|
+
"font": self._handle_font,
|
|
103
|
+
"span": self._handle_span,
|
|
104
|
+
"div": self._handle_div,
|
|
105
|
+
"sup": lambda tag: self._wrap_simple(tag, "SUP"),
|
|
106
|
+
"sub": lambda tag: self._wrap_simple(tag, "SUB"),
|
|
107
|
+
"b": lambda tag: self._wrap_simple(tag, "B"),
|
|
108
|
+
"strong": lambda tag: self._wrap_simple(tag, "B"),
|
|
109
|
+
"i": lambda tag: self._wrap_simple(tag, "I"),
|
|
110
|
+
"em": lambda tag: self._wrap_simple(tag, "I"),
|
|
111
|
+
"u": lambda tag: self._wrap_simple(tag, "U"),
|
|
112
|
+
"s": lambda tag: self._wrap_simple(tag, "S"),
|
|
113
|
+
"del": lambda tag: self._wrap_simple(tag, "S"),
|
|
114
|
+
"strike": lambda tag: self._wrap_simple(tag, "S"),
|
|
115
|
+
"ins": lambda tag: self._wrap_simple(tag, "U"),
|
|
116
|
+
"mark": lambda tag: self._wrap_simple(tag, "MARK"),
|
|
117
|
+
"kbd": self._handle_kbd,
|
|
118
|
+
"br": lambda tag: "\n",
|
|
119
|
+
"hr": lambda tag: "[HR][/HR]\n",
|
|
120
|
+
"a": self._handle_link,
|
|
121
|
+
"img": self._handle_image,
|
|
122
|
+
"p": self._handle_paragraph,
|
|
123
|
+
"blockquote": self._handle_blockquote,
|
|
124
|
+
"pre": self._handle_pre,
|
|
125
|
+
"code": self._handle_code,
|
|
126
|
+
"ul": lambda tag: self._handle_list(tag, ordered=False),
|
|
127
|
+
"ol": lambda tag: self._handle_list(tag, ordered=True),
|
|
128
|
+
"li": self._handle_list_item,
|
|
129
|
+
"table": self._handle_table,
|
|
130
|
+
"thead": self._handle_passthrough_children,
|
|
131
|
+
"tbody": self._handle_passthrough_children,
|
|
132
|
+
"tfoot": self._handle_passthrough_children,
|
|
133
|
+
"tr": self._handle_table_row,
|
|
134
|
+
"th": lambda tag: self._handle_table_cell(tag, head=True),
|
|
135
|
+
"td": lambda tag: self._handle_table_cell(tag, head=False),
|
|
136
|
+
"abbr": self._handle_abbr,
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
def convert(self, html: str) -> str:
|
|
140
|
+
# Avoid parsing HTML inside BBCode plain-ish tags like [ICODE]...[/ICODE].
|
|
141
|
+
stashed, tokens = _stash_bbcode_plain_items(html)
|
|
142
|
+
soup = BeautifulSoup(stashed, "html.parser")
|
|
143
|
+
root = soup.body or soup
|
|
144
|
+
converted = self._convert_children(root)
|
|
145
|
+
return _restore_tokens(converted, tokens)
|
|
146
|
+
|
|
147
|
+
def _convert_children(self, tag: Tag) -> str:
|
|
148
|
+
return "".join(self._convert_node(child) for child in tag.contents)
|
|
149
|
+
|
|
150
|
+
def _convert_node(self, node) -> str:
|
|
151
|
+
if isinstance(node, Comment):
|
|
152
|
+
return f"<!--{node}-->"
|
|
153
|
+
if isinstance(node, NavigableString):
|
|
154
|
+
return str(node)
|
|
155
|
+
if isinstance(node, Tag):
|
|
156
|
+
handler = self.handlers.get(node.name.lower())
|
|
157
|
+
if handler:
|
|
158
|
+
return handler(node)
|
|
159
|
+
return str(node)
|
|
160
|
+
return ""
|
|
161
|
+
|
|
162
|
+
def _wrap(self, content: str, wrappers: List[Tuple[str, Optional[str]]]) -> str:
|
|
163
|
+
for tag, value in reversed(wrappers):
|
|
164
|
+
if value is None:
|
|
165
|
+
content = f"[{tag}]{content}[/{tag}]"
|
|
166
|
+
else:
|
|
167
|
+
content = f"[{tag}={value}]{content}[/{tag}]"
|
|
168
|
+
return content
|
|
169
|
+
|
|
170
|
+
def _wrap_simple(self, tag: Tag, bbcode_tag: str) -> str:
|
|
171
|
+
content = self._convert_children(tag)
|
|
172
|
+
content = f"[{bbcode_tag}]{content}[/{bbcode_tag}]"
|
|
173
|
+
return self._apply_style_wrappers(tag, content, skip_tags={bbcode_tag})
|
|
174
|
+
|
|
175
|
+
def _parse_style(self, style: str) -> Dict[str, str]:
|
|
176
|
+
css_properties: Dict[str, str] = {}
|
|
177
|
+
for item in style.split(";"):
|
|
178
|
+
if ":" not in item:
|
|
179
|
+
continue
|
|
180
|
+
key, value = item.split(":", 1)
|
|
181
|
+
key = key.strip().lower()
|
|
182
|
+
value = value.strip()
|
|
183
|
+
if key:
|
|
184
|
+
css_properties[key] = value
|
|
185
|
+
return css_properties
|
|
186
|
+
|
|
187
|
+
def _style_wrappers(
|
|
188
|
+
self,
|
|
189
|
+
tag: Tag,
|
|
190
|
+
skip_tags: Optional[Set[str]] = None,
|
|
191
|
+
skip_props: Optional[Set[str]] = None,
|
|
192
|
+
) -> List[Tuple[str, Optional[str]]]:
|
|
193
|
+
style = tag.attrs.get("style", "")
|
|
194
|
+
if not style:
|
|
195
|
+
return []
|
|
196
|
+
css_properties = self._parse_style(style)
|
|
197
|
+
wrappers: List[Tuple[str, Optional[str]]] = []
|
|
198
|
+
skip_tags = {t.upper() for t in (skip_tags or set())}
|
|
199
|
+
skip_props = set(skip_props or set())
|
|
200
|
+
|
|
201
|
+
if "color" in css_properties and "color" not in skip_props:
|
|
202
|
+
color = _sanitize_color(css_properties["color"])
|
|
203
|
+
if color:
|
|
204
|
+
wrappers.append(("COLOR", color))
|
|
205
|
+
if "font-size" in css_properties and "size" not in skip_props:
|
|
206
|
+
size = _sanitize_size(css_properties["font-size"])
|
|
207
|
+
if size:
|
|
208
|
+
wrappers.append(("SIZE", size))
|
|
209
|
+
if "font-family" in css_properties and "font" not in skip_props:
|
|
210
|
+
face = _sanitize_font(css_properties["font-family"])
|
|
211
|
+
if face:
|
|
212
|
+
wrappers.append(("FONT", face))
|
|
213
|
+
if "text-decoration" in css_properties:
|
|
214
|
+
decoration = css_properties["text-decoration"].lower()
|
|
215
|
+
if "line-through" in decoration and "S" not in skip_tags:
|
|
216
|
+
wrappers.append(("S", None))
|
|
217
|
+
if "underline" in decoration and "U" not in skip_tags:
|
|
218
|
+
wrappers.append(("U", None))
|
|
219
|
+
if "font-weight" in css_properties and "B" not in skip_tags:
|
|
220
|
+
weight = css_properties["font-weight"].lower()
|
|
221
|
+
if weight == "bold" or (weight.isdigit() and int(weight) >= 700):
|
|
222
|
+
wrappers.append(("B", None))
|
|
223
|
+
if "font-style" in css_properties and "I" not in skip_tags:
|
|
224
|
+
style_val = css_properties["font-style"].lower()
|
|
225
|
+
if style_val in {"italic", "oblique"}:
|
|
226
|
+
wrappers.append(("I", None))
|
|
227
|
+
|
|
228
|
+
return wrappers
|
|
229
|
+
|
|
230
|
+
def _apply_style_wrappers(
|
|
231
|
+
self,
|
|
232
|
+
tag: Tag,
|
|
233
|
+
content: str,
|
|
234
|
+
skip_tags: Optional[Set[str]] = None,
|
|
235
|
+
skip_props: Optional[Set[str]] = None,
|
|
236
|
+
) -> str:
|
|
237
|
+
wrappers = self._style_wrappers(tag, skip_tags=skip_tags, skip_props=skip_props)
|
|
238
|
+
return self._wrap(content, wrappers) if wrappers else content
|
|
239
|
+
|
|
240
|
+
def _handle_details(self, tag: Tag) -> str:
|
|
241
|
+
summary = tag.find("summary")
|
|
242
|
+
spoiler_title = ""
|
|
243
|
+
if summary:
|
|
244
|
+
spoiler_title = self._convert_children(summary).strip()
|
|
245
|
+
summary.decompose()
|
|
246
|
+
content = self._convert_children(tag)
|
|
247
|
+
if spoiler_title:
|
|
248
|
+
return f"[SPOILER={spoiler_title}]{content}[/SPOILER]"
|
|
249
|
+
return f"[SPOILER]{content}[/SPOILER]"
|
|
250
|
+
|
|
251
|
+
def _handle_font(self, tag: Tag) -> str:
|
|
252
|
+
wrappers: List[Tuple[str, Optional[str]]] = []
|
|
253
|
+
skip_props: Set[str] = set()
|
|
254
|
+
if "color" in tag.attrs:
|
|
255
|
+
color = _sanitize_color(tag["color"])
|
|
256
|
+
if color:
|
|
257
|
+
wrappers.append(("COLOR", color))
|
|
258
|
+
skip_props.add("color")
|
|
259
|
+
if "size" in tag.attrs:
|
|
260
|
+
size = _sanitize_size(tag["size"])
|
|
261
|
+
if size:
|
|
262
|
+
wrappers.append(("SIZE", size))
|
|
263
|
+
skip_props.add("size")
|
|
264
|
+
if "face" in tag.attrs:
|
|
265
|
+
face = _sanitize_font(tag["face"])
|
|
266
|
+
if face:
|
|
267
|
+
wrappers.append(("FONT", face))
|
|
268
|
+
skip_props.add("font")
|
|
269
|
+
|
|
270
|
+
content = self._convert_children(tag)
|
|
271
|
+
wrappers.extend(self._style_wrappers(tag, skip_props=skip_props))
|
|
272
|
+
return self._wrap(content, wrappers) if wrappers else content
|
|
273
|
+
|
|
274
|
+
def _handle_span(self, tag: Tag) -> str:
|
|
275
|
+
content = self._convert_children(tag)
|
|
276
|
+
if tag.attrs.get("style"):
|
|
277
|
+
content = self._apply_style_wrappers(tag, content)
|
|
278
|
+
return content
|
|
279
|
+
|
|
280
|
+
def _handle_div(self, tag: Tag) -> str:
|
|
281
|
+
content = self._convert_children(tag)
|
|
282
|
+
content = self._apply_style_wrappers(tag, content)
|
|
283
|
+
align = self._extract_alignment(tag)
|
|
284
|
+
if align:
|
|
285
|
+
content = self._wrap_alignment(content, align)
|
|
286
|
+
return content
|
|
287
|
+
|
|
288
|
+
def _handle_kbd(self, tag: Tag) -> str:
|
|
289
|
+
content = tag.get_text()
|
|
290
|
+
return f"[ICODE]{content}[/ICODE]"
|
|
291
|
+
|
|
292
|
+
def _handle_link(self, tag: Tag) -> str:
|
|
293
|
+
href = tag.attrs.get("href")
|
|
294
|
+
name = tag.attrs.get("name") or tag.attrs.get("id")
|
|
295
|
+
if href:
|
|
296
|
+
href = href.strip()
|
|
297
|
+
if href.lower().startswith("mailto:"):
|
|
298
|
+
email = href[7:].strip()
|
|
299
|
+
if "?" in email:
|
|
300
|
+
email = email.split("?", 1)[0]
|
|
301
|
+
if email:
|
|
302
|
+
return f"[EMAIL]{email}[/EMAIL]"
|
|
303
|
+
text = self._convert_children(tag)
|
|
304
|
+
if href.startswith("#"):
|
|
305
|
+
anchor = href[1:]
|
|
306
|
+
if anchor:
|
|
307
|
+
return f"[JUMPTO={anchor}]{text}[/JUMPTO]"
|
|
308
|
+
return f"[URL={_safe_url(href, self.domain)}]{text}[/URL]"
|
|
309
|
+
if name:
|
|
310
|
+
text = self._convert_children(tag)
|
|
311
|
+
return f"[ANAME={name}]{text}[/ANAME]"
|
|
312
|
+
return str(tag)
|
|
313
|
+
|
|
314
|
+
def _handle_image(self, tag: Tag) -> str:
|
|
315
|
+
src = tag.attrs.get("src")
|
|
316
|
+
if not src:
|
|
317
|
+
return str(tag)
|
|
318
|
+
alt = tag.attrs.get("alt", "")
|
|
319
|
+
safe_src = _safe_url(src, self.domain)
|
|
320
|
+
rewritten_url = rewrite_svg_url(safe_src)
|
|
321
|
+
if rewritten_url is None:
|
|
322
|
+
link_text = alt or safe_src
|
|
323
|
+
return f"[URL={safe_src}]{link_text}[/URL]"
|
|
324
|
+
alt_text = f' alt="{alt}"' if alt else ""
|
|
325
|
+
return f"[IMG{alt_text}]{rewritten_url}[/IMG]"
|
|
326
|
+
|
|
327
|
+
def _handle_paragraph(self, tag: Tag) -> str:
|
|
328
|
+
content = self._convert_children(tag)
|
|
329
|
+
content = self._apply_style_wrappers(tag, content)
|
|
330
|
+
align = self._extract_alignment(tag)
|
|
331
|
+
if align:
|
|
332
|
+
content = self._wrap_alignment(content, align)
|
|
333
|
+
return f"{content}\n\n"
|
|
334
|
+
|
|
335
|
+
def _handle_blockquote(self, tag: Tag) -> str:
|
|
336
|
+
content = self._convert_children(tag)
|
|
337
|
+
content = self._apply_style_wrappers(tag, content)
|
|
338
|
+
align = self._extract_alignment(tag)
|
|
339
|
+
if align:
|
|
340
|
+
content = self._wrap_alignment(content, align)
|
|
341
|
+
attribution = self._extract_blockquote_attribution(tag)
|
|
342
|
+
if attribution:
|
|
343
|
+
attribution = attribution.replace('"', "'")
|
|
344
|
+
return f'[QUOTE="{attribution}"]\n{content}[/QUOTE]\n'
|
|
345
|
+
return f"[QUOTE]\n{content}[/QUOTE]\n"
|
|
346
|
+
|
|
347
|
+
def _extract_code_language(self, tag: Tag) -> Optional[str]:
|
|
348
|
+
classes = tag.attrs.get("class", [])
|
|
349
|
+
if isinstance(classes, str):
|
|
350
|
+
classes = classes.split()
|
|
351
|
+
for cls in classes:
|
|
352
|
+
if cls.startswith("language-"):
|
|
353
|
+
return cls.split("language-", 1)[1]
|
|
354
|
+
if cls.startswith("lang-"):
|
|
355
|
+
return cls.split("lang-", 1)[1]
|
|
356
|
+
return None
|
|
357
|
+
|
|
358
|
+
def _handle_pre(self, tag: Tag) -> str:
|
|
359
|
+
code_tag = tag.find("code")
|
|
360
|
+
if code_tag:
|
|
361
|
+
lang = self._extract_code_language(code_tag)
|
|
362
|
+
code_text = code_tag.get_text()
|
|
363
|
+
else:
|
|
364
|
+
lang = self._extract_code_language(tag)
|
|
365
|
+
code_text = tag.get_text()
|
|
366
|
+
|
|
367
|
+
if lang:
|
|
368
|
+
return f"[CODE={lang}]{code_text}[/CODE]\n"
|
|
369
|
+
return f"[CODE]{code_text}[/CODE]\n"
|
|
370
|
+
|
|
371
|
+
def _handle_code(self, tag: Tag) -> str:
|
|
372
|
+
if tag.parent and isinstance(tag.parent, Tag) and tag.parent.name.lower() == "pre":
|
|
373
|
+
return ""
|
|
374
|
+
content = tag.get_text()
|
|
375
|
+
return f"[ICODE]{content}[/ICODE]"
|
|
376
|
+
|
|
377
|
+
def _handle_list(self, tag: Tag, ordered: bool) -> str:
|
|
378
|
+
content = self._convert_children(tag)
|
|
379
|
+
list_tag = "LIST=1" if ordered else "LIST"
|
|
380
|
+
return f"[{list_tag}]{content}[/LIST]\n"
|
|
381
|
+
|
|
382
|
+
def _handle_list_item(self, tag: Tag) -> str:
|
|
383
|
+
content = self._convert_children(tag)
|
|
384
|
+
return f"[*]{content}\n"
|
|
385
|
+
|
|
386
|
+
def _handle_table(self, tag: Tag) -> str:
|
|
387
|
+
content = self._convert_children(tag)
|
|
388
|
+
return f"[TABLE]\n{content}[/TABLE]\n"
|
|
389
|
+
|
|
390
|
+
def _handle_passthrough_children(self, tag: Tag) -> str:
|
|
391
|
+
return self._convert_children(tag)
|
|
392
|
+
|
|
393
|
+
def _handle_table_row(self, tag: Tag) -> str:
|
|
394
|
+
content = self._convert_children(tag)
|
|
395
|
+
return f"[TR]\n{content}[/TR]\n"
|
|
396
|
+
|
|
397
|
+
def _extract_alignment(self, tag: Tag) -> Optional[str]:
|
|
398
|
+
align = tag.attrs.get("align")
|
|
399
|
+
if align:
|
|
400
|
+
return align.strip().lower()
|
|
401
|
+
style = tag.attrs.get("style", "")
|
|
402
|
+
if not style:
|
|
403
|
+
return None
|
|
404
|
+
css = self._parse_style(style)
|
|
405
|
+
if "text-align" in css:
|
|
406
|
+
return css["text-align"].strip().lower()
|
|
407
|
+
return None
|
|
408
|
+
|
|
409
|
+
def _wrap_alignment(self, content: str, align: str) -> str:
|
|
410
|
+
if align == "center":
|
|
411
|
+
return f"[CENTER]{content}[/CENTER]"
|
|
412
|
+
if align == "right":
|
|
413
|
+
return f"[RIGHT]{content}[/RIGHT]"
|
|
414
|
+
if align == "left":
|
|
415
|
+
return f"[LEFT]{content}[/LEFT]"
|
|
416
|
+
return content
|
|
417
|
+
|
|
418
|
+
def _extract_blockquote_attribution(self, tag: Tag) -> Optional[str]:
|
|
419
|
+
for key in (
|
|
420
|
+
"data-quote",
|
|
421
|
+
"data-attribution",
|
|
422
|
+
"data-author",
|
|
423
|
+
"data-username",
|
|
424
|
+
"data-cite",
|
|
425
|
+
"cite",
|
|
426
|
+
):
|
|
427
|
+
value = tag.attrs.get(key)
|
|
428
|
+
if value:
|
|
429
|
+
value = str(value).strip()
|
|
430
|
+
if value:
|
|
431
|
+
return value
|
|
432
|
+
return None
|
|
433
|
+
|
|
434
|
+
def _handle_table_cell(self, tag: Tag, head: bool) -> str:
|
|
435
|
+
tag_name = "TH" if head else "TD"
|
|
436
|
+
content = self._convert_children(tag)
|
|
437
|
+
content = self._apply_style_wrappers(tag, content)
|
|
438
|
+
align = self._extract_alignment(tag)
|
|
439
|
+
if align == "center":
|
|
440
|
+
content = f"[CENTER]{content}[/CENTER]"
|
|
441
|
+
elif align == "right":
|
|
442
|
+
content = f"[RIGHT]{content}[/RIGHT]"
|
|
443
|
+
elif align == "left":
|
|
444
|
+
content = f"[LEFT]{content}[/LEFT]"
|
|
445
|
+
return f"[{tag_name}]{content}[/{tag_name}]\n"
|
|
446
|
+
|
|
447
|
+
def _handle_abbr(self, tag: Tag) -> str:
|
|
448
|
+
title = tag.attrs.get("title")
|
|
449
|
+
if not title:
|
|
450
|
+
return str(tag)
|
|
451
|
+
content = self._convert_children(tag)
|
|
452
|
+
return f"[ABBR={title}]{content}[/ABBR]"
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def html_to_bbcode(html: str, domain: Optional[str] = None) -> str:
|
|
456
|
+
converter = HtmlToBbCodeConverter(domain=domain)
|
|
457
|
+
return converter.convert(html)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def process_html(input_html: str, debug: bool = False, output_file: Optional[str] = None, domain: Optional[str] = None) -> str:
|
|
461
|
+
converted_bbcode = html_to_bbcode(input_html, domain=domain)
|
|
462
|
+
|
|
463
|
+
if debug:
|
|
464
|
+
if output_file is None:
|
|
465
|
+
output_file = "readme.finalpass"
|
|
466
|
+
with open(output_file, "w", encoding="utf-8") as file:
|
|
467
|
+
file.write(converted_bbcode)
|
|
468
|
+
return converted_bbcode
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def main(argv=None) -> None:
|
|
472
|
+
parser = argparse.ArgumentParser(description="Convert HTML to BBCode with optional debugging output.")
|
|
473
|
+
parser.add_argument("input_file", type=str, help="Input HTML file path")
|
|
474
|
+
parser.add_argument("--debug", action="store_true", help="Save output to readme.finalpass for debugging")
|
|
475
|
+
|
|
476
|
+
args = parser.parse_args(argv)
|
|
477
|
+
input_file = args.input_file
|
|
478
|
+
output_file = "readme.finalpass" if args.debug else None
|
|
479
|
+
|
|
480
|
+
with open(input_file, "r", encoding="utf-8") as file:
|
|
481
|
+
html_content = file.read()
|
|
482
|
+
|
|
483
|
+
converted_bbcode = process_html(html_content, debug=args.debug, output_file=output_file)
|
|
484
|
+
|
|
485
|
+
# Print output unless we're in debug (file) mode.
|
|
486
|
+
if not args.debug:
|
|
487
|
+
print(converted_bbcode)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
if __name__ == "__main__":
|
|
491
|
+
main()
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from urllib.parse import parse_qs, quote, urlencode, urlparse
|
|
3
|
+
|
|
4
|
+
_RASTER_SHIELDS_BASE = "https://raster.shields.io"
|
|
5
|
+
_WESERV_BASE = "https://images.weserv.nl/"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def rewrite_svg_url(url: str) -> Optional[str]:
|
|
9
|
+
if not url:
|
|
10
|
+
return url
|
|
11
|
+
|
|
12
|
+
parsed = urlparse(url)
|
|
13
|
+
if _is_github_actions_badge(parsed):
|
|
14
|
+
return _rewrite_github_actions_badge(parsed)
|
|
15
|
+
|
|
16
|
+
if _should_rasterize(parsed):
|
|
17
|
+
if parsed.scheme not in ("http", "https"):
|
|
18
|
+
return None
|
|
19
|
+
return _wrap_weserv(url)
|
|
20
|
+
|
|
21
|
+
return url
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _is_github_actions_badge(parsed) -> bool:
|
|
25
|
+
if parsed.scheme not in ("http", "https"):
|
|
26
|
+
return False
|
|
27
|
+
if parsed.netloc.lower() != "github.com":
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
parts = parsed.path.strip("/").split("/")
|
|
31
|
+
return (
|
|
32
|
+
len(parts) >= 6
|
|
33
|
+
and parts[2] == "actions"
|
|
34
|
+
and parts[3] == "workflows"
|
|
35
|
+
and parts[5].lower() == "badge.svg"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _rewrite_github_actions_badge(parsed) -> str:
|
|
40
|
+
parts = parsed.path.strip("/").split("/")
|
|
41
|
+
owner = parts[0]
|
|
42
|
+
repo = parts[1]
|
|
43
|
+
workflow = parts[4]
|
|
44
|
+
|
|
45
|
+
query = parse_qs(parsed.query)
|
|
46
|
+
params = {}
|
|
47
|
+
if query.get("branch"):
|
|
48
|
+
params["branch"] = query["branch"][0]
|
|
49
|
+
if query.get("event"):
|
|
50
|
+
params["event"] = query["event"][0]
|
|
51
|
+
|
|
52
|
+
base = f"{_RASTER_SHIELDS_BASE}/github/actions/workflow/status/{owner}/{repo}/{workflow}.png"
|
|
53
|
+
if params:
|
|
54
|
+
base = f"{base}?{urlencode(params)}"
|
|
55
|
+
return base
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _should_rasterize(parsed) -> bool:
|
|
59
|
+
path = parsed.path.lower()
|
|
60
|
+
if path.endswith(".svg"):
|
|
61
|
+
return True
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _wrap_weserv(url: str) -> str:
|
|
66
|
+
encoded = quote(url, safe="")
|
|
67
|
+
return f"{_WESERV_BASE}?url={encoded}&output=png"
|
|
@@ -38,7 +38,7 @@ def process_readme(markdown_text, domain=None, debug=False):
|
|
|
38
38
|
file.write(bbcode_text)
|
|
39
39
|
|
|
40
40
|
# Convert BBCode formatted as HTML to final BBCode
|
|
41
|
-
final_bbcode = process_html(bbcode_text, debug, 'readme.finalpass')
|
|
41
|
+
final_bbcode = process_html(bbcode_text, debug, 'readme.finalpass', domain=domain)
|
|
42
42
|
|
|
43
43
|
return final_bbcode
|
|
44
44
|
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from typing import Dict, Any, List
|
|
2
|
+
|
|
3
|
+
def merge_ordered_lists(md):
|
|
4
|
+
"""
|
|
5
|
+
A plugin to merge consecutive "top-level" ordered lists into one,
|
|
6
|
+
and also attach any intervening code blocks or blank lines to the
|
|
7
|
+
last list item so that the final BBCode appears as a single list
|
|
8
|
+
with multiple steps.
|
|
9
|
+
|
|
10
|
+
This relies on a few assumptions:
|
|
11
|
+
1) The only tokens between two ordered lists that should be merged
|
|
12
|
+
are code blocks or blank lines (not normal paragraphs).
|
|
13
|
+
2) We want any code block(s) right after a list item to appear in
|
|
14
|
+
that same bullet item.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def rewrite_tokens(md, state):
|
|
18
|
+
tokens = state.tokens
|
|
19
|
+
merged = []
|
|
20
|
+
i = 0
|
|
21
|
+
|
|
22
|
+
while i < len(tokens):
|
|
23
|
+
token = tokens[i]
|
|
24
|
+
|
|
25
|
+
# Check if this token is a top-level ordered list
|
|
26
|
+
if (
|
|
27
|
+
token["type"] == "list"
|
|
28
|
+
and token.get("attrs", {}).get("ordered", False)
|
|
29
|
+
and token.get("attrs", {}).get("depth", 0) == 0
|
|
30
|
+
):
|
|
31
|
+
# Start new merged list
|
|
32
|
+
current_depth = token["attrs"]["depth"]
|
|
33
|
+
list_items = list(token["children"]) # bullet items in the first list
|
|
34
|
+
i += 1
|
|
35
|
+
|
|
36
|
+
# Continue until we run into something that's not:
|
|
37
|
+
# another top-level ordered list,
|
|
38
|
+
# or code blocks / blank lines (which we'll attach to the last bullet).
|
|
39
|
+
while i < len(tokens):
|
|
40
|
+
nxt = tokens[i]
|
|
41
|
+
|
|
42
|
+
# If there's another ordered list at the same depth, merge its bullet items
|
|
43
|
+
if (
|
|
44
|
+
nxt["type"] == "list"
|
|
45
|
+
and nxt.get("attrs", {}).get("ordered", False)
|
|
46
|
+
and nxt.get("attrs", {}).get("depth", 0) == current_depth
|
|
47
|
+
):
|
|
48
|
+
list_items.extend(nxt["children"])
|
|
49
|
+
i += 1
|
|
50
|
+
|
|
51
|
+
# If there's a code block or blank line, attach it to the *last* bullet item.
|
|
52
|
+
elif nxt["type"] in ["block_code", "blank_line"]:
|
|
53
|
+
if list_items: # attach to last bullet item, if any
|
|
54
|
+
list_items[-1]["children"].append(nxt)
|
|
55
|
+
i += 1
|
|
56
|
+
|
|
57
|
+
else:
|
|
58
|
+
# Not a same-depth list or code block—stop merging
|
|
59
|
+
break
|
|
60
|
+
|
|
61
|
+
# Create single merged list token
|
|
62
|
+
merged.append(
|
|
63
|
+
{
|
|
64
|
+
"type": "list",
|
|
65
|
+
"children": list_items,
|
|
66
|
+
"attrs": {
|
|
67
|
+
"ordered": True,
|
|
68
|
+
"depth": current_depth,
|
|
69
|
+
},
|
|
70
|
+
}
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
else:
|
|
74
|
+
# If not a top-level ordered list, just keep it as-is
|
|
75
|
+
merged.append(token)
|
|
76
|
+
i += 1
|
|
77
|
+
|
|
78
|
+
# Replace the old tokens with the merged version
|
|
79
|
+
state.tokens = merged
|
|
80
|
+
|
|
81
|
+
# Attach to before_render_hooks so we can manipulate tokens before rendering
|
|
82
|
+
md.before_render_hooks.append(rewrite_tokens)
|
|
83
|
+
return md
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from mistune.core import BaseRenderer
|
|
2
2
|
from mistune.util import escape as escape_text, striptags, safe_entity
|
|
3
|
+
import re
|
|
3
4
|
from urllib.parse import urljoin, urlparse
|
|
4
5
|
|
|
6
|
+
from md2bbcode.image_rewrite import rewrite_svg_url
|
|
7
|
+
|
|
5
8
|
|
|
6
9
|
class BBCodeRenderer(BaseRenderer):
|
|
7
10
|
"""A renderer for converting Markdown to BBCode."""
|
|
@@ -56,7 +59,12 @@ class BBCodeRenderer(BaseRenderer):
|
|
|
56
59
|
|
|
57
60
|
def image(self, text: str, url: str, title=None) -> str:
|
|
58
61
|
alt_text = f' alt="{text}"' if text else ''
|
|
59
|
-
|
|
62
|
+
safe_url = self.safe_url(url)
|
|
63
|
+
rewritten_url = rewrite_svg_url(safe_url)
|
|
64
|
+
if rewritten_url is None:
|
|
65
|
+
link_text = text or safe_url
|
|
66
|
+
return f"[url={safe_url}]{link_text}[/url]"
|
|
67
|
+
img_tag = f'[img{alt_text}]' + rewritten_url + '[/img]'
|
|
60
68
|
# Check if alt text starts with 'pixel' and treat it as pixel art
|
|
61
69
|
if text and text.lower().startswith('pixel'):
|
|
62
70
|
return f'[pixelate]{img_tag}[/pixelate]'
|
|
@@ -115,6 +123,16 @@ class BBCodeRenderer(BaseRenderer):
|
|
|
115
123
|
return f"[CODE]{escape_text(code)}[/CODE]\n"
|
|
116
124
|
|
|
117
125
|
def block_quote(self, text: str) -> str:
|
|
126
|
+
# GFMD "alerts"/admonitions are expressed as a blockquote
|
|
127
|
+
# Render these into a dedicated XenForo custom BBCode, rather than a normal QUOTE.
|
|
128
|
+
m = re.match(r"^\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]\s*", text, flags=re.IGNORECASE)
|
|
129
|
+
if m:
|
|
130
|
+
kind = m.group(1).lower()
|
|
131
|
+
body = text[m.end():].strip()
|
|
132
|
+
if body:
|
|
133
|
+
return f"[admonition={kind}]{body}[/admonition]\n"
|
|
134
|
+
return f"[admonition={kind}][/admonition]\n"
|
|
135
|
+
|
|
118
136
|
return '[QUOTE]\n' + text + '[/QUOTE]\n'
|
|
119
137
|
|
|
120
138
|
def block_html(self, html: str) -> str:
|
md2bbcode-1.0.9/README.md
DELETED
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
[](https://github.com/RedGuides/md2bbcode/actions/workflows/publish.yml)
|
|
2
|
-
|
|
3
|
-

|
|
4
|
-
|
|
5
|
-
# md2bbcode
|
|
6
|
-
**A wrapper and plugin for [Mistune](https://github.com/lepture/mistune).** It converts GitHub-flavored Markdown to Xenforo-flavored BBCode. Custom BBCodes made for RedGuides are included in `bb_codes.xml`.
|
|
7
|
-
|
|
8
|
-
## Installation
|
|
9
|
-
|
|
10
|
-
You can install md2bbcode using pip:
|
|
11
|
-
|
|
12
|
-
```bash
|
|
13
|
-
pip install md2bbcode
|
|
14
|
-
```
|
|
15
|
-
|
|
16
|
-
## Usage
|
|
17
|
-
|
|
18
|
-
After installation, you can use md2bbcode from the command line:
|
|
19
|
-
|
|
20
|
-
```bash
|
|
21
|
-
md2bbcode README.md
|
|
22
|
-
```
|
|
23
|
-
|
|
24
|
-
If the markdown includes relative images or other assets, you can use the --domain flag to prepend a domain to the relative URLs:
|
|
25
|
-
|
|
26
|
-
```bash
|
|
27
|
-
md2bbcode README.md --domain https://raw.githubusercontent.com/RedGuides/md2bbcode/main/
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
You can also use the package in your Python project:
|
|
31
|
-
|
|
32
|
-
```python
|
|
33
|
-
from md2bbcode.main import process_readme
|
|
34
|
-
|
|
35
|
-
# Your Markdown content
|
|
36
|
-
markdown_text = "# Hell World"
|
|
37
|
-
|
|
38
|
-
# Optional domain to prepend to relative URLs
|
|
39
|
-
domain = 'https://raw.githubusercontent.com/yourusername/yourrepo/main/'
|
|
40
|
-
|
|
41
|
-
# Convert Markdown to BBCode
|
|
42
|
-
bbcode_output = process_readme(markdown_text, domain=domain)
|
|
43
|
-
|
|
44
|
-
# Output the BBCode
|
|
45
|
-
print(bbcode_output)
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
### Debug Mode
|
|
49
|
-
|
|
50
|
-
You can use the `--debug` flag to save intermediate results to files for debugging:
|
|
51
|
-
|
|
52
|
-
```bash
|
|
53
|
-
md2bbcode README.md --debug
|
|
54
|
-
```
|
|
55
|
-
## Development
|
|
56
|
-
|
|
57
|
-
If you want to contribute to md2bbcode or set up a development environment, follow these steps:
|
|
58
|
-
|
|
59
|
-
1. Clone the repository:
|
|
60
|
-
```bash
|
|
61
|
-
git clone https://github.com/RedGuides/md2bbcode.git
|
|
62
|
-
cd md2bbcode
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
2. Install Hatch, which is used for building and managing the project:
|
|
66
|
-
```bash
|
|
67
|
-
pip install hatch
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
3. Create a development environment and install dependencies:
|
|
71
|
-
```bash
|
|
72
|
-
hatch env create
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
4. Activate the development environment:
|
|
76
|
-
```bash
|
|
77
|
-
hatch shell
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
### renderers/bbcode.py
|
|
81
|
-
|
|
82
|
-
The custom plugin for Mistune, which converts AST to bbcode.[^1]
|
|
83
|
-
|
|
84
|
-
[^1]: Mistune does not convert Markdown HTML to AST, hence the need for `html2bbcode`.
|
|
85
|
-
|
|
86
|
-
## Additional Tools
|
|
87
|
-
|
|
88
|
-
### html2bbcode
|
|
89
|
-
|
|
90
|
-
Converts several HTML tags typically allowed in Markdown to BBCode.[^2]
|
|
91
|
-
|
|
92
|
-
[^2]: Currently used for post-processing mistune output, but there's a better way. See inside the file for a suggestion.
|
|
93
|
-
|
|
94
|
-
```bash
|
|
95
|
-
html2bbcode input_file.html
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
### md2ast
|
|
99
|
-
|
|
100
|
-
For debugging Mistune's renderer, converts a Markdown file to AST (JSON format).
|
|
101
|
-
|
|
102
|
-
```bash
|
|
103
|
-
md2ast input.md output.json
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
## Features Test
|
|
107
|
-
|
|
108
|
-
Here are a few GitHub-flavored Markdown features so you can use this README.md for testing:
|
|
109
|
-
|
|
110
|
-
- **Strikethrough:** ~~This text is struck through.~~
|
|
111
|
-
- **Superscript:** This text is normal and this is <sup>superscript</sup>.
|
|
112
|
-
- **Table:**
|
|
113
|
-
|
|
114
|
-
| Syntax | Description |
|
|
115
|
-
| ----------- | ----------- |
|
|
116
|
-
| Header | Title |
|
|
117
|
-
| Paragraph | Text |
|
|
118
|
-
|
|
119
|
-
## Todo
|
|
120
|
-
|
|
121
|
-
- refactor html2bbcode
|
|
122
|
-
- update for new Xenforo 2.3 and 2.4 BBCode
|
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
# converts some HTML tags to BBCode
|
|
2
|
-
# pass --debug to save the output to readme.finalpass
|
|
3
|
-
# may be better off replacing this with html to markdown (and then to bbcode). Lepture recommeds a JS html to markdown converter: sundown
|
|
4
|
-
from bs4 import BeautifulSoup, NavigableString
|
|
5
|
-
import argparse
|
|
6
|
-
|
|
7
|
-
def handle_font_tag(tag, replacements):
|
|
8
|
-
"""Handles the conversion of <font> tag with attributes like color and size."""
|
|
9
|
-
attributes = []
|
|
10
|
-
if 'color' in tag.attrs:
|
|
11
|
-
attributes.append(f"COLOR={tag['color']}")
|
|
12
|
-
if 'size' in tag.attrs:
|
|
13
|
-
attributes.append(f"SIZE={tag['size']}")
|
|
14
|
-
if 'face' in tag.attrs:
|
|
15
|
-
attributes.append(f"FONT={tag['face']}")
|
|
16
|
-
|
|
17
|
-
inner_content = ''.join(recursive_html_to_bbcode(child, replacements) for child in tag.children)
|
|
18
|
-
if attributes:
|
|
19
|
-
# Nest all attributes. Example: [COLOR=red][SIZE=5]content[/SIZE][/COLOR]
|
|
20
|
-
for attr in reversed(attributes):
|
|
21
|
-
inner_content = f"[{attr}]{inner_content}[/{attr.split('=')[0]}]"
|
|
22
|
-
return inner_content
|
|
23
|
-
|
|
24
|
-
def handle_style_tag(tag, replacements):
|
|
25
|
-
"""Handles the conversion of tags with style attributes like color, size, and font."""
|
|
26
|
-
attributes = []
|
|
27
|
-
style = tag.attrs.get('style', '')
|
|
28
|
-
|
|
29
|
-
# Extracting CSS properties
|
|
30
|
-
css_properties = {item.split(':')[0].strip(): item.split(':')[1].strip() for item in style.split(';') if ':' in item}
|
|
31
|
-
|
|
32
|
-
# Mapping CSS properties to BBCode
|
|
33
|
-
if 'color' in css_properties:
|
|
34
|
-
attributes.append(f"COLOR={css_properties['color']}")
|
|
35
|
-
if 'font-size' in css_properties:
|
|
36
|
-
attributes.append(f"SIZE={css_properties['font-size']}")
|
|
37
|
-
if 'font-family' in css_properties:
|
|
38
|
-
attributes.append(f"FONT={css_properties['font-family']}")
|
|
39
|
-
if 'text-decoration' in css_properties and 'line-through' in css_properties['text-decoration']:
|
|
40
|
-
attributes.append("S") # Assume strike-through
|
|
41
|
-
if 'text-decoration' in css_properties and 'underline' in css_properties['text-decoration']:
|
|
42
|
-
attributes.append("U")
|
|
43
|
-
if 'font-weight' in css_properties:
|
|
44
|
-
if css_properties['font-weight'].lower() == 'bold' or (css_properties['font-weight'].isdigit() and int(css_properties['font-weight']) >= 700):
|
|
45
|
-
attributes.append("B") # Assume bold
|
|
46
|
-
|
|
47
|
-
inner_content = ''.join(recursive_html_to_bbcode(child, replacements) for child in tag.children)
|
|
48
|
-
if attributes:
|
|
49
|
-
# Nest all attributes
|
|
50
|
-
for attr in reversed(attributes):
|
|
51
|
-
if '=' in attr: # For attributes with values
|
|
52
|
-
inner_content = f"[{attr}]{inner_content}[/{attr.split('=')[0]}]"
|
|
53
|
-
else: # For simple BBCode tags like [B], [I], [U], [S]
|
|
54
|
-
inner_content = f"[{attr}]{inner_content}[/{attr}]"
|
|
55
|
-
return inner_content
|
|
56
|
-
|
|
57
|
-
def recursive_html_to_bbcode(element):
|
|
58
|
-
"""Recursively convert HTML elements to BBCode."""
|
|
59
|
-
bbcode = ''
|
|
60
|
-
|
|
61
|
-
if isinstance(element, NavigableString):
|
|
62
|
-
bbcode += str(element)
|
|
63
|
-
elif element.name == 'details':
|
|
64
|
-
# Handle <details> tag
|
|
65
|
-
summary = element.find('summary')
|
|
66
|
-
spoiler_title = ''
|
|
67
|
-
if summary:
|
|
68
|
-
# Get the summary content and remove the summary element
|
|
69
|
-
spoiler_title = '=' + ''.join([recursive_html_to_bbcode(child) for child in summary.contents])
|
|
70
|
-
summary.decompose()
|
|
71
|
-
|
|
72
|
-
# Process remaining content
|
|
73
|
-
content = ''.join([recursive_html_to_bbcode(child) for child in element.contents])
|
|
74
|
-
bbcode += f'[SPOILER{spoiler_title}]{content}[/SPOILER]'
|
|
75
|
-
elif element.name == 'summary':
|
|
76
|
-
# Skip summary tag as it's handled in details
|
|
77
|
-
return ''
|
|
78
|
-
else:
|
|
79
|
-
# Handle other tags or pass through
|
|
80
|
-
content = ''.join([recursive_html_to_bbcode(child) for child in element.contents])
|
|
81
|
-
bbcode += content
|
|
82
|
-
|
|
83
|
-
return bbcode
|
|
84
|
-
|
|
85
|
-
def html_to_bbcode(html):
|
|
86
|
-
replacements = {
|
|
87
|
-
'b': 'B',
|
|
88
|
-
'strong': 'B',
|
|
89
|
-
'i': 'I',
|
|
90
|
-
'em': 'I',
|
|
91
|
-
'u': 'U',
|
|
92
|
-
's': 'S',
|
|
93
|
-
'sub': 'SUB',
|
|
94
|
-
'sup': 'SUP',
|
|
95
|
-
'p': '', # Handled by default
|
|
96
|
-
'ul': 'LIST',
|
|
97
|
-
'ol': 'LIST=1',
|
|
98
|
-
'li': '*', # Special handling in recursive function
|
|
99
|
-
'font': '', # To be handled for attributes
|
|
100
|
-
'blockquote': 'QUOTE',
|
|
101
|
-
'pre': 'CODE',
|
|
102
|
-
'code': 'ICODE',
|
|
103
|
-
'a': 'URL', # Special handling for attributes
|
|
104
|
-
'img': 'IMG' # Special handling for attributes
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
soup = BeautifulSoup(html, 'html.parser')
|
|
108
|
-
return recursive_html_to_bbcode(soup)
|
|
109
|
-
|
|
110
|
-
def process_html(input_html, debug=False, output_file=None):
|
|
111
|
-
converted_bbcode = html_to_bbcode(input_html)
|
|
112
|
-
|
|
113
|
-
if debug:
|
|
114
|
-
with open(output_file, 'w', encoding='utf-8') as file:
|
|
115
|
-
file.write(converted_bbcode)
|
|
116
|
-
else:
|
|
117
|
-
return converted_bbcode
|
|
118
|
-
|
|
119
|
-
if __name__ == "__main__":
|
|
120
|
-
parser = argparse.ArgumentParser(description="Convert HTML to BBCode with optional debugging output.")
|
|
121
|
-
parser.add_argument('input_file', type=str, help='Input HTML file path')
|
|
122
|
-
parser.add_argument('--debug', action='store_true', help='Save output to readme.finalpass for debugging')
|
|
123
|
-
|
|
124
|
-
args = parser.parse_args()
|
|
125
|
-
input_file = args.input_file
|
|
126
|
-
output_file = 'readme.finalpass' if args.debug else None
|
|
127
|
-
|
|
128
|
-
with open(input_file, 'r', encoding='utf-8') as file:
|
|
129
|
-
html_content = file.read()
|
|
130
|
-
|
|
131
|
-
# Call the processing function
|
|
132
|
-
process_html(html_content, debug=args.debug, output_file=output_file)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|