edwh-editorjs 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ Metadata-Version: 2.3
2
+ Name: edwh-editorjs
3
+ Version: 1.0.0
4
+ Summary: pyEditorJS
5
+ Project-URL: Homepage, https://github.com/educationwarehouse/edwh-EditorJS
6
+ Author-email: SKevo <skevo.cw@gmail.com>, Robin van der Noord <robin.vdn@educationwarehouse.nl>
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Keywords: bleach,clean,editor,editor.js,html,javascript,json,parser,wysiwyg
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Python: >=3.10
18
+ Requires-Dist: bleach
19
+ Provides-Extra: dev
20
+ Requires-Dist: edwh; extra == 'dev'
21
+ Requires-Dist: hatch; extra == 'dev'
22
+ Requires-Dist: su6[all]; extra == 'dev'
23
+ Requires-Dist: types-bleach; extra == 'dev'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # pyEditorJS
27
+
28
+ A minimal, fast, Python 3.6+ package for parsing [Editor.js](https://editorjs.io) content.
29
+
30
+ ## Features
31
+
32
+ - Handles all out-of-the-box Editor.js elements;
33
+ - Optional sanitization via the `bleach` library;
34
+ - Checks whether the data is valid (e. g.: a header can't have more than 6 levels), and raises `EditorJsParseError` if data is malformed;
35
+ - Uses Editor.js's class names for styles, so the output will be consistent with WYSIWYG (see [Editor.js's example style](https://github.com/codex-team/editor.js/blob/8ae8823dcd6877d63241fcb94694a8a18744485d/example/assets/demo.css) and [styles documentation](https://editorjs.io/styles))
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install pyeditorjs
41
+ ```
42
+
43
+ **Optional:** install [bleach](https://pypi.org/project/bleach) for clean HTML:
44
+
45
+ ```bash
46
+ pip install bleach
47
+ ```
48
+
49
+ ## Usage
50
+
51
+ ### Quickstart
52
+
53
+ ```python
54
+ from pyeditorjs import EditorJsParser
55
+
56
+ editor_js_data = ... # your Editor.js JSON data
57
+ parser = EditorJsParser(editor_js_data) # initialize the parser
58
+
59
+ html = parser.html(sanitize=True) # `sanitize=True` requires `bleach` to be installed
60
+ print(html) # your clean HTML
61
+ ```
62
+
63
+ ### Obtain texts only (for creating audio-only version, for example)
64
+
65
+ > **WARNING:** This does not sanitize the texts! Please, call `bleach.clean(...)` directly. This also doesn't obtain text from bold texts, markers, etc... - you should use [BeautifulSoup](https://pypi.org/project/beautifulsoup4/) for this.
66
+
67
+ ```python
68
+ #import bleach
69
+ from pyeditorjs import EditorJsParser
70
+
71
+ editor_js_data = ... # your Editor.js JSON data
72
+ parser = EditorJsParser(editor_js_data) # initialize the parser
73
+
74
+ all_texts = []
75
+
76
+ for block in parser:
77
+ text = getattr(block, 'text', None)
78
+
79
+ if text:
80
+ all_texts.append(text) # all_texts.append(bleach.clean(text))
81
+
82
+ print(all_texts)
83
+ ```
84
+
85
+ ## Disclaimer
86
+
87
+ This is a community-provided project, and is not affiliated with the Editor.js team.
88
+ It was created in my spare time. I cannot make sure that it will receive consistent updates.
89
+
90
+ Because of this, PRs, bug reports and suggestions are welcome!
91
+
92
+ <a href="https://www.buymeacoffee.com/skevo"><img src="https://img.buymeacoffee.com/button-api/?text=Support me&emoji=🐣&slug=skevo&button_colour=ffa200&font_colour=000000&font_family=Poppins&outline_colour=000000&coffee_colour=FFDD00" /></a>
@@ -0,0 +1,9 @@
1
+ pyeditorjs/__about__.py,sha256=J-j-u0itpEFT6irdmWmixQqYMadNl1X91TxUmoiLHMI,22
2
+ pyeditorjs/__init__.py,sha256=Z0jH2OV2NkieVFU_URtWWhVsIMihIc4IGbUvCW_2Tm0,705
3
+ pyeditorjs/blocks.py,sha256=qksJTNfKShzxinWVIk9DiCGRCJ5C7lp5jLG4Hbl1enk,8214
4
+ pyeditorjs/exceptions.py,sha256=Uni8r3FwJ-6xQIdSmBsHLs_htWLHD0Arp1KJEvjGU1U,439
5
+ pyeditorjs/parser.py,sha256=6DCqqi-FuXDFxn9xb-dgQ19alvVu7Pjx6x3rTAx9IsI,2154
6
+ edwh_editorjs-1.0.0.dist-info/METADATA,sha256=CIRh9bLb2wwbKecMPayxr8TyY4z3GkYlr2L7FJIjCxI,3269
7
+ edwh_editorjs-1.0.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
8
+ edwh_editorjs-1.0.0.dist-info/licenses/LICENSE,sha256=bY9MhHLeuW8w1aAl-i1O1uSNP5IMOGaL6AWvHcdnt0k,1062
9
+ edwh_editorjs-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.25.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 SKevo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
pyeditorjs/__init__.py ADDED
@@ -0,0 +1,28 @@
1
+ from pathlib import Path
2
+
3
+ from .blocks import BLOCKS_MAP, EditorJsBlock, block
4
+ from .exceptions import EditorJsParseError
5
+ from .parser import EditorJsParser
6
+
7
+ __all__ = [
8
+ "EditorJsParser",
9
+ "EditorJsParseError",
10
+ "EditorJsBlock",
11
+ "block",
12
+ "BLOCKS_MAP",
13
+ ]
14
+
15
+
16
+ # Overwrite __doc__ with README, so that pdoc can render it:
17
+ README_PATH = Path(__file__).parent.parent.absolute() / Path("README.md")
18
+ try:
19
+ with open(README_PATH, "r", encoding="UTF-8") as readme:
20
+ __readme__ = readme.read()
21
+ except Exception:
22
+ __readme__ = "Failed to read README.md!" # fallback message, for example when there's no README
23
+
24
+ __doc__ = __readme__
25
+
26
+
27
+ if __name__ == "__main__":
28
+ _ = [EditorJsParser]
pyeditorjs/blocks.py ADDED
@@ -0,0 +1,309 @@
1
+ import abc
2
+ import typing as t
3
+ from dataclasses import dataclass
4
+
5
+ import bleach
6
+
7
+ from .exceptions import EditorJsParseError
8
+
9
+ __all__ = [
10
+ "block",
11
+ "BLOCKS_MAP",
12
+ "EditorJsBlock",
13
+ ]
14
+
15
+
16
+ def _sanitize(html: str) -> str:
17
+ return bleach.clean(
18
+ html,
19
+ tags=["b", "i", "u", "a", "mark", "code"],
20
+ attributes=["class", "data-placeholder", "href"],
21
+ )
22
+
23
+
24
+ BLOCKS_MAP: t.Dict[str, t.Type["EditorJsBlock"]] = {
25
+ # 'header': HeaderBlock,
26
+ # 'paragraph': ParagraphBlock,
27
+ # 'list': ListBlock,
28
+ # 'delimiter': DelimiterBlock,
29
+ # 'image': ImageBlock,
30
+ }
31
+
32
+
33
+ def block(_type: str):
34
+ def wrapper(cls: t.Type["EditorJsBlock"]):
35
+ BLOCKS_MAP[_type] = cls
36
+ return cls
37
+
38
+ return wrapper
39
+
40
+
41
+ @dataclass
42
+ class EditorJsBlock(abc.ABC):
43
+ """
44
+ A generic parsed Editor.js block
45
+ """
46
+
47
+ _data: dict
48
+ """The raw JSON data of the entire block"""
49
+
50
+ @property
51
+ def id(self) -> t.Optional[str]:
52
+ """
53
+ Returns ID of the block, generated client-side.
54
+ """
55
+
56
+ return self._data.get("id", None)
57
+
58
+ @property
59
+ def type(self) -> t.Optional[str]:
60
+ """
61
+ Returns the type of the block.
62
+ """
63
+
64
+ return self._data.get("type", None)
65
+
66
+ @property
67
+ def data(self) -> dict:
68
+ """
69
+ Returns the actual block data.
70
+ """
71
+
72
+ return self._data.get("data", {})
73
+
74
+ @abc.abstractmethod
75
+ def html(self, sanitize: bool = False) -> str:
76
+ """
77
+ Returns the HTML representation of the block.
78
+
79
+ ### Parameters:
80
+ - `sanitize` - if `True`, then the block's text/contents will be sanitized.
81
+ """
82
+
83
+ raise NotImplementedError()
84
+
85
+
86
+ @block("header")
87
+ class HeaderBlock(EditorJsBlock):
88
+ VALID_HEADER_LEVEL_RANGE = range(1, 7)
89
+ """Valid range for header levels. Default is `range(1, 7)` - so, `0` - `6`."""
90
+
91
+ @property
92
+ def text(self) -> str:
93
+ """
94
+ Returns the header's text.
95
+ """
96
+
97
+ return self.data.get("text", "")
98
+
99
+ @property
100
+ def level(self) -> int:
101
+ """
102
+ Returns the header's level (`0` - `6`).
103
+ """
104
+
105
+ _level = self.data.get("level", 1)
106
+
107
+ if not isinstance(_level, int) or _level not in self.VALID_HEADER_LEVEL_RANGE:
108
+ raise EditorJsParseError(f"`{_level}` is not a valid header level.")
109
+
110
+ return _level
111
+
112
+ def html(self, sanitize: bool = False) -> str:
113
+ text = self.text
114
+ if sanitize:
115
+ text = _sanitize(text)
116
+ return rf'<h{self.level} class="cdx-block ce-header">{text}</h{self.level}>'
117
+
118
+
119
+ @block("paragraph")
120
+ class ParagraphBlock(EditorJsBlock):
121
+ @property
122
+ def text(self) -> str:
123
+ """
124
+ The text content of the paragraph.
125
+ """
126
+
127
+ return self.data.get("text", "")
128
+
129
+ def html(self, sanitize: bool = False) -> str:
130
+ return rf'<p class="cdx-block ce-paragraph">{_sanitize(self.text) if sanitize else self.text}</p>'
131
+
132
+
133
+ @block("list")
134
+ class ListBlock(EditorJsBlock):
135
+ VALID_STYLES = ("unordered", "ordered")
136
+ """Valid list order styles."""
137
+
138
+ @property
139
+ def style(self) -> t.Optional[str]:
140
+ """
141
+ The style of the list. Can be `ordered` or `unordered`.
142
+ """
143
+
144
+ return self.data.get("style", None)
145
+
146
+ @property
147
+ def items(self) -> t.List[str]:
148
+ """
149
+ Returns the list's items, in raw format.
150
+ """
151
+
152
+ return self.data.get("items", [])
153
+
154
+ def html(self, sanitize: bool = False) -> str:
155
+ if self.style not in self.VALID_STYLES:
156
+ raise EditorJsParseError(f"`{self.style}` is not a valid list style.")
157
+
158
+ _items = [
159
+ f"<li>{_sanitize(item) if sanitize else item}</li>" for item in self.items
160
+ ]
161
+ _type = "ul" if self.style == "unordered" else "ol"
162
+ _items_html = "".join(_items)
163
+
164
+ return rf'<{_type} class="cdx-block cdx-list cdx-list--{self.style}">{_items_html}</{_type}>'
165
+
166
+
167
+ @block("delimiter")
168
+ class DelimiterBlock(EditorJsBlock):
169
+ def html(self, sanitize: bool = False) -> str:
170
+ return r'<div class="cdx-block ce-delimiter"></div>'
171
+
172
+
173
+ @block("image")
174
+ class ImageBlock(EditorJsBlock):
175
+ @property
176
+ def file_url(self) -> str:
177
+ """
178
+ URL of the image file.
179
+ """
180
+
181
+ return self.data.get("file", {}).get("url", "")
182
+
183
+ @property
184
+ def caption(self) -> str:
185
+ """
186
+ The image's caption.
187
+ """
188
+
189
+ return self.data.get("caption", "")
190
+
191
+ @property
192
+ def with_border(self) -> bool:
193
+ """
194
+ Whether the image has a border.
195
+ """
196
+
197
+ return self.data.get("withBorder", False)
198
+
199
+ @property
200
+ def stretched(self) -> bool:
201
+ """
202
+ Whether the image is stretched.
203
+ """
204
+
205
+ return self.data.get("stretched", False)
206
+
207
+ @property
208
+ def with_background(self) -> bool:
209
+ """
210
+ Whether the image has a background.
211
+ """
212
+
213
+ return self.data.get("withBackground", False)
214
+
215
+ def html(self, sanitize: bool = False) -> str:
216
+ if self.file_url.startswith("data:image/"):
217
+ _img = self.file_url
218
+ else:
219
+ _img = _sanitize(self.file_url) if sanitize else self.file_url
220
+
221
+ parts = [
222
+ rf'<div class="cdx-block image-tool image-tool--filled {"image-tool--stretched" if self.stretched else ""} {"image-tool--withBorder" if self.with_border else ""} {"image-tool--withBackground" if self.with_background else ""}">'
223
+ r'<div class="image-tool__image">',
224
+ r'<div class="image-tool__image-preloader"></div>',
225
+ rf'<img class="image-tool__image-picture" src="{_img}"/>',
226
+ r"</div>"
227
+ rf'<div class="image-tool__caption" data-placeholder="{_sanitize(self.caption) if sanitize else self.caption}"></div>'
228
+ r"</div>"
229
+ r"</div>",
230
+ ]
231
+
232
+ return "".join(parts)
233
+
234
+
235
+ @block("quote")
236
+ class QuoteBlock(EditorJsBlock):
237
+ def html(self, sanitize: bool = False) -> str:
238
+ quote = self.data.get("text", "")
239
+ caption = self.data.get("caption", "")
240
+ if sanitize:
241
+ quote = _sanitize(quote)
242
+ caption = _sanitize(caption)
243
+ _alignment = self.data.get("alignment", "left") # todo
244
+ return f"""
245
+ <blockquote class="cdx-block cdx-quote">
246
+ <div class="cdx-input cdx-quote__text">{quote}</div>
247
+ <cite class="cdx-input cdx-quote__caption">{caption}</cite>
248
+ </blockquote>
249
+ """
250
+
251
+
252
+ @block("table")
253
+ class TableBlock(EditorJsBlock):
254
+ def html(self, sanitize: bool = False) -> str:
255
+ content = self.data.get("content", [])
256
+ _stretched = self.data.get("stretched", False) # todo
257
+ _with_headings = self.data.get("withHeadings", False) # todo
258
+
259
+ html_table = '<table class="tc-table">'
260
+
261
+ # Add content rows
262
+ for row in content:
263
+ html_table += '<tr class="tc-row">'
264
+ for cell in row:
265
+ html_table += (
266
+ f'<td class="tc-cell">{_sanitize(cell) if sanitize else cell}</td>'
267
+ )
268
+ html_table += "</tr>"
269
+
270
+ html_table += "</table>"
271
+ return html_table
272
+
273
+
274
+ @block("code")
275
+ class CodeBlock(EditorJsBlock):
276
+ def html(self, sanitize: bool = False) -> str:
277
+ code = self.data.get("code", "")
278
+ if sanitize:
279
+ code = _sanitize(code)
280
+ return f"""
281
+ <code class="ce-code__textarea cdx-input" data-empty="false">{code}</code>
282
+ """
283
+
284
+
285
+ @block("warning")
286
+ class WarningBlock(EditorJsBlock):
287
+ def html(self, sanitize: bool = False) -> str:
288
+ title = self.data.get("title", "")
289
+ message = self.data.get("message", "")
290
+
291
+ if sanitize:
292
+ title = _sanitize(title)
293
+ message = _sanitize(message)
294
+
295
+ return f"""
296
+ <div class="cdx-block cdx-warning">
297
+ <div class="cdx-input cdx-warning__title">{title}</div>
298
+ <div class="cdx-input cdx-warning__message">{message}</div>
299
+ </div>
300
+ """
301
+
302
+
303
+ @block("raw")
304
+ class RawBlock(EditorJsBlock):
305
+ def html(self, sanitize: bool = False) -> str:
306
+ html = self.data.get("html", "")
307
+ if sanitize:
308
+ html = _sanitize(html)
309
+ return html
@@ -0,0 +1,19 @@
1
+ __all__ = [
2
+ "EditorJsException",
3
+ "EditorJsParseError",
4
+ "EditorJSUnsupportedBlock",
5
+ ]
6
+
7
+
8
+ class EditorJsException(Exception):
9
+ """
10
+ Base exception
11
+ """
12
+
13
+
14
+ class EditorJsParseError(EditorJsException):
15
+ """Raised when a parse error occurs (example: the JSON data has invalid or malformed content)."""
16
+
17
+
18
+ class EditorJSUnsupportedBlock(EditorJsException):
19
+ """Raised when strict=True and using an unknown block type."""
pyeditorjs/parser.py ADDED
@@ -0,0 +1,75 @@
1
+ import typing as t
2
+ import warnings
3
+ from dataclasses import dataclass
4
+
5
+ from .blocks import BLOCKS_MAP, EditorJsBlock
6
+ from .exceptions import EditorJsParseError, EditorJSUnsupportedBlock
7
+
8
+
9
+ @dataclass
10
+ class EditorJsParser:
11
+ """
12
+ An Editor.js parser.
13
+ """
14
+
15
+ content: dict
16
+ """The JSON data of Editor.js content."""
17
+
18
+ def __post_init__(self) -> None:
19
+ if not isinstance(self.content, dict):
20
+ raise EditorJsParseError(
21
+ f"Content must be `dict`, not {type(self.content).__name__}"
22
+ )
23
+
24
+ @staticmethod
25
+ def _get_block(data: dict, strict: bool = False) -> t.Optional[EditorJsBlock]:
26
+ """
27
+ Obtains block instance from block data.
28
+ """
29
+
30
+ _type = data.get("type", None)
31
+
32
+ if _type not in BLOCKS_MAP:
33
+ if strict:
34
+ raise EditorJSUnsupportedBlock(_type)
35
+ else:
36
+ warnings.warn(f"Unsupported block: {_type}", category=RuntimeWarning)
37
+ return None
38
+
39
+ return BLOCKS_MAP[_type](_data=data)
40
+
41
+ def blocks(self, strict: bool = False) -> list[EditorJsBlock]:
42
+ """
43
+ Obtains a list of all available blocks from the editor's JSON data.
44
+ """
45
+
46
+ all_blocks: list[EditorJsBlock] = []
47
+ blocks = self.content.get("blocks", [])
48
+
49
+ if not isinstance(blocks, list):
50
+ raise EditorJsParseError(
51
+ f"Blocks is not `list`, but `{type(blocks).__name__}`"
52
+ )
53
+
54
+ for block_data in blocks:
55
+ if block := self._get_block(data=block_data, strict=strict):
56
+ all_blocks.append(block)
57
+
58
+ return all_blocks
59
+
60
+ def __iter__(self) -> t.Iterator[EditorJsBlock]:
61
+ """Returns `iter(self.blocks())`"""
62
+
63
+ return iter(self.blocks())
64
+
65
+ def html(self, sanitize: bool = False, strict: bool = False) -> str:
66
+ """
67
+ Renders the editor's JSON content as HTML.
68
+
69
+ ### Parameters:
70
+ - `sanitize` - whether to also sanitize the blocks' texts/contents.
71
+ """
72
+
73
+ return "\n".join(
74
+ [block.html(sanitize=sanitize) for block in self.blocks(strict=strict)]
75
+ )