edwh-editorjs 2.0.0__py3-none-any.whl → 2.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
editorjs/blocks.py DELETED
@@ -1,670 +0,0 @@
1
- """
2
- mdast to editorjs
3
- """
4
-
5
- import abc
6
- import re
7
- import typing as t
8
- from html.parser import HTMLParser
9
- from urllib.parse import urlparse
10
-
11
- import markdown2
12
-
13
- from .exceptions import TODO
14
- from .types import EditorChildData, MDChildNode
15
-
16
-
17
- class EditorJSBlock(abc.ABC):
18
- @classmethod
19
- @abc.abstractmethod
20
- def to_markdown(cls, data: EditorChildData) -> str: ...
21
-
22
- @classmethod
23
- @abc.abstractmethod
24
- def to_json(cls, node: MDChildNode) -> list[dict]: ...
25
-
26
- @classmethod
27
- @abc.abstractmethod
28
- def to_text(cls, node: MDChildNode) -> str: ...
29
-
30
-
31
- BLOCKS: dict[str, EditorJSBlock] = {}
32
-
33
-
34
- def block(*names: str):
35
- def wrapper(cls):
36
- for name in names:
37
- BLOCKS[name] = cls
38
- return cls
39
-
40
- return wrapper
41
-
42
-
43
- def process_styled_content(item: MDChildNode, strict: bool = True) -> str:
44
- """
45
- Processes styled content (e.g., bold, italic) within a list item.
46
-
47
- Args:
48
- item: A ChildNode dictionary representing an inline element or text.
49
- strict: Raise if 'type' is not one defined in 'html_wrappers'
50
-
51
- Returns:
52
- A formatted HTML string based on the item type.
53
- """
54
- _type = item.get("type")
55
- html_wrappers = {
56
- "text": "{value}",
57
- "html": "{value}",
58
- "emphasis": "<i>{value}</i>",
59
- "strong": "<b>{value}</b>",
60
- "strongEmphasis": "<b><i>{value}</i></b>",
61
- "link": '<a href="{url}">{value}</a>',
62
- "inlineCode": '<code class="inline-code">{value}</code>',
63
- # todo: <mark>, linktool
64
- }
65
-
66
- if _type in BLOCKS:
67
- return BLOCKS[_type].to_text(item)
68
-
69
- if strict and _type not in html_wrappers:
70
- raise ValueError(f"Unsupported type {_type} in paragraph")
71
-
72
- # Process children recursively if they exist, otherwise use the direct value
73
- if children := item.get("children"):
74
- value = "".join(process_styled_content(child) for child in children)
75
- else:
76
- value = item.get("value", "")
77
-
78
- template = html_wrappers.get(_type, "{value}")
79
- return template.format(
80
- value=value, url=item.get("url", ""), caption=item.get("caption", "")
81
- )
82
-
83
-
84
- def default_to_text(node: MDChildNode):
85
- return "".join(
86
- process_styled_content(child) for child in node.get("children", [])
87
- ) or process_styled_content(node)
88
-
89
-
90
- @block("heading", "header")
91
- class HeadingBlock(EditorJSBlock):
92
- @classmethod
93
- def to_markdown(cls, data: EditorChildData) -> str:
94
- level = data.get("level", 1)
95
- text = data.get("text", "")
96
-
97
- if not (1 <= level <= 6):
98
- raise ValueError("Header level must be between 1 and 6.")
99
-
100
- return f"{'#' * level} {text}\n"
101
-
102
- @classmethod
103
- def to_json(cls, node: MDChildNode) -> list[dict]:
104
- """
105
- Converts a Markdown header block into structured block data.
106
-
107
- Args:
108
- node: A RootNode dictionary with 'depth' and 'children'.
109
-
110
- Returns:
111
- A ChildNode dictionary representing the header data, or None if no children exist.
112
-
113
- Raises:
114
- ValueError: If an unsupported heading depth is provided.
115
- """
116
-
117
- depth = node.get("depth")
118
-
119
- if depth is None or not (1 <= depth <= 6):
120
- raise ValueError("Heading depth must be between 1 and 6.")
121
-
122
- return [{"data": {"level": depth, "text": cls.to_text(node)}, "type": "header"}]
123
-
124
- @classmethod
125
- def to_text(cls, node: MDChildNode) -> str:
126
- children = node.get("children", [])
127
- if children is None or not len(children) == 1:
128
- raise ValueError("Header block must have exactly one child element")
129
- child = children[0]
130
- return child.get("value", "")
131
-
132
-
133
- @block("paragraph")
134
- class ParagraphBlock(EditorJSBlock):
135
- @classmethod
136
- def to_markdown(cls, data: EditorChildData) -> str:
137
- text = data.get("text", "")
138
- return f"{text}\n"
139
-
140
- @classmethod
141
- def to_json(cls, node: MDChildNode) -> list[dict]:
142
- result = []
143
- current_text = ""
144
-
145
- skip = 0
146
- nodes = node.get("children", [])
147
-
148
- for idx, child in enumerate(nodes):
149
- if skip:
150
- skip -= 1
151
- continue
152
-
153
- _type = child.get("type")
154
-
155
- # deal with custom types
156
- if _type == "html" and child.get("value", "").startswith("<editorjs"):
157
- # special type, e.g. <editorjs type="linkTool" href=...>...</editorjs>
158
-
159
- if child.get("value", "").endswith("/>"):
160
- # self-closing
161
- result.append(EditorJSCustom.to_json(node))
162
- continue
163
- else:
164
- # <editorjs>something</editorjs> = 3 children
165
- result.extend(
166
- EditorJSCustom.to_json({"children": nodes[idx : idx + 2]})
167
- )
168
-
169
- skip = 2
170
- continue
171
-
172
- elif _type == "image":
173
- if current_text:
174
- result.append({"data": {"text": current_text}, "type": "paragraph"})
175
- current_text = ""
176
-
177
- result.extend(ImageBlock.to_json(child))
178
- else:
179
- child_text = cls.to_text(child)
180
- _child_text = child_text.strip()
181
- if _child_text.startswith("|") and _child_text.endswith("|"):
182
- # note: this just supports text-only tables.
183
- # tables with more complex elements break into multiple children.
184
- # and mdast DOES support converting into a Table/TableCell structure
185
- # via the GFM exttension
186
- # but their default mdast->md converter does NOT implement these functionalities.
187
- result.extend(TableBlock.to_json(child))
188
- continue
189
-
190
- current_text += child_text
191
-
192
- # final text after image:
193
- if current_text:
194
- result.append({"data": {"text": current_text}, "type": "paragraph"})
195
-
196
- return result
197
-
198
- @classmethod
199
- def to_text(cls, node: MDChildNode) -> str:
200
- return default_to_text(node)
201
-
202
-
203
- @block("list")
204
- class ListBlock(EditorJSBlock):
205
- @classmethod
206
- def to_markdown(cls, data: EditorChildData) -> str:
207
- style = data.get("style", "unordered")
208
- items = data.get("items", [])
209
-
210
- def parse_items(subitems: list[dict[str, t.Any]], depth: int = 0) -> str:
211
- markdown_items = []
212
- for index, item in enumerate(subitems):
213
- prefix = f"{index + 1}." if style == "ordered" else "-"
214
- line = f"{'\t' * depth}{prefix} {item['content']}"
215
- markdown_items.append(line)
216
-
217
- # Recurse if there are nested items
218
- if item.get("items"):
219
- markdown_items.append(parse_items(item["items"], depth + 1))
220
-
221
- return "\n".join(markdown_items)
222
-
223
- return "\n" + parse_items(items) + "\n\n"
224
-
225
- @classmethod
226
- def to_json(cls, node: MDChildNode) -> list[dict]:
227
- """
228
- Converts a Markdown list block with nested items and styling into structured block data.
229
-
230
- Args:
231
- node: A RootNode dictionary with 'ordered' and 'children'.
232
-
233
- Returns:
234
- A dictionary representing the structured list data with 'items' and 'style'.
235
- """
236
- items = []
237
- # checklists are not supported (well) by mdast
238
- # so we detect it ourselves:
239
- could_be_checklist = True
240
-
241
- def is_checklist(value: str) -> bool:
242
- return value.strip().startswith(("[ ]", "[x]"))
243
-
244
- for child in node["children"]:
245
- content = ""
246
- subitems = []
247
- # child can have content and/or items
248
- for grandchild in child["children"]:
249
- _type = grandchild.get("type", "")
250
- if _type == "paragraph":
251
- subcontent = ParagraphBlock.to_text(grandchild)
252
- could_be_checklist = could_be_checklist and is_checklist(subcontent)
253
- content += "" + subcontent
254
- elif _type == "list":
255
- could_be_checklist = False
256
- subitems.extend(ListBlock.to_json(grandchild)[0]["data"]["items"])
257
- else:
258
- raise ValueError(f"Unsupported type {_type} in list")
259
-
260
- items.append(
261
- {
262
- "content": content,
263
- "items": subitems,
264
- }
265
- )
266
-
267
- # todo: detect 'checklist':
268
- """
269
- type: checklist
270
- data: {items: [{text: "a", checked: false}, {text: "b", checked: false}, {text: "c", checked: true},…]}
271
- """
272
-
273
- if could_be_checklist:
274
- return [
275
- {
276
- "type": "checklist",
277
- "data": {
278
- "items": [
279
- {
280
- "text": x["content"]
281
- .removeprefix("[ ] ")
282
- .removeprefix("[x] "),
283
- "checked": x["content"].startswith("[x]"),
284
- }
285
- for x in items
286
- ]
287
- },
288
- }
289
- ]
290
- else:
291
- return [
292
- {
293
- "data": {
294
- "items": items,
295
- "style": "ordered" if node.get("ordered") else "unordered",
296
- },
297
- "type": "list",
298
- }
299
- ]
300
-
301
- @classmethod
302
- def to_text(cls, node: MDChildNode) -> str:
303
- return ""
304
-
305
-
306
- @block("checklist")
307
- class ChecklistBlock(ListBlock):
308
- @classmethod
309
- def to_markdown(cls, data: EditorChildData) -> str:
310
- markdown_items = []
311
-
312
- for item in data.get("items", []):
313
- text = item.get("text", "").strip()
314
- char = "x" if item.get("checked", False) else " "
315
- markdown_items.append(f"- [{char}] {text}")
316
-
317
- return "\n" + "\n".join(markdown_items) + "\n\n"
318
-
319
-
320
- @block("thematicBreak", "delimiter")
321
- class DelimiterBlock(EditorJSBlock):
322
- @classmethod
323
- def to_markdown(cls, data: EditorChildData) -> str:
324
- return "***\n"
325
-
326
- @classmethod
327
- def to_json(cls, node: MDChildNode) -> list[dict]:
328
- return [
329
- {
330
- "type": "delimiter",
331
- "data": {},
332
- }
333
- ]
334
-
335
- @classmethod
336
- def to_text(cls, node: MDChildNode) -> str:
337
- return ""
338
-
339
-
340
- @block("code")
341
- class CodeBlock(EditorJSBlock):
342
- @classmethod
343
- def to_markdown(cls, data: EditorChildData) -> str:
344
- code = data.get("code", "")
345
- return f"```\n" f"{code}" f"\n```\n"
346
-
347
- @classmethod
348
- def to_json(cls, node: MDChildNode) -> list[dict]:
349
- return [
350
- {
351
- "data": {"code": cls.to_text(node)},
352
- "type": "code",
353
- }
354
- ]
355
-
356
- @classmethod
357
- def to_text(cls, node: MDChildNode) -> str:
358
- return node.get("value", "")
359
-
360
-
361
- @block("image")
362
- class ImageBlock(EditorJSBlock):
363
- @classmethod
364
- def to_markdown(cls, data: EditorChildData) -> str:
365
- url = data.get("url", "") or data.get("file", {}).get("url", "")
366
- caption = data.get("caption", "")
367
- return f"""![{caption}]({url} "{caption}")\n"""
368
-
369
- @classmethod
370
- def to_json(cls, node: MDChildNode) -> list[dict]:
371
- return [
372
- {
373
- "type": "image",
374
- "data": {
375
- "caption": cls.to_text(node),
376
- "file": {"url": node.get("url")},
377
- },
378
- }
379
- ]
380
-
381
- @classmethod
382
- def to_text(cls, node: MDChildNode) -> str:
383
- return node.get("alt") or node.get("caption") or ""
384
-
385
-
386
- @block("blockquote", "quote")
387
- class QuoteBlock(EditorJSBlock):
388
- re_cite = re.compile(r"<cite>(.+?)<\/cite>")
389
-
390
- @classmethod
391
- def to_markdown(cls, data: EditorChildData) -> str:
392
- text = data.get("text", "")
393
- result = f"> {text}\n"
394
- if caption := data.get("caption", ""):
395
- result += f"> <cite>{caption}</cite>\n"
396
- return result
397
-
398
- @classmethod
399
- def to_json(cls, node: MDChildNode) -> list[dict]:
400
- caption = ""
401
- text = cls.to_text(node).replace("\n", "<br/>\n")
402
-
403
- if cite := re.search(cls.re_cite, text):
404
- # Capture the value of the first group
405
- caption = cite.group(1)
406
- # Remove the <cite>...</cite> tags from the text
407
- text = re.sub(cls.re_cite, "", text)
408
-
409
- return [
410
- {
411
- "data": {
412
- "alignment": "left",
413
- "caption": caption,
414
- "text": text,
415
- },
416
- "type": "quote",
417
- }
418
- ]
419
-
420
- @classmethod
421
- def to_text(cls, node: MDChildNode) -> str:
422
- return default_to_text(node)
423
-
424
-
425
- @block("raw")
426
- class RawBlock(EditorJSBlock):
427
-
428
- @classmethod
429
- def to_markdown(cls, data: EditorChildData) -> str:
430
- return data.get("html", "")
431
-
432
- @classmethod
433
- def to_json(cls, node: MDChildNode) -> list[dict]:
434
- raise TODO(node)
435
-
436
- @classmethod
437
- def to_text(cls, node: MDChildNode) -> str:
438
- raise TODO(node)
439
-
440
-
441
- @block("table")
442
- class TableBlock(EditorJSBlock):
443
-
444
- @classmethod
445
- def to_markdown(cls, data: EditorChildData) -> str:
446
- """
447
- | Script | Interpreter | User | System | |
448
- |--------|-------------|------|--------|---|
449
- | | | | | |
450
- | | | | | |
451
- | | | | | |
452
- """
453
-
454
- table = ""
455
- rows = data.get("content", [])
456
-
457
- # Add an empty header row if no headings are provided
458
- if not data.get("withHeadings", False) and rows:
459
- table += "| " + " | ".join([""] * len(rows[0])) + " |\n"
460
- table += "|" + " - |" * len(rows[0]) + "\n"
461
-
462
- # Populate rows
463
- for idx, tr in enumerate(rows):
464
- table += "| " + " | ".join(tr) + " |\n"
465
-
466
- # Add separator if headings are enabled and it's the first row
467
- if not idx and data.get("withHeadings", False):
468
- table += "|" + " - |" * len(tr) + "\n"
469
-
470
- return f"\n{table}\n"
471
-
472
- @classmethod
473
- def to_json(cls, node: MDChildNode) -> list[dict]:
474
- # content":[["Yeah","Okay"],["<i>1</i>","<code class=\"inline-code\">2</code>"]]}}]
475
- table = []
476
- with_headings = False
477
-
478
- # first row is headings or empty. If not empty, withHeadings is True
479
- # second row must be ignored
480
- for idx, row in enumerate(node.get("value", "").strip().split("\n")):
481
- tr = [_.strip() for _ in row.split("|")[1:-1]]
482
- if not idx:
483
- # first
484
- if any(tr):
485
- with_headings = True
486
- table.append(tr)
487
-
488
- elif idx == 1:
489
- continue
490
- else:
491
- table.append(tr)
492
-
493
- return [
494
- {
495
- "type": "table",
496
- "content": table,
497
- "withHeadings": with_headings,
498
- }
499
- ]
500
-
501
- @classmethod
502
- def to_text(cls, node: MDChildNode) -> str:
503
- raise TODO(node)
504
-
505
-
506
- @block("linkTool")
507
- class LinkBlock(EditorJSBlock):
508
- @classmethod
509
- def to_markdown(cls, data: EditorChildData) -> str:
510
- link = data.get("link", "")
511
- meta = data.get("meta", {})
512
- title = meta.get("title", "")
513
- description = meta.get("description", "")
514
- image = meta.get("image", {}).get("url", "")
515
- return f"""<editorjs type="linkTool" href="{link}" title="{title}" image="{image}">{description}</editorjs>"""
516
-
517
- @classmethod
518
- def to_json(cls, node: MDChildNode) -> list[dict]:
519
- return [
520
- {
521
- "type": "linkTool",
522
- "data": {
523
- "link": node.get("href", ""),
524
- "meta": {
525
- "title": node.get("title", ""),
526
- "description": node.get("body", ""),
527
- "image": {
528
- "url": node.get("image", ""),
529
- },
530
- },
531
- },
532
- }
533
- ]
534
-
535
- @classmethod
536
- def to_text(cls, node: MDChildNode) -> str:
537
- url = node.get("href", "")
538
- image = node.get("image", "")
539
- title = node.get("title", "")
540
- body = node.get("body", "")
541
- domain = urlparse(url).netloc
542
-
543
- return f"""
544
- <div class="link-tool">
545
- <a class="link-tool__content link-tool__content--rendered" target="_blank"
546
- rel="nofollow noindex noreferrer" href="{url}">
547
- <div class="link-tool__image"
548
- style="background-image: url(&quot;{image}&quot;);"></div>
549
- <div class="link-tool__title">{title}</div>
550
- <p class="link-tool__description">{body}</p>
551
- <span class="link-tool__anchor">{domain}</span>
552
- </a>
553
- </div>
554
- """
555
-
556
-
557
- @block("attaches")
558
- class AttachmentBlock(EditorJSBlock):
559
-
560
- @classmethod
561
- def to_markdown(cls, data: EditorChildData) -> str:
562
- file = data.get("file", {}).get("url", "")
563
- title = data.get("title", "")
564
- return f"""<editorjs type="attaches" file="{file}">{title}</editorjs>"""
565
-
566
- @classmethod
567
- def to_json(cls, node: MDChildNode) -> list[dict]:
568
- return [
569
- {
570
- "type": "attaches",
571
- "data": {
572
- "file": {"url": node.get("file", "")},
573
- "title": node.get("body", ""),
574
- },
575
- }
576
- ]
577
-
578
- @classmethod
579
- def to_text(cls, node: MDChildNode) -> str:
580
- return f"""
581
- <div class="cdx-attaches cdx-attaches--with-file">
582
- <div class="cdx-attaches__file-icon">
583
- <div class="cdx-attaches__file-icon-background">
584
- <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="none" viewBox="0 0 24 24"><path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13.3236 8.43554L9.49533 12.1908C9.13119 12.5505 8.93118 13.043 8.9393 13.5598C8.94741 14.0767 9.163 14.5757 9.53862 14.947C9.91424 15.3182 10.4191 15.5314 10.9422 15.5397C11.4653 15.5479 11.9637 15.3504 12.3279 14.9908L16.1562 11.2355C16.8845 10.5161 17.2845 9.53123 17.2682 8.4975C17.252 7.46376 16.8208 6.46583 16.0696 5.72324C15.3184 4.98066 14.3086 4.55425 13.2624 4.53782C12.2162 4.52138 11.2193 4.91627 10.4911 5.63562L6.66277 9.39093C5.57035 10.4699 4.97032 11.9473 4.99467 13.4979C5.01903 15.0485 5.66578 16.5454 6.79264 17.6592C7.9195 18.7731 9.43417 19.4127 11.0034 19.4374C12.5727 19.462 14.068 18.8697 15.1604 17.7907L18.9887 14.0354"></path></svg>
585
- </div>
586
- </div>
587
- <div class="cdx-attaches__file-info">
588
- <div class="cdx-attaches__title" data-placeholder="File title" data-empty="false">
589
- {node.get("body", "")}
590
- </div>
591
- </div>
592
- <a class="cdx-attaches__download-button" href="{node.get('file', '')}" target="_blank" rel="nofollow noindex noreferrer">
593
- <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="none" viewBox="0 0 24 24"><path stroke="currentColor" stroke-linecap="round" stroke-width="2" d="M7 10L11.8586 14.8586C11.9367 14.9367 12.0633 14.9367 12.1414 14.8586L17 10"></path></svg>
594
- </a>
595
- </div>
596
- """
597
-
598
-
599
- class AttributeParser(HTMLParser):
600
- def __init__(self):
601
- super().__init__()
602
- self.attributes = {}
603
- self.data = None
604
-
605
- def handle_starttag(self, tag, attrs):
606
- # Collect attributes when the tag is encountered
607
- self.attributes = dict(attrs)
608
-
609
- def handle_data(self, data):
610
- self.data = data
611
-
612
-
613
- class EditorJSCustom(EditorJSBlock, markdown2.Extra):
614
- """
615
- Special type of block to deal with custom attributes
616
- """
617
-
618
- name = "editorjs"
619
- order = (), (markdown2.Stage.POSTPROCESS,)
620
-
621
- @classmethod
622
- def parse_html(cls, html: str):
623
- parser = AttributeParser()
624
- parser.feed(html)
625
-
626
- return parser.attributes, parser.data
627
-
628
- @classmethod
629
- def to_markdown(cls, data: EditorChildData) -> str:
630
- raise TODO()
631
-
632
- @classmethod
633
- def to_json(cls, node: MDChildNode) -> list[dict]:
634
- html = "".join(_["value"] for _ in node.get("children", []))
635
- attrs, body = cls.parse_html(html)
636
- _type = attrs.get("type", "")
637
- attrs.setdefault("body", body) # only if there is no such attribute yet
638
-
639
- if not (handler := BLOCKS.get(_type)):
640
- raise ValueError(f"Unknown custom type {_type}")
641
-
642
- return handler.to_json(attrs)
643
-
644
- @classmethod
645
- def to_text(cls, node: MDChildNode) -> str:
646
- raise TODO()
647
-
648
- # markdown2:
649
- re_short = re.compile(r"<editorjs.*?/>")
650
- re_long = re.compile(r"<editorjs.*?>.*?</editorjs>")
651
-
652
- def run(self, text: str) -> str:
653
- def replace_html(match):
654
- attrs, body = self.parse_html(match.group())
655
- _type = attrs.get("type", "")
656
- attrs.setdefault("body", body) # only if there is no such attribute yet
657
-
658
- if not (handler := BLOCKS.get(_type)):
659
- raise ValueError(f"Unknown custom type {_type}")
660
-
661
- return handler.to_text(attrs)
662
-
663
- # Substitute using the replacement functions
664
- text = self.re_long.sub(replace_html, text)
665
- text = self.re_short.sub(replace_html, text)
666
-
667
- return text
668
-
669
-
670
- EditorJSCustom.register()