docling 2.69.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling might be problematic. Click here for more details.

Files changed (138) hide show
  1. docling/__init__.py +0 -0
  2. docling/backend/__init__.py +0 -0
  3. docling/backend/abstract_backend.py +84 -0
  4. docling/backend/asciidoc_backend.py +443 -0
  5. docling/backend/csv_backend.py +125 -0
  6. docling/backend/docling_parse_backend.py +237 -0
  7. docling/backend/docling_parse_v2_backend.py +276 -0
  8. docling/backend/docling_parse_v4_backend.py +260 -0
  9. docling/backend/docx/__init__.py +0 -0
  10. docling/backend/docx/drawingml/utils.py +131 -0
  11. docling/backend/docx/latex/__init__.py +0 -0
  12. docling/backend/docx/latex/latex_dict.py +274 -0
  13. docling/backend/docx/latex/omml.py +459 -0
  14. docling/backend/html_backend.py +1502 -0
  15. docling/backend/image_backend.py +188 -0
  16. docling/backend/json/__init__.py +0 -0
  17. docling/backend/json/docling_json_backend.py +58 -0
  18. docling/backend/md_backend.py +618 -0
  19. docling/backend/mets_gbs_backend.py +399 -0
  20. docling/backend/msexcel_backend.py +686 -0
  21. docling/backend/mspowerpoint_backend.py +398 -0
  22. docling/backend/msword_backend.py +1663 -0
  23. docling/backend/noop_backend.py +51 -0
  24. docling/backend/pdf_backend.py +82 -0
  25. docling/backend/pypdfium2_backend.py +417 -0
  26. docling/backend/webvtt_backend.py +572 -0
  27. docling/backend/xml/__init__.py +0 -0
  28. docling/backend/xml/jats_backend.py +819 -0
  29. docling/backend/xml/uspto_backend.py +1905 -0
  30. docling/chunking/__init__.py +12 -0
  31. docling/cli/__init__.py +0 -0
  32. docling/cli/main.py +974 -0
  33. docling/cli/models.py +196 -0
  34. docling/cli/tools.py +17 -0
  35. docling/datamodel/__init__.py +0 -0
  36. docling/datamodel/accelerator_options.py +69 -0
  37. docling/datamodel/asr_model_specs.py +494 -0
  38. docling/datamodel/backend_options.py +102 -0
  39. docling/datamodel/base_models.py +493 -0
  40. docling/datamodel/document.py +699 -0
  41. docling/datamodel/extraction.py +39 -0
  42. docling/datamodel/layout_model_specs.py +91 -0
  43. docling/datamodel/pipeline_options.py +457 -0
  44. docling/datamodel/pipeline_options_asr_model.py +78 -0
  45. docling/datamodel/pipeline_options_vlm_model.py +136 -0
  46. docling/datamodel/settings.py +65 -0
  47. docling/datamodel/vlm_model_specs.py +365 -0
  48. docling/document_converter.py +559 -0
  49. docling/document_extractor.py +327 -0
  50. docling/exceptions.py +10 -0
  51. docling/experimental/__init__.py +5 -0
  52. docling/experimental/datamodel/__init__.py +1 -0
  53. docling/experimental/datamodel/table_crops_layout_options.py +13 -0
  54. docling/experimental/datamodel/threaded_layout_vlm_pipeline_options.py +45 -0
  55. docling/experimental/models/__init__.py +3 -0
  56. docling/experimental/models/table_crops_layout_model.py +114 -0
  57. docling/experimental/pipeline/__init__.py +1 -0
  58. docling/experimental/pipeline/threaded_layout_vlm_pipeline.py +439 -0
  59. docling/models/__init__.py +0 -0
  60. docling/models/base_layout_model.py +39 -0
  61. docling/models/base_model.py +230 -0
  62. docling/models/base_ocr_model.py +241 -0
  63. docling/models/base_table_model.py +45 -0
  64. docling/models/extraction/__init__.py +0 -0
  65. docling/models/extraction/nuextract_transformers_model.py +305 -0
  66. docling/models/factories/__init__.py +47 -0
  67. docling/models/factories/base_factory.py +122 -0
  68. docling/models/factories/layout_factory.py +7 -0
  69. docling/models/factories/ocr_factory.py +11 -0
  70. docling/models/factories/picture_description_factory.py +11 -0
  71. docling/models/factories/table_factory.py +7 -0
  72. docling/models/picture_description_base_model.py +149 -0
  73. docling/models/plugins/__init__.py +0 -0
  74. docling/models/plugins/defaults.py +60 -0
  75. docling/models/stages/__init__.py +0 -0
  76. docling/models/stages/code_formula/__init__.py +0 -0
  77. docling/models/stages/code_formula/code_formula_model.py +342 -0
  78. docling/models/stages/layout/__init__.py +0 -0
  79. docling/models/stages/layout/layout_model.py +249 -0
  80. docling/models/stages/ocr/__init__.py +0 -0
  81. docling/models/stages/ocr/auto_ocr_model.py +132 -0
  82. docling/models/stages/ocr/easyocr_model.py +200 -0
  83. docling/models/stages/ocr/ocr_mac_model.py +145 -0
  84. docling/models/stages/ocr/rapid_ocr_model.py +328 -0
  85. docling/models/stages/ocr/tesseract_ocr_cli_model.py +331 -0
  86. docling/models/stages/ocr/tesseract_ocr_model.py +262 -0
  87. docling/models/stages/page_assemble/__init__.py +0 -0
  88. docling/models/stages/page_assemble/page_assemble_model.py +156 -0
  89. docling/models/stages/page_preprocessing/__init__.py +0 -0
  90. docling/models/stages/page_preprocessing/page_preprocessing_model.py +145 -0
  91. docling/models/stages/picture_classifier/__init__.py +0 -0
  92. docling/models/stages/picture_classifier/document_picture_classifier.py +246 -0
  93. docling/models/stages/picture_description/__init__.py +0 -0
  94. docling/models/stages/picture_description/picture_description_api_model.py +66 -0
  95. docling/models/stages/picture_description/picture_description_vlm_model.py +123 -0
  96. docling/models/stages/reading_order/__init__.py +0 -0
  97. docling/models/stages/reading_order/readingorder_model.py +431 -0
  98. docling/models/stages/table_structure/__init__.py +0 -0
  99. docling/models/stages/table_structure/table_structure_model.py +305 -0
  100. docling/models/utils/__init__.py +0 -0
  101. docling/models/utils/generation_utils.py +157 -0
  102. docling/models/utils/hf_model_download.py +45 -0
  103. docling/models/vlm_pipeline_models/__init__.py +1 -0
  104. docling/models/vlm_pipeline_models/api_vlm_model.py +180 -0
  105. docling/models/vlm_pipeline_models/hf_transformers_model.py +391 -0
  106. docling/models/vlm_pipeline_models/mlx_model.py +325 -0
  107. docling/models/vlm_pipeline_models/vllm_model.py +344 -0
  108. docling/pipeline/__init__.py +0 -0
  109. docling/pipeline/asr_pipeline.py +431 -0
  110. docling/pipeline/base_extraction_pipeline.py +72 -0
  111. docling/pipeline/base_pipeline.py +326 -0
  112. docling/pipeline/extraction_vlm_pipeline.py +207 -0
  113. docling/pipeline/legacy_standard_pdf_pipeline.py +262 -0
  114. docling/pipeline/simple_pipeline.py +55 -0
  115. docling/pipeline/standard_pdf_pipeline.py +859 -0
  116. docling/pipeline/threaded_standard_pdf_pipeline.py +5 -0
  117. docling/pipeline/vlm_pipeline.py +416 -0
  118. docling/py.typed +1 -0
  119. docling/utils/__init__.py +0 -0
  120. docling/utils/accelerator_utils.py +97 -0
  121. docling/utils/api_image_request.py +205 -0
  122. docling/utils/deepseekocr_utils.py +388 -0
  123. docling/utils/export.py +146 -0
  124. docling/utils/glm_utils.py +361 -0
  125. docling/utils/layout_postprocessor.py +683 -0
  126. docling/utils/locks.py +3 -0
  127. docling/utils/model_downloader.py +168 -0
  128. docling/utils/ocr_utils.py +69 -0
  129. docling/utils/orientation.py +65 -0
  130. docling/utils/profiling.py +65 -0
  131. docling/utils/utils.py +65 -0
  132. docling/utils/visualization.py +85 -0
  133. docling-2.69.0.dist-info/METADATA +237 -0
  134. docling-2.69.0.dist-info/RECORD +138 -0
  135. docling-2.69.0.dist-info/WHEEL +5 -0
  136. docling-2.69.0.dist-info/entry_points.txt +6 -0
  137. docling-2.69.0.dist-info/licenses/LICENSE +21 -0
  138. docling-2.69.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,260 @@
1
+ import logging
2
+ from collections.abc import Iterable
3
+ from io import BytesIO
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, Optional, Union
6
+
7
+ import pypdfium2 as pdfium
8
+ from docling_core.types.doc import BoundingBox, CoordOrigin
9
+ from docling_core.types.doc.page import SegmentedPdfPage, TextCell
10
+ from docling_parse.pdf_parser import DoclingPdfParser, PdfDocument
11
+ from PIL import Image
12
+ from pypdfium2 import PdfPage
13
+
14
+ from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
15
+ from docling.datamodel.backend_options import PdfBackendOptions
16
+ from docling.datamodel.base_models import Size
17
+ from docling.utils.locks import pypdfium2_lock
18
+
19
+ if TYPE_CHECKING:
20
+ from docling.datamodel.document import InputDocument
21
+
22
+ _log = logging.getLogger(__name__)
23
+
24
+
25
+ class DoclingParseV4PageBackend(PdfPageBackend):
26
+ def __init__(
27
+ self,
28
+ *,
29
+ dp_doc: PdfDocument,
30
+ page_obj: PdfPage,
31
+ page_no: int,
32
+ create_words: bool = True,
33
+ create_textlines: bool = True,
34
+ keep_chars: bool = False,
35
+ keep_lines: bool = False,
36
+ keep_images: bool = True,
37
+ ):
38
+ self._ppage = page_obj
39
+ self._dp_doc = dp_doc
40
+ self._page_no = page_no
41
+
42
+ self._create_words = create_words
43
+ self._create_textlines = create_textlines
44
+
45
+ self._keep_chars = keep_chars
46
+ self._keep_lines = keep_lines
47
+ self._keep_images = keep_images
48
+
49
+ self._dpage: Optional[SegmentedPdfPage] = None
50
+ self._unloaded = False
51
+ self.valid = (self._ppage is not None) and (self._dp_doc is not None)
52
+
53
+ def _ensure_parsed(self) -> None:
54
+ if self._dpage is not None:
55
+ return
56
+
57
+ seg_page = self._dp_doc.get_page(
58
+ self._page_no + 1,
59
+ keep_chars=self._keep_chars,
60
+ keep_lines=self._keep_lines,
61
+ keep_bitmaps=self._keep_images,
62
+ create_words=self._create_words,
63
+ create_textlines=self._create_textlines,
64
+ enforce_same_font=True,
65
+ )
66
+
67
+ # In Docling, all TextCell instances are expected with top-left origin.
68
+ [
69
+ tc.to_top_left_origin(seg_page.dimension.height)
70
+ for tc in seg_page.textline_cells
71
+ ]
72
+ [tc.to_top_left_origin(seg_page.dimension.height) for tc in seg_page.char_cells]
73
+ [tc.to_top_left_origin(seg_page.dimension.height) for tc in seg_page.word_cells]
74
+
75
+ self._dpage = seg_page
76
+
77
+ def is_valid(self) -> bool:
78
+ return self.valid
79
+
80
+ def get_text_in_rect(self, bbox: BoundingBox) -> str:
81
+ self._ensure_parsed()
82
+ assert self._dpage is not None
83
+
84
+ # Find intersecting cells on the page
85
+ text_piece = ""
86
+ page_size = self.get_size()
87
+
88
+ scale = (
89
+ 1 # FIX - Replace with param in get_text_in_rect across backends (optional)
90
+ )
91
+
92
+ for i, cell in enumerate(self._dpage.textline_cells):
93
+ cell_bbox = (
94
+ cell.rect.to_bounding_box()
95
+ .to_top_left_origin(page_height=page_size.height)
96
+ .scaled(scale)
97
+ )
98
+
99
+ overlap_frac = cell_bbox.intersection_over_self(bbox)
100
+
101
+ if overlap_frac > 0.5:
102
+ if len(text_piece) > 0:
103
+ text_piece += " "
104
+ text_piece += cell.text
105
+
106
+ return text_piece
107
+
108
+ def get_segmented_page(self) -> Optional[SegmentedPdfPage]:
109
+ self._ensure_parsed()
110
+ return self._dpage
111
+
112
+ def get_text_cells(self) -> Iterable[TextCell]:
113
+ self._ensure_parsed()
114
+ assert self._dpage is not None
115
+
116
+ return self._dpage.textline_cells
117
+
118
+ def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
119
+ self._ensure_parsed()
120
+ assert self._dpage is not None
121
+
122
+ AREA_THRESHOLD = 0 # 32 * 32
123
+
124
+ images = self._dpage.bitmap_resources
125
+
126
+ for img in images:
127
+ cropbox = img.rect.to_bounding_box().to_top_left_origin(
128
+ self.get_size().height
129
+ )
130
+
131
+ if cropbox.area() > AREA_THRESHOLD:
132
+ cropbox = cropbox.scaled(scale=scale)
133
+
134
+ yield cropbox
135
+
136
+ def get_page_image(
137
+ self, scale: float = 1, cropbox: Optional[BoundingBox] = None
138
+ ) -> Image.Image:
139
+ page_size = self.get_size()
140
+
141
+ if not cropbox:
142
+ cropbox = BoundingBox(
143
+ l=0,
144
+ r=page_size.width,
145
+ t=0,
146
+ b=page_size.height,
147
+ coord_origin=CoordOrigin.TOPLEFT,
148
+ )
149
+ padbox = BoundingBox(
150
+ l=0, r=0, t=0, b=0, coord_origin=CoordOrigin.BOTTOMLEFT
151
+ )
152
+ else:
153
+ padbox = cropbox.to_bottom_left_origin(page_size.height).model_copy()
154
+ padbox.r = page_size.width - padbox.r
155
+ padbox.t = page_size.height - padbox.t
156
+
157
+ with pypdfium2_lock:
158
+ image = (
159
+ self._ppage.render(
160
+ scale=scale * 1.5,
161
+ rotation=0, # no additional rotation
162
+ crop=padbox.as_tuple(),
163
+ )
164
+ .to_pil()
165
+ .resize(
166
+ size=(round(cropbox.width * scale), round(cropbox.height * scale))
167
+ )
168
+ ) # We resize the image from 1.5x the given scale to make it sharper.
169
+
170
+ return image
171
+
172
+ def get_size(self) -> Size:
173
+ with pypdfium2_lock:
174
+ return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
175
+
176
+ # TODO: Take width and height from docling-parse.
177
+ # return Size(
178
+ # width=self._dpage.dimension.width,
179
+ # height=self._dpage.dimension.height,
180
+ # )
181
+
182
+ def unload(self):
183
+ if not self._unloaded and self._dp_doc is not None:
184
+ self._dp_doc.unload_pages((self._page_no + 1, self._page_no + 2))
185
+ self._unloaded = True
186
+
187
+ self._ppage = None
188
+ self._dpage = None
189
+ self._dp_doc = None
190
+
191
+
192
+ class DoclingParseV4DocumentBackend(PdfDocumentBackend):
193
+ def __init__(
194
+ self,
195
+ in_doc: "InputDocument",
196
+ path_or_stream: Union[BytesIO, Path],
197
+ options: PdfBackendOptions = PdfBackendOptions(),
198
+ ):
199
+ super().__init__(in_doc, path_or_stream, options)
200
+
201
+ password = (
202
+ self.options.password.get_secret_value() if self.options.password else None
203
+ )
204
+ with pypdfium2_lock:
205
+ self._pdoc = pdfium.PdfDocument(self.path_or_stream, password=password)
206
+ self.parser = DoclingPdfParser(loglevel="fatal")
207
+ self.dp_doc: PdfDocument = self.parser.load(
208
+ path_or_stream=self.path_or_stream, password=password
209
+ )
210
+ success = self.dp_doc is not None
211
+
212
+ if not success:
213
+ raise RuntimeError(
214
+ f"docling-parse v4 could not load document {self.document_hash}."
215
+ )
216
+
217
+ def page_count(self) -> int:
218
+ # return len(self._pdoc) # To be replaced with docling-parse API
219
+
220
+ len_1 = len(self._pdoc)
221
+ len_2 = self.dp_doc.number_of_pages()
222
+
223
+ if len_1 != len_2:
224
+ _log.error(f"Inconsistent number of pages: {len_1}!={len_2}")
225
+
226
+ return len_2
227
+
228
+ def load_page(
229
+ self, page_no: int, create_words: bool = True, create_textlines: bool = True
230
+ ) -> DoclingParseV4PageBackend:
231
+ with pypdfium2_lock:
232
+ ppage = self._pdoc[page_no]
233
+
234
+ return DoclingParseV4PageBackend(
235
+ dp_doc=self.dp_doc,
236
+ page_obj=ppage,
237
+ page_no=page_no,
238
+ create_words=create_words,
239
+ create_textlines=create_textlines,
240
+ )
241
+
242
+ def is_valid(self) -> bool:
243
+ return self.page_count() > 0
244
+
245
+ def unload(self):
246
+ super().unload()
247
+ # Unload docling-parse document first
248
+ if self.dp_doc is not None:
249
+ self.dp_doc.unload()
250
+ self.dp_doc = None
251
+
252
+ # Then close pypdfium2 document with proper locking
253
+ if self._pdoc is not None:
254
+ with pypdfium2_lock:
255
+ try:
256
+ self._pdoc.close()
257
+ except Exception:
258
+ # Ignore cleanup errors
259
+ pass
260
+ self._pdoc = None
File without changes
@@ -0,0 +1,131 @@
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ from pathlib import Path
5
+ from tempfile import mkdtemp
6
+ from typing import Callable, Optional
7
+
8
+ import pypdfium2
9
+ from docx.document import Document
10
+ from PIL import Image, ImageChops
11
+
12
+
13
+ def get_libreoffice_cmd(raise_if_unavailable: bool = False) -> Optional[str]:
14
+ """Return the libreoffice cmd and optionally test it."""
15
+
16
+ libreoffice_cmd = (
17
+ shutil.which("libreoffice")
18
+ or shutil.which("soffice")
19
+ or (
20
+ "/Applications/LibreOffice.app/Contents/MacOS/soffice"
21
+ if os.path.isfile("/Applications/LibreOffice.app/Contents/MacOS/soffice")
22
+ else None
23
+ )
24
+ )
25
+
26
+ if raise_if_unavailable:
27
+ if libreoffice_cmd is None:
28
+ raise RuntimeError("Libreoffice not found")
29
+
30
+ # The following test will raise if the libreoffice_cmd cannot be used
31
+ subprocess.run(
32
+ [
33
+ libreoffice_cmd,
34
+ "-h",
35
+ ],
36
+ stdout=subprocess.DEVNULL,
37
+ stderr=subprocess.DEVNULL,
38
+ check=True,
39
+ )
40
+
41
+ return libreoffice_cmd
42
+
43
+
44
+ def get_docx_to_pdf_converter() -> Optional[Callable]:
45
+ """
46
+ Detects the best available DOCX to PDF tool and returns a conversion function.
47
+ The returned function accepts (input_path, output_path).
48
+ Returns None if no tool is available.
49
+ """
50
+
51
+ # Try LibreOffice
52
+ libreoffice_cmd = get_libreoffice_cmd()
53
+
54
+ if libreoffice_cmd:
55
+
56
+ def convert_with_libreoffice(input_path, output_path):
57
+ subprocess.run(
58
+ [
59
+ libreoffice_cmd,
60
+ "--headless",
61
+ "--convert-to",
62
+ "pdf",
63
+ "--outdir",
64
+ os.path.dirname(output_path),
65
+ input_path,
66
+ ],
67
+ stdout=subprocess.DEVNULL,
68
+ stderr=subprocess.DEVNULL,
69
+ check=True,
70
+ )
71
+
72
+ expected_output = os.path.join(
73
+ os.path.dirname(output_path),
74
+ os.path.splitext(os.path.basename(input_path))[0] + ".pdf",
75
+ )
76
+ if expected_output != output_path:
77
+ os.rename(expected_output, output_path)
78
+
79
+ return convert_with_libreoffice
80
+
81
+ ## Space for other DOCX to PDF converters if available
82
+
83
+ # No tools found
84
+ return None
85
+
86
+
87
+ def crop_whitespace(image: Image.Image, bg_color=None, padding=0) -> Image.Image:
88
+ if bg_color is None:
89
+ bg_color = image.getpixel((0, 0))
90
+
91
+ bg = Image.new(image.mode, image.size, bg_color)
92
+ diff = ImageChops.difference(image, bg)
93
+ bbox = diff.getbbox()
94
+
95
+ if bbox:
96
+ left, upper, right, lower = bbox
97
+ left = max(0, left - padding)
98
+ upper = max(0, upper - padding)
99
+ right = min(image.width, right + padding)
100
+ lower = min(image.height, lower + padding)
101
+ return image.crop((left, upper, right, lower))
102
+ else:
103
+ return image
104
+
105
+
106
+ def get_pil_from_dml_docx(
107
+ docx: Document, converter: Optional[Callable]
108
+ ) -> Optional[Image.Image]:
109
+ if converter is None:
110
+ return None
111
+
112
+ temp_dir = Path(mkdtemp())
113
+ temp_docx = Path(temp_dir / "drawing_only.docx")
114
+ temp_pdf = Path(temp_dir / "drawing_only.pdf")
115
+
116
+ # 1) Save docx temporarily
117
+ docx.save(str(temp_docx))
118
+
119
+ # 2) Export to PDF
120
+ converter(temp_docx, temp_pdf)
121
+
122
+ # 3) Load PDF as PNG
123
+ pdf = pypdfium2.PdfDocument(temp_pdf)
124
+ page = pdf[0]
125
+ image = crop_whitespace(page.render(scale=2).to_pil())
126
+ page.close()
127
+ pdf.close()
128
+
129
+ shutil.rmtree(temp_dir, ignore_errors=True)
130
+
131
+ return image
File without changes
@@ -0,0 +1,274 @@
1
+ """
2
+ Adapted from https://github.com/xiilei/dwml/blob/master/dwml/latex_dict.py
3
+ On 23/01/2025
4
+ """
5
+
6
+ CHARS = ("{", "}", "_", "^", "#", "&", "$", "%", "~")
7
+
8
+ BLANK = ""
9
+ BACKSLASH = "\\"
10
+ ALN = "&"
11
+
12
+ CHR = {
13
+ # Unicode : Latex Math Symbols
14
+ # Top accents
15
+ "\u0300": "\\grave{{{0}}}",
16
+ "\u0301": "\\acute{{{0}}}",
17
+ "\u0302": "\\hat{{{0}}}",
18
+ "\u0303": "\\tilde{{{0}}}",
19
+ "\u0304": "\\bar{{{0}}}",
20
+ "\u0305": "\\overbar{{{0}}}",
21
+ "\u0306": "\\breve{{{0}}}",
22
+ "\u0307": "\\dot{{{0}}}",
23
+ "\u0308": "\\ddot{{{0}}}",
24
+ "\u0309": "\\ovhook{{{0}}}",
25
+ "\u030a": "\\ocirc{{{0}}}}",
26
+ "\u030c": "\\check{{{0}}}}",
27
+ "\u0310": "\\candra{{{0}}}",
28
+ "\u0312": "\\oturnedcomma{{{0}}}",
29
+ "\u0315": "\\ocommatopright{{{0}}}",
30
+ "\u031a": "\\droang{{{0}}}",
31
+ "\u0338": "\\not{{{0}}}",
32
+ "\u20d0": "\\leftharpoonaccent{{{0}}}",
33
+ "\u20d1": "\\rightharpoonaccent{{{0}}}",
34
+ "\u20d2": "\\vertoverlay{{{0}}}",
35
+ "\u20d6": "\\overleftarrow{{{0}}}",
36
+ "\u20d7": "\\vec{{{0}}}",
37
+ "\u20db": "\\dddot{{{0}}}",
38
+ "\u20dc": "\\ddddot{{{0}}}",
39
+ "\u20e1": "\\overleftrightarrow{{{0}}}",
40
+ "\u20e7": "\\annuity{{{0}}}",
41
+ "\u20e9": "\\widebridgeabove{{{0}}}",
42
+ "\u20f0": "\\asteraccent{{{0}}}",
43
+ # Bottom accents
44
+ "\u0330": "\\wideutilde{{{0}}}",
45
+ "\u0331": "\\underbar{{{0}}}",
46
+ "\u20e8": "\\threeunderdot{{{0}}}",
47
+ "\u20ec": "\\underrightharpoondown{{{0}}}",
48
+ "\u20ed": "\\underleftharpoondown{{{0}}}",
49
+ "\u20ee": "\\underledtarrow{{{0}}}",
50
+ "\u20ef": "\\underrightarrow{{{0}}}",
51
+ # Over | group
52
+ "\u23b4": "\\overbracket{{{0}}}",
53
+ "\u23dc": "\\overparen{{{0}}}",
54
+ "\u23de": "\\overbrace{{{0}}}",
55
+ # Under| group
56
+ "\u23b5": "\\underbracket{{{0}}}",
57
+ "\u23dd": "\\underparen{{{0}}}",
58
+ "\u23df": "\\underbrace{{{0}}}",
59
+ }
60
+
61
+ CHR_BO = {
62
+ # Big operators,
63
+ "\u2140": "\\Bbbsum",
64
+ "\u220f": "\\prod",
65
+ "\u2210": "\\coprod",
66
+ "\u2211": "\\sum",
67
+ "\u222b": "\\int",
68
+ "\u222c": "\\iint",
69
+ "\u222d": "\\iiint",
70
+ "\u222e": "\\oint",
71
+ "\u222f": "\\oiint",
72
+ "\u2230": "\\oiiint",
73
+ "\u22c0": "\\bigwedge",
74
+ "\u22c1": "\\bigvee",
75
+ "\u22c2": "\\bigcap",
76
+ "\u22c3": "\\bigcup",
77
+ "\u2a00": "\\bigodot",
78
+ "\u2a01": "\\bigoplus",
79
+ "\u2a02": "\\bigotimes",
80
+ }
81
+
82
+ T = {
83
+ # Greek letters
84
+ "\U0001d6fc": "\\alpha ",
85
+ "\U0001d6fd": "\\beta ",
86
+ "\U0001d6fe": "\\gamma ",
87
+ "\U0001d6ff": "\\theta ",
88
+ "\U0001d700": "\\epsilon ",
89
+ "\U0001d701": "\\zeta ",
90
+ "\U0001d702": "\\eta ",
91
+ "\U0001d703": "\\theta ",
92
+ "\U0001d704": "\\iota ",
93
+ "\U0001d705": "\\kappa ",
94
+ "\U0001d706": "\\lambda ",
95
+ "\U0001d707": "\\m ",
96
+ "\U0001d708": "\\n ",
97
+ "\U0001d709": "\\xi ",
98
+ "\U0001d70a": "\\omicron ",
99
+ "\U0001d70b": "\\pi ",
100
+ "\U0001d70c": "\\rho ",
101
+ "\U0001d70d": "\\varsigma ",
102
+ "\U0001d70e": "\\sigma ",
103
+ "\U0001d70f": "\\ta ",
104
+ "\U0001d710": "\\upsilon ",
105
+ "\U0001d711": "\\phi ",
106
+ "\U0001d712": "\\chi ",
107
+ "\U0001d713": "\\psi ",
108
+ "\U0001d714": "\\omega ",
109
+ "\U0001d715": "\\partial ",
110
+ "\U0001d716": "\\varepsilon ",
111
+ "\U0001d717": "\\vartheta ",
112
+ "\U0001d718": "\\varkappa ",
113
+ "\U0001d719": "\\varphi ",
114
+ "\U0001d71a": "\\varrho ",
115
+ "\U0001d71b": "\\varpi ",
116
+ # Relation symbols
117
+ "\u2190": "\\leftarrow ",
118
+ "\u2191": "\\uparrow ",
119
+ "\u2192": "\\rightarrow ",
120
+ "\u2193": "\\downright ",
121
+ "\u2194": "\\leftrightarrow ",
122
+ "\u2195": "\\updownarrow ",
123
+ "\u2196": "\\nwarrow ",
124
+ "\u2197": "\\nearrow ",
125
+ "\u2198": "\\searrow ",
126
+ "\u2199": "\\swarrow ",
127
+ "\u22ee": "\\vdots ",
128
+ "\u22ef": "\\cdots ",
129
+ "\u22f0": "\\adots ",
130
+ "\u22f1": "\\ddots ",
131
+ "\u2260": "\\ne ",
132
+ "\u2264": "\\leq ",
133
+ "\u2265": "\\geq ",
134
+ "\u2266": "\\leqq ",
135
+ "\u2267": "\\geqq ",
136
+ "\u2268": "\\lneqq ",
137
+ "\u2269": "\\gneqq ",
138
+ "\u226a": "\\ll ",
139
+ "\u226b": "\\gg ",
140
+ "\u2208": "\\in ",
141
+ "\u2209": "\\notin ",
142
+ "\u220b": "\\ni ",
143
+ "\u220c": "\\nni ",
144
+ # Ordinary symbols
145
+ "\u221e": "\\infty ",
146
+ # Binary relations
147
+ "\u00b1": "\\pm ",
148
+ "\u2213": "\\mp ",
149
+ # Italic, Latin, uppercase
150
+ "\U0001d434": "A",
151
+ "\U0001d435": "B",
152
+ "\U0001d436": "C",
153
+ "\U0001d437": "D",
154
+ "\U0001d438": "E",
155
+ "\U0001d439": "F",
156
+ "\U0001d43a": "G",
157
+ "\U0001d43b": "H",
158
+ "\U0001d43c": "I",
159
+ "\U0001d43d": "J",
160
+ "\U0001d43e": "K",
161
+ "\U0001d43f": "L",
162
+ "\U0001d440": "M",
163
+ "\U0001d441": "N",
164
+ "\U0001d442": "O",
165
+ "\U0001d443": "P",
166
+ "\U0001d444": "Q",
167
+ "\U0001d445": "R",
168
+ "\U0001d446": "S",
169
+ "\U0001d447": "T",
170
+ "\U0001d448": "U",
171
+ "\U0001d449": "V",
172
+ "\U0001d44a": "W",
173
+ "\U0001d44b": "X",
174
+ "\U0001d44c": "Y",
175
+ "\U0001d44d": "Z",
176
+ # Italic, Latin, lowercase
177
+ "\U0001d44e": "a",
178
+ "\U0001d44f": "b",
179
+ "\U0001d450": "c",
180
+ "\U0001d451": "d",
181
+ "\U0001d452": "e",
182
+ "\U0001d453": "f",
183
+ "\U0001d454": "g",
184
+ "\U0001d456": "i",
185
+ "\U0001d457": "j",
186
+ "\U0001d458": "k",
187
+ "\U0001d459": "l",
188
+ "\U0001d45a": "m",
189
+ "\U0001d45b": "n",
190
+ "\U0001d45c": "o",
191
+ "\U0001d45d": "p",
192
+ "\U0001d45e": "q",
193
+ "\U0001d45f": "r",
194
+ "\U0001d460": "s",
195
+ "\U0001d461": "t",
196
+ "\U0001d462": "u",
197
+ "\U0001d463": "v",
198
+ "\U0001d464": "w",
199
+ "\U0001d465": "x",
200
+ "\U0001d466": "y",
201
+ "\U0001d467": "z",
202
+ }
203
+
204
+ FUNC = {
205
+ "sin": "\\sin({fe})",
206
+ "cos": "\\cos({fe})",
207
+ "tan": "\\tan({fe})",
208
+ "arcsin": "\\arcsin({fe})",
209
+ "arccos": "\\arccos({fe})",
210
+ "arctan": "\\arctan({fe})",
211
+ "arccot": "\\arccot({fe})",
212
+ "sinh": "\\sinh({fe})",
213
+ "cosh": "\\cosh({fe})",
214
+ "tanh": "\\tanh({fe})",
215
+ "coth": "\\coth({fe})",
216
+ "sec": "\\sec({fe})",
217
+ "csc": "\\csc({fe})",
218
+ "mod": "\\mod {fe}",
219
+ "max": "\\max({fe})",
220
+ "min": "\\min({fe})",
221
+ }
222
+
223
+ FUNC_PLACE = "{fe}"
224
+
225
+ BRK = "\\\\"
226
+
227
+ CHR_DEFAULT = {
228
+ "ACC_VAL": "\\hat{{{0}}}",
229
+ }
230
+
231
+ POS = {
232
+ "top": "\\overline{{{0}}}", # not sure
233
+ "bot": "\\underline{{{0}}}",
234
+ }
235
+
236
+ POS_DEFAULT = {
237
+ "BAR_VAL": "\\overline{{{0}}}",
238
+ }
239
+
240
+ SUB = "_{{{0}}}"
241
+
242
+ SUP = "^{{{0}}}"
243
+
244
+ F = {
245
+ "bar": "\\frac{{{num}}}{{{den}}}",
246
+ "skw": r"^{{{num}}}/_{{{den}}}",
247
+ "noBar": "\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}",
248
+ "lin": "{{{num}}}/{{{den}}}",
249
+ }
250
+ F_DEFAULT = "\\frac{{{num}}}{{{den}}}"
251
+
252
+ D = "\\left{left}{text}\\right{right}"
253
+
254
+ D_DEFAULT = {
255
+ "left": "(",
256
+ "right": ")",
257
+ "null": ".",
258
+ }
259
+
260
+ RAD = "\\sqrt[{deg}]{{{text}}}"
261
+ RAD_DEFAULT = "\\sqrt{{{text}}}"
262
+ ARR = "{text}"
263
+
264
+ LIM_FUNC = {
265
+ "lim": "\\lim_{{{lim}}}",
266
+ "max": "\\max_{{{lim}}}",
267
+ "min": "\\min_{{{lim}}}",
268
+ }
269
+
270
+ LIM_TO = ("\\rightarrow", "\\to")
271
+
272
+ LIM_UPP = "\\overset{{{lim}}}{{{text}}}"
273
+
274
+ M = "\\begin{{matrix}}{text}\\end{{matrix}}"