novelWriter 2.5.2__py3-none-any.whl → 2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/METADATA +5 -4
  2. {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/RECORD +126 -105
  3. {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/WHEEL +1 -1
  4. novelwriter/__init__.py +50 -11
  5. novelwriter/assets/i18n/nw_de_DE.qm +0 -0
  6. novelwriter/assets/i18n/nw_en_US.qm +0 -0
  7. novelwriter/assets/i18n/nw_es_419.qm +0 -0
  8. novelwriter/assets/i18n/nw_fr_FR.qm +0 -0
  9. novelwriter/assets/i18n/nw_it_IT.qm +0 -0
  10. novelwriter/assets/i18n/nw_ja_JP.qm +0 -0
  11. novelwriter/assets/i18n/nw_nb_NO.qm +0 -0
  12. novelwriter/assets/i18n/nw_nl_NL.qm +0 -0
  13. novelwriter/assets/i18n/nw_pl_PL.qm +0 -0
  14. novelwriter/assets/i18n/nw_pt_BR.qm +0 -0
  15. novelwriter/assets/i18n/nw_ru_RU.qm +0 -0
  16. novelwriter/assets/i18n/nw_zh_CN.qm +0 -0
  17. novelwriter/assets/i18n/project_de_DE.json +4 -2
  18. novelwriter/assets/i18n/project_en_GB.json +1 -0
  19. novelwriter/assets/i18n/project_en_US.json +2 -0
  20. novelwriter/assets/i18n/project_it_IT.json +2 -0
  21. novelwriter/assets/i18n/project_ja_JP.json +2 -0
  22. novelwriter/assets/i18n/project_nb_NO.json +2 -0
  23. novelwriter/assets/i18n/project_nl_NL.json +2 -0
  24. novelwriter/assets/i18n/project_pl_PL.json +2 -0
  25. novelwriter/assets/i18n/project_pt_BR.json +2 -0
  26. novelwriter/assets/i18n/project_ru_RU.json +11 -0
  27. novelwriter/assets/i18n/project_zh_CN.json +2 -0
  28. novelwriter/assets/icons/typicons_dark/icons.conf +8 -0
  29. novelwriter/assets/icons/typicons_dark/mixed_copy.svg +4 -0
  30. novelwriter/assets/icons/typicons_dark/mixed_margin-bottom.svg +6 -0
  31. novelwriter/assets/icons/typicons_dark/mixed_margin-left.svg +6 -0
  32. novelwriter/assets/icons/typicons_dark/mixed_margin-right.svg +6 -0
  33. novelwriter/assets/icons/typicons_dark/mixed_margin-top.svg +6 -0
  34. novelwriter/assets/icons/typicons_dark/mixed_size-height.svg +6 -0
  35. novelwriter/assets/icons/typicons_dark/mixed_size-width.svg +6 -0
  36. novelwriter/assets/icons/typicons_dark/nw_toolbar.svg +5 -0
  37. novelwriter/assets/icons/typicons_light/icons.conf +8 -0
  38. novelwriter/assets/icons/typicons_light/mixed_copy.svg +4 -0
  39. novelwriter/assets/icons/typicons_light/mixed_margin-bottom.svg +6 -0
  40. novelwriter/assets/icons/typicons_light/mixed_margin-left.svg +6 -0
  41. novelwriter/assets/icons/typicons_light/mixed_margin-right.svg +6 -0
  42. novelwriter/assets/icons/typicons_light/mixed_margin-top.svg +6 -0
  43. novelwriter/assets/icons/typicons_light/mixed_size-height.svg +6 -0
  44. novelwriter/assets/icons/typicons_light/mixed_size-width.svg +6 -0
  45. novelwriter/assets/icons/typicons_light/nw_toolbar.svg +5 -0
  46. novelwriter/assets/manual.pdf +0 -0
  47. novelwriter/assets/sample.zip +0 -0
  48. novelwriter/assets/text/credits_en.htm +1 -0
  49. novelwriter/assets/themes/default_light.conf +2 -2
  50. novelwriter/common.py +101 -3
  51. novelwriter/config.py +30 -17
  52. novelwriter/constants.py +189 -81
  53. novelwriter/core/buildsettings.py +74 -40
  54. novelwriter/core/coretools.py +146 -148
  55. novelwriter/core/docbuild.py +133 -171
  56. novelwriter/core/document.py +1 -1
  57. novelwriter/core/index.py +39 -38
  58. novelwriter/core/item.py +42 -9
  59. novelwriter/core/itemmodel.py +518 -0
  60. novelwriter/core/options.py +5 -2
  61. novelwriter/core/project.py +68 -90
  62. novelwriter/core/projectdata.py +8 -2
  63. novelwriter/core/projectxml.py +1 -1
  64. novelwriter/core/sessions.py +1 -1
  65. novelwriter/core/spellcheck.py +10 -15
  66. novelwriter/core/status.py +24 -8
  67. novelwriter/core/storage.py +1 -1
  68. novelwriter/core/tree.py +269 -288
  69. novelwriter/dialogs/about.py +1 -1
  70. novelwriter/dialogs/docmerge.py +8 -18
  71. novelwriter/dialogs/docsplit.py +1 -1
  72. novelwriter/dialogs/editlabel.py +1 -1
  73. novelwriter/dialogs/preferences.py +47 -34
  74. novelwriter/dialogs/projectsettings.py +149 -99
  75. novelwriter/dialogs/quotes.py +1 -1
  76. novelwriter/dialogs/wordlist.py +11 -10
  77. novelwriter/enum.py +37 -24
  78. novelwriter/error.py +2 -2
  79. novelwriter/extensions/configlayout.py +28 -13
  80. novelwriter/extensions/eventfilters.py +1 -1
  81. novelwriter/extensions/modified.py +30 -6
  82. novelwriter/extensions/novelselector.py +4 -3
  83. novelwriter/extensions/pagedsidebar.py +9 -9
  84. novelwriter/extensions/progressbars.py +4 -4
  85. novelwriter/extensions/statusled.py +3 -3
  86. novelwriter/extensions/switch.py +3 -3
  87. novelwriter/extensions/switchbox.py +1 -1
  88. novelwriter/extensions/versioninfo.py +1 -1
  89. novelwriter/formats/shared.py +156 -0
  90. novelwriter/formats/todocx.py +1191 -0
  91. novelwriter/formats/tohtml.py +454 -0
  92. novelwriter/{core → formats}/tokenizer.py +497 -495
  93. novelwriter/formats/tomarkdown.py +218 -0
  94. novelwriter/{core → formats}/toodt.py +312 -433
  95. novelwriter/formats/toqdoc.py +486 -0
  96. novelwriter/formats/toraw.py +91 -0
  97. novelwriter/gui/doceditor.py +347 -287
  98. novelwriter/gui/dochighlight.py +97 -85
  99. novelwriter/gui/docviewer.py +90 -33
  100. novelwriter/gui/docviewerpanel.py +18 -26
  101. novelwriter/gui/editordocument.py +18 -3
  102. novelwriter/gui/itemdetails.py +27 -29
  103. novelwriter/gui/mainmenu.py +130 -64
  104. novelwriter/gui/noveltree.py +46 -48
  105. novelwriter/gui/outline.py +202 -256
  106. novelwriter/gui/projtree.py +590 -1238
  107. novelwriter/gui/search.py +11 -19
  108. novelwriter/gui/sidebar.py +8 -7
  109. novelwriter/gui/statusbar.py +20 -3
  110. novelwriter/gui/theme.py +11 -6
  111. novelwriter/guimain.py +101 -201
  112. novelwriter/shared.py +67 -28
  113. novelwriter/text/counting.py +3 -1
  114. novelwriter/text/patterns.py +169 -61
  115. novelwriter/tools/dictionaries.py +3 -3
  116. novelwriter/tools/lipsum.py +1 -1
  117. novelwriter/tools/manusbuild.py +15 -13
  118. novelwriter/tools/manuscript.py +121 -79
  119. novelwriter/tools/manussettings.py +424 -291
  120. novelwriter/tools/noveldetails.py +1 -1
  121. novelwriter/tools/welcome.py +6 -6
  122. novelwriter/tools/writingstats.py +4 -4
  123. novelwriter/types.py +25 -9
  124. novelwriter/core/tohtml.py +0 -530
  125. novelwriter/core/tomarkdown.py +0 -252
  126. novelwriter/core/toqdoc.py +0 -419
  127. {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/LICENSE.md +0 -0
  128. {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/entry_points.txt +0 -0
  129. {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ Created: 2019-05-05 [0.0.1] Tokenizer
7
7
  Created: 2023-05-23 [2.1b1] HeadingFormatter
8
8
 
9
9
  This file is a part of novelWriter
10
- Copyright 2018–2024, Veronica Berglyd Olsen
10
+ Copyright (C) 2019 Veronica Berglyd Olsen and novelWriter contributors
11
11
 
12
12
  This program is free software: you can redistribute it and/or modify
13
13
  it under the terms of the GNU General Public License as published by
@@ -24,41 +24,56 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
24
24
  """
25
25
  from __future__ import annotations
26
26
 
27
- import json
28
27
  import logging
29
28
  import re
30
29
 
31
30
  from abc import ABC, abstractmethod
32
- from functools import partial
33
31
  from pathlib import Path
34
- from time import time
32
+ from typing import NamedTuple
35
33
 
36
- from PyQt5.QtCore import QCoreApplication, QRegularExpression
37
- from PyQt5.QtGui import QFont
34
+ from PyQt5.QtCore import QLocale
35
+ from PyQt5.QtGui import QColor, QFont
38
36
 
39
37
  from novelwriter import CONFIG
40
- from novelwriter.common import checkInt, formatTimeStamp, numberToRoman
41
- from novelwriter.constants import nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwUnicode, trConst
38
+ from novelwriter.common import checkInt, fontMatcher, numberToRoman
39
+ from novelwriter.constants import (
40
+ nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwStats, nwStyles, nwUnicode,
41
+ trConst
42
+ )
42
43
  from novelwriter.core.index import processComment
43
44
  from novelwriter.core.project import NWProject
44
45
  from novelwriter.enum import nwComment, nwItemLayout
45
- from novelwriter.text.patterns import REGEX_PATTERNS
46
+ from novelwriter.formats.shared import (
47
+ BlockFmt, BlockTyp, T_Block, T_Formats, T_Note, TextDocumentTheme, TextFmt
48
+ )
49
+ from novelwriter.text.patterns import REGEX_PATTERNS, DialogParser
46
50
 
47
51
  logger = logging.getLogger(__name__)
48
52
 
49
- ESCAPES = {r"\*": "*", r"\~": "~", r"\_": "_", r"\[": "[", r"\]": "]", r"\ ": ""}
50
- RX_ESC = re.compile("|".join([re.escape(k) for k in ESCAPES.keys()]), flags=re.DOTALL)
51
53
 
52
- T_Formats = list[tuple[int, int, str]]
53
- T_Comment = tuple[str, T_Formats]
54
- T_Token = tuple[int, int, str, T_Formats, int]
54
+ class ComStyle(NamedTuple):
55
55
 
56
+ label: str = ""
57
+ labelClass: str = ""
58
+ textClass: str = ""
56
59
 
57
- def stripEscape(text: str) -> str:
58
- """Strip escaped Markdown characters from paragraph text."""
59
- if "\\" in text:
60
- return RX_ESC.sub(lambda x: ESCAPES[x.group(0)], text)
61
- return text
60
+
61
+ COMMENT_STYLE = {
62
+ nwComment.PLAIN: ComStyle("Comment", "comment", "comment"),
63
+ nwComment.IGNORE: ComStyle(),
64
+ nwComment.SYNOPSIS: ComStyle("Synopsis", "modifier", "synopsis"),
65
+ nwComment.SHORT: ComStyle("Short Description", "modifier", "synopsis"),
66
+ nwComment.NOTE: ComStyle("Note", "modifier", "note"),
67
+ nwComment.FOOTNOTE: ComStyle("", "modifier", "note"),
68
+ nwComment.COMMENT: ComStyle(),
69
+ nwComment.STORY: ComStyle("", "modifier", "note"),
70
+ }
71
+ HEADINGS = [
72
+ BlockTyp.TITLE, BlockTyp.PART, BlockTyp.HEAD1,
73
+ BlockTyp.HEAD2, BlockTyp.HEAD3, BlockTyp.HEAD4,
74
+ ]
75
+ SKIP_INDENT = HEADINGS + [BlockTyp.SEP, BlockTyp.SKIP]
76
+ B_EMPTY: T_Block = (BlockTyp.EMPTY, "", "", [], BlockFmt.NONE)
62
77
 
63
78
 
64
79
  class Tokenizer(ABC):
@@ -70,119 +85,71 @@ class Tokenizer(ABC):
70
85
  subclasses.
71
86
  """
72
87
 
73
- # In-Text Format
74
- FMT_B_B = 1 # Begin bold
75
- FMT_B_E = 2 # End bold
76
- FMT_I_B = 3 # Begin italics
77
- FMT_I_E = 4 # End italics
78
- FMT_D_B = 5 # Begin strikeout
79
- FMT_D_E = 6 # End strikeout
80
- FMT_U_B = 7 # Begin underline
81
- FMT_U_E = 8 # End underline
82
- FMT_M_B = 9 # Begin mark
83
- FMT_M_E = 10 # End mark
84
- FMT_SUP_B = 11 # Begin superscript
85
- FMT_SUP_E = 12 # End superscript
86
- FMT_SUB_B = 13 # Begin subscript
87
- FMT_SUB_E = 14 # End subscript
88
- FMT_DL_B = 15 # Begin dialogue
89
- FMT_DL_E = 16 # End dialogue
90
- FMT_ADL_B = 17 # Begin alt dialogue
91
- FMT_ADL_E = 18 # End alt dialogue
92
- FMT_FNOTE = 19 # Footnote marker
93
- FMT_STRIP = 20 # Strip the format code
94
-
95
- # Block Type
96
- T_EMPTY = 1 # Empty line (new paragraph)
97
- T_SYNOPSIS = 2 # Synopsis comment
98
- T_SHORT = 3 # Short description comment
99
- T_COMMENT = 4 # Comment line
100
- T_KEYWORD = 5 # Command line
101
- T_TITLE = 6 # Title
102
- T_HEAD1 = 7 # Heading 1
103
- T_HEAD2 = 8 # Heading 2
104
- T_HEAD3 = 9 # Heading 3
105
- T_HEAD4 = 10 # Heading 4
106
- T_TEXT = 11 # Text line
107
- T_SEP = 12 # Scene separator
108
- T_SKIP = 13 # Paragraph break
109
-
110
- # Block Style
111
- A_NONE = 0x0000 # No special style
112
- A_LEFT = 0x0001 # Left aligned
113
- A_RIGHT = 0x0002 # Right aligned
114
- A_CENTRE = 0x0004 # Centred
115
- A_JUSTIFY = 0x0008 # Justified
116
- A_PBB = 0x0010 # Page break before
117
- A_PBA = 0x0020 # Page break after
118
- A_Z_TOPMRG = 0x0040 # Zero top margin
119
- A_Z_BTMMRG = 0x0080 # Zero bottom margin
120
- A_IND_L = 0x0100 # Left indentation
121
- A_IND_R = 0x0200 # Right indentation
122
- A_IND_T = 0x0400 # Text indentation
123
-
124
- # Masks
125
- M_ALIGNED = A_LEFT | A_RIGHT | A_CENTRE | A_JUSTIFY
126
-
127
- # Lookups
128
- L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
129
- L_SKIP_INDENT = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD2, T_HEAD3, T_HEAD4, T_SEP, T_SKIP]
130
- L_SUMMARY = [T_SYNOPSIS, T_SHORT]
131
-
132
88
  def __init__(self, project: NWProject) -> None:
133
89
 
134
90
  self._project = project
135
91
 
136
92
  # Data Variables
137
- self._text = "" # The raw text to be tokenized
138
- self._handle = None # The item handle currently being processed
139
- self._result = "" # The result of the last document
140
- self._keepMD = False # Whether to keep the markdown text
141
-
142
- # Tokens and Meta Data (Per Document)
143
- self._tokens: list[T_Token] = []
144
- self._footnotes: dict[str, T_Comment] = {}
145
-
146
- # Tokens and Meta Data (Per Instance)
93
+ self._text = "" # The raw text to be tokenized
94
+ self._handle = None # The item handle currently being processed
95
+ self._keepRaw = False # Whether to keep the raw text, used by ToRaw
96
+ self._noTokens = False # Disable tokenization if they're not needed
97
+
98
+ # Blocks and Meta Data (Per Document)
99
+ self._blocks: list[T_Block] = []
100
+ self._footnotes: dict[str, T_Note] = {}
101
+
102
+ # Blocks and Meta Data (Per Instance)
103
+ self._raw: list[str] = []
104
+ self._pages: list[str] = []
147
105
  self._counts: dict[str, int] = {}
148
106
  self._outline: dict[str, str] = {}
149
- self._markdown: list[str] = []
150
107
 
151
108
  # User Settings
109
+ self._dLocale = CONFIG.locale # The document locale
152
110
  self._textFont = QFont("Serif", 11) # Output text font
153
- self._lineHeight = 1.15 # Line height in units of em
154
- self._blockIndent = 4.00 # Block indent in units of em
155
- self._firstIndent = False # Enable first line indent
156
- self._firstWidth = 1.40 # First line indent in units of em
157
- self._indentFirst = False # Indent first paragraph
158
- self._doJustify = False # Justify text
159
- self._doBodyText = True # Include body text
160
- self._doSynopsis = False # Also process synopsis comments
161
- self._doComments = False # Also process comments
162
- self._doKeywords = False # Also process keywords like tags and references
163
- self._skipKeywords = set() # Keywords to ignore
164
- self._keepBreaks = True # Keep line breaks in paragraphs
111
+ self._lineHeight = 1.15 # Line height in units of em
112
+ self._colorHeads = True # Colourise headings
113
+ self._scaleHeads = True # Scale headings to larger font size
114
+ self._boldHeads = True # Bold headings
115
+ self._blockIndent = 4.00 # Block indent in units of em
116
+ self._firstIndent = False # Enable first line indent
117
+ self._firstWidth = 1.40 # First line indent in units of em
118
+ self._indentFirst = False # Indent first paragraph
119
+ self._doJustify = False # Justify text
120
+ self._doBodyText = True # Include body text
121
+ self._doSynopsis = False # Also process synopsis comments
122
+ self._doComments = False # Also process comments
123
+ self._doKeywords = False # Also process keywords like tags and references
124
+ self._keepBreaks = True # Keep line breaks in paragraphs
125
+ self._defaultAlign = "left" # The default text alignment
126
+
127
+ self._skipKeywords: set[str] = set() # Keywords to ignore
128
+
129
+ # Other Setting
130
+ self._theme = TextDocumentTheme()
131
+ self._classes: dict[str, QColor] = {}
165
132
 
166
133
  # Margins
167
- self._marginTitle = (1.417, 0.500)
168
- self._marginHead1 = (1.417, 0.500)
169
- self._marginHead2 = (1.668, 0.500)
170
- self._marginHead3 = (1.168, 0.500)
171
- self._marginHead4 = (1.168, 0.500)
172
- self._marginText = (0.000, 0.584)
173
- self._marginMeta = (0.000, 0.584)
174
- self._marginFoot = (1.417, 0.467)
175
- self._marginSep = (1.168, 1.168)
134
+ self._marginTitle = nwStyles.T_MARGIN["H0"]
135
+ self._marginHead1 = nwStyles.T_MARGIN["H1"]
136
+ self._marginHead2 = nwStyles.T_MARGIN["H2"]
137
+ self._marginHead3 = nwStyles.T_MARGIN["H3"]
138
+ self._marginHead4 = nwStyles.T_MARGIN["H4"]
139
+ self._marginText = nwStyles.T_MARGIN["TT"]
140
+ self._marginMeta = nwStyles.T_MARGIN["MT"]
141
+ self._marginFoot = nwStyles.T_MARGIN["FT"]
142
+ self._marginSep = nwStyles.T_MARGIN["SP"]
176
143
 
177
144
  # Title Formats
178
- self._fmtTitle = nwHeadFmt.TITLE # Formatting for titles
145
+ self._fmtPart = nwHeadFmt.TITLE # Formatting for partitions
179
146
  self._fmtChapter = nwHeadFmt.TITLE # Formatting for numbered chapters
180
147
  self._fmtUnNum = nwHeadFmt.TITLE # Formatting for unnumbered chapters
181
148
  self._fmtScene = nwHeadFmt.TITLE # Formatting for scenes
182
149
  self._fmtHScene = nwHeadFmt.TITLE # Formatting for hard scenes
183
150
  self._fmtSection = nwHeadFmt.TITLE # Formatting for sections
184
151
 
185
- self._hideTitle = False # Do not include title headings
152
+ self._hidePart = False # Do not include partition headings
186
153
  self._hideChapter = False # Do not include chapter headings
187
154
  self._hideUnNum = False # Do not include unnumbered headings
188
155
  self._hideScene = False # Do not include scene headings
@@ -191,15 +158,16 @@ class Tokenizer(ABC):
191
158
 
192
159
  self._linkHeadings = False # Add an anchor before headings
193
160
 
194
- self._titleStyle = self.A_CENTRE | self.A_PBB
195
- self._chapterStyle = self.A_PBB
196
- self._sceneStyle = self.A_NONE
161
+ self._titleStyle = BlockFmt.CENTRE | BlockFmt.PBB
162
+ self._partStyle = BlockFmt.CENTRE | BlockFmt.PBB
163
+ self._chapterStyle = BlockFmt.PBB
164
+ self._sceneStyle = BlockFmt.NONE
197
165
 
198
166
  # Instance Variables
199
167
  self._hFormatter = HeadingFormatter(self._project)
200
168
  self._noSep = True # Flag to indicate that we don't want a scene separator
201
169
  self._noIndent = False # Flag to disable text indent on next paragraph
202
- self._showDialog = False # Flag for dialogue highlighting
170
+ self._breakNext = False # Add a page break on next token
203
171
 
204
172
  # This File
205
173
  self._isNovel = False # Document is a novel document
@@ -210,31 +178,33 @@ class Tokenizer(ABC):
210
178
 
211
179
  # Function Mapping
212
180
  self._localLookup = self._project.localLookup
213
- self.tr = partial(QCoreApplication.translate, "Tokenizer")
214
181
 
215
182
  # Format RegEx
216
183
  self._rxMarkdown = [
217
- (REGEX_PATTERNS.markdownItalic, [0, self.FMT_I_B, 0, self.FMT_I_E]),
218
- (REGEX_PATTERNS.markdownBold, [0, self.FMT_B_B, 0, self.FMT_B_E]),
219
- (REGEX_PATTERNS.markdownStrike, [0, self.FMT_D_B, 0, self.FMT_D_E]),
184
+ (REGEX_PATTERNS.markdownItalic, [0, TextFmt.I_B, 0, TextFmt.I_E]),
185
+ (REGEX_PATTERNS.markdownBold, [0, TextFmt.B_B, 0, TextFmt.B_E]),
186
+ (REGEX_PATTERNS.markdownStrike, [0, TextFmt.D_B, 0, TextFmt.D_E]),
220
187
  ]
221
- self._rxShortCodes = REGEX_PATTERNS.shortcodePlain
222
- self._rxShortCodeVals = REGEX_PATTERNS.shortcodeValue
223
188
 
224
189
  self._shortCodeFmt = {
225
- nwShortcode.ITALIC_O: self.FMT_I_B, nwShortcode.ITALIC_C: self.FMT_I_E,
226
- nwShortcode.BOLD_O: self.FMT_B_B, nwShortcode.BOLD_C: self.FMT_B_E,
227
- nwShortcode.STRIKE_O: self.FMT_D_B, nwShortcode.STRIKE_C: self.FMT_D_E,
228
- nwShortcode.ULINE_O: self.FMT_U_B, nwShortcode.ULINE_C: self.FMT_U_E,
229
- nwShortcode.MARK_O: self.FMT_M_B, nwShortcode.MARK_C: self.FMT_M_E,
230
- nwShortcode.SUP_O: self.FMT_SUP_B, nwShortcode.SUP_C: self.FMT_SUP_E,
231
- nwShortcode.SUB_O: self.FMT_SUB_B, nwShortcode.SUB_C: self.FMT_SUB_E,
190
+ nwShortcode.ITALIC_O: TextFmt.I_B, nwShortcode.ITALIC_C: TextFmt.I_E,
191
+ nwShortcode.BOLD_O: TextFmt.B_B, nwShortcode.BOLD_C: TextFmt.B_E,
192
+ nwShortcode.STRIKE_O: TextFmt.D_B, nwShortcode.STRIKE_C: TextFmt.D_E,
193
+ nwShortcode.ULINE_O: TextFmt.U_B, nwShortcode.ULINE_C: TextFmt.U_E,
194
+ nwShortcode.MARK_O: TextFmt.M_B, nwShortcode.MARK_C: TextFmt.M_E,
195
+ nwShortcode.SUP_O: TextFmt.SUP_B, nwShortcode.SUP_C: TextFmt.SUP_E,
196
+ nwShortcode.SUB_O: TextFmt.SUB_B, nwShortcode.SUB_C: TextFmt.SUB_E,
232
197
  }
233
198
  self._shortCodeVals = {
234
- nwShortcode.FOOTNOTE_B: self.FMT_FNOTE,
199
+ nwShortcode.FOOTNOTE_B: TextFmt.FNOTE,
200
+ nwShortcode.FIELD_B: TextFmt.FIELD,
235
201
  }
236
202
 
237
- self._rxDialogue: list[tuple[QRegularExpression, int, int]] = []
203
+ # Dialogue
204
+ self._hlightDialog = False
205
+ self._rxAltDialog = REGEX_PATTERNS.altDialogStyle
206
+ self._dialogParser = DialogParser()
207
+ self._dialogParser.initParser()
238
208
 
239
209
  return
240
210
 
@@ -242,16 +212,6 @@ class Tokenizer(ABC):
242
212
  # Properties
243
213
  ##
244
214
 
245
- @property
246
- def result(self) -> str:
247
- """The result of the build process."""
248
- return self._result
249
-
250
- @property
251
- def allMarkdown(self) -> list[str]:
252
- """The combined novelWriter Markdown text."""
253
- return self._markdown
254
-
255
215
  @property
256
216
  def textStats(self) -> dict[str, int]:
257
217
  """The collected stats about the text."""
@@ -271,10 +231,21 @@ class Tokenizer(ABC):
271
231
  # Setters
272
232
  ##
273
233
 
274
- def setTitleFormat(self, hFormat: str, hide: bool = False) -> None:
275
- """Set the title format pattern."""
276
- self._fmtTitle = hFormat.strip()
277
- self._hideTitle = hide
234
+ def setLanguage(self, language: str | None) -> None:
235
+ """Set language for the document."""
236
+ if language:
237
+ self._dLocale = QLocale(language)
238
+ return
239
+
240
+ def setTheme(self, theme: TextDocumentTheme) -> None:
241
+ """Set the document colour theme."""
242
+ self._theme = theme
243
+ return
244
+
245
+ def setPartitionFormat(self, hFormat: str, hide: bool = False) -> None:
246
+ """Set the partition format pattern."""
247
+ self._fmtPart = hFormat.strip()
248
+ self._hidePart = hide
278
249
  return
279
250
 
280
251
  def setChapterFormat(self, hFormat: str, hide: bool = False) -> None:
@@ -309,28 +280,31 @@ class Tokenizer(ABC):
309
280
 
310
281
  def setTitleStyle(self, center: bool, pageBreak: bool) -> None:
311
282
  """Set the title heading style."""
312
- self._titleStyle = (
313
- (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
314
- )
283
+ self._titleStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
284
+ self._titleStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
285
+ return
286
+
287
+ def setPartitionStyle(self, center: bool, pageBreak: bool) -> None:
288
+ """Set the partition heading style."""
289
+ self._partStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
290
+ self._partStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
315
291
  return
316
292
 
317
293
  def setChapterStyle(self, center: bool, pageBreak: bool) -> None:
318
294
  """Set the chapter heading style."""
319
- self._chapterStyle = (
320
- (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
321
- )
295
+ self._chapterStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
296
+ self._chapterStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
322
297
  return
323
298
 
324
299
  def setSceneStyle(self, center: bool, pageBreak: bool) -> None:
325
300
  """Set the scene heading style."""
326
- self._sceneStyle = (
327
- (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
328
- )
301
+ self._sceneStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
302
+ self._sceneStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
329
303
  return
330
304
 
331
- def setFont(self, font: QFont) -> None:
305
+ def setTextFont(self, font: QFont) -> None:
332
306
  """Set the build font."""
333
- self._textFont = font
307
+ self._textFont = fontMatcher(font)
334
308
  return
335
309
 
336
310
  def setLineHeight(self, height: float) -> None:
@@ -338,6 +312,13 @@ class Tokenizer(ABC):
338
312
  self._lineHeight = min(max(float(height), 0.5), 5.0)
339
313
  return
340
314
 
315
+ def setHeadingStyles(self, color: bool, scale: bool, bold: bool) -> None:
316
+ """Set text style for headings."""
317
+ self._colorHeads = color
318
+ self._scaleHeads = scale
319
+ self._boldHeads = bold
320
+ return
321
+
341
322
  def setBlockIndent(self, indent: float) -> None:
342
323
  """Set the block indent between 0.0 and 10.0."""
343
324
  self._blockIndent = min(max(float(indent), 0.0), 10.0)
@@ -357,27 +338,9 @@ class Tokenizer(ABC):
357
338
  self._doJustify = state
358
339
  return
359
340
 
360
- def setDialogueHighlight(self, state: bool) -> None:
341
+ def setDialogHighlight(self, state: bool) -> None:
361
342
  """Enable or disable dialogue highlighting."""
362
- self._rxDialogue = []
363
- self._showDialog = state
364
- if state:
365
- if CONFIG.dialogStyle > 0:
366
- self._rxDialogue.append((
367
- REGEX_PATTERNS.dialogStyle, self.FMT_DL_B, self.FMT_DL_E
368
- ))
369
- if CONFIG.dialogLine:
370
- self._rxDialogue.append((
371
- REGEX_PATTERNS.dialogLine, self.FMT_DL_B, self.FMT_DL_E
372
- ))
373
- if CONFIG.narratorBreak:
374
- self._rxDialogue.append((
375
- REGEX_PATTERNS.narratorBreak, self.FMT_DL_E, self.FMT_DL_B
376
- ))
377
- if CONFIG.altDialogOpen and CONFIG.altDialogClose:
378
- self._rxDialogue.append((
379
- REGEX_PATTERNS.altDialogStyle, self.FMT_ADL_B, self.FMT_ADL_E
380
- ))
343
+ self._hlightDialog = state
381
344
  return
382
345
 
383
346
  def setTitleMargins(self, upper: float, lower: float) -> None:
@@ -455,11 +418,6 @@ class Tokenizer(ABC):
455
418
  self._keepBreaks = state
456
419
  return
457
420
 
458
- def setKeepMarkdown(self, state: bool) -> None:
459
- """Keep original markdown during build."""
460
- self._keepMD = state
461
- return
462
-
463
421
  ##
464
422
  # Class Methods
465
423
  ##
@@ -468,27 +426,54 @@ class Tokenizer(ABC):
468
426
  def doConvert(self) -> None:
469
427
  raise NotImplementedError
470
428
 
429
+ @abstractmethod
430
+ def closeDocument(self) -> None:
431
+ raise NotImplementedError
432
+
433
+ @abstractmethod
434
+ def saveDocument(self, path: Path) -> None:
435
+ raise NotImplementedError
436
+
437
+ def initDocument(self) -> None:
438
+ """Initialise data after settings."""
439
+ self._classes["modifier"] = self._theme.modifier
440
+ self._classes["synopsis"] = self._theme.note
441
+ self._classes["comment"] = self._theme.comment
442
+ self._classes["dialog"] = self._theme.dialog
443
+ self._classes["altdialog"] = self._theme.altdialog
444
+ self._classes["tag"] = self._theme.tag
445
+ self._classes["keyword"] = self._theme.keyword
446
+ self._classes["optional"] = self._theme.optional
447
+ return
448
+
449
+ def setBreakNext(self) -> None:
450
+ """Set a page break for next block."""
451
+ self._breakNext = True
452
+ return
453
+
471
454
  def addRootHeading(self, tHandle: str) -> None:
472
455
  """Add a heading at the start of a new root folder."""
473
456
  self._text = ""
474
457
  self._handle = None
475
458
 
476
- if (tItem := self._project.tree[tHandle]) and tItem.isRootType():
459
+ if (item := self._project.tree[tHandle]) and item.isRootType():
477
460
  self._handle = tHandle
461
+ style = BlockFmt.CENTRE
478
462
  if self._isFirst:
479
- textAlign = self.A_CENTRE
480
463
  self._isFirst = False
481
464
  else:
482
- textAlign = self.A_PBB | self.A_CENTRE
465
+ style |= BlockFmt.PBB
483
466
 
484
- trNotes = self._localLookup("Notes")
485
- title = f"{trNotes}: {tItem.itemName}"
486
- self._tokens = []
487
- self._tokens.append((
488
- self.T_TITLE, 1, title, [], textAlign
489
- ))
490
- if self._keepMD:
491
- self._markdown.append(f"#! {title}\n\n")
467
+ title = item.itemName
468
+ if not item.isNovelLike():
469
+ notes = self._localLookup("Notes")
470
+ title = f"{notes}: {title}"
471
+
472
+ self._blocks = [(
473
+ BlockTyp.TITLE, f"{self._handle}:T0001", title, [], style
474
+ )]
475
+ if self._keepRaw:
476
+ self._raw.append(f"#! {title}\n\n")
492
477
 
493
478
  return
494
479
 
@@ -505,20 +490,14 @@ class Tokenizer(ABC):
505
490
  return
506
491
 
507
492
  def doPreProcessing(self) -> None:
508
- """Run trough the various replace dictionaries."""
493
+ """Run pre-processing jobs before the text is tokenized."""
509
494
  # Process the user's auto-replace dictionary
510
- autoReplace = self._project.data.autoReplace
511
- if len(autoReplace) > 0:
495
+ if autoReplace := self._project.data.autoReplace:
512
496
  repDict = {}
513
497
  for aKey, aVal in autoReplace.items():
514
498
  repDict[f"<{aKey}>"] = aVal
515
499
  xRep = re.compile("|".join([re.escape(k) for k in repDict.keys()]), flags=re.DOTALL)
516
500
  self._text = xRep.sub(lambda x: repDict[x.group(0)], self._text)
517
-
518
- # Process the character translation map
519
- trDict = {nwUnicode.U_MAPOS: nwUnicode.U_RSQUO}
520
- self._text = self._text.translate(str.maketrans(trDict))
521
-
522
501
  return
523
502
 
524
503
  def tokenizeText(self) -> None:
@@ -526,42 +505,62 @@ class Tokenizer(ABC):
526
505
  characters that indicate headings, comments, commands etc, or
527
506
  just contain plain text. In the case of plain text, apply the
528
507
  same RegExes that the syntax highlighter uses and save the
529
- locations of these formatting tags into the token array.
508
+ locations of these formatting tags into the blocks list.
530
509
 
531
- The format of the token list is an entry with a five-tuple for
510
+ The format of the blocs list is an entry with a five-tuple for
532
511
  each line in the file. The tuple is as follows:
533
- 1: The type of the block, self.T_*
512
+ 1: The type of the block, BlockType.*
534
513
  2: The heading number under which the text is placed
535
514
  3: The text content of the block, without leading tags
536
- 4: The internal formatting map of the text, self.FMT_*
537
- 5: The style of the block, self.A_*
515
+ 4: The internal formatting map of the text, TxtFmt.*
516
+ 5: The formats of the block, BlockFmt.*
538
517
  """
518
+ if self._keepRaw:
519
+ self._raw.append(f"{self._text.rstrip()}\n\n")
520
+ if self._noTokens:
521
+ return
539
522
  if self._isNovel:
540
523
  self._hFormatter.setHandle(self._handle)
541
524
 
525
+ # Cache Flags
526
+ isNovel = self._isNovel
527
+ doJustify = self._doJustify
528
+ keepBreaks = self._keepBreaks
529
+ indentFirst = self._indentFirst
530
+ firstIndent = self._firstIndent
531
+
532
+ # Replace all instances of [br] with a placeholder character
533
+ text = REGEX_PATTERNS.lineBreak.sub(nwUnicode.U_NAC2, self._text)
534
+
535
+ # Translation Maps
536
+ transMapA = str.maketrans({
537
+ nwUnicode.U_NAC2: "", # Used when [br] is ignored
538
+ nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
539
+ nwUnicode.U_HBAR: nwUnicode.U_EMDASH,
540
+ })
541
+ transMapB = str.maketrans({
542
+ nwUnicode.U_NAC2: "\n", # Used when [br] is not ignored
543
+ nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
544
+ nwUnicode.U_HBAR: nwUnicode.U_EMDASH,
545
+ })
546
+
542
547
  nHead = 0
543
- breakNext = False
544
- tmpMarkdown = []
545
548
  tHandle = self._handle or ""
546
- tokens: list[T_Token] = []
547
- for aLine in self._text.splitlines():
549
+ tBlocks: list[T_Block] = [B_EMPTY]
550
+ for bLine in text.splitlines():
551
+ aLine = bLine.translate(transMapA)
548
552
  sLine = aLine.strip().lower()
549
553
 
550
554
  # Check for blank lines
551
- if len(sLine) == 0:
552
- tokens.append((
553
- self.T_EMPTY, nHead, "", [], self.A_NONE
554
- ))
555
- if self._keepMD:
556
- tmpMarkdown.append("\n")
557
-
555
+ if not sLine:
556
+ tBlocks.append(B_EMPTY)
558
557
  continue
559
558
 
560
- if breakNext:
561
- sAlign = self.A_PBB
562
- breakNext = False
559
+ if self._breakNext:
560
+ tStyle = BlockFmt.PBB
561
+ self._breakNext = False
563
562
  else:
564
- sAlign = self.A_NONE
563
+ tStyle = BlockFmt.NONE
565
564
 
566
565
  # Check Line Format
567
566
  # =================
@@ -574,24 +573,24 @@ class Tokenizer(ABC):
574
573
  # therefore proceed to check other formats.
575
574
 
576
575
  if sLine in ("[newpage]", "[new page]"):
577
- breakNext = True
576
+ self._breakNext = True
578
577
  continue
579
578
 
580
579
  elif sLine == "[vspace]":
581
- tokens.append(
582
- (self.T_SKIP, nHead, "", [], sAlign)
580
+ tBlocks.append(
581
+ (BlockTyp.SKIP, "", "", [], tStyle)
583
582
  )
584
583
  continue
585
584
 
586
585
  elif sLine.startswith("[vspace:") and sLine.endswith("]"):
587
586
  nSkip = checkInt(sLine[8:-1], 0)
588
587
  if nSkip >= 1:
589
- tokens.append(
590
- (self.T_SKIP, nHead, "", [], sAlign)
588
+ tBlocks.append(
589
+ (BlockTyp.SKIP, "", "", [], tStyle)
591
590
  )
592
591
  if nSkip > 1:
593
- tokens += (nSkip - 1) * [
594
- (self.T_SKIP, nHead, "", [], self.A_NONE)
592
+ tBlocks += (nSkip - 1) * [
593
+ (BlockTyp.SKIP, "", "", [], BlockFmt.NONE)
595
594
  ]
596
595
  continue
597
596
 
@@ -605,32 +604,24 @@ class Tokenizer(ABC):
605
604
  continue
606
605
 
607
606
  cStyle, cKey, cText, _, _ = processComment(aLine)
608
- if cStyle == nwComment.SYNOPSIS:
609
- tLine, tFmt = self._extractFormats(cText)
610
- tokens.append((
611
- self.T_SYNOPSIS, nHead, tLine, tFmt, sAlign
612
- ))
613
- if self._doSynopsis and self._keepMD:
614
- tmpMarkdown.append(f"{aLine}\n")
615
- elif cStyle == nwComment.SHORT:
616
- tLine, tFmt = self._extractFormats(cText)
617
- tokens.append((
618
- self.T_SHORT, nHead, tLine, tFmt, sAlign
607
+ if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT) and not self._doSynopsis:
608
+ continue
609
+ if cStyle == nwComment.PLAIN and not self._doComments:
610
+ continue
611
+
612
+ if doJustify and not tStyle & BlockFmt.ALIGNED:
613
+ tStyle |= BlockFmt.JUSTIFY
614
+
615
+ if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT, nwComment.PLAIN):
616
+ bStyle = COMMENT_STYLE[cStyle]
617
+ tLine, tFmt = self._formatComment(bStyle, cKey, cText)
618
+ tBlocks.append((
619
+ BlockTyp.COMMENT, "", tLine, tFmt, tStyle
619
620
  ))
620
- if self._doSynopsis and self._keepMD:
621
- tmpMarkdown.append(f"{aLine}\n")
621
+
622
622
  elif cStyle == nwComment.FOOTNOTE:
623
- tLine, tFmt = self._extractFormats(cText, skip=self.FMT_FNOTE)
623
+ tLine, tFmt = self._extractFormats(cText, skip=TextFmt.FNOTE)
624
624
  self._footnotes[f"{tHandle}:{cKey}"] = (tLine, tFmt)
625
- if self._keepMD:
626
- tmpMarkdown.append(f"{aLine}\n")
627
- else:
628
- tLine, tFmt = self._extractFormats(cText)
629
- tokens.append((
630
- self.T_COMMENT, nHead, tLine, tFmt, sAlign
631
- ))
632
- if self._doComments and self._keepMD:
633
- tmpMarkdown.append(f"{aLine}\n")
634
625
 
635
626
  elif aLine.startswith("@"):
636
627
  # Keywords
@@ -638,16 +629,12 @@ class Tokenizer(ABC):
638
629
  # Only valid keyword lines are parsed, and any ignored keywords
639
630
  # are automatically skipped.
640
631
 
641
- valid, bits, _ = self._project.index.scanThis(aLine)
642
- if (
643
- valid and bits and bits[0] in nwLabels.KEY_NAME
644
- and bits[0] not in self._skipKeywords
645
- ):
646
- tokens.append((
647
- self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
648
- ))
649
- if self._doKeywords and self._keepMD:
650
- tmpMarkdown.append(f"{aLine}\n")
632
+ if self._doKeywords:
633
+ tTag, tLine, tFmt = self._formatMeta(aLine)
634
+ if tLine:
635
+ tBlocks.append((
636
+ BlockTyp.KEYWORD, tTag[1:], tLine, tFmt, tStyle
637
+ ))
651
638
 
652
639
  elif aLine.startswith(("# ", "#! ")):
653
640
  # Title or Partition Headings
@@ -662,28 +649,27 @@ class Tokenizer(ABC):
662
649
 
663
650
  nHead += 1
664
651
  tText = aLine[2:].strip()
665
- tType = self.T_HEAD1 if isPlain else self.T_TITLE
666
- tStyle = self.A_NONE if isPlain else (self.A_PBB | self.A_CENTRE)
667
- sHide = self._hideTitle if isPlain else False
668
- if self._isNovel:
652
+ tType = BlockTyp.HEAD1 if isPlain else BlockTyp.TITLE
653
+ sHide = self._hidePart if isPlain else False
654
+ if not (isPlain or isNovel and sHide):
655
+ tStyle |= self._titleStyle
656
+ if isNovel:
657
+ tType = BlockTyp.PART if isPlain else BlockTyp.TITLE
669
658
  if sHide:
670
659
  tText = ""
671
- tType = self.T_EMPTY
672
- tStyle = self.A_NONE
660
+ tType = BlockTyp.EMPTY
673
661
  elif isPlain:
674
- tText = self._hFormatter.apply(self._fmtTitle, tText, nHead)
675
- tStyle = self._titleStyle
662
+ tText = self._hFormatter.apply(self._fmtPart, tText, nHead)
663
+ tStyle |= self._partStyle
676
664
  if isPlain:
677
665
  self._hFormatter.resetScene()
678
666
  else:
679
667
  self._hFormatter.resetAll()
680
668
  self._noSep = True
681
669
 
682
- tokens.append((
683
- tType, nHead, tText, [], tStyle
670
+ tBlocks.append((
671
+ tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
684
672
  ))
685
- if self._keepMD:
686
- tmpMarkdown.append(f"{aLine}\n")
687
673
 
688
674
  elif aLine.startswith(("## ", "##! ")):
689
675
  # (Unnumbered) Chapter Headings
@@ -698,27 +684,25 @@ class Tokenizer(ABC):
698
684
 
699
685
  nHead += 1
700
686
  tText = aLine[3:].strip()
701
- tType = self.T_HEAD2
702
- tStyle = self.A_NONE
687
+ tType = BlockTyp.HEAD2
703
688
  sHide = self._hideChapter if isPlain else self._hideUnNum
704
689
  tFormat = self._fmtChapter if isPlain else self._fmtUnNum
705
- if self._isNovel:
690
+ if isNovel:
691
+ tType = BlockTyp.HEAD1 # Promote
706
692
  if isPlain:
707
693
  self._hFormatter.incChapter()
708
694
  if sHide:
709
695
  tText = ""
710
- tType = self.T_EMPTY
696
+ tType = BlockTyp.EMPTY
711
697
  else:
712
698
  tText = self._hFormatter.apply(tFormat, tText, nHead)
713
- tStyle = self._chapterStyle
699
+ tStyle |= self._chapterStyle
714
700
  self._hFormatter.resetScene()
715
701
  self._noSep = True
716
702
 
717
- tokens.append((
718
- tType, nHead, tText, [], tStyle
703
+ tBlocks.append((
704
+ tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
719
705
  ))
720
- if self._keepMD:
721
- tmpMarkdown.append(f"{aLine}\n")
722
706
 
723
707
  elif aLine.startswith(("### ", "###! ")):
724
708
  # (Alternative) Scene Headings
@@ -735,31 +719,29 @@ class Tokenizer(ABC):
735
719
 
736
720
  nHead += 1
737
721
  tText = aLine[4:].strip()
738
- tType = self.T_HEAD3
739
- tStyle = self.A_NONE
722
+ tType = BlockTyp.HEAD3
740
723
  sHide = self._hideScene if isPlain else self._hideHScene
741
724
  tFormat = self._fmtScene if isPlain else self._fmtHScene
742
- if self._isNovel:
725
+ if isNovel:
726
+ tType = BlockTyp.HEAD2 # Promote
743
727
  self._hFormatter.incScene()
744
728
  if sHide:
745
729
  tText = ""
746
- tType = self.T_EMPTY
730
+ tType = BlockTyp.EMPTY
747
731
  else:
748
732
  tText = self._hFormatter.apply(tFormat, tText, nHead)
749
- tStyle = self._sceneStyle
733
+ tStyle |= self._sceneStyle
750
734
  if tText == "": # Empty Format
751
- tType = self.T_EMPTY if self._noSep else self.T_SKIP
735
+ tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SKIP
752
736
  elif tText == tFormat: # Static Format
753
737
  tText = "" if self._noSep else tText
754
- tType = self.T_EMPTY if self._noSep else self.T_SEP
755
- tStyle = self.A_NONE if self._noSep else self.A_CENTRE
738
+ tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SEP
739
+ tStyle |= BlockFmt.NONE if self._noSep else BlockFmt.CENTRE
756
740
  self._noSep = False
757
741
 
758
- tokens.append((
759
- tType, nHead, tText, [], tStyle
742
+ tBlocks.append((
743
+ tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
760
744
  ))
761
- if self._keepMD:
762
- tmpMarkdown.append(f"{aLine}\n")
763
745
 
764
746
  elif aLine.startswith("#### "):
765
747
  # Section Headings
@@ -771,25 +753,23 @@ class Tokenizer(ABC):
771
753
 
772
754
  nHead += 1
773
755
  tText = aLine[5:].strip()
774
- tType = self.T_HEAD4
775
- tStyle = self.A_NONE
776
- if self._isNovel:
756
+ tType = BlockTyp.HEAD4
757
+ if isNovel:
758
+ tType = BlockTyp.HEAD3 # Promote
777
759
  if self._hideSection:
778
760
  tText = ""
779
- tType = self.T_EMPTY
761
+ tType = BlockTyp.EMPTY
780
762
  else:
781
763
  tText = self._hFormatter.apply(self._fmtSection, tText, nHead)
782
764
  if tText == "": # Empty Format
783
- tType = self.T_SKIP
765
+ tType = BlockTyp.SKIP
784
766
  elif tText == self._fmtSection: # Static Format
785
- tType = self.T_SEP
786
- tStyle = self.A_CENTRE
767
+ tType = BlockTyp.SEP
768
+ tStyle |= BlockFmt.CENTRE
787
769
 
788
- tokens.append((
789
- tType, nHead, tText, [], tStyle
770
+ tBlocks.append((
771
+ tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
790
772
  ))
791
- if self._keepMD:
792
- tmpMarkdown.append(f"{aLine}\n")
793
773
 
794
774
  else:
795
775
  # Text Lines
@@ -805,135 +785,133 @@ class Tokenizer(ABC):
805
785
  alnRight = False
806
786
  indLeft = False
807
787
  indRight = False
808
- if aLine.startswith(">>"):
788
+ if bLine.startswith(">>"):
809
789
  alnRight = True
810
- aLine = aLine[2:].lstrip(" ")
811
- elif aLine.startswith(">"):
790
+ bLine = bLine[2:].lstrip(" ")
791
+ elif bLine.startswith(">"):
812
792
  indLeft = True
813
- aLine = aLine[1:].lstrip(" ")
793
+ bLine = bLine[1:].lstrip(" ")
814
794
 
815
- if aLine.endswith("<<"):
795
+ if bLine.endswith("<<"):
816
796
  alnLeft = True
817
- aLine = aLine[:-2].rstrip(" ")
818
- elif aLine.endswith("<"):
797
+ bLine = bLine[:-2].rstrip(" ")
798
+ elif bLine.endswith("<"):
819
799
  indRight = True
820
- aLine = aLine[:-1].rstrip(" ")
800
+ bLine = bLine[:-1].rstrip(" ")
821
801
 
822
802
  if alnLeft and alnRight:
823
- sAlign |= self.A_CENTRE
803
+ tStyle |= BlockFmt.CENTRE
824
804
  elif alnLeft:
825
- sAlign |= self.A_LEFT
805
+ tStyle |= BlockFmt.LEFT
826
806
  elif alnRight:
827
- sAlign |= self.A_RIGHT
807
+ tStyle |= BlockFmt.RIGHT
828
808
 
829
809
  if indLeft:
830
- sAlign |= self.A_IND_L
810
+ tStyle |= BlockFmt.IND_L
831
811
  if indRight:
832
- sAlign |= self.A_IND_R
812
+ tStyle |= BlockFmt.IND_R
833
813
 
834
814
  # Process formats
835
- tLine, tFmt = self._extractFormats(aLine, hDialog=self._isNovel)
836
- tokens.append((
837
- self.T_TEXT, nHead, tLine, tFmt, sAlign
815
+ tLine, tFmt = self._extractFormats(bLine, hDialog=isNovel)
816
+ tBlocks.append((
817
+ BlockTyp.TEXT, "", tLine, tFmt, tStyle
838
818
  ))
839
- if self._keepMD:
840
- tmpMarkdown.append(f"{aLine}\n")
841
819
 
842
820
  # If we have content, turn off the first page flag
843
- if self._isFirst and tokens:
821
+ if self._isFirst and len(tBlocks) > 1:
844
822
  self._isFirst = False # First document has been processed
845
823
 
846
- # Make sure the token array doesn't start with a page break
847
- # on the very first page, adding a blank first page.
848
- if tokens[0][4] & self.A_PBB:
849
- cToken = tokens[0]
850
- tokens[0] = (
851
- cToken[0], cToken[1], cToken[2], cToken[3], cToken[4] & ~self.A_PBB
852
- )
824
+ # Make sure the blocks array doesn't start with a page break
825
+ # on the very first block, adding a blank first page.
826
+ for n, cBlock in enumerate(tBlocks):
827
+ if cBlock[0] != BlockTyp.EMPTY:
828
+ if cBlock[4] & BlockFmt.PBB:
829
+ tBlocks[n] = (
830
+ cBlock[0], cBlock[1], cBlock[2], cBlock[3], cBlock[4] & ~BlockFmt.PBB
831
+ )
832
+ break
853
833
 
854
834
  # Always add an empty line at the end of the file
855
- tokens.append((
856
- self.T_EMPTY, nHead, "", [], self.A_NONE
857
- ))
858
- if self._keepMD:
859
- tmpMarkdown.append("\n")
860
- self._markdown.append("".join(tmpMarkdown))
835
+ tBlocks.append(B_EMPTY)
861
836
 
862
837
  # Second Pass
863
838
  # ===========
864
839
  # This second pass strips away consecutive blank lines, and
865
840
  # combines consecutive text lines into the same paragraph.
866
841
  # It also ensures that there isn't paragraph spacing between
867
- # meta data lines for formats that has spacing.
842
+ # meta data lines for formats that have spacing.
868
843
 
869
- self._tokens = []
870
- pToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
871
- nToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
844
+ lineSep = "\n" if keepBreaks else " "
872
845
 
873
- lineSep = "\n" if self._keepBreaks else " "
874
- pLines: list[T_Token] = []
846
+ pLines: list[T_Block] = []
847
+ sBlocks: list[T_Block] = []
848
+ for n, cBlock in enumerate(tBlocks[1:-1], 1):
875
849
 
876
- tCount = len(tokens)
877
- for n, cToken in enumerate(tokens):
850
+ pBlock = tBlocks[n-1] # Look behind
851
+ nBlock = tBlocks[n+1] # Look ahead
878
852
 
879
- if n > 0:
880
- pToken = tokens[n-1] # Look behind
881
- if n < tCount - 1:
882
- nToken = tokens[n+1] # Look ahead
883
-
884
- if cToken[0] in self.L_SKIP_INDENT and not self._indentFirst:
853
+ if cBlock[0] in SKIP_INDENT and not indentFirst:
885
854
  # Unless the indentFirst flag is set, we set up the next
886
855
  # paragraph to not be indented if we see a block of a
887
856
  # specific type
888
857
  self._noIndent = True
889
858
 
890
- if cToken[0] == self.T_EMPTY:
859
+ if cBlock[0] == BlockTyp.EMPTY:
891
860
  # We don't need to keep the empty lines after this pass
892
861
  pass
893
862
 
894
- elif cToken[0] == self.T_KEYWORD:
863
+ elif cBlock[0] == BlockTyp.KEYWORD:
895
864
  # Adjust margins for lines in a list of keyword lines
896
- aStyle = cToken[4]
897
- if pToken[0] == self.T_KEYWORD:
898
- aStyle |= self.A_Z_TOPMRG
899
- if nToken[0] == self.T_KEYWORD:
900
- aStyle |= self.A_Z_BTMMRG
901
- self._tokens.append((
902
- cToken[0], cToken[1], cToken[2], cToken[3], aStyle
865
+ aStyle = cBlock[4]
866
+ if pBlock[0] == BlockTyp.KEYWORD:
867
+ aStyle |= BlockFmt.Z_TOP
868
+ if nBlock[0] == BlockTyp.KEYWORD:
869
+ aStyle |= BlockFmt.Z_BTM
870
+ sBlocks.append((
871
+ cBlock[0], cBlock[1], cBlock[2], cBlock[3], aStyle
903
872
  ))
904
873
 
905
- elif cToken[0] == self.T_TEXT:
874
+ elif cBlock[0] == BlockTyp.TEXT:
906
875
  # Combine lines from the same paragraph
907
- pLines.append(cToken)
876
+ pLines.append(cBlock)
908
877
 
909
- if nToken[0] != self.T_TEXT:
910
- # Next token is not text, so we add the buffer to tokens
878
+ if nBlock[0] != BlockTyp.TEXT:
879
+ # Next block is not text, so we add the buffer to blocks
911
880
  nLines = len(pLines)
912
881
  cStyle = pLines[0][4]
913
- if self._firstIndent and not (self._noIndent or cStyle & self.M_ALIGNED):
882
+ if firstIndent and not (self._noIndent or cStyle & BlockFmt.ALIGNED):
914
883
  # If paragraph indentation is enabled, not temporarily
915
884
  # turned off, and the block is not aligned, we add the
916
885
  # text indentation flag
917
- cStyle |= self.A_IND_T
886
+ cStyle |= BlockFmt.IND_T
918
887
 
919
888
  if nLines == 1:
920
- # The paragraph contains a single line, so we just
921
- # save that directly to the token list
922
- self._tokens.append((
923
- self.T_TEXT, pLines[0][1], pLines[0][2], pLines[0][3], cStyle
889
+ # The paragraph contains a single line, so we just save
890
+ # that directly to the blocks list. If justify is
891
+ # enabled, and there is no alignment, we apply it.
892
+ if doJustify and not cStyle & BlockFmt.ALIGNED:
893
+ cStyle |= BlockFmt.JUSTIFY
894
+
895
+ pTxt = pLines[0][2].translate(transMapB)
896
+ sBlocks.append((
897
+ BlockTyp.TEXT, pLines[0][1], pTxt, pLines[0][3], cStyle
924
898
  ))
899
+
925
900
  elif nLines > 1:
926
901
  # The paragraph contains multiple lines, so we need to
927
902
  # join them according to the line break policy, and
928
903
  # recompute all the formatting markers
929
904
  tTxt = ""
930
905
  tFmt: T_Formats = []
931
- for aToken in pLines:
906
+ for aBlock in pLines:
932
907
  tLen = len(tTxt)
933
- tTxt += f"{aToken[2]}{lineSep}"
934
- tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aToken[3])
935
- self._tokens.append((
936
- self.T_TEXT, pLines[0][1], tTxt[:-1], tFmt, cStyle
908
+ tTxt += f"{aBlock[2]}{lineSep}"
909
+ tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aBlock[3])
910
+ cStyle |= aBlock[4]
911
+
912
+ pTxt = tTxt[:-1].translate(transMapB)
913
+ sBlocks.append((
914
+ BlockTyp.TEXT, pLines[0][1], pTxt, tFmt, cStyle
937
915
  ))
938
916
 
939
917
  # Reset buffer and make sure text indent is on for next pass
@@ -941,50 +919,52 @@ class Tokenizer(ABC):
941
919
  self._noIndent = False
942
920
 
943
921
  else:
944
- self._tokens.append(cToken)
922
+ sBlocks.append(cBlock)
923
+
924
+ self._blocks = sBlocks
945
925
 
946
926
  return
947
927
 
948
928
  def buildOutline(self) -> None:
949
929
  """Build an outline of the text up to level 3 headings."""
950
- tHandle = self._handle or ""
951
930
  isNovel = self._isNovel
952
- for tType, nHead, tText, _, _ in self._tokens:
953
- if tType == self.T_TITLE:
931
+ for tType, tKey, tText, _, _ in self._blocks:
932
+ if tType == BlockTyp.TITLE:
954
933
  prefix = "TT"
955
- elif tType == self.T_HEAD1:
956
- prefix = "PT" if isNovel else "H1"
957
- elif tType == self.T_HEAD2:
958
- prefix = "CH" if isNovel else "H2"
959
- elif tType == self.T_HEAD3:
960
- prefix = "SC" if isNovel else "H3"
934
+ elif tType == BlockTyp.PART:
935
+ prefix = "PT"
936
+ elif tType == BlockTyp.HEAD1:
937
+ prefix = "CH" if isNovel else "H1"
938
+ elif tType == BlockTyp.HEAD2:
939
+ prefix = "SC" if isNovel else "H2"
940
+ elif tType == BlockTyp.HEAD3 and not isNovel:
941
+ prefix = "H3"
961
942
  else:
962
943
  continue
963
944
 
964
- key = f"{tHandle}:T{nHead:04d}"
965
945
  text = tText.replace(nwHeadFmt.BR, " ").replace("&amp;", "&")
966
- self._outline[key] = f"{prefix}|{text}"
946
+ self._outline[tKey] = f"{prefix}|{text}"
967
947
 
968
948
  return
969
949
 
970
950
  def countStats(self) -> None:
971
951
  """Count stats on the tokenized text."""
972
- titleCount = self._counts.get("titleCount", 0)
973
- paragraphCount = self._counts.get("paragraphCount", 0)
952
+ titleCount = self._counts.get(nwStats.TITLES, 0)
953
+ paragraphCount = self._counts.get(nwStats.PARAGRAPHS, 0)
974
954
 
975
- allWords = self._counts.get("allWords", 0)
976
- textWords = self._counts.get("textWords", 0)
977
- titleWords = self._counts.get("titleWords", 0)
955
+ allWords = self._counts.get(nwStats.WORDS, 0)
956
+ textWords = self._counts.get(nwStats.WORDS_TEXT, 0)
957
+ titleWords = self._counts.get(nwStats.WORDS_TITLE, 0)
978
958
 
979
- allChars = self._counts.get("allChars", 0)
980
- textChars = self._counts.get("textChars", 0)
981
- titleChars = self._counts.get("titleChars", 0)
959
+ allChars = self._counts.get(nwStats.CHARS, 0)
960
+ textChars = self._counts.get(nwStats.CHARS_TEXT, 0)
961
+ titleChars = self._counts.get(nwStats.CHARS_TITLE, 0)
982
962
 
983
- allWordChars = self._counts.get("allWordChars", 0)
984
- textWordChars = self._counts.get("textWordChars", 0)
985
- titleWordChars = self._counts.get("titleWordChars", 0)
963
+ allWordChars = self._counts.get(nwStats.WCHARS_ALL, 0)
964
+ textWordChars = self._counts.get(nwStats.WCHARS_TEXT, 0)
965
+ titleWordChars = self._counts.get(nwStats.WCHARS_TITLE, 0)
986
966
 
987
- for tType, _, tText, _, _ in self._tokens:
967
+ for tType, _, tText, _, _ in self._blocks:
988
968
  tText = tText.replace(nwUnicode.U_ENDASH, " ")
989
969
  tText = tText.replace(nwUnicode.U_EMDASH, " ")
990
970
 
@@ -993,7 +973,7 @@ class Tokenizer(ABC):
993
973
  nChars = len(tText)
994
974
  nWChars = len("".join(tWords))
995
975
 
996
- if tType == self.T_TEXT:
976
+ if tType == BlockTyp.TEXT:
997
977
  tPWords = tText.split()
998
978
  nPWords = len(tPWords)
999
979
  nPChars = len(tText)
@@ -1007,7 +987,7 @@ class Tokenizer(ABC):
1007
987
  allWordChars += nPWChars
1008
988
  textWordChars += nPWChars
1009
989
 
1010
- elif tType in self.L_HEADINGS:
990
+ elif tType in HEADINGS:
1011
991
  titleCount += 1
1012
992
  allWords += nWords
1013
993
  titleWords += nWords
@@ -1016,88 +996,110 @@ class Tokenizer(ABC):
1016
996
  titleChars += nChars
1017
997
  titleWordChars += nWChars
1018
998
 
1019
- elif tType == self.T_SEP:
999
+ elif tType == BlockTyp.SEP:
1020
1000
  allWords += nWords
1021
1001
  allChars += nChars
1022
1002
  allWordChars += nWChars
1023
1003
 
1024
- elif tType == self.T_SYNOPSIS and self._doSynopsis:
1025
- text = "{0}: {1}".format(self._localLookup("Synopsis"), tText)
1026
- words = text.split()
1027
- allWords += len(words)
1028
- allChars += len(text)
1029
- allWordChars += len("".join(words))
1030
-
1031
- elif tType == self.T_SHORT and self._doSynopsis:
1032
- text = "{0}: {1}".format(self._localLookup("Short Description"), tText)
1033
- words = text.split()
1034
- allWords += len(words)
1035
- allChars += len(text)
1036
- allWordChars += len("".join(words))
1037
-
1038
- elif tType == self.T_COMMENT and self._doComments:
1039
- text = "{0}: {1}".format(self._localLookup("Comment"), tText)
1040
- words = text.split()
1004
+ elif tType in (BlockTyp.COMMENT, BlockTyp.KEYWORD):
1005
+ words = tText.split()
1041
1006
  allWords += len(words)
1042
- allChars += len(text)
1007
+ allChars += len(tText)
1043
1008
  allWordChars += len("".join(words))
1044
1009
 
1045
- elif tType == self.T_KEYWORD and self._doKeywords:
1046
- valid, bits, _ = self._project.index.scanThis("@"+tText)
1047
- if valid and bits:
1048
- key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
1049
- text = "{0}: {1}".format(key, ", ".join(bits[1:]))
1050
- words = text.split()
1051
- allWords += len(words)
1052
- allChars += len(text)
1053
- allWordChars += len("".join(words))
1010
+ self._counts[nwStats.TITLES] = titleCount
1011
+ self._counts[nwStats.PARAGRAPHS] = paragraphCount
1054
1012
 
1055
- self._counts["titleCount"] = titleCount
1056
- self._counts["paragraphCount"] = paragraphCount
1013
+ self._counts[nwStats.WORDS] = allWords
1014
+ self._counts[nwStats.WORDS_TEXT] = textWords
1015
+ self._counts[nwStats.WORDS_TITLE] = titleWords
1057
1016
 
1058
- self._counts["allWords"] = allWords
1059
- self._counts["textWords"] = textWords
1060
- self._counts["titleWords"] = titleWords
1017
+ self._counts[nwStats.CHARS] = allChars
1018
+ self._counts[nwStats.CHARS_TEXT] = textChars
1019
+ self._counts[nwStats.CHARS_TITLE] = titleChars
1061
1020
 
1062
- self._counts["allChars"] = allChars
1063
- self._counts["textChars"] = textChars
1064
- self._counts["titleChars"] = titleChars
1021
+ self._counts[nwStats.WCHARS_ALL] = allWordChars
1022
+ self._counts[nwStats.WCHARS_TEXT] = textWordChars
1023
+ self._counts[nwStats.WCHARS_TITLE] = titleWordChars
1065
1024
 
1066
- self._counts["allWordChars"] = allWordChars
1067
- self._counts["textWordChars"] = textWordChars
1068
- self._counts["titleWordChars"] = titleWordChars
1069
-
1070
- return
1071
-
1072
- def saveRawMarkdown(self, path: str | Path) -> None:
1073
- """Save the raw text to a plain text file."""
1074
- with open(path, mode="w", encoding="utf-8") as outFile:
1075
- for nwdPage in self._markdown:
1076
- outFile.write(nwdPage)
1077
- return
1078
-
1079
- def saveRawMarkdownJSON(self, path: str | Path) -> None:
1080
- """Save the raw text to a JSON file."""
1081
- timeStamp = time()
1082
- data = {
1083
- "meta": {
1084
- "projectName": self._project.data.name,
1085
- "novelAuthor": self._project.data.author,
1086
- "buildTime": int(timeStamp),
1087
- "buildTimeStr": formatTimeStamp(timeStamp),
1088
- },
1089
- "text": {
1090
- "nwd": [page.rstrip("\n").split("\n") for page in self._markdown],
1091
- }
1092
- }
1093
- with open(path, mode="w", encoding="utf-8") as fObj:
1094
- json.dump(data, fObj, indent=2)
1095
1025
  return
1096
1026
 
1097
1027
  ##
1098
1028
  # Internal Functions
1099
1029
  ##
1100
1030
 
1031
+ def _formatInt(self, value: int) -> str:
1032
+ """Return a localised integer."""
1033
+ return self._dLocale.toString(value)
1034
+
1035
+ def _formatComment(self, style: ComStyle, key: str, text: str) -> tuple[str, T_Formats]:
1036
+ """Apply formatting to comments and notes."""
1037
+ tTxt, tFmt = self._extractFormats(text)
1038
+ tFmt.insert(0, (0, TextFmt.COL_B, style.textClass))
1039
+ tFmt.append((len(tTxt), TextFmt.COL_E, ""))
1040
+ if label := (self._localLookup(style.label) + (f" ({key})" if key else "")).strip():
1041
+ shift = len(label) + 2
1042
+ tTxt = f"{label}: {tTxt}"
1043
+ rFmt = [(0, TextFmt.B_B, ""), (shift - 1, TextFmt.B_E, "")]
1044
+ if style.labelClass:
1045
+ rFmt.insert(1, (0, TextFmt.COL_B, style.labelClass))
1046
+ rFmt.insert(2, (shift - 1, TextFmt.COL_E, ""))
1047
+ rFmt.extend((p + shift, f, d) for p, f, d in tFmt)
1048
+ return tTxt, rFmt
1049
+
1050
+ def _formatMeta(self, text: str) -> tuple[str, str, T_Formats]:
1051
+ """Apply formatting to a meta data line."""
1052
+ tag = ""
1053
+ txt = []
1054
+ fmt = []
1055
+ valid, bits, _ = self._project.index.scanThis(text)
1056
+ if valid and bits and bits[0] in nwLabels.KEY_NAME and bits[0] not in self._skipKeywords:
1057
+ tag = bits[0]
1058
+ pos = 0
1059
+ lbl = f"{self._localLookup(nwLabels.KEY_NAME[tag])}:"
1060
+ end = len(lbl)
1061
+ fmt = [
1062
+ (pos, TextFmt.B_B, ""),
1063
+ (pos, TextFmt.COL_B, "keyword"),
1064
+ (end, TextFmt.COL_E, ""),
1065
+ (end, TextFmt.B_E, ""),
1066
+ ]
1067
+ txt = [lbl, " "]
1068
+ pos = end + 1
1069
+
1070
+ if (num := len(bits)) > 1:
1071
+ if bits[0] == nwKeyWords.TAG_KEY:
1072
+ one, two = self._project.index.parseValue(bits[1])
1073
+ end = pos + len(one)
1074
+ fmt.append((pos, TextFmt.COL_B, "tag"))
1075
+ fmt.append((pos, TextFmt.ANM_B, f"tag_{one}".lower()))
1076
+ fmt.append((end, TextFmt.ANM_E, ""))
1077
+ fmt.append((end, TextFmt.COL_E, ""))
1078
+ txt.append(one)
1079
+ pos = end
1080
+ if two:
1081
+ txt.append(" | ")
1082
+ pos += 3
1083
+ end = pos + len(two)
1084
+ fmt.append((pos, TextFmt.COL_B, "optional"))
1085
+ fmt.append((end, TextFmt.COL_E, ""))
1086
+ txt.append(two)
1087
+ pos = end
1088
+ else:
1089
+ for n, bit in enumerate(bits[1:], 2):
1090
+ end = pos + len(bit)
1091
+ fmt.append((pos, TextFmt.COL_B, "tag"))
1092
+ fmt.append((pos, TextFmt.ARF_B, f"#tag_{bit}".lower()))
1093
+ fmt.append((end, TextFmt.ARF_E, ""))
1094
+ fmt.append((end, TextFmt.COL_E, ""))
1095
+ txt.append(bit)
1096
+ pos = end
1097
+ if n < num:
1098
+ txt.append(", ")
1099
+ pos += 2
1100
+
1101
+ return tag, "".join(txt), fmt
1102
+
1101
1103
  def _extractFormats(
1102
1104
  self, text: str, skip: int = 0, hDialog: bool = False
1103
1105
  ) -> tuple[str, T_Formats]:
@@ -1109,56 +1111,55 @@ class Tokenizer(ABC):
1109
1111
 
1110
1112
  # Match Markdown
1111
1113
  for regEx, fmts in self._rxMarkdown:
1112
- rxItt = regEx.globalMatch(text, 0)
1113
- while rxItt.hasNext():
1114
- rxMatch = rxItt.next()
1114
+ for res in regEx.finditer(text):
1115
1115
  temp.extend(
1116
- (rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt, "")
1116
+ (res.start(n), res.end(n), fmt, "")
1117
1117
  for n, fmt in enumerate(fmts) if fmt > 0
1118
1118
  )
1119
1119
 
1120
+ # Match URLs
1121
+ for res in REGEX_PATTERNS.url.finditer(text):
1122
+ temp.append((res.start(0), 0, TextFmt.HRF_B, res.group(0)))
1123
+ temp.append((res.end(0), 0, TextFmt.HRF_E, ""))
1124
+
1120
1125
  # Match Shortcodes
1121
- rxItt = self._rxShortCodes.globalMatch(text, 0)
1122
- while rxItt.hasNext():
1123
- rxMatch = rxItt.next()
1126
+ for res in REGEX_PATTERNS.shortcodePlain.finditer(text):
1124
1127
  temp.append((
1125
- rxMatch.capturedStart(1),
1126
- rxMatch.capturedLength(1),
1127
- self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0),
1128
+ res.start(1), res.end(1),
1129
+ self._shortCodeFmt.get(res.group(1).lower(), 0),
1128
1130
  "",
1129
1131
  ))
1130
1132
 
1131
1133
  # Match Shortcode w/Values
1132
- rxItt = self._rxShortCodeVals.globalMatch(text, 0)
1133
1134
  tHandle = self._handle or ""
1134
- while rxItt.hasNext():
1135
- rxMatch = rxItt.next()
1136
- kind = self._shortCodeVals.get(rxMatch.captured(1).lower(), 0)
1135
+ for res in REGEX_PATTERNS.shortcodeValue.finditer(text):
1136
+ kind = self._shortCodeVals.get(res.group(1).lower(), 0)
1137
1137
  temp.append((
1138
- rxMatch.capturedStart(0),
1139
- rxMatch.capturedLength(0),
1140
- self.FMT_STRIP if kind == skip else kind,
1141
- f"{tHandle}:{rxMatch.captured(2)}",
1138
+ res.start(0), res.end(0),
1139
+ TextFmt.STRIP if kind == skip else kind,
1140
+ f"{tHandle}:{res.group(2)}",
1142
1141
  ))
1143
1142
 
1144
1143
  # Match Dialogue
1145
- if self._rxDialogue and hDialog:
1146
- for regEx, fmtB, fmtE in self._rxDialogue:
1147
- rxItt = regEx.globalMatch(text, 0)
1148
- while rxItt.hasNext():
1149
- rxMatch = rxItt.next()
1150
- temp.append((rxMatch.capturedStart(0), 0, fmtB, ""))
1151
- temp.append((rxMatch.capturedEnd(0), 0, fmtE, ""))
1144
+ if self._hlightDialog and hDialog:
1145
+ if self._dialogParser.enabled:
1146
+ for pos, end in self._dialogParser(text):
1147
+ temp.append((pos, 0, TextFmt.COL_B, "dialog"))
1148
+ temp.append((end, 0, TextFmt.COL_E, ""))
1149
+ if self._rxAltDialog:
1150
+ for res in self._rxAltDialog.finditer(text):
1151
+ temp.append((res.start(0), 0, TextFmt.COL_B, "altdialog"))
1152
+ temp.append((res.end(0), 0, TextFmt.COL_E, ""))
1152
1153
 
1153
1154
  # Post-process text and format
1154
1155
  result = text
1155
1156
  formats = []
1156
- for pos, n, fmt, key in reversed(sorted(temp, key=lambda x: x[0])):
1157
+ for pos, end, fmt, meta in reversed(sorted(temp, key=lambda x: x[0])):
1157
1158
  if fmt > 0:
1158
- if n > 0:
1159
- result = result[:pos] + result[pos+n:]
1160
- formats = [(p-n if p > pos else p, f, k) for p, f, k in formats]
1161
- formats.insert(0, (pos, fmt, key))
1159
+ if end > pos:
1160
+ result = result[:pos] + result[end:]
1161
+ formats = [(p+pos-end if p > pos else p, f, m) for p, f, m in formats]
1162
+ formats.insert(0, (pos, fmt, meta))
1162
1163
 
1163
1164
  return result, formats
1164
1165
 
@@ -1204,6 +1205,7 @@ class HeadingFormatter:
1204
1205
  def apply(self, hFormat: str, text: str, nHead: int) -> str:
1205
1206
  """Apply formatting to a specific heading."""
1206
1207
  hFormat = hFormat.replace(nwHeadFmt.TITLE, text)
1208
+ hFormat = hFormat.replace(nwHeadFmt.BR, "\n")
1207
1209
  hFormat = hFormat.replace(nwHeadFmt.CH_NUM, str(self._chCount))
1208
1210
  hFormat = hFormat.replace(nwHeadFmt.SC_NUM, str(self._scChCount))
1209
1211
  hFormat = hFormat.replace(nwHeadFmt.SC_ABS, str(self._scAbsCount))