novelWriter 2.5.1__py3-none-any.whl → 2.6b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/METADATA +2 -1
  2. {novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/RECORD +61 -56
  3. {novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/WHEEL +1 -1
  4. novelwriter/__init__.py +3 -3
  5. novelwriter/assets/i18n/project_en_GB.json +1 -0
  6. novelwriter/assets/icons/typicons_dark/icons.conf +1 -0
  7. novelwriter/assets/icons/typicons_dark/mixed_copy.svg +4 -0
  8. novelwriter/assets/icons/typicons_light/icons.conf +1 -0
  9. novelwriter/assets/icons/typicons_light/mixed_copy.svg +4 -0
  10. novelwriter/assets/manual.pdf +0 -0
  11. novelwriter/assets/sample.zip +0 -0
  12. novelwriter/assets/themes/default_light.conf +2 -2
  13. novelwriter/common.py +63 -0
  14. novelwriter/config.py +10 -3
  15. novelwriter/constants.py +153 -60
  16. novelwriter/core/buildsettings.py +66 -39
  17. novelwriter/core/coretools.py +34 -22
  18. novelwriter/core/docbuild.py +130 -169
  19. novelwriter/core/index.py +29 -18
  20. novelwriter/core/item.py +2 -2
  21. novelwriter/core/options.py +4 -1
  22. novelwriter/core/spellcheck.py +9 -14
  23. novelwriter/dialogs/preferences.py +45 -32
  24. novelwriter/dialogs/projectsettings.py +3 -3
  25. novelwriter/enum.py +29 -23
  26. novelwriter/extensions/configlayout.py +24 -11
  27. novelwriter/extensions/modified.py +13 -1
  28. novelwriter/extensions/pagedsidebar.py +5 -5
  29. novelwriter/formats/shared.py +155 -0
  30. novelwriter/formats/todocx.py +1195 -0
  31. novelwriter/formats/tohtml.py +452 -0
  32. novelwriter/{core → formats}/tokenizer.py +483 -485
  33. novelwriter/formats/tomarkdown.py +217 -0
  34. novelwriter/{core → formats}/toodt.py +270 -320
  35. novelwriter/formats/toqdoc.py +436 -0
  36. novelwriter/formats/toraw.py +91 -0
  37. novelwriter/gui/doceditor.py +240 -193
  38. novelwriter/gui/dochighlight.py +96 -84
  39. novelwriter/gui/docviewer.py +56 -30
  40. novelwriter/gui/docviewerpanel.py +3 -3
  41. novelwriter/gui/editordocument.py +17 -2
  42. novelwriter/gui/itemdetails.py +8 -4
  43. novelwriter/gui/mainmenu.py +121 -60
  44. novelwriter/gui/noveltree.py +35 -37
  45. novelwriter/gui/outline.py +186 -238
  46. novelwriter/gui/projtree.py +142 -131
  47. novelwriter/gui/sidebar.py +7 -6
  48. novelwriter/gui/theme.py +5 -4
  49. novelwriter/guimain.py +43 -155
  50. novelwriter/shared.py +14 -4
  51. novelwriter/text/counting.py +2 -0
  52. novelwriter/text/patterns.py +155 -59
  53. novelwriter/tools/manusbuild.py +1 -1
  54. novelwriter/tools/manuscript.py +121 -78
  55. novelwriter/tools/manussettings.py +403 -260
  56. novelwriter/tools/welcome.py +4 -4
  57. novelwriter/tools/writingstats.py +3 -3
  58. novelwriter/types.py +16 -6
  59. novelwriter/core/tohtml.py +0 -530
  60. novelwriter/core/tomarkdown.py +0 -252
  61. novelwriter/core/toqdoc.py +0 -419
  62. {novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/LICENSE.md +0 -0
  63. {novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/entry_points.txt +0 -0
  64. {novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/top_level.txt +0 -0
@@ -24,41 +24,56 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
24
24
  """
25
25
  from __future__ import annotations
26
26
 
27
- import json
28
27
  import logging
29
28
  import re
30
29
 
31
30
  from abc import ABC, abstractmethod
32
- from functools import partial
33
31
  from pathlib import Path
34
- from time import time
32
+ from typing import NamedTuple
35
33
 
36
- from PyQt5.QtCore import QCoreApplication, QRegularExpression
37
- from PyQt5.QtGui import QFont
34
+ from PyQt5.QtCore import QLocale
35
+ from PyQt5.QtGui import QColor, QFont
38
36
 
39
37
  from novelwriter import CONFIG
40
- from novelwriter.common import checkInt, formatTimeStamp, numberToRoman
41
- from novelwriter.constants import nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwUnicode, trConst
38
+ from novelwriter.common import checkInt, numberToRoman
39
+ from novelwriter.constants import (
40
+ nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwStats, nwStyles, nwUnicode,
41
+ trConst
42
+ )
42
43
  from novelwriter.core.index import processComment
43
44
  from novelwriter.core.project import NWProject
44
45
  from novelwriter.enum import nwComment, nwItemLayout
45
- from novelwriter.text.patterns import REGEX_PATTERNS
46
+ from novelwriter.formats.shared import (
47
+ BlockFmt, BlockTyp, T_Block, T_Formats, T_Note, TextDocumentTheme, TextFmt
48
+ )
49
+ from novelwriter.text.patterns import REGEX_PATTERNS, DialogParser
46
50
 
47
51
  logger = logging.getLogger(__name__)
48
52
 
49
- ESCAPES = {r"\*": "*", r"\~": "~", r"\_": "_", r"\[": "[", r"\]": "]", r"\ ": ""}
50
- RX_ESC = re.compile("|".join([re.escape(k) for k in ESCAPES.keys()]), flags=re.DOTALL)
51
53
 
52
- T_Formats = list[tuple[int, int, str]]
53
- T_Comment = tuple[str, T_Formats]
54
- T_Token = tuple[int, int, str, T_Formats, int]
54
+ class ComStyle(NamedTuple):
55
55
 
56
+ label: str = ""
57
+ labelClass: str = ""
58
+ textClass: str = ""
56
59
 
57
- def stripEscape(text: str) -> str:
58
- """Strip escaped Markdown characters from paragraph text."""
59
- if "\\" in text:
60
- return RX_ESC.sub(lambda x: ESCAPES[x.group(0)], text)
61
- return text
60
+
61
+ COMMENT_STYLE = {
62
+ nwComment.PLAIN: ComStyle("Comment", "comment", "comment"),
63
+ nwComment.IGNORE: ComStyle(),
64
+ nwComment.SYNOPSIS: ComStyle("Synopsis", "modifier", "synopsis"),
65
+ nwComment.SHORT: ComStyle("Short Description", "modifier", "synopsis"),
66
+ nwComment.NOTE: ComStyle("Note", "modifier", "note"),
67
+ nwComment.FOOTNOTE: ComStyle("", "modifier", "note"),
68
+ nwComment.COMMENT: ComStyle(),
69
+ nwComment.STORY: ComStyle("", "modifier", "note"),
70
+ }
71
+ HEADINGS = [BlockTyp.TITLE, BlockTyp.HEAD1, BlockTyp.HEAD2, BlockTyp.HEAD3, BlockTyp.HEAD4]
72
+ SKIP_INDENT = [
73
+ BlockTyp.TITLE, BlockTyp.HEAD1, BlockTyp.HEAD2, BlockTyp.HEAD2, BlockTyp.HEAD3,
74
+ BlockTyp.HEAD4, BlockTyp.SEP, BlockTyp.SKIP,
75
+ ]
76
+ B_EMPTY: T_Block = (BlockTyp.EMPTY, "", "", [], BlockFmt.NONE)
62
77
 
63
78
 
64
79
  class Tokenizer(ABC):
@@ -70,119 +85,71 @@ class Tokenizer(ABC):
70
85
  subclasses.
71
86
  """
72
87
 
73
- # In-Text Format
74
- FMT_B_B = 1 # Begin bold
75
- FMT_B_E = 2 # End bold
76
- FMT_I_B = 3 # Begin italics
77
- FMT_I_E = 4 # End italics
78
- FMT_D_B = 5 # Begin strikeout
79
- FMT_D_E = 6 # End strikeout
80
- FMT_U_B = 7 # Begin underline
81
- FMT_U_E = 8 # End underline
82
- FMT_M_B = 9 # Begin mark
83
- FMT_M_E = 10 # End mark
84
- FMT_SUP_B = 11 # Begin superscript
85
- FMT_SUP_E = 12 # End superscript
86
- FMT_SUB_B = 13 # Begin subscript
87
- FMT_SUB_E = 14 # End subscript
88
- FMT_DL_B = 15 # Begin dialogue
89
- FMT_DL_E = 16 # End dialogue
90
- FMT_ADL_B = 17 # Begin alt dialogue
91
- FMT_ADL_E = 18 # End alt dialogue
92
- FMT_FNOTE = 19 # Footnote marker
93
- FMT_STRIP = 20 # Strip the format code
94
-
95
- # Block Type
96
- T_EMPTY = 1 # Empty line (new paragraph)
97
- T_SYNOPSIS = 2 # Synopsis comment
98
- T_SHORT = 3 # Short description comment
99
- T_COMMENT = 4 # Comment line
100
- T_KEYWORD = 5 # Command line
101
- T_TITLE = 6 # Title
102
- T_HEAD1 = 7 # Heading 1
103
- T_HEAD2 = 8 # Heading 2
104
- T_HEAD3 = 9 # Heading 3
105
- T_HEAD4 = 10 # Heading 4
106
- T_TEXT = 11 # Text line
107
- T_SEP = 12 # Scene separator
108
- T_SKIP = 13 # Paragraph break
109
-
110
- # Block Style
111
- A_NONE = 0x0000 # No special style
112
- A_LEFT = 0x0001 # Left aligned
113
- A_RIGHT = 0x0002 # Right aligned
114
- A_CENTRE = 0x0004 # Centred
115
- A_JUSTIFY = 0x0008 # Justified
116
- A_PBB = 0x0010 # Page break before
117
- A_PBA = 0x0020 # Page break after
118
- A_Z_TOPMRG = 0x0040 # Zero top margin
119
- A_Z_BTMMRG = 0x0080 # Zero bottom margin
120
- A_IND_L = 0x0100 # Left indentation
121
- A_IND_R = 0x0200 # Right indentation
122
- A_IND_T = 0x0400 # Text indentation
123
-
124
- # Masks
125
- M_ALIGNED = A_LEFT | A_RIGHT | A_CENTRE | A_JUSTIFY
126
-
127
- # Lookups
128
- L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
129
- L_SKIP_INDENT = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD2, T_HEAD3, T_HEAD4, T_SEP, T_SKIP]
130
- L_SUMMARY = [T_SYNOPSIS, T_SHORT]
131
-
132
88
  def __init__(self, project: NWProject) -> None:
133
89
 
134
90
  self._project = project
135
91
 
136
92
  # Data Variables
137
- self._text = "" # The raw text to be tokenized
138
- self._handle = None # The item handle currently being processed
139
- self._result = "" # The result of the last document
140
- self._keepMD = False # Whether to keep the markdown text
141
-
142
- # Tokens and Meta Data (Per Document)
143
- self._tokens: list[T_Token] = []
144
- self._footnotes: dict[str, T_Comment] = {}
145
-
146
- # Tokens and Meta Data (Per Instance)
93
+ self._text = "" # The raw text to be tokenized
94
+ self._handle = None # The item handle currently being processed
95
+ self._keepRaw = False # Whether to keep the raw text, used by ToRaw
96
+ self._noTokens = False # Disable tokenization if they're not needed
97
+
98
+ # Blocks and Meta Data (Per Document)
99
+ self._blocks: list[T_Block] = []
100
+ self._footnotes: dict[str, T_Note] = {}
101
+
102
+ # Blocks and Meta Data (Per Instance)
103
+ self._raw: list[str] = []
104
+ self._pages: list[str] = []
147
105
  self._counts: dict[str, int] = {}
148
106
  self._outline: dict[str, str] = {}
149
- self._markdown: list[str] = []
150
107
 
151
108
  # User Settings
109
+ self._dLocale = CONFIG.locale # The document locale
152
110
  self._textFont = QFont("Serif", 11) # Output text font
153
- self._lineHeight = 1.15 # Line height in units of em
154
- self._blockIndent = 4.00 # Block indent in units of em
155
- self._firstIndent = False # Enable first line indent
156
- self._firstWidth = 1.40 # First line indent in units of em
157
- self._indentFirst = False # Indent first paragraph
158
- self._doJustify = False # Justify text
159
- self._doBodyText = True # Include body text
160
- self._doSynopsis = False # Also process synopsis comments
161
- self._doComments = False # Also process comments
162
- self._doKeywords = False # Also process keywords like tags and references
163
- self._skipKeywords = set() # Keywords to ignore
164
- self._keepBreaks = True # Keep line breaks in paragraphs
111
+ self._lineHeight = 1.15 # Line height in units of em
112
+ self._colorHeads = True # Colourise headings
113
+ self._scaleHeads = True # Scale headings to larger font size
114
+ self._boldHeads = True # Bold headings
115
+ self._blockIndent = 4.00 # Block indent in units of em
116
+ self._firstIndent = False # Enable first line indent
117
+ self._firstWidth = 1.40 # First line indent in units of em
118
+ self._indentFirst = False # Indent first paragraph
119
+ self._doJustify = False # Justify text
120
+ self._doBodyText = True # Include body text
121
+ self._doSynopsis = False # Also process synopsis comments
122
+ self._doComments = False # Also process comments
123
+ self._doKeywords = False # Also process keywords like tags and references
124
+ self._keepBreaks = True # Keep line breaks in paragraphs
125
+ self._defaultAlign = "left" # The default text alignment
126
+
127
+ self._skipKeywords: set[str] = set() # Keywords to ignore
128
+
129
+ # Other Setting
130
+ self._theme = TextDocumentTheme()
131
+ self._classes: dict[str, QColor] = {}
165
132
 
166
133
  # Margins
167
- self._marginTitle = (1.417, 0.500)
168
- self._marginHead1 = (1.417, 0.500)
169
- self._marginHead2 = (1.668, 0.500)
170
- self._marginHead3 = (1.168, 0.500)
171
- self._marginHead4 = (1.168, 0.500)
172
- self._marginText = (0.000, 0.584)
173
- self._marginMeta = (0.000, 0.584)
174
- self._marginFoot = (1.417, 0.467)
175
- self._marginSep = (1.168, 1.168)
134
+ self._marginTitle = nwStyles.T_MARGIN["H0"]
135
+ self._marginHead1 = nwStyles.T_MARGIN["H1"]
136
+ self._marginHead2 = nwStyles.T_MARGIN["H2"]
137
+ self._marginHead3 = nwStyles.T_MARGIN["H3"]
138
+ self._marginHead4 = nwStyles.T_MARGIN["H4"]
139
+ self._marginText = nwStyles.T_MARGIN["TT"]
140
+ self._marginMeta = nwStyles.T_MARGIN["MT"]
141
+ self._marginFoot = nwStyles.T_MARGIN["FT"]
142
+ self._marginSep = nwStyles.T_MARGIN["SP"]
176
143
 
177
144
  # Title Formats
178
- self._fmtTitle = nwHeadFmt.TITLE # Formatting for titles
145
+ self._fmtPart = nwHeadFmt.TITLE # Formatting for partitions
179
146
  self._fmtChapter = nwHeadFmt.TITLE # Formatting for numbered chapters
180
147
  self._fmtUnNum = nwHeadFmt.TITLE # Formatting for unnumbered chapters
181
148
  self._fmtScene = nwHeadFmt.TITLE # Formatting for scenes
182
149
  self._fmtHScene = nwHeadFmt.TITLE # Formatting for hard scenes
183
150
  self._fmtSection = nwHeadFmt.TITLE # Formatting for sections
184
151
 
185
- self._hideTitle = False # Do not include title headings
152
+ self._hidePart = False # Do not include partition headings
186
153
  self._hideChapter = False # Do not include chapter headings
187
154
  self._hideUnNum = False # Do not include unnumbered headings
188
155
  self._hideScene = False # Do not include scene headings
@@ -191,15 +158,16 @@ class Tokenizer(ABC):
191
158
 
192
159
  self._linkHeadings = False # Add an anchor before headings
193
160
 
194
- self._titleStyle = self.A_CENTRE | self.A_PBB
195
- self._chapterStyle = self.A_PBB
196
- self._sceneStyle = self.A_NONE
161
+ self._titleStyle = BlockFmt.CENTRE | BlockFmt.PBB
162
+ self._partStyle = BlockFmt.CENTRE | BlockFmt.PBB
163
+ self._chapterStyle = BlockFmt.PBB
164
+ self._sceneStyle = BlockFmt.NONE
197
165
 
198
166
  # Instance Variables
199
167
  self._hFormatter = HeadingFormatter(self._project)
200
168
  self._noSep = True # Flag to indicate that we don't want a scene separator
201
169
  self._noIndent = False # Flag to disable text indent on next paragraph
202
- self._showDialog = False # Flag for dialogue highlighting
170
+ self._breakNext = False # Add a page break on next token
203
171
 
204
172
  # This File
205
173
  self._isNovel = False # Document is a novel document
@@ -210,31 +178,33 @@ class Tokenizer(ABC):
210
178
 
211
179
  # Function Mapping
212
180
  self._localLookup = self._project.localLookup
213
- self.tr = partial(QCoreApplication.translate, "Tokenizer")
214
181
 
215
182
  # Format RegEx
216
183
  self._rxMarkdown = [
217
- (REGEX_PATTERNS.markdownItalic, [0, self.FMT_I_B, 0, self.FMT_I_E]),
218
- (REGEX_PATTERNS.markdownBold, [0, self.FMT_B_B, 0, self.FMT_B_E]),
219
- (REGEX_PATTERNS.markdownStrike, [0, self.FMT_D_B, 0, self.FMT_D_E]),
184
+ (REGEX_PATTERNS.markdownItalic, [0, TextFmt.I_B, 0, TextFmt.I_E]),
185
+ (REGEX_PATTERNS.markdownBold, [0, TextFmt.B_B, 0, TextFmt.B_E]),
186
+ (REGEX_PATTERNS.markdownStrike, [0, TextFmt.D_B, 0, TextFmt.D_E]),
220
187
  ]
221
- self._rxShortCodes = REGEX_PATTERNS.shortcodePlain
222
- self._rxShortCodeVals = REGEX_PATTERNS.shortcodeValue
223
188
 
224
189
  self._shortCodeFmt = {
225
- nwShortcode.ITALIC_O: self.FMT_I_B, nwShortcode.ITALIC_C: self.FMT_I_E,
226
- nwShortcode.BOLD_O: self.FMT_B_B, nwShortcode.BOLD_C: self.FMT_B_E,
227
- nwShortcode.STRIKE_O: self.FMT_D_B, nwShortcode.STRIKE_C: self.FMT_D_E,
228
- nwShortcode.ULINE_O: self.FMT_U_B, nwShortcode.ULINE_C: self.FMT_U_E,
229
- nwShortcode.MARK_O: self.FMT_M_B, nwShortcode.MARK_C: self.FMT_M_E,
230
- nwShortcode.SUP_O: self.FMT_SUP_B, nwShortcode.SUP_C: self.FMT_SUP_E,
231
- nwShortcode.SUB_O: self.FMT_SUB_B, nwShortcode.SUB_C: self.FMT_SUB_E,
190
+ nwShortcode.ITALIC_O: TextFmt.I_B, nwShortcode.ITALIC_C: TextFmt.I_E,
191
+ nwShortcode.BOLD_O: TextFmt.B_B, nwShortcode.BOLD_C: TextFmt.B_E,
192
+ nwShortcode.STRIKE_O: TextFmt.D_B, nwShortcode.STRIKE_C: TextFmt.D_E,
193
+ nwShortcode.ULINE_O: TextFmt.U_B, nwShortcode.ULINE_C: TextFmt.U_E,
194
+ nwShortcode.MARK_O: TextFmt.M_B, nwShortcode.MARK_C: TextFmt.M_E,
195
+ nwShortcode.SUP_O: TextFmt.SUP_B, nwShortcode.SUP_C: TextFmt.SUP_E,
196
+ nwShortcode.SUB_O: TextFmt.SUB_B, nwShortcode.SUB_C: TextFmt.SUB_E,
232
197
  }
233
198
  self._shortCodeVals = {
234
- nwShortcode.FOOTNOTE_B: self.FMT_FNOTE,
199
+ nwShortcode.FOOTNOTE_B: TextFmt.FNOTE,
200
+ nwShortcode.FIELD_B: TextFmt.FIELD,
235
201
  }
236
202
 
237
- self._rxDialogue: list[tuple[QRegularExpression, int, int]] = []
203
+ # Dialogue
204
+ self._hlightDialog = False
205
+ self._rxAltDialog = REGEX_PATTERNS.altDialogStyle
206
+ self._dialogParser = DialogParser()
207
+ self._dialogParser.initParser()
238
208
 
239
209
  return
240
210
 
@@ -242,16 +212,6 @@ class Tokenizer(ABC):
242
212
  # Properties
243
213
  ##
244
214
 
245
- @property
246
- def result(self) -> str:
247
- """The result of the build process."""
248
- return self._result
249
-
250
- @property
251
- def allMarkdown(self) -> list[str]:
252
- """The combined novelWriter Markdown text."""
253
- return self._markdown
254
-
255
215
  @property
256
216
  def textStats(self) -> dict[str, int]:
257
217
  """The collected stats about the text."""
@@ -271,10 +231,21 @@ class Tokenizer(ABC):
271
231
  # Setters
272
232
  ##
273
233
 
274
- def setTitleFormat(self, hFormat: str, hide: bool = False) -> None:
275
- """Set the title format pattern."""
276
- self._fmtTitle = hFormat.strip()
277
- self._hideTitle = hide
234
+ def setLanguage(self, language: str | None) -> None:
235
+ """Set language for the document."""
236
+ if language:
237
+ self._dLocale = QLocale(language)
238
+ return
239
+
240
+ def setTheme(self, theme: TextDocumentTheme) -> None:
241
+ """Set the document colour theme."""
242
+ self._theme = theme
243
+ return
244
+
245
+ def setPartitionFormat(self, hFormat: str, hide: bool = False) -> None:
246
+ """Set the partition format pattern."""
247
+ self._fmtPart = hFormat.strip()
248
+ self._hidePart = hide
278
249
  return
279
250
 
280
251
  def setChapterFormat(self, hFormat: str, hide: bool = False) -> None:
@@ -309,23 +280,26 @@ class Tokenizer(ABC):
309
280
 
310
281
  def setTitleStyle(self, center: bool, pageBreak: bool) -> None:
311
282
  """Set the title heading style."""
312
- self._titleStyle = (
313
- (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
314
- )
283
+ self._titleStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
284
+ self._titleStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
285
+ return
286
+
287
+ def setPartitionStyle(self, center: bool, pageBreak: bool) -> None:
288
+ """Set the partition heading style."""
289
+ self._partStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
290
+ self._partStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
315
291
  return
316
292
 
317
293
  def setChapterStyle(self, center: bool, pageBreak: bool) -> None:
318
294
  """Set the chapter heading style."""
319
- self._chapterStyle = (
320
- (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
321
- )
295
+ self._chapterStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
296
+ self._chapterStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
322
297
  return
323
298
 
324
299
  def setSceneStyle(self, center: bool, pageBreak: bool) -> None:
325
300
  """Set the scene heading style."""
326
- self._sceneStyle = (
327
- (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
328
- )
301
+ self._sceneStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
302
+ self._sceneStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
329
303
  return
330
304
 
331
305
  def setFont(self, font: QFont) -> None:
@@ -338,6 +312,13 @@ class Tokenizer(ABC):
338
312
  self._lineHeight = min(max(float(height), 0.5), 5.0)
339
313
  return
340
314
 
315
+ def setHeadingStyles(self, color: bool, scale: bool, bold: bool) -> None:
316
+ """Set text style for headings."""
317
+ self._colorHeads = color
318
+ self._scaleHeads = scale
319
+ self._boldHeads = bold
320
+ return
321
+
341
322
  def setBlockIndent(self, indent: float) -> None:
342
323
  """Set the block indent between 0.0 and 10.0."""
343
324
  self._blockIndent = min(max(float(indent), 0.0), 10.0)
@@ -357,27 +338,9 @@ class Tokenizer(ABC):
357
338
  self._doJustify = state
358
339
  return
359
340
 
360
- def setDialogueHighlight(self, state: bool) -> None:
341
+ def setDialogHighlight(self, state: bool) -> None:
361
342
  """Enable or disable dialogue highlighting."""
362
- self._rxDialogue = []
363
- self._showDialog = state
364
- if state:
365
- if CONFIG.dialogStyle > 0:
366
- self._rxDialogue.append((
367
- REGEX_PATTERNS.dialogStyle, self.FMT_DL_B, self.FMT_DL_E
368
- ))
369
- if CONFIG.dialogLine:
370
- self._rxDialogue.append((
371
- REGEX_PATTERNS.dialogLine, self.FMT_DL_B, self.FMT_DL_E
372
- ))
373
- if CONFIG.narratorBreak:
374
- self._rxDialogue.append((
375
- REGEX_PATTERNS.narratorBreak, self.FMT_DL_E, self.FMT_DL_B
376
- ))
377
- if CONFIG.altDialogOpen and CONFIG.altDialogClose:
378
- self._rxDialogue.append((
379
- REGEX_PATTERNS.altDialogStyle, self.FMT_ADL_B, self.FMT_ADL_E
380
- ))
343
+ self._hlightDialog = state
381
344
  return
382
345
 
383
346
  def setTitleMargins(self, upper: float, lower: float) -> None:
@@ -455,11 +418,6 @@ class Tokenizer(ABC):
455
418
  self._keepBreaks = state
456
419
  return
457
420
 
458
- def setKeepMarkdown(self, state: bool) -> None:
459
- """Keep original markdown during build."""
460
- self._keepMD = state
461
- return
462
-
463
421
  ##
464
422
  # Class Methods
465
423
  ##
@@ -468,27 +426,54 @@ class Tokenizer(ABC):
468
426
  def doConvert(self) -> None:
469
427
  raise NotImplementedError
470
428
 
429
+ @abstractmethod
430
+ def closeDocument(self) -> None:
431
+ raise NotImplementedError
432
+
433
+ @abstractmethod
434
+ def saveDocument(self, path: Path) -> None:
435
+ raise NotImplementedError
436
+
437
+ def initDocument(self) -> None:
438
+ """Initialise data after settings."""
439
+ self._classes["modifier"] = self._theme.modifier
440
+ self._classes["synopsis"] = self._theme.note
441
+ self._classes["comment"] = self._theme.comment
442
+ self._classes["dialog"] = self._theme.dialog
443
+ self._classes["altdialog"] = self._theme.altdialog
444
+ self._classes["tag"] = self._theme.tag
445
+ self._classes["keyword"] = self._theme.keyword
446
+ self._classes["optional"] = self._theme.optional
447
+ return
448
+
449
+ def setBreakNext(self) -> None:
450
+ """Set a page break for next block."""
451
+ self._breakNext = True
452
+ return
453
+
471
454
  def addRootHeading(self, tHandle: str) -> None:
472
455
  """Add a heading at the start of a new root folder."""
473
456
  self._text = ""
474
457
  self._handle = None
475
458
 
476
- if (tItem := self._project.tree[tHandle]) and tItem.isRootType():
459
+ if (item := self._project.tree[tHandle]) and item.isRootType():
477
460
  self._handle = tHandle
461
+ style = BlockFmt.CENTRE
478
462
  if self._isFirst:
479
- textAlign = self.A_CENTRE
480
463
  self._isFirst = False
481
464
  else:
482
- textAlign = self.A_PBB | self.A_CENTRE
465
+ style |= BlockFmt.PBB
483
466
 
484
- trNotes = self._localLookup("Notes")
485
- title = f"{trNotes}: {tItem.itemName}"
486
- self._tokens = []
487
- self._tokens.append((
488
- self.T_TITLE, 1, title, [], textAlign
489
- ))
490
- if self._keepMD:
491
- self._markdown.append(f"#! {title}\n\n")
467
+ title = item.itemName
468
+ if not item.isNovelLike():
469
+ notes = self._localLookup("Notes")
470
+ title = f"{notes}: {title}"
471
+
472
+ self._blocks = [(
473
+ BlockTyp.TITLE, f"{self._handle}:T0001", title, [], style
474
+ )]
475
+ if self._keepRaw:
476
+ self._raw.append(f"#! {title}\n\n")
492
477
 
493
478
  return
494
479
 
@@ -515,9 +500,11 @@ class Tokenizer(ABC):
515
500
  xRep = re.compile("|".join([re.escape(k) for k in repDict.keys()]), flags=re.DOTALL)
516
501
  self._text = xRep.sub(lambda x: repDict[x.group(0)], self._text)
517
502
 
518
- # Process the character translation map
519
- trDict = {nwUnicode.U_MAPOS: nwUnicode.U_RSQUO}
520
- self._text = self._text.translate(str.maketrans(trDict))
503
+ # Process the translation map for placeholder characters
504
+ self._text = self._text.translate(str.maketrans({
505
+ nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
506
+ nwUnicode.U_HBAR: nwUnicode.U_EMDASH,
507
+ }))
521
508
 
522
509
  return
523
510
 
@@ -526,42 +513,50 @@ class Tokenizer(ABC):
526
513
  characters that indicate headings, comments, commands etc, or
527
514
  just contain plain text. In the case of plain text, apply the
528
515
  same RegExes that the syntax highlighter uses and save the
529
- locations of these formatting tags into the token array.
516
+ locations of these formatting tags into the blocks list.
530
517
 
531
- The format of the token list is an entry with a five-tuple for
518
+ The format of the blocs list is an entry with a five-tuple for
532
519
  each line in the file. The tuple is as follows:
533
- 1: The type of the block, self.T_*
520
+ 1: The type of the block, BlockType.*
534
521
  2: The heading number under which the text is placed
535
522
  3: The text content of the block, without leading tags
536
- 4: The internal formatting map of the text, self.FMT_*
537
- 5: The style of the block, self.A_*
523
+ 4: The internal formatting map of the text, TxtFmt.*
524
+ 5: The formats of the block, BlockFmt.*
538
525
  """
526
+ if self._keepRaw:
527
+ self._raw.append(f"{self._text.rstrip()}\n\n")
528
+ if self._noTokens:
529
+ return
539
530
  if self._isNovel:
540
531
  self._hFormatter.setHandle(self._handle)
541
532
 
533
+ # Cache Flags
534
+ isNovel = self._isNovel
535
+ doJustify = self._doJustify
536
+ keepBreaks = self._keepBreaks
537
+ indentFirst = self._indentFirst
538
+ firstIndent = self._firstIndent
539
+
540
+ # Replace all instances of [br] with a placeholder character
541
+ text = REGEX_PATTERNS.lineBreak.sub("\uffff", self._text)
542
+
542
543
  nHead = 0
543
- breakNext = False
544
- tmpMarkdown = []
545
544
  tHandle = self._handle or ""
546
- tokens: list[T_Token] = []
547
- for aLine in self._text.splitlines():
545
+ tBlocks: list[T_Block] = [B_EMPTY]
546
+ for bLine in text.splitlines():
547
+ aLine = bLine.replace("\uffff", "") # Remove placeholder characters
548
548
  sLine = aLine.strip().lower()
549
549
 
550
550
  # Check for blank lines
551
- if len(sLine) == 0:
552
- tokens.append((
553
- self.T_EMPTY, nHead, "", [], self.A_NONE
554
- ))
555
- if self._keepMD:
556
- tmpMarkdown.append("\n")
557
-
551
+ if not sLine:
552
+ tBlocks.append(B_EMPTY)
558
553
  continue
559
554
 
560
- if breakNext:
561
- sAlign = self.A_PBB
562
- breakNext = False
555
+ if self._breakNext:
556
+ tStyle = BlockFmt.PBB
557
+ self._breakNext = False
563
558
  else:
564
- sAlign = self.A_NONE
559
+ tStyle = BlockFmt.NONE
565
560
 
566
561
  # Check Line Format
567
562
  # =================
@@ -574,24 +569,24 @@ class Tokenizer(ABC):
574
569
  # therefore proceed to check other formats.
575
570
 
576
571
  if sLine in ("[newpage]", "[new page]"):
577
- breakNext = True
572
+ self._breakNext = True
578
573
  continue
579
574
 
580
575
  elif sLine == "[vspace]":
581
- tokens.append(
582
- (self.T_SKIP, nHead, "", [], sAlign)
576
+ tBlocks.append(
577
+ (BlockTyp.SKIP, "", "", [], tStyle)
583
578
  )
584
579
  continue
585
580
 
586
581
  elif sLine.startswith("[vspace:") and sLine.endswith("]"):
587
582
  nSkip = checkInt(sLine[8:-1], 0)
588
583
  if nSkip >= 1:
589
- tokens.append(
590
- (self.T_SKIP, nHead, "", [], sAlign)
584
+ tBlocks.append(
585
+ (BlockTyp.SKIP, "", "", [], tStyle)
591
586
  )
592
587
  if nSkip > 1:
593
- tokens += (nSkip - 1) * [
594
- (self.T_SKIP, nHead, "", [], self.A_NONE)
588
+ tBlocks += (nSkip - 1) * [
589
+ (BlockTyp.SKIP, "", "", [], BlockFmt.NONE)
595
590
  ]
596
591
  continue
597
592
 
@@ -605,32 +600,24 @@ class Tokenizer(ABC):
605
600
  continue
606
601
 
607
602
  cStyle, cKey, cText, _, _ = processComment(aLine)
608
- if cStyle == nwComment.SYNOPSIS:
609
- tLine, tFmt = self._extractFormats(cText)
610
- tokens.append((
611
- self.T_SYNOPSIS, nHead, tLine, tFmt, sAlign
612
- ))
613
- if self._doSynopsis and self._keepMD:
614
- tmpMarkdown.append(f"{aLine}\n")
615
- elif cStyle == nwComment.SHORT:
616
- tLine, tFmt = self._extractFormats(cText)
617
- tokens.append((
618
- self.T_SHORT, nHead, tLine, tFmt, sAlign
603
+ if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT) and not self._doSynopsis:
604
+ continue
605
+ if cStyle == nwComment.PLAIN and not self._doComments:
606
+ continue
607
+
608
+ if doJustify and not tStyle & BlockFmt.ALIGNED:
609
+ tStyle |= BlockFmt.JUSTIFY
610
+
611
+ if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT, nwComment.PLAIN):
612
+ bStyle = COMMENT_STYLE[cStyle]
613
+ tLine, tFmt = self._formatComment(bStyle, cKey, cText)
614
+ tBlocks.append((
615
+ BlockTyp.COMMENT, "", tLine, tFmt, tStyle
619
616
  ))
620
- if self._doSynopsis and self._keepMD:
621
- tmpMarkdown.append(f"{aLine}\n")
617
+
622
618
  elif cStyle == nwComment.FOOTNOTE:
623
- tLine, tFmt = self._extractFormats(cText, skip=self.FMT_FNOTE)
619
+ tLine, tFmt = self._extractFormats(cText, skip=TextFmt.FNOTE)
624
620
  self._footnotes[f"{tHandle}:{cKey}"] = (tLine, tFmt)
625
- if self._keepMD:
626
- tmpMarkdown.append(f"{aLine}\n")
627
- else:
628
- tLine, tFmt = self._extractFormats(cText)
629
- tokens.append((
630
- self.T_COMMENT, nHead, tLine, tFmt, sAlign
631
- ))
632
- if self._doComments and self._keepMD:
633
- tmpMarkdown.append(f"{aLine}\n")
634
621
 
635
622
  elif aLine.startswith("@"):
636
623
  # Keywords
@@ -638,16 +625,12 @@ class Tokenizer(ABC):
638
625
  # Only valid keyword lines are parsed, and any ignored keywords
639
626
  # are automatically skipped.
640
627
 
641
- valid, bits, _ = self._project.index.scanThis(aLine)
642
- if (
643
- valid and bits and bits[0] in nwLabels.KEY_NAME
644
- and bits[0] not in self._skipKeywords
645
- ):
646
- tokens.append((
647
- self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
648
- ))
649
- if self._doKeywords and self._keepMD:
650
- tmpMarkdown.append(f"{aLine}\n")
628
+ if self._doKeywords:
629
+ tTag, tLine, tFmt = self._formatMeta(aLine)
630
+ if tLine:
631
+ tBlocks.append((
632
+ BlockTyp.KEYWORD, tTag[1:], tLine, tFmt, tStyle
633
+ ))
651
634
 
652
635
  elif aLine.startswith(("# ", "#! ")):
653
636
  # Title or Partition Headings
@@ -662,28 +645,26 @@ class Tokenizer(ABC):
662
645
 
663
646
  nHead += 1
664
647
  tText = aLine[2:].strip()
665
- tType = self.T_HEAD1 if isPlain else self.T_TITLE
666
- tStyle = self.A_NONE if isPlain else (self.A_PBB | self.A_CENTRE)
667
- sHide = self._hideTitle if isPlain else False
668
- if self._isNovel:
648
+ tType = BlockTyp.HEAD1 if isPlain else BlockTyp.TITLE
649
+ sHide = self._hidePart if isPlain else False
650
+ if not (isPlain or isNovel and sHide):
651
+ tStyle |= self._titleStyle
652
+ if isNovel:
669
653
  if sHide:
670
654
  tText = ""
671
- tType = self.T_EMPTY
672
- tStyle = self.A_NONE
655
+ tType = BlockTyp.EMPTY
673
656
  elif isPlain:
674
- tText = self._hFormatter.apply(self._fmtTitle, tText, nHead)
675
- tStyle = self._titleStyle
657
+ tText = self._hFormatter.apply(self._fmtPart, tText, nHead)
658
+ tStyle |= self._partStyle
676
659
  if isPlain:
677
660
  self._hFormatter.resetScene()
678
661
  else:
679
662
  self._hFormatter.resetAll()
680
663
  self._noSep = True
681
664
 
682
- tokens.append((
683
- tType, nHead, tText, [], tStyle
665
+ tBlocks.append((
666
+ tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
684
667
  ))
685
- if self._keepMD:
686
- tmpMarkdown.append(f"{aLine}\n")
687
668
 
688
669
  elif aLine.startswith(("## ", "##! ")):
689
670
  # (Unnumbered) Chapter Headings
@@ -698,27 +679,24 @@ class Tokenizer(ABC):
698
679
 
699
680
  nHead += 1
700
681
  tText = aLine[3:].strip()
701
- tType = self.T_HEAD2
702
- tStyle = self.A_NONE
682
+ tType = BlockTyp.HEAD2
703
683
  sHide = self._hideChapter if isPlain else self._hideUnNum
704
684
  tFormat = self._fmtChapter if isPlain else self._fmtUnNum
705
- if self._isNovel:
685
+ if isNovel:
706
686
  if isPlain:
707
687
  self._hFormatter.incChapter()
708
688
  if sHide:
709
689
  tText = ""
710
- tType = self.T_EMPTY
690
+ tType = BlockTyp.EMPTY
711
691
  else:
712
692
  tText = self._hFormatter.apply(tFormat, tText, nHead)
713
- tStyle = self._chapterStyle
693
+ tStyle |= self._chapterStyle
714
694
  self._hFormatter.resetScene()
715
695
  self._noSep = True
716
696
 
717
- tokens.append((
718
- tType, nHead, tText, [], tStyle
697
+ tBlocks.append((
698
+ tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
719
699
  ))
720
- if self._keepMD:
721
- tmpMarkdown.append(f"{aLine}\n")
722
700
 
723
701
  elif aLine.startswith(("### ", "###! ")):
724
702
  # (Alternative) Scene Headings
@@ -735,31 +713,28 @@ class Tokenizer(ABC):
735
713
 
736
714
  nHead += 1
737
715
  tText = aLine[4:].strip()
738
- tType = self.T_HEAD3
739
- tStyle = self.A_NONE
716
+ tType = BlockTyp.HEAD3
740
717
  sHide = self._hideScene if isPlain else self._hideHScene
741
718
  tFormat = self._fmtScene if isPlain else self._fmtHScene
742
- if self._isNovel:
719
+ if isNovel:
743
720
  self._hFormatter.incScene()
744
721
  if sHide:
745
722
  tText = ""
746
- tType = self.T_EMPTY
723
+ tType = BlockTyp.EMPTY
747
724
  else:
748
725
  tText = self._hFormatter.apply(tFormat, tText, nHead)
749
- tStyle = self._sceneStyle
726
+ tStyle |= self._sceneStyle
750
727
  if tText == "": # Empty Format
751
- tType = self.T_EMPTY if self._noSep else self.T_SKIP
728
+ tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SKIP
752
729
  elif tText == tFormat: # Static Format
753
730
  tText = "" if self._noSep else tText
754
- tType = self.T_EMPTY if self._noSep else self.T_SEP
755
- tStyle = self.A_NONE if self._noSep else self.A_CENTRE
731
+ tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SEP
732
+ tStyle |= BlockFmt.NONE if self._noSep else BlockFmt.CENTRE
756
733
  self._noSep = False
757
734
 
758
- tokens.append((
759
- tType, nHead, tText, [], tStyle
735
+ tBlocks.append((
736
+ tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
760
737
  ))
761
- if self._keepMD:
762
- tmpMarkdown.append(f"{aLine}\n")
763
738
 
764
739
  elif aLine.startswith("#### "):
765
740
  # Section Headings
@@ -771,25 +746,22 @@ class Tokenizer(ABC):
771
746
 
772
747
  nHead += 1
773
748
  tText = aLine[5:].strip()
774
- tType = self.T_HEAD4
775
- tStyle = self.A_NONE
776
- if self._isNovel:
749
+ tType = BlockTyp.HEAD4
750
+ if isNovel:
777
751
  if self._hideSection:
778
752
  tText = ""
779
- tType = self.T_EMPTY
753
+ tType = BlockTyp.EMPTY
780
754
  else:
781
755
  tText = self._hFormatter.apply(self._fmtSection, tText, nHead)
782
756
  if tText == "": # Empty Format
783
- tType = self.T_SKIP
757
+ tType = BlockTyp.SKIP
784
758
  elif tText == self._fmtSection: # Static Format
785
- tType = self.T_SEP
786
- tStyle = self.A_CENTRE
759
+ tType = BlockTyp.SEP
760
+ tStyle |= BlockFmt.CENTRE
787
761
 
788
- tokens.append((
789
- tType, nHead, tText, [], tStyle
762
+ tBlocks.append((
763
+ tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
790
764
  ))
791
- if self._keepMD:
792
- tmpMarkdown.append(f"{aLine}\n")
793
765
 
794
766
  else:
795
767
  # Text Lines
@@ -805,135 +777,133 @@ class Tokenizer(ABC):
805
777
  alnRight = False
806
778
  indLeft = False
807
779
  indRight = False
808
- if aLine.startswith(">>"):
780
+ if bLine.startswith(">>"):
809
781
  alnRight = True
810
- aLine = aLine[2:].lstrip(" ")
811
- elif aLine.startswith(">"):
782
+ bLine = bLine[2:].lstrip(" ")
783
+ elif bLine.startswith(">"):
812
784
  indLeft = True
813
- aLine = aLine[1:].lstrip(" ")
785
+ bLine = bLine[1:].lstrip(" ")
814
786
 
815
- if aLine.endswith("<<"):
787
+ if bLine.endswith("<<"):
816
788
  alnLeft = True
817
- aLine = aLine[:-2].rstrip(" ")
818
- elif aLine.endswith("<"):
789
+ bLine = bLine[:-2].rstrip(" ")
790
+ elif bLine.endswith("<"):
819
791
  indRight = True
820
- aLine = aLine[:-1].rstrip(" ")
792
+ bLine = bLine[:-1].rstrip(" ")
821
793
 
822
794
  if alnLeft and alnRight:
823
- sAlign |= self.A_CENTRE
795
+ tStyle |= BlockFmt.CENTRE
824
796
  elif alnLeft:
825
- sAlign |= self.A_LEFT
797
+ tStyle |= BlockFmt.LEFT
826
798
  elif alnRight:
827
- sAlign |= self.A_RIGHT
799
+ tStyle |= BlockFmt.RIGHT
828
800
 
829
801
  if indLeft:
830
- sAlign |= self.A_IND_L
802
+ tStyle |= BlockFmt.IND_L
831
803
  if indRight:
832
- sAlign |= self.A_IND_R
804
+ tStyle |= BlockFmt.IND_R
833
805
 
834
806
  # Process formats
835
- tLine, tFmt = self._extractFormats(aLine)
836
- tokens.append((
837
- self.T_TEXT, nHead, tLine, tFmt, sAlign
807
+ tLine, tFmt = self._extractFormats(bLine, hDialog=isNovel)
808
+ tBlocks.append((
809
+ BlockTyp.TEXT, "", tLine, tFmt, tStyle
838
810
  ))
839
- if self._keepMD:
840
- tmpMarkdown.append(f"{aLine}\n")
841
811
 
842
812
  # If we have content, turn off the first page flag
843
- if self._isFirst and tokens:
813
+ if self._isFirst and len(tBlocks) > 1:
844
814
  self._isFirst = False # First document has been processed
845
815
 
846
- # Make sure the token array doesn't start with a page break
847
- # on the very first page, adding a blank first page.
848
- if tokens[0][4] & self.A_PBB:
849
- cToken = tokens[0]
850
- tokens[0] = (
851
- cToken[0], cToken[1], cToken[2], cToken[3], cToken[4] & ~self.A_PBB
852
- )
816
+ # Make sure the blocks array doesn't start with a page break
817
+ # on the very first block, adding a blank first page.
818
+ for n, cBlock in enumerate(tBlocks):
819
+ if cBlock[0] != BlockTyp.EMPTY:
820
+ if cBlock[4] & BlockFmt.PBB:
821
+ tBlocks[n] = (
822
+ cBlock[0], cBlock[1], cBlock[2], cBlock[3], cBlock[4] & ~BlockFmt.PBB
823
+ )
824
+ break
853
825
 
854
826
  # Always add an empty line at the end of the file
855
- tokens.append((
856
- self.T_EMPTY, nHead, "", [], self.A_NONE
857
- ))
858
- if self._keepMD:
859
- tmpMarkdown.append("\n")
860
- self._markdown.append("".join(tmpMarkdown))
827
+ tBlocks.append(B_EMPTY)
861
828
 
862
829
  # Second Pass
863
830
  # ===========
864
831
  # This second pass strips away consecutive blank lines, and
865
832
  # combines consecutive text lines into the same paragraph.
866
833
  # It also ensures that there isn't paragraph spacing between
867
- # meta data lines for formats that has spacing.
868
-
869
- self._tokens = []
870
- pToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
871
- nToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
834
+ # meta data lines for formats that have spacing.
872
835
 
873
- lineSep = "\n" if self._keepBreaks else " "
874
- pLines: list[T_Token] = []
836
+ lineSep = "\n" if keepBreaks else " "
875
837
 
876
- tCount = len(tokens)
877
- for n, cToken in enumerate(tokens):
838
+ pLines: list[T_Block] = []
839
+ sBlocks: list[T_Block] = []
840
+ for n, cBlock in enumerate(tBlocks[1:-1], 1):
878
841
 
879
- if n > 0:
880
- pToken = tokens[n-1] # Look behind
881
- if n < tCount - 1:
882
- nToken = tokens[n+1] # Look ahead
842
+ pBlock = tBlocks[n-1] # Look behind
843
+ nBlock = tBlocks[n+1] # Look ahead
883
844
 
884
- if cToken[0] in self.L_SKIP_INDENT and not self._indentFirst:
845
+ if cBlock[0] in SKIP_INDENT and not indentFirst:
885
846
  # Unless the indentFirst flag is set, we set up the next
886
847
  # paragraph to not be indented if we see a block of a
887
848
  # specific type
888
849
  self._noIndent = True
889
850
 
890
- if cToken[0] == self.T_EMPTY:
851
+ if cBlock[0] == BlockTyp.EMPTY:
891
852
  # We don't need to keep the empty lines after this pass
892
853
  pass
893
854
 
894
- elif cToken[0] == self.T_KEYWORD:
855
+ elif cBlock[0] == BlockTyp.KEYWORD:
895
856
  # Adjust margins for lines in a list of keyword lines
896
- aStyle = cToken[4]
897
- if pToken[0] == self.T_KEYWORD:
898
- aStyle |= self.A_Z_TOPMRG
899
- if nToken[0] == self.T_KEYWORD:
900
- aStyle |= self.A_Z_BTMMRG
901
- self._tokens.append((
902
- cToken[0], cToken[1], cToken[2], cToken[3], aStyle
857
+ aStyle = cBlock[4]
858
+ if pBlock[0] == BlockTyp.KEYWORD:
859
+ aStyle |= BlockFmt.Z_TOP
860
+ if nBlock[0] == BlockTyp.KEYWORD:
861
+ aStyle |= BlockFmt.Z_BTM
862
+ sBlocks.append((
863
+ cBlock[0], cBlock[1], cBlock[2], cBlock[3], aStyle
903
864
  ))
904
865
 
905
- elif cToken[0] == self.T_TEXT:
866
+ elif cBlock[0] == BlockTyp.TEXT:
906
867
  # Combine lines from the same paragraph
907
- pLines.append(cToken)
868
+ pLines.append(cBlock)
908
869
 
909
- if nToken[0] != self.T_TEXT:
910
- # Next token is not text, so we add the buffer to tokens
870
+ if nBlock[0] != BlockTyp.TEXT:
871
+ # Next block is not text, so we add the buffer to blocks
911
872
  nLines = len(pLines)
912
873
  cStyle = pLines[0][4]
913
- if self._firstIndent and not (self._noIndent or cStyle & self.M_ALIGNED):
874
+ if firstIndent and not (self._noIndent or cStyle & BlockFmt.ALIGNED):
914
875
  # If paragraph indentation is enabled, not temporarily
915
876
  # turned off, and the block is not aligned, we add the
916
877
  # text indentation flag
917
- cStyle |= self.A_IND_T
878
+ cStyle |= BlockFmt.IND_T
918
879
 
919
880
  if nLines == 1:
920
- # The paragraph contains a single line, so we just
921
- # save that directly to the token list
922
- self._tokens.append((
923
- self.T_TEXT, pLines[0][1], pLines[0][2], pLines[0][3], cStyle
881
+ # The paragraph contains a single line, so we just save
882
+ # that directly to the blocks list. If justify is
883
+ # enabled, and there is no alignment, we apply it.
884
+ if doJustify and not cStyle & BlockFmt.ALIGNED:
885
+ cStyle |= BlockFmt.JUSTIFY
886
+
887
+ pTxt = pLines[0][2].replace("\uffff", "\n")
888
+ sBlocks.append((
889
+ BlockTyp.TEXT, pLines[0][1], pTxt, pLines[0][3], cStyle
924
890
  ))
891
+
925
892
  elif nLines > 1:
926
893
  # The paragraph contains multiple lines, so we need to
927
894
  # join them according to the line break policy, and
928
895
  # recompute all the formatting markers
929
896
  tTxt = ""
930
897
  tFmt: T_Formats = []
931
- for aToken in pLines:
898
+ for aBlock in pLines:
932
899
  tLen = len(tTxt)
933
- tTxt += f"{aToken[2]}{lineSep}"
934
- tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aToken[3])
935
- self._tokens.append((
936
- self.T_TEXT, pLines[0][1], tTxt[:-1], tFmt, cStyle
900
+ tTxt += f"{aBlock[2]}{lineSep}"
901
+ tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aBlock[3])
902
+ cStyle |= aBlock[4]
903
+
904
+ pTxt = tTxt[:-1].replace("\uffff", "\n")
905
+ sBlocks.append((
906
+ BlockTyp.TEXT, pLines[0][1], pTxt, tFmt, cStyle
937
907
  ))
938
908
 
939
909
  # Reset buffer and make sure text indent is on for next pass
@@ -941,50 +911,50 @@ class Tokenizer(ABC):
941
911
  self._noIndent = False
942
912
 
943
913
  else:
944
- self._tokens.append(cToken)
914
+ sBlocks.append(cBlock)
915
+
916
+ self._blocks = sBlocks
945
917
 
946
918
  return
947
919
 
948
920
  def buildOutline(self) -> None:
949
921
  """Build an outline of the text up to level 3 headings."""
950
- tHandle = self._handle or ""
951
922
  isNovel = self._isNovel
952
- for tType, nHead, tText, _, _ in self._tokens:
953
- if tType == self.T_TITLE:
923
+ for tType, tKey, tText, _, _ in self._blocks:
924
+ if tType == BlockTyp.TITLE:
954
925
  prefix = "TT"
955
- elif tType == self.T_HEAD1:
926
+ elif tType == BlockTyp.HEAD1:
956
927
  prefix = "PT" if isNovel else "H1"
957
- elif tType == self.T_HEAD2:
928
+ elif tType == BlockTyp.HEAD2:
958
929
  prefix = "CH" if isNovel else "H2"
959
- elif tType == self.T_HEAD3:
930
+ elif tType == BlockTyp.HEAD3:
960
931
  prefix = "SC" if isNovel else "H3"
961
932
  else:
962
933
  continue
963
934
 
964
- key = f"{tHandle}:T{nHead:04d}"
965
935
  text = tText.replace(nwHeadFmt.BR, " ").replace("&amp;", "&")
966
- self._outline[key] = f"{prefix}|{text}"
936
+ self._outline[tKey] = f"{prefix}|{text}"
967
937
 
968
938
  return
969
939
 
970
940
  def countStats(self) -> None:
971
941
  """Count stats on the tokenized text."""
972
- titleCount = self._counts.get("titleCount", 0)
973
- paragraphCount = self._counts.get("paragraphCount", 0)
942
+ titleCount = self._counts.get(nwStats.TITLES, 0)
943
+ paragraphCount = self._counts.get(nwStats.PARAGRAPHS, 0)
974
944
 
975
- allWords = self._counts.get("allWords", 0)
976
- textWords = self._counts.get("textWords", 0)
977
- titleWords = self._counts.get("titleWords", 0)
945
+ allWords = self._counts.get(nwStats.WORDS_ALL, 0)
946
+ textWords = self._counts.get(nwStats.WORDS_TEXT, 0)
947
+ titleWords = self._counts.get(nwStats.WORDS_TITLE, 0)
978
948
 
979
- allChars = self._counts.get("allChars", 0)
980
- textChars = self._counts.get("textChars", 0)
981
- titleChars = self._counts.get("titleChars", 0)
949
+ allChars = self._counts.get(nwStats.CHARS_ALL, 0)
950
+ textChars = self._counts.get(nwStats.CHARS_TEXT, 0)
951
+ titleChars = self._counts.get(nwStats.CHARS_TITLE, 0)
982
952
 
983
- allWordChars = self._counts.get("allWordChars", 0)
984
- textWordChars = self._counts.get("textWordChars", 0)
985
- titleWordChars = self._counts.get("titleWordChars", 0)
953
+ allWordChars = self._counts.get(nwStats.WCHARS_ALL, 0)
954
+ textWordChars = self._counts.get(nwStats.WCHARS_TEXT, 0)
955
+ titleWordChars = self._counts.get(nwStats.WCHARS_TITLE, 0)
986
956
 
987
- for tType, _, tText, _, _ in self._tokens:
957
+ for tType, _, tText, _, _ in self._blocks:
988
958
  tText = tText.replace(nwUnicode.U_ENDASH, " ")
989
959
  tText = tText.replace(nwUnicode.U_EMDASH, " ")
990
960
 
@@ -993,7 +963,7 @@ class Tokenizer(ABC):
993
963
  nChars = len(tText)
994
964
  nWChars = len("".join(tWords))
995
965
 
996
- if tType == self.T_TEXT:
966
+ if tType == BlockTyp.TEXT:
997
967
  tPWords = tText.split()
998
968
  nPWords = len(tPWords)
999
969
  nPChars = len(tText)
@@ -1007,7 +977,7 @@ class Tokenizer(ABC):
1007
977
  allWordChars += nPWChars
1008
978
  textWordChars += nPWChars
1009
979
 
1010
- elif tType in self.L_HEADINGS:
980
+ elif tType in HEADINGS:
1011
981
  titleCount += 1
1012
982
  allWords += nWords
1013
983
  titleWords += nWords
@@ -1016,143 +986,170 @@ class Tokenizer(ABC):
1016
986
  titleChars += nChars
1017
987
  titleWordChars += nWChars
1018
988
 
1019
- elif tType == self.T_SEP:
989
+ elif tType == BlockTyp.SEP:
1020
990
  allWords += nWords
1021
991
  allChars += nChars
1022
992
  allWordChars += nWChars
1023
993
 
1024
- elif tType == self.T_SYNOPSIS and self._doSynopsis:
1025
- text = "{0}: {1}".format(self._localLookup("Synopsis"), tText)
1026
- words = text.split()
1027
- allWords += len(words)
1028
- allChars += len(text)
1029
- allWordChars += len("".join(words))
1030
-
1031
- elif tType == self.T_SHORT and self._doSynopsis:
1032
- text = "{0}: {1}".format(self._localLookup("Short Description"), tText)
1033
- words = text.split()
994
+ elif tType in (BlockTyp.COMMENT, BlockTyp.KEYWORD):
995
+ words = tText.split()
1034
996
  allWords += len(words)
1035
- allChars += len(text)
997
+ allChars += len(tText)
1036
998
  allWordChars += len("".join(words))
1037
999
 
1038
- elif tType == self.T_COMMENT and self._doComments:
1039
- text = "{0}: {1}".format(self._localLookup("Comment"), tText)
1040
- words = text.split()
1041
- allWords += len(words)
1042
- allChars += len(text)
1043
- allWordChars += len("".join(words))
1044
-
1045
- elif tType == self.T_KEYWORD and self._doKeywords:
1046
- valid, bits, _ = self._project.index.scanThis("@"+tText)
1047
- if valid and bits:
1048
- key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
1049
- text = "{0}: {1}".format(key, ", ".join(bits[1:]))
1050
- words = text.split()
1051
- allWords += len(words)
1052
- allChars += len(text)
1053
- allWordChars += len("".join(words))
1054
-
1055
- self._counts["titleCount"] = titleCount
1056
- self._counts["paragraphCount"] = paragraphCount
1000
+ self._counts[nwStats.TITLES] = titleCount
1001
+ self._counts[nwStats.PARAGRAPHS] = paragraphCount
1057
1002
 
1058
- self._counts["allWords"] = allWords
1059
- self._counts["textWords"] = textWords
1060
- self._counts["titleWords"] = titleWords
1003
+ self._counts[nwStats.WORDS_ALL] = allWords
1004
+ self._counts[nwStats.WORDS_TEXT] = textWords
1005
+ self._counts[nwStats.WORDS_TITLE] = titleWords
1061
1006
 
1062
- self._counts["allChars"] = allChars
1063
- self._counts["textChars"] = textChars
1064
- self._counts["titleChars"] = titleChars
1007
+ self._counts[nwStats.CHARS_ALL] = allChars
1008
+ self._counts[nwStats.CHARS_TEXT] = textChars
1009
+ self._counts[nwStats.CHARS_TITLE] = titleChars
1065
1010
 
1066
- self._counts["allWordChars"] = allWordChars
1067
- self._counts["textWordChars"] = textWordChars
1068
- self._counts["titleWordChars"] = titleWordChars
1011
+ self._counts[nwStats.WCHARS_ALL] = allWordChars
1012
+ self._counts[nwStats.WCHARS_TEXT] = textWordChars
1013
+ self._counts[nwStats.WCHARS_TITLE] = titleWordChars
1069
1014
 
1070
1015
  return
1071
1016
 
1072
- def saveRawMarkdown(self, path: str | Path) -> None:
1073
- """Save the raw text to a plain text file."""
1074
- with open(path, mode="w", encoding="utf-8") as outFile:
1075
- for nwdPage in self._markdown:
1076
- outFile.write(nwdPage)
1077
- return
1078
-
1079
- def saveRawMarkdownJSON(self, path: str | Path) -> None:
1080
- """Save the raw text to a JSON file."""
1081
- timeStamp = time()
1082
- data = {
1083
- "meta": {
1084
- "projectName": self._project.data.name,
1085
- "novelAuthor": self._project.data.author,
1086
- "buildTime": int(timeStamp),
1087
- "buildTimeStr": formatTimeStamp(timeStamp),
1088
- },
1089
- "text": {
1090
- "nwd": [page.rstrip("\n").split("\n") for page in self._markdown],
1091
- }
1092
- }
1093
- with open(path, mode="w", encoding="utf-8") as fObj:
1094
- json.dump(data, fObj, indent=2)
1095
- return
1096
-
1097
1017
  ##
1098
1018
  # Internal Functions
1099
1019
  ##
1100
1020
 
1101
- def _extractFormats(self, text: str, skip: int = 0) -> tuple[str, T_Formats]:
1102
- """Extract format markers from a text paragraph."""
1021
+ def _formatInt(self, value: int) -> str:
1022
+ """Return a localised integer."""
1023
+ return self._dLocale.toString(value)
1024
+
1025
+ def _formatComment(self, style: ComStyle, key: str, text: str) -> tuple[str, T_Formats]:
1026
+ """Apply formatting to comments and notes."""
1027
+ tTxt, tFmt = self._extractFormats(text)
1028
+ tFmt.insert(0, (0, TextFmt.COL_B, style.textClass))
1029
+ tFmt.append((len(tTxt), TextFmt.COL_E, ""))
1030
+ if label := (self._localLookup(style.label) + (f" ({key})" if key else "")).strip():
1031
+ shift = len(label) + 2
1032
+ tTxt = f"{label}: {tTxt}"
1033
+ rFmt = [(0, TextFmt.B_B, ""), (shift - 1, TextFmt.B_E, "")]
1034
+ if style.labelClass:
1035
+ rFmt.insert(1, (0, TextFmt.COL_B, style.labelClass))
1036
+ rFmt.insert(2, (shift - 1, TextFmt.COL_E, ""))
1037
+ rFmt.extend((p + shift, f, d) for p, f, d in tFmt)
1038
+ return tTxt, rFmt
1039
+
1040
+ def _formatMeta(self, text: str) -> tuple[str, str, T_Formats]:
1041
+ """Apply formatting to a meta data line."""
1042
+ tag = ""
1043
+ txt = []
1044
+ fmt = []
1045
+ valid, bits, _ = self._project.index.scanThis(text)
1046
+ if valid and bits and bits[0] in nwLabels.KEY_NAME and bits[0] not in self._skipKeywords:
1047
+ tag = bits[0]
1048
+ pos = 0
1049
+ lbl = f"{self._localLookup(nwLabels.KEY_NAME[tag])}:"
1050
+ end = len(lbl)
1051
+ fmt = [
1052
+ (pos, TextFmt.B_B, ""),
1053
+ (pos, TextFmt.COL_B, "keyword"),
1054
+ (end, TextFmt.COL_E, ""),
1055
+ (end, TextFmt.B_E, ""),
1056
+ ]
1057
+ txt = [lbl, " "]
1058
+ pos = end + 1
1059
+
1060
+ if (num := len(bits)) > 1:
1061
+ if bits[0] == nwKeyWords.TAG_KEY:
1062
+ one, two = self._project.index.parseValue(bits[1])
1063
+ end = pos + len(one)
1064
+ fmt.append((pos, TextFmt.COL_B, "tag"))
1065
+ fmt.append((pos, TextFmt.ANM_B, f"tag_{one}".lower()))
1066
+ fmt.append((end, TextFmt.ANM_E, ""))
1067
+ fmt.append((end, TextFmt.COL_E, ""))
1068
+ txt.append(one)
1069
+ pos = end
1070
+ if two:
1071
+ txt.append(" | ")
1072
+ pos += 3
1073
+ end = pos + len(two)
1074
+ fmt.append((pos, TextFmt.COL_B, "optional"))
1075
+ fmt.append((end, TextFmt.COL_E, ""))
1076
+ txt.append(two)
1077
+ pos = end
1078
+ else:
1079
+ for n, bit in enumerate(bits[1:], 2):
1080
+ end = pos + len(bit)
1081
+ fmt.append((pos, TextFmt.COL_B, "tag"))
1082
+ fmt.append((pos, TextFmt.ARF_B, f"#tag_{bit}".lower()))
1083
+ fmt.append((end, TextFmt.ARF_E, ""))
1084
+ fmt.append((end, TextFmt.COL_E, ""))
1085
+ txt.append(bit)
1086
+ pos = end
1087
+ if n < num:
1088
+ txt.append(", ")
1089
+ pos += 2
1090
+
1091
+ return tag, "".join(txt), fmt
1092
+
1093
+ def _extractFormats(
1094
+ self, text: str, skip: int = 0, hDialog: bool = False
1095
+ ) -> tuple[str, T_Formats]:
1096
+ """Extract format markers from a text paragraph. In order to
1097
+ also process dialogue highlighting, the hDialog flag must be set
1098
+ to True. See issues #2011 and #2013.
1099
+ """
1103
1100
  temp: list[tuple[int, int, int, str]] = []
1104
1101
 
1105
1102
  # Match Markdown
1106
1103
  for regEx, fmts in self._rxMarkdown:
1107
- rxItt = regEx.globalMatch(text, 0)
1108
- while rxItt.hasNext():
1109
- rxMatch = rxItt.next()
1104
+ for res in regEx.finditer(text):
1110
1105
  temp.extend(
1111
- (rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt, "")
1106
+ (res.start(n), res.end(n), fmt, "")
1112
1107
  for n, fmt in enumerate(fmts) if fmt > 0
1113
1108
  )
1114
1109
 
1110
+ # Match URLs
1111
+ for res in REGEX_PATTERNS.url.finditer(text):
1112
+ temp.append((res.start(0), 0, TextFmt.HRF_B, res.group(0)))
1113
+ temp.append((res.end(0), 0, TextFmt.HRF_E, ""))
1114
+
1115
1115
  # Match Shortcodes
1116
- rxItt = self._rxShortCodes.globalMatch(text, 0)
1117
- while rxItt.hasNext():
1118
- rxMatch = rxItt.next()
1116
+ for res in REGEX_PATTERNS.shortcodePlain.finditer(text):
1119
1117
  temp.append((
1120
- rxMatch.capturedStart(1),
1121
- rxMatch.capturedLength(1),
1122
- self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0),
1118
+ res.start(1), res.end(1),
1119
+ self._shortCodeFmt.get(res.group(1).lower(), 0),
1123
1120
  "",
1124
1121
  ))
1125
1122
 
1126
1123
  # Match Shortcode w/Values
1127
- rxItt = self._rxShortCodeVals.globalMatch(text, 0)
1128
1124
  tHandle = self._handle or ""
1129
- while rxItt.hasNext():
1130
- rxMatch = rxItt.next()
1131
- kind = self._shortCodeVals.get(rxMatch.captured(1).lower(), 0)
1125
+ for res in REGEX_PATTERNS.shortcodeValue.finditer(text):
1126
+ kind = self._shortCodeVals.get(res.group(1).lower(), 0)
1132
1127
  temp.append((
1133
- rxMatch.capturedStart(0),
1134
- rxMatch.capturedLength(0),
1135
- self.FMT_STRIP if kind == skip else kind,
1136
- f"{tHandle}:{rxMatch.captured(2)}",
1128
+ res.start(0), res.end(0),
1129
+ TextFmt.STRIP if kind == skip else kind,
1130
+ f"{tHandle}:{res.group(2)}",
1137
1131
  ))
1138
1132
 
1139
1133
  # Match Dialogue
1140
- if self._rxDialogue:
1141
- for regEx, fmtB, fmtE in self._rxDialogue:
1142
- rxItt = regEx.globalMatch(text, 0)
1143
- while rxItt.hasNext():
1144
- rxMatch = rxItt.next()
1145
- temp.append((rxMatch.capturedStart(0), 0, fmtB, ""))
1146
- temp.append((rxMatch.capturedEnd(0), 0, fmtE, ""))
1134
+ if self._hlightDialog and hDialog:
1135
+ if self._dialogParser.enabled:
1136
+ for pos, end in self._dialogParser(text):
1137
+ temp.append((pos, 0, TextFmt.COL_B, "dialog"))
1138
+ temp.append((end, 0, TextFmt.COL_E, ""))
1139
+ if self._rxAltDialog:
1140
+ for res in self._rxAltDialog.finditer(text):
1141
+ temp.append((res.start(0), 0, TextFmt.COL_B, "altdialog"))
1142
+ temp.append((res.end(0), 0, TextFmt.COL_E, ""))
1147
1143
 
1148
1144
  # Post-process text and format
1149
1145
  result = text
1150
1146
  formats = []
1151
- for pos, n, fmt, key in reversed(sorted(temp, key=lambda x: x[0])):
1147
+ for pos, end, fmt, meta in reversed(sorted(temp, key=lambda x: x[0])):
1152
1148
  if fmt > 0:
1153
- result = result[:pos] + result[pos+n:]
1154
- formats = [(p-n, f, k) for p, f, k in formats]
1155
- formats.insert(0, (pos, fmt, key))
1149
+ if end > pos:
1150
+ result = result[:pos] + result[end:]
1151
+ formats = [(p+pos-end if p > pos else p, f, m) for p, f, m in formats]
1152
+ formats.insert(0, (pos, fmt, meta))
1156
1153
 
1157
1154
  return result, formats
1158
1155
 
@@ -1198,6 +1195,7 @@ class HeadingFormatter:
1198
1195
  def apply(self, hFormat: str, text: str, nHead: int) -> str:
1199
1196
  """Apply formatting to a specific heading."""
1200
1197
  hFormat = hFormat.replace(nwHeadFmt.TITLE, text)
1198
+ hFormat = hFormat.replace(nwHeadFmt.BR, "\n")
1201
1199
  hFormat = hFormat.replace(nwHeadFmt.CH_NUM, str(self._chCount))
1202
1200
  hFormat = hFormat.replace(nwHeadFmt.SC_NUM, str(self._scChCount))
1203
1201
  hFormat = hFormat.replace(nwHeadFmt.SC_ABS, str(self._scAbsCount))