novelWriter 2.3rc1__py3-none-any.whl → 2.4b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {novelWriter-2.3rc1.dist-info → novelWriter-2.4b1.dist-info}/METADATA +1 -1
  2. {novelWriter-2.3rc1.dist-info → novelWriter-2.4b1.dist-info}/RECORD +99 -85
  3. {novelWriter-2.3rc1.dist-info → novelWriter-2.4b1.dist-info}/WHEEL +1 -1
  4. novelWriter-2.4b1.dist-info/entry_points.txt +2 -0
  5. novelwriter/__init__.py +5 -5
  6. novelwriter/assets/i18n/nw_de_DE.qm +0 -0
  7. novelwriter/assets/i18n/nw_en_US.qm +0 -0
  8. novelwriter/assets/i18n/nw_es_419.qm +0 -0
  9. novelwriter/assets/i18n/nw_fr_FR.qm +0 -0
  10. novelwriter/assets/i18n/nw_it_IT.qm +0 -0
  11. novelwriter/assets/i18n/nw_ja_JP.qm +0 -0
  12. novelwriter/assets/i18n/nw_nb_NO.qm +0 -0
  13. novelwriter/assets/i18n/nw_nl_NL.qm +0 -0
  14. novelwriter/assets/i18n/project_nl_NL.json +11 -0
  15. novelwriter/assets/i18n/project_pt_BR.json +11 -0
  16. novelwriter/assets/icons/typicons_dark/icons.conf +4 -0
  17. novelwriter/assets/icons/typicons_dark/nw_tb-mark.svg +7 -0
  18. novelwriter/assets/icons/typicons_dark/typ_arrow-down.svg +4 -0
  19. novelwriter/assets/icons/typicons_dark/typ_arrow-right.svg +4 -0
  20. novelwriter/assets/icons/typicons_dark/typ_refresh-flipped.svg +1 -1
  21. novelwriter/assets/icons/typicons_dark/typ_refresh.svg +1 -1
  22. novelwriter/assets/icons/typicons_dark/typ_search-grey.svg +4 -0
  23. novelwriter/assets/icons/typicons_dark/typ_times.svg +1 -1
  24. novelwriter/assets/icons/typicons_light/icons.conf +4 -0
  25. novelwriter/assets/icons/typicons_light/nw_tb-mark.svg +7 -0
  26. novelwriter/assets/icons/typicons_light/typ_arrow-down.svg +4 -0
  27. novelwriter/assets/icons/typicons_light/typ_arrow-right.svg +4 -0
  28. novelwriter/assets/icons/typicons_light/typ_refresh-flipped.svg +1 -1
  29. novelwriter/assets/icons/typicons_light/typ_refresh.svg +1 -1
  30. novelwriter/assets/icons/typicons_light/typ_search-grey.svg +4 -0
  31. novelwriter/assets/icons/typicons_light/typ_times.svg +1 -1
  32. novelwriter/assets/manual.pdf +0 -0
  33. novelwriter/assets/sample.zip +0 -0
  34. novelwriter/assets/syntax/cyberpunk_night.conf +26 -0
  35. novelwriter/assets/syntax/default_dark.conf +1 -0
  36. novelwriter/assets/syntax/default_light.conf +1 -0
  37. novelwriter/assets/syntax/grey_dark.conf +1 -0
  38. novelwriter/assets/syntax/grey_light.conf +1 -0
  39. novelwriter/assets/syntax/light_owl.conf +1 -0
  40. novelwriter/assets/syntax/night_owl.conf +1 -0
  41. novelwriter/assets/syntax/solarized_dark.conf +1 -0
  42. novelwriter/assets/syntax/solarized_light.conf +1 -0
  43. novelwriter/assets/syntax/tango.conf +23 -0
  44. novelwriter/assets/syntax/tomorrow.conf +1 -0
  45. novelwriter/assets/syntax/tomorrow_night.conf +1 -0
  46. novelwriter/assets/syntax/tomorrow_night_blue.conf +1 -0
  47. novelwriter/assets/syntax/tomorrow_night_bright.conf +1 -0
  48. novelwriter/assets/syntax/tomorrow_night_eighties.conf +1 -0
  49. novelwriter/assets/text/credits_en.htm +25 -23
  50. novelwriter/assets/themes/cyberpunk_night.conf +29 -0
  51. novelwriter/common.py +1 -1
  52. novelwriter/config.py +35 -12
  53. novelwriter/constants.py +5 -6
  54. novelwriter/core/buildsettings.py +60 -40
  55. novelwriter/core/coretools.py +98 -13
  56. novelwriter/core/docbuild.py +74 -7
  57. novelwriter/core/document.py +24 -3
  58. novelwriter/core/index.py +31 -112
  59. novelwriter/core/project.py +11 -15
  60. novelwriter/core/projectxml.py +2 -1
  61. novelwriter/core/sessions.py +2 -2
  62. novelwriter/core/status.py +4 -4
  63. novelwriter/core/storage.py +16 -6
  64. novelwriter/core/tohtml.py +22 -25
  65. novelwriter/core/tokenizer.py +416 -236
  66. novelwriter/core/tomd.py +17 -8
  67. novelwriter/core/toodt.py +65 -7
  68. novelwriter/core/tree.py +8 -8
  69. novelwriter/dialogs/about.py +2 -2
  70. novelwriter/dialogs/docsplit.py +7 -8
  71. novelwriter/dialogs/preferences.py +3 -6
  72. novelwriter/dialogs/wordlist.py +1 -1
  73. novelwriter/enum.py +17 -14
  74. novelwriter/extensions/configlayout.py +22 -0
  75. novelwriter/extensions/modified.py +20 -2
  76. novelwriter/extensions/versioninfo.py +1 -1
  77. novelwriter/gui/doceditor.py +257 -279
  78. novelwriter/gui/dochighlight.py +29 -25
  79. novelwriter/gui/docviewer.py +139 -148
  80. novelwriter/gui/docviewerpanel.py +4 -24
  81. novelwriter/gui/editordocument.py +12 -1
  82. novelwriter/gui/itemdetails.py +6 -6
  83. novelwriter/gui/mainmenu.py +37 -17
  84. novelwriter/gui/noveltree.py +11 -19
  85. novelwriter/gui/outline.py +43 -20
  86. novelwriter/gui/projtree.py +88 -88
  87. novelwriter/gui/search.py +316 -0
  88. novelwriter/gui/sidebar.py +25 -30
  89. novelwriter/gui/theme.py +68 -8
  90. novelwriter/guimain.py +183 -178
  91. novelwriter/shared.py +26 -1
  92. novelwriter/text/__init__.py +3 -0
  93. novelwriter/text/counting.py +137 -0
  94. novelwriter/tools/manuscript.py +344 -55
  95. novelwriter/tools/manussettings.py +214 -71
  96. novelwriter/tools/noveldetails.py +1 -1
  97. novelwriter/tools/welcome.py +8 -9
  98. novelWriter-2.3rc1.dist-info/entry_points.txt +0 -5
  99. {novelWriter-2.3rc1.dist-info → novelWriter-2.4b1.dist-info}/LICENSE.md +0 -0
  100. {novelWriter-2.3rc1.dist-info → novelWriter-2.4b1.dist-info}/top_level.txt +0 -0
@@ -35,13 +35,13 @@ from functools import partial
35
35
 
36
36
  from PyQt5.QtCore import QCoreApplication, QRegularExpression
37
37
 
38
- from novelwriter.enum import nwComment, nwItemLayout
39
38
  from novelwriter.common import formatTimeStamp, numberToRoman, checkInt
40
39
  from novelwriter.constants import (
41
40
  nwHeadFmt, nwKeyWords, nwLabels, nwRegEx, nwShortcode, nwUnicode, trConst
42
41
  )
43
42
  from novelwriter.core.index import processComment
44
43
  from novelwriter.core.project import NWProject
44
+ from novelwriter.enum import nwComment, nwItemLayout
45
45
 
46
46
  logger = logging.getLogger(__name__)
47
47
 
@@ -74,10 +74,12 @@ class Tokenizer(ABC):
74
74
  FMT_D_E = 6 # End strikeout
75
75
  FMT_U_B = 7 # Begin underline
76
76
  FMT_U_E = 8 # End underline
77
- FMT_SUP_B = 9 # Begin superscript
78
- FMT_SUP_E = 10 # End superscript
79
- FMT_SUB_B = 11 # Begin subscript
80
- FMT_SUB_E = 12 # End subscript
77
+ FMT_M_B = 9 # Begin mark
78
+ FMT_M_E = 10 # End mark
79
+ FMT_SUP_B = 11 # Begin superscript
80
+ FMT_SUP_E = 12 # End superscript
81
+ FMT_SUB_B = 13 # Begin subscript
82
+ FMT_SUB_E = 14 # End subscript
81
83
 
82
84
  # Block Type
83
85
  T_EMPTY = 1 # Empty line (new paragraph)
@@ -86,14 +88,13 @@ class Tokenizer(ABC):
86
88
  T_COMMENT = 4 # Comment line
87
89
  T_KEYWORD = 5 # Command line
88
90
  T_TITLE = 6 # Title
89
- T_UNNUM = 7 # Unnumbered
90
- T_HEAD1 = 8 # Header 1
91
- T_HEAD2 = 9 # Header 2
92
- T_HEAD3 = 10 # Header 3
93
- T_HEAD4 = 11 # Header 4
94
- T_TEXT = 12 # Text line
95
- T_SEP = 13 # Scene separator
96
- T_SKIP = 14 # Paragraph break
91
+ T_HEAD1 = 7 # Heading 1
92
+ T_HEAD2 = 8 # Heading 2
93
+ T_HEAD3 = 9 # Heading 3
94
+ T_HEAD4 = 10 # Heading 4
95
+ T_TEXT = 11 # Text line
96
+ T_SEP = 12 # Scene separator
97
+ T_SKIP = 13 # Paragraph break
97
98
 
98
99
  # Block Style
99
100
  A_NONE = 0x0000 # No special style
@@ -108,32 +109,38 @@ class Tokenizer(ABC):
108
109
  A_IND_L = 0x0100 # Left indentation
109
110
  A_IND_R = 0x0200 # Right indentation
110
111
 
112
+ # Lookups
113
+ L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
114
+
111
115
  def __init__(self, project: NWProject) -> None:
112
116
 
113
117
  self._project = project
114
118
 
115
119
  # Data Variables
116
120
  self._text = "" # The raw text to be tokenized
117
- self._nwItem = None # The NWItem currently being processed
121
+ self._handle = None # The item handle currently being processed
118
122
  self._result = "" # The result of the last document
119
123
 
120
124
  self._keepMarkdown = False # Whether to keep the markdown text
121
125
  self._allMarkdown = [] # The result novelWriter markdown of all documents
122
126
 
123
- # Processed Tokens
127
+ # Processed Tokens and Meta Data
124
128
  self._tokens: list[tuple[int, int, str, list[tuple[int, int]], int]] = []
129
+ self._counts: dict[str, int] = {}
130
+ self._outline: dict[str, str] = {}
125
131
 
126
132
  # User Settings
127
- self._textFont = "Serif" # Output text font
128
- self._textSize = 11 # Output text size
129
- self._textFixed = False # Fixed width text
130
- self._lineHeight = 1.15 # Line height in units of em
131
- self._blockIndent = 4.00 # Block indent in units of em
132
- self._doJustify = False # Justify text
133
- self._doBodyText = True # Include body text
134
- self._doSynopsis = False # Also process synopsis comments
135
- self._doComments = False # Also process comments
136
- self._doKeywords = False # Also process keywords like tags and references
133
+ self._textFont = "Serif" # Output text font
134
+ self._textSize = 11 # Output text size
135
+ self._textFixed = False # Fixed width text
136
+ self._lineHeight = 1.15 # Line height in units of em
137
+ self._blockIndent = 4.00 # Block indent in units of em
138
+ self._doJustify = False # Justify text
139
+ self._doBodyText = True # Include body text
140
+ self._doSynopsis = False # Also process synopsis comments
141
+ self._doComments = False # Also process comments
142
+ self._doKeywords = False # Also process keywords like tags and references
143
+ self._skipKeywords = set() # Keywords to ignore
137
144
 
138
145
  # Margins
139
146
  self._marginTitle = (1.000, 0.500)
@@ -149,21 +156,28 @@ class Tokenizer(ABC):
149
156
  self._fmtChapter = nwHeadFmt.TITLE # Formatting for numbered chapters
150
157
  self._fmtUnNum = nwHeadFmt.TITLE # Formatting for unnumbered chapters
151
158
  self._fmtScene = nwHeadFmt.TITLE # Formatting for scenes
159
+ self._fmtHScene = nwHeadFmt.TITLE # Formatting for hard scenes
152
160
  self._fmtSection = nwHeadFmt.TITLE # Formatting for sections
153
161
 
154
- self._hideScene = False # Do not include scene headers
155
- self._hideSection = False # Do not include section headers
162
+ self._hideTitle = False # Do not include title headings
163
+ self._hideChapter = False # Do not include chapter headings
164
+ self._hideUnNum = False # Do not include unnumbered headings
165
+ self._hideScene = False # Do not include scene headings
166
+ self._hideHScene = False # Do not include hard scene headings
167
+ self._hideSection = False # Do not include section headings
156
168
 
157
- self._linkHeaders = False # Add an anchor before headers
169
+ self._linkHeadings = False # Add an anchor before headings
170
+
171
+ self._titleStyle = self.A_CENTRE | self.A_PBB
172
+ self._chapterStyle = self.A_PBB
173
+ self._sceneStyle = self.A_NONE
158
174
 
159
175
  # Instance Variables
160
176
  self._hFormatter = HeadingFormatter(self._project)
161
- self._allowSeparator = False # Flag to indicate that the first scene of the chapter
177
+ self._noSep = True # Flag to indicate that we don't want a scene separator
162
178
 
163
179
  # This File
164
- self._isNone = False # Document has unknown layout
165
180
  self._isNovel = False # Document is a novel document
166
- self._isNote = False # Document is a project note
167
181
  self._isFirst = True # Document is the first in a set
168
182
 
169
183
  # Error Handling
@@ -187,6 +201,7 @@ class Tokenizer(ABC):
187
201
  nwShortcode.BOLD_O: self.FMT_B_B, nwShortcode.BOLD_C: self.FMT_B_E,
188
202
  nwShortcode.STRIKE_O: self.FMT_D_B, nwShortcode.STRIKE_C: self.FMT_D_E,
189
203
  nwShortcode.ULINE_O: self.FMT_U_B, nwShortcode.ULINE_C: self.FMT_U_E,
204
+ nwShortcode.MARK_O: self.FMT_M_B, nwShortcode.MARK_C: self.FMT_M_E,
190
205
  nwShortcode.SUP_O: self.FMT_SUP_B, nwShortcode.SUP_C: self.FMT_SUP_E,
191
206
  nwShortcode.SUB_O: self.FMT_SUB_B, nwShortcode.SUB_C: self.FMT_SUB_E,
192
207
  }
@@ -203,12 +218,22 @@ class Tokenizer(ABC):
203
218
  return self._result
204
219
 
205
220
  @property
206
- def allMarkdown(self) -> list:
221
+ def allMarkdown(self) -> list[str]:
207
222
  """The combined novelWriter Markdown text."""
208
223
  return self._allMarkdown
209
224
 
210
225
  @property
211
- def errData(self) -> list:
226
+ def textStats(self) -> dict[str, int]:
227
+ """The collected stats about the text."""
228
+ return self._counts
229
+
230
+ @property
231
+ def textOutline(self) -> dict[str, str]:
232
+ """The generated outline of the text."""
233
+ return self._outline
234
+
235
+ @property
236
+ def errData(self) -> list[str]:
212
237
  """The error data."""
213
238
  return self._errData
214
239
 
@@ -216,33 +241,63 @@ class Tokenizer(ABC):
216
241
  # Setters
217
242
  ##
218
243
 
219
- def setTitleFormat(self, hFormat: str) -> None:
244
+ def setTitleFormat(self, hFormat: str, hide: bool = False) -> None:
220
245
  """Set the title format pattern."""
221
246
  self._fmtTitle = hFormat.strip()
247
+ self._hideTitle = hide
222
248
  return
223
249
 
224
- def setChapterFormat(self, hFormat: str) -> None:
250
+ def setChapterFormat(self, hFormat: str, hide: bool = False) -> None:
225
251
  """Set the chapter format pattern."""
226
252
  self._fmtChapter = hFormat.strip()
253
+ self._hideChapter = hide
227
254
  return
228
255
 
229
- def setUnNumberedFormat(self, hFormat: str) -> None:
256
+ def setUnNumberedFormat(self, hFormat: str, hide: bool = False) -> None:
230
257
  """Set the unnumbered format pattern."""
231
258
  self._fmtUnNum = hFormat.strip()
259
+ self._hideUnNum = hide
232
260
  return
233
261
 
234
- def setSceneFormat(self, hFormat: str, hide: bool) -> None:
262
+ def setSceneFormat(self, hFormat: str, hide: bool = False) -> None:
235
263
  """Set the scene format pattern and hidden status."""
236
264
  self._fmtScene = hFormat.strip()
237
265
  self._hideScene = hide
238
266
  return
239
267
 
240
- def setSectionFormat(self, hFormat: str, hide: bool) -> None:
268
+ def setHardSceneFormat(self, hFormat: str, hide: bool = False) -> None:
269
+ """Set the hard scene format pattern and hidden status."""
270
+ self._fmtHScene = hFormat.strip()
271
+ self._hideHScene = hide
272
+ return
273
+
274
+ def setSectionFormat(self, hFormat: str, hide: bool = False) -> None:
241
275
  """Set the section format pattern and hidden status."""
242
276
  self._fmtSection = hFormat.strip()
243
277
  self._hideSection = hide
244
278
  return
245
279
 
280
+ def setTitleStyle(self, center: bool, pageBreak: bool) -> None:
281
+ """Set the title heading style."""
282
+ self._titleStyle = (
283
+ (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
284
+ )
285
+ return
286
+
287
+ def setChapterStyle(self, center: bool, pageBreak: bool) -> None:
288
+ """Set the chapter heading style."""
289
+ self._chapterStyle = (
290
+ (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
291
+ )
292
+ return
293
+
294
+ def setSceneStyle(self, center: bool, pageBreak: bool) -> None:
295
+ """Set the scene heading style."""
296
+ self._sceneStyle = (
297
+ (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
298
+ )
299
+ return
300
+
246
301
  def setFont(self, family: str, size: int, isFixed: bool = False) -> None:
247
302
  """Set the build font."""
248
303
  self._textFont = family
@@ -271,22 +326,22 @@ class Tokenizer(ABC):
271
326
  return
272
327
 
273
328
  def setHead1Margins(self, upper: float, lower: float) -> None:
274
- """Set the upper and lower header 1 margin."""
329
+ """Set the upper and lower heading 1 margin."""
275
330
  self._marginHead1 = (float(upper), float(lower))
276
331
  return
277
332
 
278
333
  def setHead2Margins(self, upper: float, lower: float) -> None:
279
- """Set the upper and lower header 2 margin."""
334
+ """Set the upper and lower heading 2 margin."""
280
335
  self._marginHead2 = (float(upper), float(lower))
281
336
  return
282
337
 
283
338
  def setHead3Margins(self, upper: float, lower: float) -> None:
284
- """Set the upper and lower header 3 margin."""
339
+ """Set the upper and lower heading 3 margin."""
285
340
  self._marginHead3 = (float(upper), float(lower))
286
341
  return
287
342
 
288
343
  def setHead4Margins(self, upper: float, lower: float) -> None:
289
- """Set the upper and lower header 4 margin."""
344
+ """Set the upper and lower heading 4 margin."""
290
345
  self._marginHead4 = (float(upper), float(lower))
291
346
  return
292
347
 
@@ -300,9 +355,9 @@ class Tokenizer(ABC):
300
355
  self._marginMeta = (float(upper), float(lower))
301
356
  return
302
357
 
303
- def setLinkHeaders(self, state: bool) -> None:
304
- """Enable or disable adding an anchor before headers."""
305
- self._linkHeaders = state
358
+ def setLinkHeadings(self, state: bool) -> None:
359
+ """Enable or disable adding an anchor before headings."""
360
+ self._linkHeadings = state
306
361
  return
307
362
 
308
363
  def setBodyText(self, state: bool) -> None:
@@ -325,6 +380,11 @@ class Tokenizer(ABC):
325
380
  self._doKeywords = state
326
381
  return
327
382
 
383
+ def setIgnoredKeywords(self, keywords: str) -> None:
384
+ """Comma separated string of keywords to ignore."""
385
+ self._skipKeywords = set(x.lower().strip() for x in keywords.split(","))
386
+ return
387
+
328
388
  def setKeepMarkdown(self, state: bool) -> None:
329
389
  """Keep original markdown during build."""
330
390
  self._keepMarkdown = state
@@ -338,47 +398,41 @@ class Tokenizer(ABC):
338
398
  def doConvert(self) -> None:
339
399
  raise NotImplementedError
340
400
 
341
- def addRootHeading(self, tHandle: str) -> bool:
401
+ def addRootHeading(self, tHandle: str) -> None:
342
402
  """Add a heading at the start of a new root folder."""
343
- tItem = self._project.tree[tHandle]
344
- if not tItem or not tItem.isRootType():
345
- return False
346
-
347
- if self._isFirst:
348
- textAlign = self.A_CENTRE
349
- self._isFirst = False
350
- else:
351
- textAlign = self.A_PBB | self.A_CENTRE
352
-
353
- trNotes = self._localLookup("Notes")
354
- title = f"{trNotes}: {tItem.itemName}"
355
- self._tokens = []
356
- self._tokens.append((
357
- self.T_TITLE, 0, title, [], textAlign
358
- ))
359
- if self._keepMarkdown:
360
- self._allMarkdown.append(f"# {title}\n\n")
361
-
362
- return True
363
-
364
- def setText(self, tHandle: str, text: str | None = None) -> bool:
365
- """Set the text for the tokenizer from a handle. If text is not
366
- set, load it from the file.
367
- """
368
- self._nwItem = self._project.tree[tHandle]
369
- if self._nwItem is None:
370
- return False
403
+ self._text = ""
404
+ self._handle = None
371
405
 
372
- if text is None:
373
- text = self._project.storage.getDocument(tHandle).readDocument() or ""
406
+ if (tItem := self._project.tree[tHandle]) and tItem.isRootType():
407
+ self._handle = tHandle
408
+ if self._isFirst:
409
+ textAlign = self.A_CENTRE
410
+ self._isFirst = False
411
+ else:
412
+ textAlign = self.A_PBB | self.A_CENTRE
374
413
 
375
- self._text = text
414
+ trNotes = self._localLookup("Notes")
415
+ title = f"{trNotes}: {tItem.itemName}"
416
+ self._tokens = []
417
+ self._tokens.append((
418
+ self.T_TITLE, 1, title, [], textAlign
419
+ ))
420
+ if self._keepMarkdown:
421
+ self._allMarkdown.append(f"#! {title}\n\n")
376
422
 
377
- self._isNone = self._nwItem.itemLayout == nwItemLayout.NO_LAYOUT
378
- self._isNovel = self._nwItem.itemLayout == nwItemLayout.DOCUMENT
379
- self._isNote = self._nwItem.itemLayout == nwItemLayout.NOTE
423
+ return
380
424
 
381
- return True
425
+ def setText(self, tHandle: str, text: str | None = None) -> None:
426
+ """Set the text for the tokenizer from a handle. If text is not
427
+ set, it's is loaded from the file.
428
+ """
429
+ self._text = ""
430
+ self._handle = None
431
+ if nwItem := self._project.tree[tHandle]:
432
+ self._text = text or self._project.storage.getDocumentText(tHandle)
433
+ self._handle = tHandle
434
+ self._isNovel = nwItem.itemLayout == nwItemLayout.DOCUMENT
435
+ return
382
436
 
383
437
  def doPreProcessing(self) -> None:
384
438
  """Run trough the various replace dictionaries."""
@@ -399,7 +453,7 @@ class Tokenizer(ABC):
399
453
 
400
454
  def tokenizeText(self) -> None:
401
455
  """Scan the text for either lines starting with specific
402
- characters that indicate headers, comments, commands etc, or
456
+ characters that indicate headings, comments, commands etc, or
403
457
  just contain plain text. In the case of plain text, apply the
404
458
  same RegExes that the syntax highlighter uses and save the
405
459
  locations of these formatting tags into the token array.
@@ -407,15 +461,18 @@ class Tokenizer(ABC):
407
461
  The format of the token list is an entry with a five-tuple for
408
462
  each line in the file. The tuple is as follows:
409
463
  1: The type of the block, self.T_*
410
- 2: The header number under which the text is placed
464
+ 2: The heading number under which the text is placed
411
465
  3: The text content of the block, without leading tags
412
466
  4: The internal formatting map of the text, self.FMT_*
413
467
  5: The style of the block, self.A_*
414
468
  """
415
469
  self._tokens = []
416
- tmpMarkdown = []
470
+ if self._isNovel:
471
+ self._hFormatter.setHandle(self._handle)
472
+
417
473
  nHead = 0
418
474
  breakNext = False
475
+ tmpMarkdown = []
419
476
  for aLine in self._text.splitlines():
420
477
  sLine = aLine.strip().lower()
421
478
 
@@ -438,11 +495,12 @@ class Tokenizer(ABC):
438
495
  # Check Line Format
439
496
  # =================
440
497
 
441
- if aLine[0] == "[":
442
- # Parse special formatting line
443
- # This must be a separate if statement, as it may not
444
- # reach a continue statement and must therefore proceed
445
- # to check other formats.
498
+ if aLine.startswith("["):
499
+ # Special Formats
500
+ # ===============
501
+ # Parse special formatting line. This must be a separate if
502
+ # statement, as it may not reach a continue statement and must
503
+ # therefore proceed to check other formats.
446
504
 
447
505
  if sLine in ("[newpage]", "[new page]"):
448
506
  breakNext = True
@@ -466,9 +524,13 @@ class Tokenizer(ABC):
466
524
  ]
467
525
  continue
468
526
 
469
- if aLine[0] == "%":
470
- if aLine[1] == "~":
471
- # Completely ignore the paragraph
527
+ if aLine.startswith("%"):
528
+ # Comments
529
+ # ========
530
+ # All style comments are processed and the exact type exact
531
+ # style extracted. Ignored comments on the '%~' format are
532
+ # skipped completely.
533
+ if aLine.startswith("%~"):
472
534
  continue
473
535
 
474
536
  cStyle, cText, _ = processComment(aLine)
@@ -477,94 +539,181 @@ class Tokenizer(ABC):
477
539
  self.T_SYNOPSIS, nHead, cText, [], sAlign
478
540
  ))
479
541
  if self._doSynopsis and self._keepMarkdown:
480
- tmpMarkdown.append("%s\n" % aLine)
542
+ tmpMarkdown.append(f"{aLine}\n")
481
543
  elif cStyle == nwComment.SHORT:
482
544
  self._tokens.append((
483
545
  self.T_SHORT, nHead, cText, [], sAlign
484
546
  ))
485
547
  if self._doSynopsis and self._keepMarkdown:
486
- tmpMarkdown.append("%s\n" % aLine)
548
+ tmpMarkdown.append(f"{aLine}\n")
487
549
  else:
488
550
  self._tokens.append((
489
551
  self.T_COMMENT, nHead, cText, [], sAlign
490
552
  ))
491
553
  if self._doComments and self._keepMarkdown:
492
- tmpMarkdown.append("%s\n" % aLine)
554
+ tmpMarkdown.append(f"{aLine}\n")
493
555
 
494
- elif aLine[0] == "@":
495
- self._tokens.append((
496
- self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
497
- ))
498
- if self._doKeywords and self._keepMarkdown:
499
- tmpMarkdown.append("%s\n" % aLine)
556
+ elif aLine.startswith("@"):
557
+ # Keywords
558
+ # ========
559
+ # Only valid keyword lines are parsed, and any ignored keywords
560
+ # are automatically skipped.
500
561
 
501
- elif aLine[:2] == "# ":
502
- if self._isNovel:
503
- sAlign |= self.A_CENTRE
504
- sAlign |= self.A_PBB
562
+ valid, bits, _ = self._project.index.scanThis(aLine)
563
+ if valid and bits and bits[0] not in self._skipKeywords:
564
+ self._tokens.append((
565
+ self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
566
+ ))
567
+ if self._doKeywords and self._keepMarkdown:
568
+ tmpMarkdown.append(f"{aLine}\n")
569
+
570
+ elif aLine.startswith(("# ", "#! ")):
571
+ # Title or Partition Headings
572
+ # ===========================
573
+ # Main titles are allowed in any document, and they are always
574
+ # centred and start on a new page. For novel documents, we also
575
+ # reset all counters when such a title is encountered.
576
+ # Partition headings are only formatted in novel documents, and
577
+ # otherwise unchanged. Scene separators are disabled
578
+ # immediately after partitions, and scene numbers are reset.
579
+ isPlain = aLine.startswith("# ")
505
580
 
506
581
  nHead += 1
507
- self._tokens.append((
508
- self.T_HEAD1, nHead, aLine[2:].strip(), [], sAlign
509
- ))
510
- if self._keepMarkdown:
511
- tmpMarkdown.append("%s\n" % aLine)
512
-
513
- elif aLine[:3] == "## ":
582
+ tText = aLine[2:].strip()
583
+ tType = self.T_HEAD1 if isPlain else self.T_TITLE
584
+ tStyle = self.A_NONE if isPlain else (self.A_PBB | self.A_CENTRE)
585
+ sHide = self._hideTitle if isPlain else False
514
586
  if self._isNovel:
515
- sAlign |= self.A_PBB
587
+ if sHide:
588
+ tText = ""
589
+ tType = self.T_EMPTY
590
+ tStyle = self.A_NONE
591
+ elif isPlain:
592
+ tText = self._hFormatter.apply(self._fmtTitle, tText, nHead)
593
+ tStyle = self._titleStyle
594
+ if isPlain:
595
+ self._hFormatter.resetScene()
596
+ else:
597
+ self._hFormatter.resetAll()
598
+ self._noSep = True
516
599
 
517
- nHead += 1
518
600
  self._tokens.append((
519
- self.T_HEAD2, nHead, aLine[3:].strip(), [], sAlign
601
+ tType, nHead, tText, [], tStyle
520
602
  ))
521
603
  if self._keepMarkdown:
522
- tmpMarkdown.append("%s\n" % aLine)
604
+ tmpMarkdown.append(f"{aLine}\n")
605
+
606
+ elif aLine.startswith(("## ", "##! ")):
607
+ # (Unnumbered) Chapter Headings
608
+ # =============================
609
+ # Chapter headings are only formatted in novel documents, and
610
+ # otherwise unchanged. Chapter numbers are bumped before the
611
+ # heading is formatted. Scene separators are disabled
612
+ # immediately after chapter headings, and scene numbers are
613
+ # reset. Unnumbered chapters are only meaningful in Novel docs,
614
+ # so if we're in a note, we keep them as level 2 headings.
615
+ isPlain = aLine.startswith("## ")
523
616
 
524
- elif aLine[:4] == "### ":
525
617
  nHead += 1
526
- self._tokens.append((
527
- self.T_HEAD3, nHead, aLine[4:].strip(), [], sAlign
528
- ))
529
- if self._keepMarkdown:
530
- tmpMarkdown.append("%s\n" % aLine)
618
+ tText = aLine[3:].strip()
619
+ tType = self.T_HEAD2
620
+ tStyle = self.A_NONE
621
+ sHide = self._hideChapter if isPlain else self._hideUnNum
622
+ tFormat = self._fmtChapter if isPlain else self._fmtUnNum
623
+ if self._isNovel:
624
+ if isPlain:
625
+ self._hFormatter.incChapter()
626
+ if sHide:
627
+ tText = ""
628
+ tType = self.T_EMPTY
629
+ else:
630
+ tText = self._hFormatter.apply(tFormat, tText, nHead)
631
+ tStyle = self._chapterStyle
632
+ self._hFormatter.resetScene()
633
+ self._noSep = True
531
634
 
532
- elif aLine[:5] == "#### ":
533
- nHead += 1
534
635
  self._tokens.append((
535
- self.T_HEAD4, nHead, aLine[5:].strip(), [], sAlign
636
+ tType, nHead, tText, [], tStyle
536
637
  ))
537
638
  if self._keepMarkdown:
538
- tmpMarkdown.append("%s\n" % aLine)
639
+ tmpMarkdown.append(f"{aLine}\n")
640
+
641
+ elif aLine.startswith(("### ", "###! ")):
642
+ # (Hard) Scene Headings
643
+ # =====================
644
+ # Scene headings in novel documents are treated as centred
645
+ # separators if the formatting does not change the text. If the
646
+ # format is empty, the scene can be hidden or a blank paragraph
647
+ # (skip). When the scene title has static text or no text, it
648
+ # is always ignored if the noSep flag is set. This prevents
649
+ # separators immediately after other titles. Scene numbers are
650
+ # always incremented before formatting. For notes, the heading
651
+ # is unchanged.
652
+ isPlain = aLine.startswith("### ")
539
653
 
540
- elif aLine[:3] == "#! ":
541
654
  nHead += 1
655
+ tText = aLine[4:].strip()
656
+ tType = self.T_HEAD3
657
+ tStyle = self.A_NONE
658
+ sHide = self._hideScene if isPlain else self._hideHScene
659
+ tFormat = self._fmtScene if isPlain else self._fmtHScene
542
660
  if self._isNovel:
543
- tStyle = self.T_TITLE
544
- else:
545
- tStyle = self.T_HEAD1
661
+ self._hFormatter.incScene()
662
+ if sHide:
663
+ tText = ""
664
+ tType = self.T_EMPTY
665
+ else:
666
+ tText = self._hFormatter.apply(tFormat, tText, nHead)
667
+ tStyle = self._sceneStyle
668
+ if tText == "": # Empty Format
669
+ tType = self.T_EMPTY if self._noSep else self.T_SKIP
670
+ elif tText == tFormat: # Static Format
671
+ tText = "" if self._noSep else tText
672
+ tType = self.T_EMPTY if self._noSep else self.T_SEP
673
+ tStyle = self.A_NONE if self._noSep else self.A_CENTRE
674
+ self._noSep = False
546
675
 
547
676
  self._tokens.append((
548
- tStyle, nHead, aLine[3:].strip(), [], sAlign | self.A_CENTRE
677
+ tType, nHead, tText, [], tStyle
549
678
  ))
550
679
  if self._keepMarkdown:
551
- tmpMarkdown.append("%s\n" % aLine)
680
+ tmpMarkdown.append(f"{aLine}\n")
681
+
682
+ elif aLine.startswith("#### "):
683
+ # Section Headings
684
+ # =================
685
+ # Section headings in novel docs are treated as centred
686
+ # separators if the formatting does not change the text. If the
687
+ # format is empty, the section can be hidden or a blank
688
+ # paragraph (skip). For notes, the heading is unchanged.
552
689
 
553
- elif aLine[:4] == "##! ":
554
690
  nHead += 1
691
+ tText = aLine[5:].strip()
692
+ tType = self.T_HEAD4
693
+ tStyle = self.A_NONE
555
694
  if self._isNovel:
556
- tStyle = self.T_UNNUM
557
- sAlign |= self.A_PBB
558
- else:
559
- tStyle = self.T_HEAD2
695
+ if self._hideSection:
696
+ tText = ""
697
+ tType = self.T_EMPTY
698
+ else:
699
+ tText = self._hFormatter.apply(self._fmtSection, tText, nHead)
700
+ if tText == "": # Empty Format
701
+ tType = self.T_SKIP
702
+ elif tText == self._fmtSection: # Static Format
703
+ tType = self.T_SEP
704
+ tStyle = self.A_CENTRE
560
705
 
561
706
  self._tokens.append((
562
- tStyle, nHead, aLine[4:].strip(), [], sAlign
707
+ tType, nHead, tText, [], tStyle
563
708
  ))
564
709
  if self._keepMarkdown:
565
- tmpMarkdown.append("%s\n" % aLine)
710
+ tmpMarkdown.append(f"{aLine}\n")
566
711
 
567
712
  else:
713
+ # Text Lines
714
+ # ==========
715
+ # Anything remaining at this point is body text. If body text
716
+ # is not disabled, we proceed to process text formatting.
568
717
  if not self._doBodyText:
569
718
  # Skip all body text
570
719
  continue
@@ -606,11 +755,11 @@ class Tokenizer(ABC):
606
755
  self.T_TEXT, nHead, tLine, fmtPos, sAlign
607
756
  ))
608
757
  if self._keepMarkdown:
609
- tmpMarkdown.append("%s\n" % aLine)
758
+ tmpMarkdown.append(f"{aLine}\n")
610
759
 
611
760
  # If we have content, turn off the first page flag
612
761
  if self._isFirst and self._tokens:
613
- self._isFirst = False
762
+ self._isFirst = False # First document has been processed
614
763
 
615
764
  # Make sure the token array doesn't start with a page break
616
765
  # on the very first page, adding a blank first page.
@@ -626,8 +775,6 @@ class Tokenizer(ABC):
626
775
  ))
627
776
  if self._keepMarkdown:
628
777
  tmpMarkdown.append("\n")
629
-
630
- if self._keepMarkdown:
631
778
  self._allMarkdown.append("".join(tmpMarkdown))
632
779
 
633
780
  # Second Pass
@@ -654,103 +801,136 @@ class Tokenizer(ABC):
654
801
 
655
802
  return
656
803
 
657
- def doHeaders(self) -> bool:
658
- """Apply formatting to the text headers for novel files. This
659
- also applies chapter and scene numbering.
660
- """
661
- if not self._isNovel:
662
- return False
663
-
664
- self._hFormatter.setHandle(self._nwItem.itemHandle if self._nwItem else None)
665
-
666
- for n, token in enumerate(self._tokens):
667
-
668
- if token[0] == self.T_TEXT:
669
- # If we see text before a scene, we consider it a "scene"
670
- self._allowSeparator = False
671
-
672
- elif token[0] == self.T_TITLE: # Title
673
- # For new titles, we reset all counters
674
- self._allowSeparator = True
675
- self._hFormatter.resetAll()
676
-
677
- elif token[0] == self.T_HEAD1: # Partition
678
-
679
- tTemp = self._hFormatter.apply(self._fmtTitle, token[2], token[1])
680
- self._tokens[n] = (
681
- token[0], token[1], tTemp, [], token[4]
682
- )
683
-
684
- # Set scene variables
685
- self._allowSeparator = True
686
- self._hFormatter.resetScene()
687
-
688
- elif token[0] in (self.T_HEAD2, self.T_UNNUM): # Chapter
689
-
690
- # Numbered or Unnumbered
691
- if token[0] == self.T_UNNUM:
692
- tTemp = self._hFormatter.apply(self._fmtUnNum, token[2], token[1])
693
- else:
694
- self._hFormatter.incChapter()
695
- tTemp = self._hFormatter.apply(self._fmtChapter, token[2], token[1])
696
-
697
- # Format the chapter header
698
- self._tokens[n] = (
699
- token[0], token[1], tTemp, [], token[4]
700
- )
701
-
702
- # Set scene variables
703
- self._allowSeparator = True
704
- self._hFormatter.resetScene()
705
-
706
- elif token[0] == self.T_HEAD3: # Scene
707
-
708
- self._hFormatter.incScene()
709
-
710
- tTemp = self._hFormatter.apply(self._fmtScene, token[2], token[1])
711
- if tTemp == "" and self._hideScene:
712
- self._tokens[n] = (
713
- self.T_EMPTY, token[1], "", [], self.A_NONE
714
- )
715
- elif tTemp == "" and not self._hideScene:
716
- self._tokens[n] = (
717
- self.T_EMPTY if self._allowSeparator else self.T_SKIP, token[1],
718
- "", [], self.A_NONE if self._allowSeparator else token[4]
719
- )
720
- elif tTemp == self._fmtScene:
721
- self._tokens[n] = (
722
- self.T_EMPTY if self._allowSeparator else self.T_SEP, token[1],
723
- "" if self._allowSeparator else tTemp, [],
724
- self.A_NONE if self._allowSeparator else (token[4] | self.A_CENTRE)
725
- )
726
- else:
727
- self._tokens[n] = (
728
- token[0], token[1], tTemp, [], token[4]
729
- )
804
+ def buildOutline(self) -> None:
805
+ """Build an outline of the text up to level 3 headings."""
806
+ tHandle = self._handle or ""
807
+ isNovel = self._isNovel
808
+ for tType, nHead, tText, _, _ in self._tokens:
809
+ if tType == self.T_TITLE:
810
+ prefix = "TT"
811
+ elif tType == self.T_HEAD1:
812
+ prefix = "PT" if isNovel else "H1"
813
+ elif tType == self.T_HEAD2:
814
+ prefix = "CH" if isNovel else "H2"
815
+ elif tType == self.T_HEAD3:
816
+ prefix = "SC" if isNovel else "H3"
817
+ else:
818
+ continue
730
819
 
731
- self._allowSeparator = False
820
+ key = f"{tHandle}:T{nHead:04d}"
821
+ text = tText.replace(nwHeadFmt.BR, " ").replace("&", "&")
822
+ self._outline[key] = f"{prefix}|{text}"
732
823
 
733
- elif token[0] == self.T_HEAD4: # Section
824
+ return
734
825
 
735
- tTemp = self._hFormatter.apply(self._fmtSection, token[2], token[1])
736
- if tTemp == "" and self._hideSection:
737
- self._tokens[n] = (
738
- self.T_EMPTY, token[1], "", [], self.A_NONE
739
- )
740
- elif tTemp == "" and not self._hideSection:
741
- self._tokens[n] = (
742
- self.T_SKIP, token[1], "", [], token[4]
743
- )
744
- elif tTemp == self._fmtSection:
745
- self._tokens[n] = (
746
- self.T_SEP, token[1], tTemp, [], token[4] | self.A_CENTRE
747
- )
748
- else:
749
- self._tokens[n] = (
750
- token[0], token[1], tTemp, [], token[4]
751
- )
826
+ def countStats(self) -> None:
827
+ """Count stats on the tokenized text."""
828
+ titleCount = self._counts.get("titleCount", 0)
829
+ paragraphCount = self._counts.get("paragraphCount", 0)
830
+
831
+ allWords = self._counts.get("allWords", 0)
832
+ textWords = self._counts.get("textWords", 0)
833
+ titleWords = self._counts.get("titleWords", 0)
834
+
835
+ allChars = self._counts.get("allChars", 0)
836
+ textChars = self._counts.get("textChars", 0)
837
+ titleChars = self._counts.get("titleChars", 0)
838
+
839
+ allWordChars = self._counts.get("allWordChars", 0)
840
+ textWordChars = self._counts.get("textWordChars", 0)
841
+ titleWordChars = self._counts.get("titleWordChars", 0)
842
+
843
+ para = []
844
+ for tType, _, tText, _, _ in self._tokens:
845
+ tText = tText.replace(nwUnicode.U_ENDASH, " ")
846
+ tText = tText.replace(nwUnicode.U_EMDASH, " ")
847
+
848
+ tWords = tText.split()
849
+ nWords = len(tWords)
850
+ nChars = len(tText)
851
+ nWChars = len("".join(tWords))
852
+
853
+ if tType == self.T_EMPTY:
854
+ if len(para) > 0:
855
+ tTemp = "\n".join(para)
856
+ tPWords = tTemp.split()
857
+ nPWords = len(tPWords)
858
+ nPChars = len(tTemp)
859
+ nPWChars = len("".join(tPWords))
860
+
861
+ paragraphCount += 1
862
+ allWords += nPWords
863
+ textWords += nPWords
864
+ allChars += nPChars
865
+ textChars += nPChars
866
+ allWordChars += nPWChars
867
+ textWordChars += nPWChars
868
+ para = []
869
+
870
+ elif tType in self.L_HEADINGS:
871
+ titleCount += 1
872
+ allWords += nWords
873
+ titleWords += nWords
874
+ allChars += nChars
875
+ allWordChars += nWChars
876
+ titleChars += nChars
877
+ titleWordChars += nWChars
878
+
879
+ elif tType == self.T_SEP:
880
+ allWords += nWords
881
+ allChars += nChars
882
+ allWordChars += nWChars
883
+
884
+ elif tType == self.T_TEXT:
885
+ para.append(tText.rstrip())
886
+
887
+ elif tType == self.T_SYNOPSIS and self._doSynopsis:
888
+ text = "{0}: {1}".format(self._localLookup("Synopsis"), tText)
889
+ words = text.split()
890
+ allWords += len(words)
891
+ allChars += len(text)
892
+ allWordChars += len("".join(words))
893
+
894
+ elif tType == self.T_SHORT and self._doSynopsis:
895
+ text = "{0}: {1}".format(self._localLookup("Short Description"), tText)
896
+ words = text.split()
897
+ allWords += len(words)
898
+ allChars += len(text)
899
+ allWordChars += len("".join(words))
900
+
901
+ elif tType == self.T_COMMENT and self._doComments:
902
+ text = "{0}: {1}".format(self._localLookup("Comment"), tText)
903
+ words = text.split()
904
+ allWords += len(words)
905
+ allChars += len(text)
906
+ allWordChars += len("".join(words))
907
+
908
+ elif tType == self.T_KEYWORD and self._doKeywords:
909
+ valid, bits, _ = self._project.index.scanThis("@"+tText)
910
+ if valid and bits:
911
+ key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
912
+ text = "{0}: {1}".format(key, ", ".join(bits[1:]))
913
+ words = text.split()
914
+ allWords += len(words)
915
+ allChars += len(text)
916
+ allWordChars += len("".join(words))
917
+
918
+ self._counts["titleCount"] = titleCount
919
+ self._counts["paragraphCount"] = paragraphCount
920
+
921
+ self._counts["allWords"] = allWords
922
+ self._counts["textWords"] = textWords
923
+ self._counts["titleWords"] = titleWords
924
+
925
+ self._counts["allChars"] = allChars
926
+ self._counts["textChars"] = textChars
927
+ self._counts["titleChars"] = titleChars
928
+
929
+ self._counts["allWordChars"] = allWordChars
930
+ self._counts["textWordChars"] = textWordChars
931
+ self._counts["titleWordChars"] = titleWordChars
752
932
 
753
- return True
933
+ return
754
934
 
755
935
  def saveRawMarkdown(self, path: str | Path) -> None:
756
936
  """Save the raw text to a plain text file."""