novelWriter 2.4.3__py3-none-any.whl → 2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. {novelWriter-2.4.3.dist-info → novelWriter-2.5.dist-info}/METADATA +4 -5
  2. {novelWriter-2.4.3.dist-info → novelWriter-2.5.dist-info}/RECORD +122 -112
  3. {novelWriter-2.4.3.dist-info → novelWriter-2.5.dist-info}/WHEEL +1 -1
  4. novelwriter/__init__.py +33 -39
  5. novelwriter/assets/i18n/nw_de_DE.qm +0 -0
  6. novelwriter/assets/i18n/nw_en_US.qm +0 -0
  7. novelwriter/assets/i18n/nw_es_419.qm +0 -0
  8. novelwriter/assets/i18n/nw_fr_FR.qm +0 -0
  9. novelwriter/assets/i18n/nw_it_IT.qm +0 -0
  10. novelwriter/assets/i18n/nw_ja_JP.qm +0 -0
  11. novelwriter/assets/i18n/nw_nb_NO.qm +0 -0
  12. novelwriter/assets/i18n/nw_nl_NL.qm +0 -0
  13. novelwriter/assets/i18n/nw_pl_PL.qm +0 -0
  14. novelwriter/assets/i18n/nw_pt_BR.qm +0 -0
  15. novelwriter/assets/i18n/nw_zh_CN.qm +0 -0
  16. novelwriter/assets/i18n/project_en_GB.json +1 -0
  17. novelwriter/assets/i18n/project_pl_PL.json +116 -0
  18. novelwriter/assets/i18n/project_pt_BR.json +74 -74
  19. novelwriter/assets/icons/typicons_dark/icons.conf +2 -0
  20. novelwriter/assets/icons/typicons_dark/nw_font.svg +4 -0
  21. novelwriter/assets/icons/typicons_dark/nw_quote.svg +4 -0
  22. novelwriter/assets/icons/typicons_light/icons.conf +2 -0
  23. novelwriter/assets/icons/typicons_light/nw_font.svg +4 -0
  24. novelwriter/assets/icons/typicons_light/nw_quote.svg +4 -0
  25. novelwriter/assets/manual.pdf +0 -0
  26. novelwriter/assets/sample.zip +0 -0
  27. novelwriter/assets/syntax/cyberpunk_night.conf +5 -3
  28. novelwriter/assets/syntax/default_dark.conf +32 -18
  29. novelwriter/assets/syntax/default_light.conf +24 -10
  30. novelwriter/assets/syntax/dracula.conf +44 -0
  31. novelwriter/assets/syntax/grey_dark.conf +5 -4
  32. novelwriter/assets/syntax/grey_light.conf +5 -4
  33. novelwriter/assets/syntax/light_owl.conf +7 -6
  34. novelwriter/assets/syntax/night_owl.conf +7 -6
  35. novelwriter/assets/syntax/snazzy.conf +42 -0
  36. novelwriter/assets/syntax/solarized_dark.conf +4 -3
  37. novelwriter/assets/syntax/solarized_light.conf +4 -3
  38. novelwriter/assets/syntax/tango.conf +27 -11
  39. novelwriter/assets/syntax/tomorrow.conf +6 -5
  40. novelwriter/assets/syntax/tomorrow_night.conf +7 -6
  41. novelwriter/assets/syntax/tomorrow_night_blue.conf +6 -5
  42. novelwriter/assets/syntax/tomorrow_night_bright.conf +6 -5
  43. novelwriter/assets/syntax/tomorrow_night_eighties.conf +6 -5
  44. novelwriter/assets/text/credits_en.htm +52 -41
  45. novelwriter/assets/themes/cyberpunk_night.conf +3 -0
  46. novelwriter/assets/themes/default_dark.conf +2 -0
  47. novelwriter/assets/themes/default_light.conf +2 -0
  48. novelwriter/assets/themes/dracula.conf +48 -0
  49. novelwriter/assets/themes/solarized_dark.conf +2 -0
  50. novelwriter/assets/themes/solarized_light.conf +2 -0
  51. novelwriter/common.py +33 -12
  52. novelwriter/config.py +184 -98
  53. novelwriter/constants.py +47 -35
  54. novelwriter/core/buildsettings.py +68 -69
  55. novelwriter/core/coretools.py +5 -23
  56. novelwriter/core/docbuild.py +52 -40
  57. novelwriter/core/document.py +3 -5
  58. novelwriter/core/index.py +115 -45
  59. novelwriter/core/item.py +8 -19
  60. novelwriter/core/options.py +2 -4
  61. novelwriter/core/project.py +37 -61
  62. novelwriter/core/projectdata.py +1 -3
  63. novelwriter/core/projectxml.py +12 -15
  64. novelwriter/core/sessions.py +3 -5
  65. novelwriter/core/spellcheck.py +4 -9
  66. novelwriter/core/status.py +211 -164
  67. novelwriter/core/storage.py +0 -8
  68. novelwriter/core/tohtml.py +139 -105
  69. novelwriter/core/tokenizer.py +278 -122
  70. novelwriter/core/{tomd.py → tomarkdown.py} +97 -78
  71. novelwriter/core/toodt.py +257 -166
  72. novelwriter/core/toqdoc.py +419 -0
  73. novelwriter/core/tree.py +5 -7
  74. novelwriter/dialogs/about.py +11 -18
  75. novelwriter/dialogs/docmerge.py +17 -19
  76. novelwriter/dialogs/docsplit.py +17 -19
  77. novelwriter/dialogs/editlabel.py +6 -10
  78. novelwriter/dialogs/preferences.py +200 -164
  79. novelwriter/dialogs/projectsettings.py +225 -189
  80. novelwriter/dialogs/quotes.py +12 -9
  81. novelwriter/dialogs/wordlist.py +9 -15
  82. novelwriter/enum.py +35 -30
  83. novelwriter/error.py +8 -15
  84. novelwriter/extensions/configlayout.py +55 -21
  85. novelwriter/extensions/eventfilters.py +1 -5
  86. novelwriter/extensions/modified.py +70 -14
  87. novelwriter/extensions/novelselector.py +1 -3
  88. novelwriter/extensions/pagedsidebar.py +9 -12
  89. novelwriter/extensions/{circularprogress.py → progressbars.py} +30 -8
  90. novelwriter/extensions/statusled.py +40 -26
  91. novelwriter/extensions/switch.py +4 -6
  92. novelwriter/extensions/switchbox.py +7 -6
  93. novelwriter/extensions/versioninfo.py +3 -9
  94. novelwriter/gui/doceditor.py +120 -139
  95. novelwriter/gui/dochighlight.py +231 -186
  96. novelwriter/gui/docviewer.py +69 -108
  97. novelwriter/gui/docviewerpanel.py +3 -10
  98. novelwriter/gui/editordocument.py +1 -3
  99. novelwriter/gui/itemdetails.py +7 -11
  100. novelwriter/gui/mainmenu.py +22 -18
  101. novelwriter/gui/noveltree.py +11 -24
  102. novelwriter/gui/outline.py +15 -26
  103. novelwriter/gui/projtree.py +39 -65
  104. novelwriter/gui/search.py +10 -3
  105. novelwriter/gui/sidebar.py +2 -6
  106. novelwriter/gui/statusbar.py +29 -37
  107. novelwriter/gui/theme.py +26 -48
  108. novelwriter/guimain.py +162 -160
  109. novelwriter/shared.py +36 -19
  110. novelwriter/text/patterns.py +113 -0
  111. novelwriter/tools/dictionaries.py +10 -20
  112. novelwriter/tools/lipsum.py +10 -16
  113. novelwriter/tools/manusbuild.py +9 -11
  114. novelwriter/tools/manuscript.py +75 -149
  115. novelwriter/tools/manussettings.py +74 -76
  116. novelwriter/tools/noveldetails.py +16 -21
  117. novelwriter/tools/welcome.py +21 -26
  118. novelwriter/tools/writingstats.py +9 -12
  119. novelwriter/types.py +49 -4
  120. novelwriter/extensions/simpleprogress.py +0 -55
  121. {novelWriter-2.4.3.dist-info → novelWriter-2.5.dist-info}/LICENSE.md +0 -0
  122. {novelWriter-2.4.3.dist-info → novelWriter-2.5.dist-info}/entry_points.txt +0 -0
  123. {novelWriter-2.4.3.dist-info → novelWriter-2.5.dist-info}/top_level.txt +0 -0
@@ -24,30 +24,35 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
24
24
  """
25
25
  from __future__ import annotations
26
26
 
27
- import re
28
27
  import json
29
28
  import logging
29
+ import re
30
30
 
31
31
  from abc import ABC, abstractmethod
32
- from time import time
33
- from pathlib import Path
34
32
  from functools import partial
33
+ from pathlib import Path
34
+ from time import time
35
35
 
36
36
  from PyQt5.QtCore import QCoreApplication, QRegularExpression
37
+ from PyQt5.QtGui import QFont
37
38
 
38
- from novelwriter.common import formatTimeStamp, numberToRoman, checkInt
39
- from novelwriter.constants import (
40
- nwHeadFmt, nwKeyWords, nwLabels, nwRegEx, nwShortcode, nwUnicode, trConst
41
- )
39
+ from novelwriter import CONFIG
40
+ from novelwriter.common import checkInt, formatTimeStamp, numberToRoman
41
+ from novelwriter.constants import nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwUnicode, trConst
42
42
  from novelwriter.core.index import processComment
43
43
  from novelwriter.core.project import NWProject
44
44
  from novelwriter.enum import nwComment, nwItemLayout
45
+ from novelwriter.text.patterns import REGEX_PATTERNS
45
46
 
46
47
  logger = logging.getLogger(__name__)
47
48
 
48
49
  ESCAPES = {r"\*": "*", r"\~": "~", r"\_": "_", r"\[": "[", r"\]": "]", r"\ ": ""}
49
50
  RX_ESC = re.compile("|".join([re.escape(k) for k in ESCAPES.keys()]), flags=re.DOTALL)
50
51
 
52
+ T_Formats = list[tuple[int, int, str]]
53
+ T_Comment = tuple[str, T_Formats]
54
+ T_Token = tuple[int, int, str, T_Formats, int]
55
+
51
56
 
52
57
  def stripEscape(text: str) -> str:
53
58
  """Strip escaped Markdown characters from paragraph text."""
@@ -80,6 +85,12 @@ class Tokenizer(ABC):
80
85
  FMT_SUP_E = 12 # End superscript
81
86
  FMT_SUB_B = 13 # Begin subscript
82
87
  FMT_SUB_E = 14 # End subscript
88
+ FMT_DL_B = 15 # Begin dialogue
89
+ FMT_DL_E = 16 # End dialogue
90
+ FMT_ADL_B = 17 # Begin alt dialogue
91
+ FMT_ADL_E = 18 # End alt dialogue
92
+ FMT_FNOTE = 19 # Footnote marker
93
+ FMT_STRIP = 20 # Strip the format code
83
94
 
84
95
  # Block Type
85
96
  T_EMPTY = 1 # Empty line (new paragraph)
@@ -108,48 +119,60 @@ class Tokenizer(ABC):
108
119
  A_Z_BTMMRG = 0x0080 # Zero bottom margin
109
120
  A_IND_L = 0x0100 # Left indentation
110
121
  A_IND_R = 0x0200 # Right indentation
122
+ A_IND_T = 0x0400 # Text indentation
123
+
124
+ # Masks
125
+ M_ALIGNED = A_LEFT | A_RIGHT | A_CENTRE | A_JUSTIFY
111
126
 
112
127
  # Lookups
113
128
  L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
129
+ L_SKIP_INDENT = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD2, T_HEAD3, T_HEAD4, T_SEP, T_SKIP]
130
+ L_SUMMARY = [T_SYNOPSIS, T_SHORT]
114
131
 
115
132
  def __init__(self, project: NWProject) -> None:
116
133
 
117
134
  self._project = project
118
135
 
119
136
  # Data Variables
120
- self._text = "" # The raw text to be tokenized
121
- self._handle = None # The item handle currently being processed
122
- self._result = "" # The result of the last document
137
+ self._text = "" # The raw text to be tokenized
138
+ self._handle = None # The item handle currently being processed
139
+ self._result = "" # The result of the last document
140
+ self._keepMD = False # Whether to keep the markdown text
123
141
 
124
- self._keepMarkdown = False # Whether to keep the markdown text
125
- self._allMarkdown = [] # The result novelWriter markdown of all documents
142
+ # Tokens and Meta Data (Per Document)
143
+ self._tokens: list[T_Token] = []
144
+ self._footnotes: dict[str, T_Comment] = {}
126
145
 
127
- # Processed Tokens and Meta Data
128
- self._tokens: list[tuple[int, int, str, list[tuple[int, int]], int]] = []
146
+ # Tokens and Meta Data (Per Instance)
129
147
  self._counts: dict[str, int] = {}
130
148
  self._outline: dict[str, str] = {}
149
+ self._markdown: list[str] = []
131
150
 
132
151
  # User Settings
133
- self._textFont = "Serif" # Output text font
134
- self._textSize = 11 # Output text size
135
- self._textFixed = False # Fixed width text
152
+ self._textFont = QFont("Serif", 11) # Output text font
136
153
  self._lineHeight = 1.15 # Line height in units of em
137
154
  self._blockIndent = 4.00 # Block indent in units of em
155
+ self._firstIndent = False # Enable first line indent
156
+ self._firstWidth = 1.40 # First line indent in units of em
157
+ self._indentFirst = False # Indent first paragraph
138
158
  self._doJustify = False # Justify text
139
159
  self._doBodyText = True # Include body text
140
160
  self._doSynopsis = False # Also process synopsis comments
141
161
  self._doComments = False # Also process comments
142
162
  self._doKeywords = False # Also process keywords like tags and references
143
163
  self._skipKeywords = set() # Keywords to ignore
164
+ self._keepBreaks = True # Keep line breaks in paragraphs
144
165
 
145
166
  # Margins
146
- self._marginTitle = (1.000, 0.500)
147
- self._marginHead1 = (1.000, 0.500)
148
- self._marginHead2 = (0.834, 0.500)
149
- self._marginHead3 = (0.584, 0.500)
150
- self._marginHead4 = (0.584, 0.500)
167
+ self._marginTitle = (1.417, 0.500)
168
+ self._marginHead1 = (1.417, 0.500)
169
+ self._marginHead2 = (1.668, 0.500)
170
+ self._marginHead3 = (1.168, 0.500)
171
+ self._marginHead4 = (1.168, 0.500)
151
172
  self._marginText = (0.000, 0.584)
152
173
  self._marginMeta = (0.000, 0.584)
174
+ self._marginFoot = (1.417, 0.467)
175
+ self._marginSep = (1.168, 1.168)
153
176
 
154
177
  # Title Formats
155
178
  self._fmtTitle = nwHeadFmt.TITLE # Formatting for titles
@@ -174,7 +197,9 @@ class Tokenizer(ABC):
174
197
 
175
198
  # Instance Variables
176
199
  self._hFormatter = HeadingFormatter(self._project)
177
- self._noSep = True # Flag to indicate that we don't want a scene separator
200
+ self._noSep = True # Flag to indicate that we don't want a scene separator
201
+ self._noIndent = False # Flag to disable text indent on next paragraph
202
+ self._showDialog = False # Flag for dialogue highlighting
178
203
 
179
204
  # This File
180
205
  self._isNovel = False # Document is a novel document
@@ -189,12 +214,12 @@ class Tokenizer(ABC):
189
214
 
190
215
  # Format RegEx
191
216
  self._rxMarkdown = [
192
- (QRegularExpression(nwRegEx.FMT_EI), [0, self.FMT_I_B, 0, self.FMT_I_E]),
193
- (QRegularExpression(nwRegEx.FMT_EB), [0, self.FMT_B_B, 0, self.FMT_B_E]),
194
- (QRegularExpression(nwRegEx.FMT_ST), [0, self.FMT_D_B, 0, self.FMT_D_E]),
217
+ (REGEX_PATTERNS.markdownItalic, [0, self.FMT_I_B, 0, self.FMT_I_E]),
218
+ (REGEX_PATTERNS.markdownBold, [0, self.FMT_B_B, 0, self.FMT_B_E]),
219
+ (REGEX_PATTERNS.markdownStrike, [0, self.FMT_D_B, 0, self.FMT_D_E]),
195
220
  ]
196
- self._rxShortCodes = QRegularExpression(nwRegEx.FMT_SC)
197
- self._rxShortCodeVals = QRegularExpression(nwRegEx.FMT_SV)
221
+ self._rxShortCodes = REGEX_PATTERNS.shortcodePlain
222
+ self._rxShortCodeVals = REGEX_PATTERNS.shortcodeValue
198
223
 
199
224
  self._shortCodeFmt = {
200
225
  nwShortcode.ITALIC_O: self.FMT_I_B, nwShortcode.ITALIC_C: self.FMT_I_E,
@@ -205,6 +230,11 @@ class Tokenizer(ABC):
205
230
  nwShortcode.SUP_O: self.FMT_SUP_B, nwShortcode.SUP_C: self.FMT_SUP_E,
206
231
  nwShortcode.SUB_O: self.FMT_SUB_B, nwShortcode.SUB_C: self.FMT_SUB_E,
207
232
  }
233
+ self._shortCodeVals = {
234
+ nwShortcode.FOOTNOTE_B: self.FMT_FNOTE,
235
+ }
236
+
237
+ self._rxDialogue: list[tuple[QRegularExpression, int, int]] = []
208
238
 
209
239
  return
210
240
 
@@ -220,7 +250,7 @@ class Tokenizer(ABC):
220
250
  @property
221
251
  def allMarkdown(self) -> list[str]:
222
252
  """The combined novelWriter Markdown text."""
223
- return self._allMarkdown
253
+ return self._markdown
224
254
 
225
255
  @property
226
256
  def textStats(self) -> dict[str, int]:
@@ -298,11 +328,9 @@ class Tokenizer(ABC):
298
328
  )
299
329
  return
300
330
 
301
- def setFont(self, family: str, size: int, isFixed: bool = False) -> None:
331
+ def setFont(self, font: QFont) -> None:
302
332
  """Set the build font."""
303
- self._textFont = family
304
- self._textSize = round(int(size))
305
- self._textFixed = isFixed
333
+ self._textFont = font
306
334
  return
307
335
 
308
336
  def setLineHeight(self, height: float) -> None:
@@ -315,11 +343,43 @@ class Tokenizer(ABC):
315
343
  self._blockIndent = min(max(float(indent), 0.0), 10.0)
316
344
  return
317
345
 
346
+ def setFirstLineIndent(self, state: bool, indent: float, first: bool) -> None:
347
+ """Set first line indent and whether to also indent first
348
+ paragraph after a heading.
349
+ """
350
+ self._firstIndent = state
351
+ self._firstWidth = indent
352
+ self._indentFirst = first
353
+ return
354
+
318
355
  def setJustify(self, state: bool) -> None:
319
356
  """Enable or disable text justification."""
320
357
  self._doJustify = state
321
358
  return
322
359
 
360
+ def setDialogueHighlight(self, state: bool) -> None:
361
+ """Enable or disable dialogue highlighting."""
362
+ self._rxDialogue = []
363
+ self._showDialog = state
364
+ if state:
365
+ if CONFIG.dialogStyle > 0:
366
+ self._rxDialogue.append((
367
+ REGEX_PATTERNS.dialogStyle, self.FMT_DL_B, self.FMT_DL_E
368
+ ))
369
+ if CONFIG.dialogLine:
370
+ self._rxDialogue.append((
371
+ REGEX_PATTERNS.dialogLine, self.FMT_DL_B, self.FMT_DL_E
372
+ ))
373
+ if CONFIG.narratorBreak:
374
+ self._rxDialogue.append((
375
+ REGEX_PATTERNS.narratorBreak, self.FMT_DL_E, self.FMT_DL_B
376
+ ))
377
+ if CONFIG.altDialogOpen and CONFIG.altDialogClose:
378
+ self._rxDialogue.append((
379
+ REGEX_PATTERNS.altDialogStyle, self.FMT_ADL_B, self.FMT_ADL_E
380
+ ))
381
+ return
382
+
323
383
  def setTitleMargins(self, upper: float, lower: float) -> None:
324
384
  """Set the upper and lower title margin."""
325
385
  self._marginTitle = (float(upper), float(lower))
@@ -355,6 +415,11 @@ class Tokenizer(ABC):
355
415
  self._marginMeta = (float(upper), float(lower))
356
416
  return
357
417
 
418
+ def setSeparatorMargins(self, upper: float, lower: float) -> None:
419
+ """Set the upper and lower meta text margin."""
420
+ self._marginSep = (float(upper), float(lower))
421
+ return
422
+
358
423
  def setLinkHeadings(self, state: bool) -> None:
359
424
  """Enable or disable adding an anchor before headings."""
360
425
  self._linkHeadings = state
@@ -385,9 +450,14 @@ class Tokenizer(ABC):
385
450
  self._skipKeywords = set(x.lower().strip() for x in keywords.split(","))
386
451
  return
387
452
 
453
+ def setKeepLineBreaks(self, state: bool) -> None:
454
+ """Keep line breaks in paragraphs."""
455
+ self._keepBreaks = state
456
+ return
457
+
388
458
  def setKeepMarkdown(self, state: bool) -> None:
389
459
  """Keep original markdown during build."""
390
- self._keepMarkdown = state
460
+ self._keepMD = state
391
461
  return
392
462
 
393
463
  ##
@@ -417,8 +487,8 @@ class Tokenizer(ABC):
417
487
  self._tokens.append((
418
488
  self.T_TITLE, 1, title, [], textAlign
419
489
  ))
420
- if self._keepMarkdown:
421
- self._allMarkdown.append(f"#! {title}\n\n")
490
+ if self._keepMD:
491
+ self._markdown.append(f"#! {title}\n\n")
422
492
 
423
493
  return
424
494
 
@@ -446,7 +516,7 @@ class Tokenizer(ABC):
446
516
  self._text = xRep.sub(lambda x: repDict[x.group(0)], self._text)
447
517
 
448
518
  # Process the character translation map
449
- trDict = {nwUnicode.U_MAPOSS: nwUnicode.U_RSQUO}
519
+ trDict = {nwUnicode.U_MAPOS: nwUnicode.U_RSQUO}
450
520
  self._text = self._text.translate(str.maketrans(trDict))
451
521
 
452
522
  return
@@ -466,22 +536,23 @@ class Tokenizer(ABC):
466
536
  4: The internal formatting map of the text, self.FMT_*
467
537
  5: The style of the block, self.A_*
468
538
  """
469
- self._tokens = []
470
539
  if self._isNovel:
471
540
  self._hFormatter.setHandle(self._handle)
472
541
 
473
542
  nHead = 0
474
543
  breakNext = False
475
544
  tmpMarkdown = []
545
+ tHandle = self._handle or ""
546
+ tokens: list[T_Token] = []
476
547
  for aLine in self._text.splitlines():
477
548
  sLine = aLine.strip().lower()
478
549
 
479
550
  # Check for blank lines
480
551
  if len(sLine) == 0:
481
- self._tokens.append((
552
+ tokens.append((
482
553
  self.T_EMPTY, nHead, "", [], self.A_NONE
483
554
  ))
484
- if self._keepMarkdown:
555
+ if self._keepMD:
485
556
  tmpMarkdown.append("\n")
486
557
 
487
558
  continue
@@ -507,7 +578,7 @@ class Tokenizer(ABC):
507
578
  continue
508
579
 
509
580
  elif sLine == "[vspace]":
510
- self._tokens.append(
581
+ tokens.append(
511
582
  (self.T_SKIP, nHead, "", [], sAlign)
512
583
  )
513
584
  continue
@@ -515,11 +586,11 @@ class Tokenizer(ABC):
515
586
  elif sLine.startswith("[vspace:") and sLine.endswith("]"):
516
587
  nSkip = checkInt(sLine[8:-1], 0)
517
588
  if nSkip >= 1:
518
- self._tokens.append(
589
+ tokens.append(
519
590
  (self.T_SKIP, nHead, "", [], sAlign)
520
591
  )
521
592
  if nSkip > 1:
522
- self._tokens += (nSkip - 1) * [
593
+ tokens += (nSkip - 1) * [
523
594
  (self.T_SKIP, nHead, "", [], self.A_NONE)
524
595
  ]
525
596
  continue
@@ -533,24 +604,32 @@ class Tokenizer(ABC):
533
604
  if aLine.startswith("%~"):
534
605
  continue
535
606
 
536
- cStyle, cText, _ = processComment(aLine)
607
+ cStyle, cKey, cText, _, _ = processComment(aLine)
537
608
  if cStyle == nwComment.SYNOPSIS:
538
- self._tokens.append((
539
- self.T_SYNOPSIS, nHead, cText, [], sAlign
609
+ tLine, tFmt = self._extractFormats(cText)
610
+ tokens.append((
611
+ self.T_SYNOPSIS, nHead, tLine, tFmt, sAlign
540
612
  ))
541
- if self._doSynopsis and self._keepMarkdown:
613
+ if self._doSynopsis and self._keepMD:
542
614
  tmpMarkdown.append(f"{aLine}\n")
543
615
  elif cStyle == nwComment.SHORT:
544
- self._tokens.append((
545
- self.T_SHORT, nHead, cText, [], sAlign
616
+ tLine, tFmt = self._extractFormats(cText)
617
+ tokens.append((
618
+ self.T_SHORT, nHead, tLine, tFmt, sAlign
546
619
  ))
547
- if self._doSynopsis and self._keepMarkdown:
620
+ if self._doSynopsis and self._keepMD:
621
+ tmpMarkdown.append(f"{aLine}\n")
622
+ elif cStyle == nwComment.FOOTNOTE:
623
+ tLine, tFmt = self._extractFormats(cText, skip=self.FMT_FNOTE)
624
+ self._footnotes[f"{tHandle}:{cKey}"] = (tLine, tFmt)
625
+ if self._keepMD:
548
626
  tmpMarkdown.append(f"{aLine}\n")
549
627
  else:
550
- self._tokens.append((
551
- self.T_COMMENT, nHead, cText, [], sAlign
628
+ tLine, tFmt = self._extractFormats(cText)
629
+ tokens.append((
630
+ self.T_COMMENT, nHead, tLine, tFmt, sAlign
552
631
  ))
553
- if self._doComments and self._keepMarkdown:
632
+ if self._doComments and self._keepMD:
554
633
  tmpMarkdown.append(f"{aLine}\n")
555
634
 
556
635
  elif aLine.startswith("@"):
@@ -560,11 +639,14 @@ class Tokenizer(ABC):
560
639
  # are automatically skipped.
561
640
 
562
641
  valid, bits, _ = self._project.index.scanThis(aLine)
563
- if valid and bits and bits[0] not in self._skipKeywords:
564
- self._tokens.append((
642
+ if (
643
+ valid and bits and bits[0] in nwLabels.KEY_NAME
644
+ and bits[0] not in self._skipKeywords
645
+ ):
646
+ tokens.append((
565
647
  self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
566
648
  ))
567
- if self._doKeywords and self._keepMarkdown:
649
+ if self._doKeywords and self._keepMD:
568
650
  tmpMarkdown.append(f"{aLine}\n")
569
651
 
570
652
  elif aLine.startswith(("# ", "#! ")):
@@ -597,10 +679,10 @@ class Tokenizer(ABC):
597
679
  self._hFormatter.resetAll()
598
680
  self._noSep = True
599
681
 
600
- self._tokens.append((
682
+ tokens.append((
601
683
  tType, nHead, tText, [], tStyle
602
684
  ))
603
- if self._keepMarkdown:
685
+ if self._keepMD:
604
686
  tmpMarkdown.append(f"{aLine}\n")
605
687
 
606
688
  elif aLine.startswith(("## ", "##! ")):
@@ -632,10 +714,10 @@ class Tokenizer(ABC):
632
714
  self._hFormatter.resetScene()
633
715
  self._noSep = True
634
716
 
635
- self._tokens.append((
717
+ tokens.append((
636
718
  tType, nHead, tText, [], tStyle
637
719
  ))
638
- if self._keepMarkdown:
720
+ if self._keepMD:
639
721
  tmpMarkdown.append(f"{aLine}\n")
640
722
 
641
723
  elif aLine.startswith(("### ", "###! ")):
@@ -673,10 +755,10 @@ class Tokenizer(ABC):
673
755
  tStyle = self.A_NONE if self._noSep else self.A_CENTRE
674
756
  self._noSep = False
675
757
 
676
- self._tokens.append((
758
+ tokens.append((
677
759
  tType, nHead, tText, [], tStyle
678
760
  ))
679
- if self._keepMarkdown:
761
+ if self._keepMD:
680
762
  tmpMarkdown.append(f"{aLine}\n")
681
763
 
682
764
  elif aLine.startswith("#### "):
@@ -703,10 +785,10 @@ class Tokenizer(ABC):
703
785
  tType = self.T_SEP
704
786
  tStyle = self.A_CENTRE
705
787
 
706
- self._tokens.append((
788
+ tokens.append((
707
789
  tType, nHead, tText, [], tStyle
708
790
  ))
709
- if self._keepMarkdown:
791
+ if self._keepMD:
710
792
  tmpMarkdown.append(f"{aLine}\n")
711
793
 
712
794
  else:
@@ -750,54 +832,116 @@ class Tokenizer(ABC):
750
832
  sAlign |= self.A_IND_R
751
833
 
752
834
  # Process formats
753
- tLine, fmtPos = self._extractFormats(aLine)
754
- self._tokens.append((
755
- self.T_TEXT, nHead, tLine, fmtPos, sAlign
835
+ tLine, tFmt = self._extractFormats(aLine)
836
+ tokens.append((
837
+ self.T_TEXT, nHead, tLine, tFmt, sAlign
756
838
  ))
757
- if self._keepMarkdown:
839
+ if self._keepMD:
758
840
  tmpMarkdown.append(f"{aLine}\n")
759
841
 
760
842
  # If we have content, turn off the first page flag
761
- if self._isFirst and self._tokens:
843
+ if self._isFirst and tokens:
762
844
  self._isFirst = False # First document has been processed
763
845
 
764
846
  # Make sure the token array doesn't start with a page break
765
847
  # on the very first page, adding a blank first page.
766
- if self._tokens[0][4] & self.A_PBB:
767
- token = self._tokens[0]
768
- self._tokens[0] = (
769
- token[0], token[1], token[2], token[3], token[4] & ~self.A_PBB
848
+ if tokens[0][4] & self.A_PBB:
849
+ cToken = tokens[0]
850
+ tokens[0] = (
851
+ cToken[0], cToken[1], cToken[2], cToken[3], cToken[4] & ~self.A_PBB
770
852
  )
771
853
 
772
854
  # Always add an empty line at the end of the file
773
- self._tokens.append((
855
+ tokens.append((
774
856
  self.T_EMPTY, nHead, "", [], self.A_NONE
775
857
  ))
776
- if self._keepMarkdown:
858
+ if self._keepMD:
777
859
  tmpMarkdown.append("\n")
778
- self._allMarkdown.append("".join(tmpMarkdown))
860
+ self._markdown.append("".join(tmpMarkdown))
779
861
 
780
862
  # Second Pass
781
863
  # ===========
782
- # Some items need a second pass
864
+ # This second pass strips away consecutive blank lines, and
865
+ # combines consecutive text lines into the same paragraph.
866
+ # It also ensures that there isn't paragraph spacing between
867
+ # meta data lines for formats that has spacing.
868
+
869
+ self._tokens = []
870
+ pToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
871
+ nToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
872
+
873
+ lineSep = "\n" if self._keepBreaks else " "
874
+ pLines: list[T_Token] = []
783
875
 
784
- pToken = (self.T_EMPTY, 0, "", [], self.A_NONE)
785
- nToken = (self.T_EMPTY, 0, "", [], self.A_NONE)
786
- tCount = len(self._tokens)
787
- for n, token in enumerate(self._tokens):
876
+ tCount = len(tokens)
877
+ for n, cToken in enumerate(tokens):
788
878
 
789
879
  if n > 0:
790
- pToken = self._tokens[n-1]
880
+ pToken = tokens[n-1] # Look behind
791
881
  if n < tCount - 1:
792
- nToken = self._tokens[n+1]
882
+ nToken = tokens[n+1] # Look ahead
883
+
884
+ if cToken[0] in self.L_SKIP_INDENT and not self._indentFirst:
885
+ # Unless the indentFirst flag is set, we set up the next
886
+ # paragraph to not be indented if we see a block of a
887
+ # specific type
888
+ self._noIndent = True
889
+
890
+ if cToken[0] == self.T_EMPTY:
891
+ # We don't need to keep the empty lines after this pass
892
+ pass
793
893
 
794
- if token[0] == self.T_KEYWORD:
795
- aStyle = token[4]
894
+ elif cToken[0] == self.T_KEYWORD:
895
+ # Adjust margins for lines in a list of keyword lines
896
+ aStyle = cToken[4]
796
897
  if pToken[0] == self.T_KEYWORD:
797
898
  aStyle |= self.A_Z_TOPMRG
798
899
  if nToken[0] == self.T_KEYWORD:
799
900
  aStyle |= self.A_Z_BTMMRG
800
- self._tokens[n] = (token[0], token[1], token[2], token[3], aStyle)
901
+ self._tokens.append((
902
+ cToken[0], cToken[1], cToken[2], cToken[3], aStyle
903
+ ))
904
+
905
+ elif cToken[0] == self.T_TEXT:
906
+ # Combine lines from the same paragraph
907
+ pLines.append(cToken)
908
+
909
+ if nToken[0] != self.T_TEXT:
910
+ # Next token is not text, so we add the buffer to tokens
911
+ nLines = len(pLines)
912
+ cStyle = pLines[0][4]
913
+ if self._firstIndent and not (self._noIndent or cStyle & self.M_ALIGNED):
914
+ # If paragraph indentation is enabled, not temporarily
915
+ # turned off, and the block is not aligned, we add the
916
+ # text indentation flag
917
+ cStyle |= self.A_IND_T
918
+
919
+ if nLines == 1:
920
+ # The paragraph contains a single line, so we just
921
+ # save that directly to the token list
922
+ self._tokens.append((
923
+ self.T_TEXT, pLines[0][1], pLines[0][2], pLines[0][3], cStyle
924
+ ))
925
+ elif nLines > 1:
926
+ # The paragraph contains multiple lines, so we need to
927
+ # join them according to the line break policy, and
928
+ # recompute all the formatting markers
929
+ tTxt = ""
930
+ tFmt: T_Formats = []
931
+ for aToken in pLines:
932
+ tLen = len(tTxt)
933
+ tTxt += f"{aToken[2]}{lineSep}"
934
+ tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aToken[3])
935
+ self._tokens.append((
936
+ self.T_TEXT, pLines[0][1], tTxt[:-1], tFmt, cStyle
937
+ ))
938
+
939
+ # Reset buffer and make sure text indent is on for next pass
940
+ pLines = []
941
+ self._noIndent = False
942
+
943
+ else:
944
+ self._tokens.append(cToken)
801
945
 
802
946
  return
803
947
 
@@ -840,7 +984,6 @@ class Tokenizer(ABC):
840
984
  textWordChars = self._counts.get("textWordChars", 0)
841
985
  titleWordChars = self._counts.get("titleWordChars", 0)
842
986
 
843
- para = []
844
987
  for tType, _, tText, _, _ in self._tokens:
845
988
  tText = tText.replace(nwUnicode.U_ENDASH, " ")
846
989
  tText = tText.replace(nwUnicode.U_EMDASH, " ")
@@ -850,22 +993,19 @@ class Tokenizer(ABC):
850
993
  nChars = len(tText)
851
994
  nWChars = len("".join(tWords))
852
995
 
853
- if tType == self.T_EMPTY:
854
- if len(para) > 0:
855
- tTemp = "\n".join(para)
856
- tPWords = tTemp.split()
857
- nPWords = len(tPWords)
858
- nPChars = len(tTemp)
859
- nPWChars = len("".join(tPWords))
860
-
861
- paragraphCount += 1
862
- allWords += nPWords
863
- textWords += nPWords
864
- allChars += nPChars
865
- textChars += nPChars
866
- allWordChars += nPWChars
867
- textWordChars += nPWChars
868
- para = []
996
+ if tType == self.T_TEXT:
997
+ tPWords = tText.split()
998
+ nPWords = len(tPWords)
999
+ nPChars = len(tText)
1000
+ nPWChars = len("".join(tPWords))
1001
+
1002
+ paragraphCount += 1
1003
+ allWords += nPWords
1004
+ textWords += nPWords
1005
+ allChars += nPChars
1006
+ textChars += nPChars
1007
+ allWordChars += nPWChars
1008
+ textWordChars += nPWChars
869
1009
 
870
1010
  elif tType in self.L_HEADINGS:
871
1011
  titleCount += 1
@@ -881,9 +1021,6 @@ class Tokenizer(ABC):
881
1021
  allChars += nChars
882
1022
  allWordChars += nWChars
883
1023
 
884
- elif tType == self.T_TEXT:
885
- para.append(tText.rstrip())
886
-
887
1024
  elif tType == self.T_SYNOPSIS and self._doSynopsis:
888
1025
  text = "{0}: {1}".format(self._localLookup("Synopsis"), tText)
889
1026
  words = text.split()
@@ -935,7 +1072,7 @@ class Tokenizer(ABC):
935
1072
  def saveRawMarkdown(self, path: str | Path) -> None:
936
1073
  """Save the raw text to a plain text file."""
937
1074
  with open(path, mode="w", encoding="utf-8") as outFile:
938
- for nwdPage in self._allMarkdown:
1075
+ for nwdPage in self._markdown:
939
1076
  outFile.write(nwdPage)
940
1077
  return
941
1078
 
@@ -950,7 +1087,7 @@ class Tokenizer(ABC):
950
1087
  "buildTimeStr": formatTimeStamp(timeStamp),
951
1088
  },
952
1089
  "text": {
953
- "nwd": [page.rstrip("\n").split("\n") for page in self._allMarkdown],
1090
+ "nwd": [page.rstrip("\n").split("\n") for page in self._markdown],
954
1091
  }
955
1092
  }
956
1093
  with open(path, mode="w", encoding="utf-8") as fObj:
@@ -961,9 +1098,9 @@ class Tokenizer(ABC):
961
1098
  # Internal Functions
962
1099
  ##
963
1100
 
964
- def _extractFormats(self, text: str) -> tuple[str, list[tuple[int, int]]]:
1101
+ def _extractFormats(self, text: str, skip: int = 0) -> tuple[str, T_Formats]:
965
1102
  """Extract format markers from a text paragraph."""
966
- temp = []
1103
+ temp: list[tuple[int, int, int, str]] = []
967
1104
 
968
1105
  # Match Markdown
969
1106
  for regEx, fmts in self._rxMarkdown:
@@ -971,7 +1108,7 @@ class Tokenizer(ABC):
971
1108
  while rxItt.hasNext():
972
1109
  rxMatch = rxItt.next()
973
1110
  temp.extend(
974
- [rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt]
1111
+ (rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt, "")
975
1112
  for n, fmt in enumerate(fmts) if fmt > 0
976
1113
  )
977
1114
 
@@ -979,25 +1116,46 @@ class Tokenizer(ABC):
979
1116
  rxItt = self._rxShortCodes.globalMatch(text, 0)
980
1117
  while rxItt.hasNext():
981
1118
  rxMatch = rxItt.next()
982
- temp.append([
1119
+ temp.append((
983
1120
  rxMatch.capturedStart(1),
984
1121
  rxMatch.capturedLength(1),
985
- self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0)
986
- ])
1122
+ self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0),
1123
+ "",
1124
+ ))
987
1125
 
988
- # Post-process text and format markers
1126
+ # Match Shortcode w/Values
1127
+ rxItt = self._rxShortCodeVals.globalMatch(text, 0)
1128
+ tHandle = self._handle or ""
1129
+ while rxItt.hasNext():
1130
+ rxMatch = rxItt.next()
1131
+ kind = self._shortCodeVals.get(rxMatch.captured(1).lower(), 0)
1132
+ temp.append((
1133
+ rxMatch.capturedStart(0),
1134
+ rxMatch.capturedLength(0),
1135
+ self.FMT_STRIP if kind == skip else kind,
1136
+ f"{tHandle}:{rxMatch.captured(2)}",
1137
+ ))
1138
+
1139
+ # Match Dialogue
1140
+ if self._rxDialogue:
1141
+ for regEx, fmtB, fmtE in self._rxDialogue:
1142
+ rxItt = regEx.globalMatch(text, 0)
1143
+ while rxItt.hasNext():
1144
+ rxMatch = rxItt.next()
1145
+ temp.append((rxMatch.capturedStart(0), 0, fmtB, ""))
1146
+ temp.append((rxMatch.capturedEnd(0), 0, fmtE, ""))
1147
+
1148
+ # Post-process text and format
989
1149
  result = text
990
1150
  formats = []
991
- for pos, n, fmt in reversed(sorted(temp, key=lambda x: x[0])):
1151
+ for pos, n, fmt, key in reversed(sorted(temp, key=lambda x: x[0])):
992
1152
  if fmt > 0:
993
1153
  result = result[:pos] + result[pos+n:]
994
- formats = [(p-n, f) for p, f in formats]
995
- formats.insert(0, (pos, fmt))
1154
+ formats = [(p-n, f, k) for p, f, k in formats]
1155
+ formats.insert(0, (pos, fmt, key))
996
1156
 
997
1157
  return result, formats
998
1158
 
999
- # END Class Tokenizer
1000
-
1001
1159
 
1002
1160
  class HeadingFormatter:
1003
1161
 
@@ -1067,5 +1225,3 @@ class HeadingFormatter:
1067
1225
  hFormat = hFormat.replace(nwHeadFmt.CHAR_FOCUS, fText)
1068
1226
 
1069
1227
  return hFormat
1070
-
1071
- # END Class HeadingFormatter