novelWriter 2.4.4__py3-none-any.whl → 2.5rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {novelWriter-2.4.4.dist-info → novelWriter-2.5rc1.dist-info}/METADATA +4 -5
  2. {novelWriter-2.4.4.dist-info → novelWriter-2.5rc1.dist-info}/RECORD +109 -101
  3. {novelWriter-2.4.4.dist-info → novelWriter-2.5rc1.dist-info}/WHEEL +1 -1
  4. novelwriter/__init__.py +33 -39
  5. novelwriter/assets/i18n/project_en_GB.json +1 -0
  6. novelwriter/assets/icons/typicons_dark/icons.conf +2 -0
  7. novelwriter/assets/icons/typicons_dark/nw_font.svg +4 -0
  8. novelwriter/assets/icons/typicons_dark/nw_quote.svg +4 -0
  9. novelwriter/assets/icons/typicons_light/icons.conf +2 -0
  10. novelwriter/assets/icons/typicons_light/nw_font.svg +4 -0
  11. novelwriter/assets/icons/typicons_light/nw_quote.svg +4 -0
  12. novelwriter/assets/manual.pdf +0 -0
  13. novelwriter/assets/sample.zip +0 -0
  14. novelwriter/assets/syntax/cyberpunk_night.conf +5 -3
  15. novelwriter/assets/syntax/default_dark.conf +32 -18
  16. novelwriter/assets/syntax/default_light.conf +24 -10
  17. novelwriter/assets/syntax/dracula.conf +44 -0
  18. novelwriter/assets/syntax/grey_dark.conf +5 -4
  19. novelwriter/assets/syntax/grey_light.conf +5 -4
  20. novelwriter/assets/syntax/light_owl.conf +7 -6
  21. novelwriter/assets/syntax/night_owl.conf +7 -6
  22. novelwriter/assets/syntax/snazzy.conf +42 -0
  23. novelwriter/assets/syntax/solarized_dark.conf +4 -3
  24. novelwriter/assets/syntax/solarized_light.conf +4 -3
  25. novelwriter/assets/syntax/tango.conf +27 -11
  26. novelwriter/assets/syntax/tomorrow.conf +6 -5
  27. novelwriter/assets/syntax/tomorrow_night.conf +7 -6
  28. novelwriter/assets/syntax/tomorrow_night_blue.conf +6 -5
  29. novelwriter/assets/syntax/tomorrow_night_bright.conf +6 -5
  30. novelwriter/assets/syntax/tomorrow_night_eighties.conf +6 -5
  31. novelwriter/assets/text/credits_en.htm +4 -1
  32. novelwriter/assets/themes/cyberpunk_night.conf +3 -0
  33. novelwriter/assets/themes/default_dark.conf +2 -0
  34. novelwriter/assets/themes/default_light.conf +2 -0
  35. novelwriter/assets/themes/dracula.conf +48 -0
  36. novelwriter/assets/themes/solarized_dark.conf +2 -0
  37. novelwriter/assets/themes/solarized_light.conf +2 -0
  38. novelwriter/common.py +33 -12
  39. novelwriter/config.py +184 -98
  40. novelwriter/constants.py +47 -35
  41. novelwriter/core/buildsettings.py +68 -69
  42. novelwriter/core/coretools.py +5 -23
  43. novelwriter/core/docbuild.py +52 -40
  44. novelwriter/core/document.py +3 -5
  45. novelwriter/core/index.py +115 -45
  46. novelwriter/core/item.py +8 -19
  47. novelwriter/core/options.py +2 -4
  48. novelwriter/core/project.py +23 -57
  49. novelwriter/core/projectdata.py +1 -3
  50. novelwriter/core/projectxml.py +12 -15
  51. novelwriter/core/sessions.py +3 -5
  52. novelwriter/core/spellcheck.py +4 -9
  53. novelwriter/core/status.py +211 -164
  54. novelwriter/core/storage.py +0 -8
  55. novelwriter/core/tohtml.py +139 -105
  56. novelwriter/core/tokenizer.py +278 -122
  57. novelwriter/core/{tomd.py → tomarkdown.py} +97 -78
  58. novelwriter/core/toodt.py +257 -166
  59. novelwriter/core/toqdoc.py +419 -0
  60. novelwriter/core/tree.py +5 -7
  61. novelwriter/dialogs/about.py +11 -18
  62. novelwriter/dialogs/docmerge.py +17 -19
  63. novelwriter/dialogs/docsplit.py +17 -19
  64. novelwriter/dialogs/editlabel.py +6 -10
  65. novelwriter/dialogs/preferences.py +193 -144
  66. novelwriter/dialogs/projectsettings.py +225 -189
  67. novelwriter/dialogs/quotes.py +12 -9
  68. novelwriter/dialogs/wordlist.py +9 -15
  69. novelwriter/enum.py +35 -30
  70. novelwriter/error.py +8 -15
  71. novelwriter/extensions/configlayout.py +40 -21
  72. novelwriter/extensions/eventfilters.py +1 -5
  73. novelwriter/extensions/modified.py +58 -14
  74. novelwriter/extensions/novelselector.py +1 -3
  75. novelwriter/extensions/pagedsidebar.py +9 -12
  76. novelwriter/extensions/{circularprogress.py → progressbars.py} +30 -8
  77. novelwriter/extensions/statusled.py +29 -25
  78. novelwriter/extensions/switch.py +4 -6
  79. novelwriter/extensions/switchbox.py +7 -6
  80. novelwriter/extensions/versioninfo.py +3 -9
  81. novelwriter/gui/doceditor.py +118 -137
  82. novelwriter/gui/dochighlight.py +231 -186
  83. novelwriter/gui/docviewer.py +66 -107
  84. novelwriter/gui/docviewerpanel.py +3 -10
  85. novelwriter/gui/editordocument.py +1 -3
  86. novelwriter/gui/itemdetails.py +7 -11
  87. novelwriter/gui/mainmenu.py +22 -18
  88. novelwriter/gui/noveltree.py +11 -24
  89. novelwriter/gui/outline.py +14 -26
  90. novelwriter/gui/projtree.py +35 -60
  91. novelwriter/gui/search.py +10 -3
  92. novelwriter/gui/sidebar.py +2 -6
  93. novelwriter/gui/statusbar.py +29 -37
  94. novelwriter/gui/theme.py +26 -48
  95. novelwriter/guimain.py +134 -148
  96. novelwriter/shared.py +36 -32
  97. novelwriter/text/patterns.py +113 -0
  98. novelwriter/tools/dictionaries.py +10 -20
  99. novelwriter/tools/lipsum.py +10 -16
  100. novelwriter/tools/manusbuild.py +9 -11
  101. novelwriter/tools/manuscript.py +71 -145
  102. novelwriter/tools/manussettings.py +71 -75
  103. novelwriter/tools/noveldetails.py +16 -21
  104. novelwriter/tools/welcome.py +12 -26
  105. novelwriter/tools/writingstats.py +9 -12
  106. novelwriter/types.py +49 -4
  107. novelwriter/extensions/simpleprogress.py +0 -55
  108. {novelWriter-2.4.4.dist-info → novelWriter-2.5rc1.dist-info}/LICENSE.md +0 -0
  109. {novelWriter-2.4.4.dist-info → novelWriter-2.5rc1.dist-info}/entry_points.txt +0 -0
  110. {novelWriter-2.4.4.dist-info → novelWriter-2.5rc1.dist-info}/top_level.txt +0 -0
@@ -24,30 +24,35 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
24
24
  """
25
25
  from __future__ import annotations
26
26
 
27
- import re
28
27
  import json
29
28
  import logging
29
+ import re
30
30
 
31
31
  from abc import ABC, abstractmethod
32
- from time import time
33
- from pathlib import Path
34
32
  from functools import partial
33
+ from pathlib import Path
34
+ from time import time
35
35
 
36
36
  from PyQt5.QtCore import QCoreApplication, QRegularExpression
37
+ from PyQt5.QtGui import QFont
37
38
 
38
- from novelwriter.common import formatTimeStamp, numberToRoman, checkInt
39
- from novelwriter.constants import (
40
- nwHeadFmt, nwKeyWords, nwLabels, nwRegEx, nwShortcode, nwUnicode, trConst
41
- )
39
+ from novelwriter import CONFIG
40
+ from novelwriter.common import checkInt, formatTimeStamp, numberToRoman
41
+ from novelwriter.constants import nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwUnicode, trConst
42
42
  from novelwriter.core.index import processComment
43
43
  from novelwriter.core.project import NWProject
44
44
  from novelwriter.enum import nwComment, nwItemLayout
45
+ from novelwriter.text.patterns import REGEX_PATTERNS
45
46
 
46
47
  logger = logging.getLogger(__name__)
47
48
 
48
49
  ESCAPES = {r"\*": "*", r"\~": "~", r"\_": "_", r"\[": "[", r"\]": "]", r"\ ": ""}
49
50
  RX_ESC = re.compile("|".join([re.escape(k) for k in ESCAPES.keys()]), flags=re.DOTALL)
50
51
 
52
+ T_Formats = list[tuple[int, int, str]]
53
+ T_Comment = tuple[str, T_Formats]
54
+ T_Token = tuple[int, int, str, T_Formats, int]
55
+
51
56
 
52
57
  def stripEscape(text: str) -> str:
53
58
  """Strip escaped Markdown characters from paragraph text."""
@@ -80,6 +85,12 @@ class Tokenizer(ABC):
80
85
  FMT_SUP_E = 12 # End superscript
81
86
  FMT_SUB_B = 13 # Begin subscript
82
87
  FMT_SUB_E = 14 # End subscript
88
+ FMT_DL_B = 15 # Begin dialogue
89
+ FMT_DL_E = 16 # End dialogue
90
+ FMT_ADL_B = 17 # Begin alt dialogue
91
+ FMT_ADL_E = 18 # End alt dialogue
92
+ FMT_FNOTE = 19 # Footnote marker
93
+ FMT_STRIP = 20 # Strip the format code
83
94
 
84
95
  # Block Type
85
96
  T_EMPTY = 1 # Empty line (new paragraph)
@@ -108,48 +119,60 @@ class Tokenizer(ABC):
108
119
  A_Z_BTMMRG = 0x0080 # Zero bottom margin
109
120
  A_IND_L = 0x0100 # Left indentation
110
121
  A_IND_R = 0x0200 # Right indentation
122
+ A_IND_T = 0x0400 # Text indentation
123
+
124
+ # Masks
125
+ M_ALIGNED = A_LEFT | A_RIGHT | A_CENTRE | A_JUSTIFY
111
126
 
112
127
  # Lookups
113
128
  L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
129
+ L_SKIP_INDENT = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD2, T_HEAD3, T_HEAD4, T_SEP, T_SKIP]
130
+ L_SUMMARY = [T_SYNOPSIS, T_SHORT]
114
131
 
115
132
  def __init__(self, project: NWProject) -> None:
116
133
 
117
134
  self._project = project
118
135
 
119
136
  # Data Variables
120
- self._text = "" # The raw text to be tokenized
121
- self._handle = None # The item handle currently being processed
122
- self._result = "" # The result of the last document
137
+ self._text = "" # The raw text to be tokenized
138
+ self._handle = None # The item handle currently being processed
139
+ self._result = "" # The result of the last document
140
+ self._keepMD = False # Whether to keep the markdown text
123
141
 
124
- self._keepMarkdown = False # Whether to keep the markdown text
125
- self._allMarkdown = [] # The result novelWriter markdown of all documents
142
+ # Tokens and Meta Data (Per Document)
143
+ self._tokens: list[T_Token] = []
144
+ self._footnotes: dict[str, T_Comment] = {}
126
145
 
127
- # Processed Tokens and Meta Data
128
- self._tokens: list[tuple[int, int, str, list[tuple[int, int]], int]] = []
146
+ # Tokens and Meta Data (Per Instance)
129
147
  self._counts: dict[str, int] = {}
130
148
  self._outline: dict[str, str] = {}
149
+ self._markdown: list[str] = []
131
150
 
132
151
  # User Settings
133
- self._textFont = "Serif" # Output text font
134
- self._textSize = 11 # Output text size
135
- self._textFixed = False # Fixed width text
152
+ self._textFont = QFont("Serif", 11) # Output text font
136
153
  self._lineHeight = 1.15 # Line height in units of em
137
154
  self._blockIndent = 4.00 # Block indent in units of em
155
+ self._firstIndent = False # Enable first line indent
156
+ self._firstWidth = 1.40 # First line indent in units of em
157
+ self._indentFirst = False # Indent first paragraph
138
158
  self._doJustify = False # Justify text
139
159
  self._doBodyText = True # Include body text
140
160
  self._doSynopsis = False # Also process synopsis comments
141
161
  self._doComments = False # Also process comments
142
162
  self._doKeywords = False # Also process keywords like tags and references
143
163
  self._skipKeywords = set() # Keywords to ignore
164
+ self._keepBreaks = True # Keep line breaks in paragraphs
144
165
 
145
166
  # Margins
146
- self._marginTitle = (1.000, 0.500)
147
- self._marginHead1 = (1.000, 0.500)
148
- self._marginHead2 = (0.834, 0.500)
149
- self._marginHead3 = (0.584, 0.500)
150
- self._marginHead4 = (0.584, 0.500)
167
+ self._marginTitle = (1.417, 0.500)
168
+ self._marginHead1 = (1.417, 0.500)
169
+ self._marginHead2 = (1.668, 0.500)
170
+ self._marginHead3 = (1.168, 0.500)
171
+ self._marginHead4 = (1.168, 0.500)
151
172
  self._marginText = (0.000, 0.584)
152
173
  self._marginMeta = (0.000, 0.584)
174
+ self._marginFoot = (1.417, 0.467)
175
+ self._marginSep = (1.168, 1.168)
153
176
 
154
177
  # Title Formats
155
178
  self._fmtTitle = nwHeadFmt.TITLE # Formatting for titles
@@ -174,7 +197,8 @@ class Tokenizer(ABC):
174
197
 
175
198
  # Instance Variables
176
199
  self._hFormatter = HeadingFormatter(self._project)
177
- self._noSep = True # Flag to indicate that we don't want a scene separator
200
+ self._noSep = True # Flag to indicate that we don't want a scene separator
201
+ self._showDialog = False # Flag for dialogue highlighting
178
202
 
179
203
  # This File
180
204
  self._isNovel = False # Document is a novel document
@@ -189,12 +213,12 @@ class Tokenizer(ABC):
189
213
 
190
214
  # Format RegEx
191
215
  self._rxMarkdown = [
192
- (QRegularExpression(nwRegEx.FMT_EI), [0, self.FMT_I_B, 0, self.FMT_I_E]),
193
- (QRegularExpression(nwRegEx.FMT_EB), [0, self.FMT_B_B, 0, self.FMT_B_E]),
194
- (QRegularExpression(nwRegEx.FMT_ST), [0, self.FMT_D_B, 0, self.FMT_D_E]),
216
+ (REGEX_PATTERNS.markdownItalic, [0, self.FMT_I_B, 0, self.FMT_I_E]),
217
+ (REGEX_PATTERNS.markdownBold, [0, self.FMT_B_B, 0, self.FMT_B_E]),
218
+ (REGEX_PATTERNS.markdownStrike, [0, self.FMT_D_B, 0, self.FMT_D_E]),
195
219
  ]
196
- self._rxShortCodes = QRegularExpression(nwRegEx.FMT_SC)
197
- self._rxShortCodeVals = QRegularExpression(nwRegEx.FMT_SV)
220
+ self._rxShortCodes = REGEX_PATTERNS.shortcodePlain
221
+ self._rxShortCodeVals = REGEX_PATTERNS.shortcodeValue
198
222
 
199
223
  self._shortCodeFmt = {
200
224
  nwShortcode.ITALIC_O: self.FMT_I_B, nwShortcode.ITALIC_C: self.FMT_I_E,
@@ -205,6 +229,11 @@ class Tokenizer(ABC):
205
229
  nwShortcode.SUP_O: self.FMT_SUP_B, nwShortcode.SUP_C: self.FMT_SUP_E,
206
230
  nwShortcode.SUB_O: self.FMT_SUB_B, nwShortcode.SUB_C: self.FMT_SUB_E,
207
231
  }
232
+ self._shortCodeVals = {
233
+ nwShortcode.FOOTNOTE_B: self.FMT_FNOTE,
234
+ }
235
+
236
+ self._rxDialogue: list[tuple[QRegularExpression, int, int]] = []
208
237
 
209
238
  return
210
239
 
@@ -220,7 +249,7 @@ class Tokenizer(ABC):
220
249
  @property
221
250
  def allMarkdown(self) -> list[str]:
222
251
  """The combined novelWriter Markdown text."""
223
- return self._allMarkdown
252
+ return self._markdown
224
253
 
225
254
  @property
226
255
  def textStats(self) -> dict[str, int]:
@@ -298,11 +327,9 @@ class Tokenizer(ABC):
298
327
  )
299
328
  return
300
329
 
301
- def setFont(self, family: str, size: int, isFixed: bool = False) -> None:
330
+ def setFont(self, font: QFont) -> None:
302
331
  """Set the build font."""
303
- self._textFont = family
304
- self._textSize = round(int(size))
305
- self._textFixed = isFixed
332
+ self._textFont = font
306
333
  return
307
334
 
308
335
  def setLineHeight(self, height: float) -> None:
@@ -315,11 +342,43 @@ class Tokenizer(ABC):
315
342
  self._blockIndent = min(max(float(indent), 0.0), 10.0)
316
343
  return
317
344
 
345
+ def setFirstLineIndent(self, state: bool, indent: float, first: bool) -> None:
346
+ """Set first line indent and whether to also indent first
347
+ paragraph after a heading.
348
+ """
349
+ self._firstIndent = state
350
+ self._firstWidth = indent
351
+ self._indentFirst = first
352
+ return
353
+
318
354
  def setJustify(self, state: bool) -> None:
319
355
  """Enable or disable text justification."""
320
356
  self._doJustify = state
321
357
  return
322
358
 
359
+ def setDialogueHighlight(self, state: bool) -> None:
360
+ """Enable or disable dialogue highlighting."""
361
+ self._rxDialogue = []
362
+ self._showDialog = state
363
+ if state:
364
+ if CONFIG.dialogStyle > 0:
365
+ self._rxDialogue.append((
366
+ REGEX_PATTERNS.dialogStyle, self.FMT_DL_B, self.FMT_DL_E
367
+ ))
368
+ if CONFIG.dialogLine:
369
+ self._rxDialogue.append((
370
+ REGEX_PATTERNS.dialogLine, self.FMT_DL_B, self.FMT_DL_E
371
+ ))
372
+ if CONFIG.narratorBreak:
373
+ self._rxDialogue.append((
374
+ REGEX_PATTERNS.narratorBreak, self.FMT_DL_E, self.FMT_DL_B
375
+ ))
376
+ if CONFIG.altDialogOpen and CONFIG.altDialogClose:
377
+ self._rxDialogue.append((
378
+ REGEX_PATTERNS.altDialogStyle, self.FMT_ADL_B, self.FMT_ADL_E
379
+ ))
380
+ return
381
+
323
382
  def setTitleMargins(self, upper: float, lower: float) -> None:
324
383
  """Set the upper and lower title margin."""
325
384
  self._marginTitle = (float(upper), float(lower))
@@ -355,6 +414,11 @@ class Tokenizer(ABC):
355
414
  self._marginMeta = (float(upper), float(lower))
356
415
  return
357
416
 
417
+ def setSeparatorMargins(self, upper: float, lower: float) -> None:
418
+ """Set the upper and lower meta text margin."""
419
+ self._marginSep = (float(upper), float(lower))
420
+ return
421
+
358
422
  def setLinkHeadings(self, state: bool) -> None:
359
423
  """Enable or disable adding an anchor before headings."""
360
424
  self._linkHeadings = state
@@ -385,9 +449,14 @@ class Tokenizer(ABC):
385
449
  self._skipKeywords = set(x.lower().strip() for x in keywords.split(","))
386
450
  return
387
451
 
452
+ def setKeepLineBreaks(self, state: bool) -> None:
453
+ """Keep line breaks in paragraphs."""
454
+ self._keepBreaks = state
455
+ return
456
+
388
457
  def setKeepMarkdown(self, state: bool) -> None:
389
458
  """Keep original markdown during build."""
390
- self._keepMarkdown = state
459
+ self._keepMD = state
391
460
  return
392
461
 
393
462
  ##
@@ -417,8 +486,8 @@ class Tokenizer(ABC):
417
486
  self._tokens.append((
418
487
  self.T_TITLE, 1, title, [], textAlign
419
488
  ))
420
- if self._keepMarkdown:
421
- self._allMarkdown.append(f"#! {title}\n\n")
489
+ if self._keepMD:
490
+ self._markdown.append(f"#! {title}\n\n")
422
491
 
423
492
  return
424
493
 
@@ -446,7 +515,7 @@ class Tokenizer(ABC):
446
515
  self._text = xRep.sub(lambda x: repDict[x.group(0)], self._text)
447
516
 
448
517
  # Process the character translation map
449
- trDict = {nwUnicode.U_MAPOSS: nwUnicode.U_RSQUO}
518
+ trDict = {nwUnicode.U_MAPOS: nwUnicode.U_RSQUO}
450
519
  self._text = self._text.translate(str.maketrans(trDict))
451
520
 
452
521
  return
@@ -466,22 +535,23 @@ class Tokenizer(ABC):
466
535
  4: The internal formatting map of the text, self.FMT_*
467
536
  5: The style of the block, self.A_*
468
537
  """
469
- self._tokens = []
470
538
  if self._isNovel:
471
539
  self._hFormatter.setHandle(self._handle)
472
540
 
473
541
  nHead = 0
474
542
  breakNext = False
475
543
  tmpMarkdown = []
544
+ tHandle = self._handle or ""
545
+ tokens: list[T_Token] = []
476
546
  for aLine in self._text.splitlines():
477
547
  sLine = aLine.strip().lower()
478
548
 
479
549
  # Check for blank lines
480
550
  if len(sLine) == 0:
481
- self._tokens.append((
551
+ tokens.append((
482
552
  self.T_EMPTY, nHead, "", [], self.A_NONE
483
553
  ))
484
- if self._keepMarkdown:
554
+ if self._keepMD:
485
555
  tmpMarkdown.append("\n")
486
556
 
487
557
  continue
@@ -507,7 +577,7 @@ class Tokenizer(ABC):
507
577
  continue
508
578
 
509
579
  elif sLine == "[vspace]":
510
- self._tokens.append(
580
+ tokens.append(
511
581
  (self.T_SKIP, nHead, "", [], sAlign)
512
582
  )
513
583
  continue
@@ -515,11 +585,11 @@ class Tokenizer(ABC):
515
585
  elif sLine.startswith("[vspace:") and sLine.endswith("]"):
516
586
  nSkip = checkInt(sLine[8:-1], 0)
517
587
  if nSkip >= 1:
518
- self._tokens.append(
588
+ tokens.append(
519
589
  (self.T_SKIP, nHead, "", [], sAlign)
520
590
  )
521
591
  if nSkip > 1:
522
- self._tokens += (nSkip - 1) * [
592
+ tokens += (nSkip - 1) * [
523
593
  (self.T_SKIP, nHead, "", [], self.A_NONE)
524
594
  ]
525
595
  continue
@@ -533,24 +603,32 @@ class Tokenizer(ABC):
533
603
  if aLine.startswith("%~"):
534
604
  continue
535
605
 
536
- cStyle, cText, _ = processComment(aLine)
606
+ cStyle, cKey, cText, _, _ = processComment(aLine)
537
607
  if cStyle == nwComment.SYNOPSIS:
538
- self._tokens.append((
539
- self.T_SYNOPSIS, nHead, cText, [], sAlign
608
+ tLine, tFmt = self._extractFormats(cText)
609
+ tokens.append((
610
+ self.T_SYNOPSIS, nHead, tLine, tFmt, sAlign
540
611
  ))
541
- if self._doSynopsis and self._keepMarkdown:
612
+ if self._doSynopsis and self._keepMD:
542
613
  tmpMarkdown.append(f"{aLine}\n")
543
614
  elif cStyle == nwComment.SHORT:
544
- self._tokens.append((
545
- self.T_SHORT, nHead, cText, [], sAlign
615
+ tLine, tFmt = self._extractFormats(cText)
616
+ tokens.append((
617
+ self.T_SHORT, nHead, tLine, tFmt, sAlign
546
618
  ))
547
- if self._doSynopsis and self._keepMarkdown:
619
+ if self._doSynopsis and self._keepMD:
620
+ tmpMarkdown.append(f"{aLine}\n")
621
+ elif cStyle == nwComment.FOOTNOTE:
622
+ tLine, tFmt = self._extractFormats(cText, skip=self.FMT_FNOTE)
623
+ self._footnotes[f"{tHandle}:{cKey}"] = (tLine, tFmt)
624
+ if self._keepMD:
548
625
  tmpMarkdown.append(f"{aLine}\n")
549
626
  else:
550
- self._tokens.append((
551
- self.T_COMMENT, nHead, cText, [], sAlign
627
+ tLine, tFmt = self._extractFormats(cText)
628
+ tokens.append((
629
+ self.T_COMMENT, nHead, tLine, tFmt, sAlign
552
630
  ))
553
- if self._doComments and self._keepMarkdown:
631
+ if self._doComments and self._keepMD:
554
632
  tmpMarkdown.append(f"{aLine}\n")
555
633
 
556
634
  elif aLine.startswith("@"):
@@ -560,11 +638,14 @@ class Tokenizer(ABC):
560
638
  # are automatically skipped.
561
639
 
562
640
  valid, bits, _ = self._project.index.scanThis(aLine)
563
- if valid and bits and bits[0] not in self._skipKeywords:
564
- self._tokens.append((
641
+ if (
642
+ valid and bits and bits[0] in nwLabels.KEY_NAME
643
+ and bits[0] not in self._skipKeywords
644
+ ):
645
+ tokens.append((
565
646
  self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
566
647
  ))
567
- if self._doKeywords and self._keepMarkdown:
648
+ if self._doKeywords and self._keepMD:
568
649
  tmpMarkdown.append(f"{aLine}\n")
569
650
 
570
651
  elif aLine.startswith(("# ", "#! ")):
@@ -597,10 +678,10 @@ class Tokenizer(ABC):
597
678
  self._hFormatter.resetAll()
598
679
  self._noSep = True
599
680
 
600
- self._tokens.append((
681
+ tokens.append((
601
682
  tType, nHead, tText, [], tStyle
602
683
  ))
603
- if self._keepMarkdown:
684
+ if self._keepMD:
604
685
  tmpMarkdown.append(f"{aLine}\n")
605
686
 
606
687
  elif aLine.startswith(("## ", "##! ")):
@@ -632,10 +713,10 @@ class Tokenizer(ABC):
632
713
  self._hFormatter.resetScene()
633
714
  self._noSep = True
634
715
 
635
- self._tokens.append((
716
+ tokens.append((
636
717
  tType, nHead, tText, [], tStyle
637
718
  ))
638
- if self._keepMarkdown:
719
+ if self._keepMD:
639
720
  tmpMarkdown.append(f"{aLine}\n")
640
721
 
641
722
  elif aLine.startswith(("### ", "###! ")):
@@ -673,10 +754,10 @@ class Tokenizer(ABC):
673
754
  tStyle = self.A_NONE if self._noSep else self.A_CENTRE
674
755
  self._noSep = False
675
756
 
676
- self._tokens.append((
757
+ tokens.append((
677
758
  tType, nHead, tText, [], tStyle
678
759
  ))
679
- if self._keepMarkdown:
760
+ if self._keepMD:
680
761
  tmpMarkdown.append(f"{aLine}\n")
681
762
 
682
763
  elif aLine.startswith("#### "):
@@ -703,10 +784,10 @@ class Tokenizer(ABC):
703
784
  tType = self.T_SEP
704
785
  tStyle = self.A_CENTRE
705
786
 
706
- self._tokens.append((
787
+ tokens.append((
707
788
  tType, nHead, tText, [], tStyle
708
789
  ))
709
- if self._keepMarkdown:
790
+ if self._keepMD:
710
791
  tmpMarkdown.append(f"{aLine}\n")
711
792
 
712
793
  else:
@@ -750,54 +831,117 @@ class Tokenizer(ABC):
750
831
  sAlign |= self.A_IND_R
751
832
 
752
833
  # Process formats
753
- tLine, fmtPos = self._extractFormats(aLine)
754
- self._tokens.append((
755
- self.T_TEXT, nHead, tLine, fmtPos, sAlign
834
+ tLine, tFmt = self._extractFormats(aLine)
835
+ tokens.append((
836
+ self.T_TEXT, nHead, tLine, tFmt, sAlign
756
837
  ))
757
- if self._keepMarkdown:
838
+ if self._keepMD:
758
839
  tmpMarkdown.append(f"{aLine}\n")
759
840
 
760
841
  # If we have content, turn off the first page flag
761
- if self._isFirst and self._tokens:
842
+ if self._isFirst and tokens:
762
843
  self._isFirst = False # First document has been processed
763
844
 
764
845
  # Make sure the token array doesn't start with a page break
765
846
  # on the very first page, adding a blank first page.
766
- if self._tokens[0][4] & self.A_PBB:
767
- token = self._tokens[0]
768
- self._tokens[0] = (
769
- token[0], token[1], token[2], token[3], token[4] & ~self.A_PBB
847
+ if tokens[0][4] & self.A_PBB:
848
+ cToken = tokens[0]
849
+ tokens[0] = (
850
+ cToken[0], cToken[1], cToken[2], cToken[3], cToken[4] & ~self.A_PBB
770
851
  )
771
852
 
772
853
  # Always add an empty line at the end of the file
773
- self._tokens.append((
854
+ tokens.append((
774
855
  self.T_EMPTY, nHead, "", [], self.A_NONE
775
856
  ))
776
- if self._keepMarkdown:
857
+ if self._keepMD:
777
858
  tmpMarkdown.append("\n")
778
- self._allMarkdown.append("".join(tmpMarkdown))
859
+ self._markdown.append("".join(tmpMarkdown))
779
860
 
780
861
  # Second Pass
781
862
  # ===========
782
- # Some items need a second pass
863
+ # This second pass strips away consecutive blank lines, and
864
+ # combines consecutive text lines into the same paragraph.
865
+ # It also ensures that there isn't paragraph spacing between
866
+ # meta data lines for formats that has spacing.
867
+
868
+ self._tokens = []
869
+ pToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
870
+ nToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
871
+
872
+ lineSep = "\n" if self._keepBreaks else " "
873
+ pLines: list[T_Token] = []
783
874
 
784
- pToken = (self.T_EMPTY, 0, "", [], self.A_NONE)
785
- nToken = (self.T_EMPTY, 0, "", [], self.A_NONE)
786
- tCount = len(self._tokens)
787
- for n, token in enumerate(self._tokens):
875
+ tCount = len(tokens)
876
+ pIndent = True
877
+ for n, cToken in enumerate(tokens):
788
878
 
789
879
  if n > 0:
790
- pToken = self._tokens[n-1]
880
+ pToken = tokens[n-1] # Look behind
791
881
  if n < tCount - 1:
792
- nToken = self._tokens[n+1]
882
+ nToken = tokens[n+1] # Look ahead
883
+
884
+ if not self._indentFirst and cToken[0] in self.L_SKIP_INDENT:
885
+ # Unless the indentFirst flag is set, we set up the next
886
+ # paragraph to not be indented if we see a block of a
887
+ # specific type
888
+ pIndent = False
889
+
890
+ if cToken[0] == self.T_EMPTY:
891
+ # We don't need to keep the empty lines after this pass
892
+ pass
793
893
 
794
- if token[0] == self.T_KEYWORD:
795
- aStyle = token[4]
894
+ elif cToken[0] == self.T_KEYWORD:
895
+ # Adjust margins for lines in a list of keyword lines
896
+ aStyle = cToken[4]
796
897
  if pToken[0] == self.T_KEYWORD:
797
898
  aStyle |= self.A_Z_TOPMRG
798
899
  if nToken[0] == self.T_KEYWORD:
799
900
  aStyle |= self.A_Z_BTMMRG
800
- self._tokens[n] = (token[0], token[1], token[2], token[3], aStyle)
901
+ self._tokens.append((
902
+ cToken[0], cToken[1], cToken[2], cToken[3], aStyle
903
+ ))
904
+
905
+ elif cToken[0] == self.T_TEXT:
906
+ # Combine lines from the same paragraph
907
+ pLines.append(cToken)
908
+
909
+ if nToken[0] != self.T_TEXT:
910
+ # Next token is not text, so we add the buffer to tokens
911
+ nLines = len(pLines)
912
+ cStyle = pLines[0][4]
913
+ if self._firstIndent and pIndent and not cStyle & self.M_ALIGNED:
914
+ # If paragraph indentation is enabled, not temporarily
915
+ # turned off, and the block is not aligned, we add the
916
+ # text indentation flag
917
+ cStyle |= self.A_IND_T
918
+
919
+ if nLines == 1:
920
+ # The paragraph contains a single line, so we just
921
+ # save that directly to the token list
922
+ self._tokens.append((
923
+ self.T_TEXT, pLines[0][1], pLines[0][2], pLines[0][3], cStyle
924
+ ))
925
+ elif nLines > 1:
926
+ # The paragraph contains multiple lines, so we need to
927
+ # join them according to the line break policy, and
928
+ # recompute all the formatting markers
929
+ tTxt = ""
930
+ tFmt: T_Formats = []
931
+ for aToken in pLines:
932
+ tLen = len(tTxt)
933
+ tTxt += f"{aToken[2]}{lineSep}"
934
+ tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aToken[3])
935
+ self._tokens.append((
936
+ self.T_TEXT, pLines[0][1], tTxt[:-1], tFmt, cStyle
937
+ ))
938
+
939
+ # Reset buffer and make sure text indent is on for next pass
940
+ pLines = []
941
+ pIndent = True
942
+
943
+ else:
944
+ self._tokens.append(cToken)
801
945
 
802
946
  return
803
947
 
@@ -840,7 +984,6 @@ class Tokenizer(ABC):
840
984
  textWordChars = self._counts.get("textWordChars", 0)
841
985
  titleWordChars = self._counts.get("titleWordChars", 0)
842
986
 
843
- para = []
844
987
  for tType, _, tText, _, _ in self._tokens:
845
988
  tText = tText.replace(nwUnicode.U_ENDASH, " ")
846
989
  tText = tText.replace(nwUnicode.U_EMDASH, " ")
@@ -850,22 +993,19 @@ class Tokenizer(ABC):
850
993
  nChars = len(tText)
851
994
  nWChars = len("".join(tWords))
852
995
 
853
- if tType == self.T_EMPTY:
854
- if len(para) > 0:
855
- tTemp = "\n".join(para)
856
- tPWords = tTemp.split()
857
- nPWords = len(tPWords)
858
- nPChars = len(tTemp)
859
- nPWChars = len("".join(tPWords))
860
-
861
- paragraphCount += 1
862
- allWords += nPWords
863
- textWords += nPWords
864
- allChars += nPChars
865
- textChars += nPChars
866
- allWordChars += nPWChars
867
- textWordChars += nPWChars
868
- para = []
996
+ if tType == self.T_TEXT:
997
+ tPWords = tText.split()
998
+ nPWords = len(tPWords)
999
+ nPChars = len(tText)
1000
+ nPWChars = len("".join(tPWords))
1001
+
1002
+ paragraphCount += 1
1003
+ allWords += nPWords
1004
+ textWords += nPWords
1005
+ allChars += nPChars
1006
+ textChars += nPChars
1007
+ allWordChars += nPWChars
1008
+ textWordChars += nPWChars
869
1009
 
870
1010
  elif tType in self.L_HEADINGS:
871
1011
  titleCount += 1
@@ -881,9 +1021,6 @@ class Tokenizer(ABC):
881
1021
  allChars += nChars
882
1022
  allWordChars += nWChars
883
1023
 
884
- elif tType == self.T_TEXT:
885
- para.append(tText.rstrip())
886
-
887
1024
  elif tType == self.T_SYNOPSIS and self._doSynopsis:
888
1025
  text = "{0}: {1}".format(self._localLookup("Synopsis"), tText)
889
1026
  words = text.split()
@@ -935,7 +1072,7 @@ class Tokenizer(ABC):
935
1072
  def saveRawMarkdown(self, path: str | Path) -> None:
936
1073
  """Save the raw text to a plain text file."""
937
1074
  with open(path, mode="w", encoding="utf-8") as outFile:
938
- for nwdPage in self._allMarkdown:
1075
+ for nwdPage in self._markdown:
939
1076
  outFile.write(nwdPage)
940
1077
  return
941
1078
 
@@ -950,7 +1087,7 @@ class Tokenizer(ABC):
950
1087
  "buildTimeStr": formatTimeStamp(timeStamp),
951
1088
  },
952
1089
  "text": {
953
- "nwd": [page.rstrip("\n").split("\n") for page in self._allMarkdown],
1090
+ "nwd": [page.rstrip("\n").split("\n") for page in self._markdown],
954
1091
  }
955
1092
  }
956
1093
  with open(path, mode="w", encoding="utf-8") as fObj:
@@ -961,9 +1098,9 @@ class Tokenizer(ABC):
961
1098
  # Internal Functions
962
1099
  ##
963
1100
 
964
- def _extractFormats(self, text: str) -> tuple[str, list[tuple[int, int]]]:
1101
+ def _extractFormats(self, text: str, skip: int = 0) -> tuple[str, T_Formats]:
965
1102
  """Extract format markers from a text paragraph."""
966
- temp = []
1103
+ temp: list[tuple[int, int, int, str]] = []
967
1104
 
968
1105
  # Match Markdown
969
1106
  for regEx, fmts in self._rxMarkdown:
@@ -971,7 +1108,7 @@ class Tokenizer(ABC):
971
1108
  while rxItt.hasNext():
972
1109
  rxMatch = rxItt.next()
973
1110
  temp.extend(
974
- [rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt]
1111
+ (rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt, "")
975
1112
  for n, fmt in enumerate(fmts) if fmt > 0
976
1113
  )
977
1114
 
@@ -979,25 +1116,46 @@ class Tokenizer(ABC):
979
1116
  rxItt = self._rxShortCodes.globalMatch(text, 0)
980
1117
  while rxItt.hasNext():
981
1118
  rxMatch = rxItt.next()
982
- temp.append([
1119
+ temp.append((
983
1120
  rxMatch.capturedStart(1),
984
1121
  rxMatch.capturedLength(1),
985
- self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0)
986
- ])
1122
+ self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0),
1123
+ "",
1124
+ ))
987
1125
 
988
- # Post-process text and format markers
1126
+ # Match Shortcode w/Values
1127
+ rxItt = self._rxShortCodeVals.globalMatch(text, 0)
1128
+ tHandle = self._handle or ""
1129
+ while rxItt.hasNext():
1130
+ rxMatch = rxItt.next()
1131
+ kind = self._shortCodeVals.get(rxMatch.captured(1).lower(), 0)
1132
+ temp.append((
1133
+ rxMatch.capturedStart(0),
1134
+ rxMatch.capturedLength(0),
1135
+ self.FMT_STRIP if kind == skip else kind,
1136
+ f"{tHandle}:{rxMatch.captured(2)}",
1137
+ ))
1138
+
1139
+ # Match Dialogue
1140
+ if self._rxDialogue:
1141
+ for regEx, fmtB, fmtE in self._rxDialogue:
1142
+ rxItt = regEx.globalMatch(text, 0)
1143
+ while rxItt.hasNext():
1144
+ rxMatch = rxItt.next()
1145
+ temp.append((rxMatch.capturedStart(0), 0, fmtB, ""))
1146
+ temp.append((rxMatch.capturedEnd(0), 0, fmtE, ""))
1147
+
1148
+ # Post-process text and format
989
1149
  result = text
990
1150
  formats = []
991
- for pos, n, fmt in reversed(sorted(temp, key=lambda x: x[0])):
1151
+ for pos, n, fmt, key in reversed(sorted(temp, key=lambda x: x[0])):
992
1152
  if fmt > 0:
993
1153
  result = result[:pos] + result[pos+n:]
994
- formats = [(p-n, f) for p, f in formats]
995
- formats.insert(0, (pos, fmt))
1154
+ formats = [(p-n, f, k) for p, f, k in formats]
1155
+ formats.insert(0, (pos, fmt, key))
996
1156
 
997
1157
  return result, formats
998
1158
 
999
- # END Class Tokenizer
1000
-
1001
1159
 
1002
1160
  class HeadingFormatter:
1003
1161
 
@@ -1067,5 +1225,3 @@ class HeadingFormatter:
1067
1225
  hFormat = hFormat.replace(nwHeadFmt.CHAR_FOCUS, fText)
1068
1226
 
1069
1227
  return hFormat
1070
-
1071
- # END Class HeadingFormatter