novelWriter 2.4.2__py3-none-any.whl → 2.5b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {novelWriter-2.4.2.dist-info → novelWriter-2.5b1.dist-info}/METADATA +4 -5
  2. {novelWriter-2.4.2.dist-info → novelWriter-2.5b1.dist-info}/RECORD +109 -101
  3. novelwriter/__init__.py +33 -39
  4. novelwriter/assets/i18n/project_en_GB.json +1 -0
  5. novelwriter/assets/icons/typicons_dark/icons.conf +2 -0
  6. novelwriter/assets/icons/typicons_dark/nw_font.svg +4 -0
  7. novelwriter/assets/icons/typicons_dark/nw_quote.svg +4 -0
  8. novelwriter/assets/icons/typicons_light/icons.conf +2 -0
  9. novelwriter/assets/icons/typicons_light/nw_font.svg +4 -0
  10. novelwriter/assets/icons/typicons_light/nw_quote.svg +4 -0
  11. novelwriter/assets/manual.pdf +0 -0
  12. novelwriter/assets/sample.zip +0 -0
  13. novelwriter/assets/syntax/cyberpunk_night.conf +5 -3
  14. novelwriter/assets/syntax/default_dark.conf +32 -18
  15. novelwriter/assets/syntax/default_light.conf +24 -10
  16. novelwriter/assets/syntax/dracula.conf +44 -0
  17. novelwriter/assets/syntax/grey_dark.conf +5 -4
  18. novelwriter/assets/syntax/grey_light.conf +5 -4
  19. novelwriter/assets/syntax/light_owl.conf +7 -6
  20. novelwriter/assets/syntax/night_owl.conf +7 -6
  21. novelwriter/assets/syntax/snazzy.conf +42 -0
  22. novelwriter/assets/syntax/solarized_dark.conf +4 -3
  23. novelwriter/assets/syntax/solarized_light.conf +4 -3
  24. novelwriter/assets/syntax/tango.conf +27 -11
  25. novelwriter/assets/syntax/tomorrow.conf +6 -5
  26. novelwriter/assets/syntax/tomorrow_night.conf +7 -6
  27. novelwriter/assets/syntax/tomorrow_night_blue.conf +6 -5
  28. novelwriter/assets/syntax/tomorrow_night_bright.conf +6 -5
  29. novelwriter/assets/syntax/tomorrow_night_eighties.conf +6 -5
  30. novelwriter/assets/text/credits_en.htm +4 -1
  31. novelwriter/assets/themes/cyberpunk_night.conf +2 -0
  32. novelwriter/assets/themes/default_dark.conf +1 -0
  33. novelwriter/assets/themes/default_light.conf +1 -0
  34. novelwriter/assets/themes/dracula.conf +47 -0
  35. novelwriter/assets/themes/solarized_dark.conf +1 -0
  36. novelwriter/assets/themes/solarized_light.conf +1 -0
  37. novelwriter/common.py +31 -9
  38. novelwriter/config.py +118 -84
  39. novelwriter/constants.py +40 -26
  40. novelwriter/core/buildsettings.py +63 -66
  41. novelwriter/core/coretools.py +2 -22
  42. novelwriter/core/docbuild.py +51 -40
  43. novelwriter/core/document.py +3 -5
  44. novelwriter/core/index.py +115 -45
  45. novelwriter/core/item.py +8 -19
  46. novelwriter/core/options.py +2 -4
  47. novelwriter/core/project.py +23 -57
  48. novelwriter/core/projectdata.py +1 -3
  49. novelwriter/core/projectxml.py +12 -15
  50. novelwriter/core/sessions.py +3 -5
  51. novelwriter/core/spellcheck.py +4 -9
  52. novelwriter/core/status.py +211 -164
  53. novelwriter/core/storage.py +0 -8
  54. novelwriter/core/tohtml.py +94 -100
  55. novelwriter/core/tokenizer.py +199 -112
  56. novelwriter/core/{tomd.py → tomarkdown.py} +97 -78
  57. novelwriter/core/toodt.py +212 -148
  58. novelwriter/core/toqdoc.py +403 -0
  59. novelwriter/core/tree.py +5 -7
  60. novelwriter/dialogs/about.py +3 -5
  61. novelwriter/dialogs/docmerge.py +1 -3
  62. novelwriter/dialogs/docsplit.py +1 -3
  63. novelwriter/dialogs/editlabel.py +0 -2
  64. novelwriter/dialogs/preferences.py +111 -88
  65. novelwriter/dialogs/projectsettings.py +216 -180
  66. novelwriter/dialogs/quotes.py +3 -4
  67. novelwriter/dialogs/wordlist.py +3 -9
  68. novelwriter/enum.py +31 -25
  69. novelwriter/error.py +8 -15
  70. novelwriter/extensions/circularprogress.py +5 -6
  71. novelwriter/extensions/configlayout.py +18 -18
  72. novelwriter/extensions/eventfilters.py +1 -5
  73. novelwriter/extensions/modified.py +50 -13
  74. novelwriter/extensions/novelselector.py +1 -3
  75. novelwriter/extensions/pagedsidebar.py +9 -12
  76. novelwriter/extensions/simpleprogress.py +1 -3
  77. novelwriter/extensions/statusled.py +1 -3
  78. novelwriter/extensions/switch.py +4 -6
  79. novelwriter/extensions/switchbox.py +7 -6
  80. novelwriter/extensions/versioninfo.py +3 -9
  81. novelwriter/gui/doceditor.py +132 -133
  82. novelwriter/gui/dochighlight.py +237 -183
  83. novelwriter/gui/docviewer.py +61 -97
  84. novelwriter/gui/docviewerpanel.py +3 -10
  85. novelwriter/gui/editordocument.py +1 -3
  86. novelwriter/gui/itemdetails.py +7 -11
  87. novelwriter/gui/mainmenu.py +11 -7
  88. novelwriter/gui/noveltree.py +11 -24
  89. novelwriter/gui/outline.py +11 -23
  90. novelwriter/gui/projtree.py +26 -43
  91. novelwriter/gui/search.py +1 -3
  92. novelwriter/gui/sidebar.py +2 -6
  93. novelwriter/gui/statusbar.py +6 -10
  94. novelwriter/gui/theme.py +26 -51
  95. novelwriter/guimain.py +50 -71
  96. novelwriter/shared.py +30 -15
  97. novelwriter/tools/dictionaries.py +12 -15
  98. novelwriter/tools/lipsum.py +2 -4
  99. novelwriter/tools/manusbuild.py +1 -3
  100. novelwriter/tools/manuscript.py +71 -144
  101. novelwriter/tools/manussettings.py +67 -73
  102. novelwriter/tools/noveldetails.py +6 -11
  103. novelwriter/tools/welcome.py +2 -16
  104. novelwriter/tools/writingstats.py +6 -9
  105. novelwriter/types.py +45 -3
  106. {novelWriter-2.4.2.dist-info → novelWriter-2.5b1.dist-info}/LICENSE.md +0 -0
  107. {novelWriter-2.4.2.dist-info → novelWriter-2.5b1.dist-info}/WHEEL +0 -0
  108. {novelWriter-2.4.2.dist-info → novelWriter-2.5b1.dist-info}/entry_points.txt +0 -0
  109. {novelWriter-2.4.2.dist-info → novelWriter-2.5b1.dist-info}/top_level.txt +0 -0
@@ -24,18 +24,19 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
24
24
  """
25
25
  from __future__ import annotations
26
26
 
27
- import re
28
27
  import json
29
28
  import logging
29
+ import re
30
30
 
31
31
  from abc import ABC, abstractmethod
32
- from time import time
33
- from pathlib import Path
34
32
  from functools import partial
33
+ from pathlib import Path
34
+ from time import time
35
35
 
36
36
  from PyQt5.QtCore import QCoreApplication, QRegularExpression
37
+ from PyQt5.QtGui import QFont
37
38
 
38
- from novelwriter.common import formatTimeStamp, numberToRoman, checkInt
39
+ from novelwriter.common import checkInt, formatTimeStamp, numberToRoman
39
40
  from novelwriter.constants import (
40
41
  nwHeadFmt, nwKeyWords, nwLabels, nwRegEx, nwShortcode, nwUnicode, trConst
41
42
  )
@@ -48,6 +49,10 @@ logger = logging.getLogger(__name__)
48
49
  ESCAPES = {r"\*": "*", r"\~": "~", r"\_": "_", r"\[": "[", r"\]": "]", r"\ ": ""}
49
50
  RX_ESC = re.compile("|".join([re.escape(k) for k in ESCAPES.keys()]), flags=re.DOTALL)
50
51
 
52
+ T_Formats = list[tuple[int, int, str]]
53
+ T_Comment = tuple[str, T_Formats]
54
+ T_Token = tuple[int, int, str, T_Formats, int]
55
+
51
56
 
52
57
  def stripEscape(text: str) -> str:
53
58
  """Strip escaped Markdown characters from paragraph text."""
@@ -80,6 +85,8 @@ class Tokenizer(ABC):
80
85
  FMT_SUP_E = 12 # End superscript
81
86
  FMT_SUB_B = 13 # Begin subscript
82
87
  FMT_SUB_E = 14 # End subscript
88
+ FMT_FNOTE = 15 # Footnote marker
89
+ FMT_STRIP = 16 # Strip the format code
83
90
 
84
91
  # Block Type
85
92
  T_EMPTY = 1 # Empty line (new paragraph)
@@ -111,45 +118,53 @@ class Tokenizer(ABC):
111
118
 
112
119
  # Lookups
113
120
  L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
121
+ L_SKIP_INDENT = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD2, T_HEAD3, T_HEAD4, T_SEP, T_SKIP]
122
+ L_SUMMARY = [T_SYNOPSIS, T_SHORT]
114
123
 
115
124
  def __init__(self, project: NWProject) -> None:
116
125
 
117
126
  self._project = project
118
127
 
119
128
  # Data Variables
120
- self._text = "" # The raw text to be tokenized
121
- self._handle = None # The item handle currently being processed
122
- self._result = "" # The result of the last document
129
+ self._text = "" # The raw text to be tokenized
130
+ self._handle = None # The item handle currently being processed
131
+ self._result = "" # The result of the last document
132
+ self._keepMD = False # Whether to keep the markdown text
123
133
 
124
- self._keepMarkdown = False # Whether to keep the markdown text
125
- self._allMarkdown = [] # The result novelWriter markdown of all documents
134
+ # Tokens and Meta Data (Per Document)
135
+ self._tokens: list[T_Token] = []
136
+ self._footnotes: dict[str, T_Comment] = {}
126
137
 
127
- # Processed Tokens and Meta Data
128
- self._tokens: list[tuple[int, int, str, list[tuple[int, int]], int]] = []
138
+ # Tokens and Meta Data (Per Instance)
129
139
  self._counts: dict[str, int] = {}
130
140
  self._outline: dict[str, str] = {}
141
+ self._markdown: list[str] = []
131
142
 
132
143
  # User Settings
133
- self._textFont = "Serif" # Output text font
134
- self._textSize = 11 # Output text size
135
- self._textFixed = False # Fixed width text
144
+ self._textFont = QFont("Serif", 11) # Output text font
136
145
  self._lineHeight = 1.15 # Line height in units of em
137
146
  self._blockIndent = 4.00 # Block indent in units of em
147
+ self._firstIndent = False # Enable first line indent
148
+ self._firstWidth = 1.40 # First line indent in units of em
149
+ self._indentFirst = False # Indent first paragraph
138
150
  self._doJustify = False # Justify text
139
151
  self._doBodyText = True # Include body text
140
152
  self._doSynopsis = False # Also process synopsis comments
141
153
  self._doComments = False # Also process comments
142
154
  self._doKeywords = False # Also process keywords like tags and references
143
155
  self._skipKeywords = set() # Keywords to ignore
156
+ self._keepBreaks = True # Keep line breaks in paragraphs
144
157
 
145
158
  # Margins
146
- self._marginTitle = (1.000, 0.500)
147
- self._marginHead1 = (1.000, 0.500)
148
- self._marginHead2 = (0.834, 0.500)
149
- self._marginHead3 = (0.584, 0.500)
150
- self._marginHead4 = (0.584, 0.500)
159
+ self._marginTitle = (1.417, 0.500)
160
+ self._marginHead1 = (1.417, 0.500)
161
+ self._marginHead2 = (1.668, 0.500)
162
+ self._marginHead3 = (1.168, 0.500)
163
+ self._marginHead4 = (1.168, 0.500)
151
164
  self._marginText = (0.000, 0.584)
152
165
  self._marginMeta = (0.000, 0.584)
166
+ self._marginFoot = (1.417, 0.467)
167
+ self._marginSep = (1.168, 1.168)
153
168
 
154
169
  # Title Formats
155
170
  self._fmtTitle = nwHeadFmt.TITLE # Formatting for titles
@@ -205,6 +220,9 @@ class Tokenizer(ABC):
205
220
  nwShortcode.SUP_O: self.FMT_SUP_B, nwShortcode.SUP_C: self.FMT_SUP_E,
206
221
  nwShortcode.SUB_O: self.FMT_SUB_B, nwShortcode.SUB_C: self.FMT_SUB_E,
207
222
  }
223
+ self._shortCodeVals = {
224
+ nwShortcode.FOOTNOTE_B: self.FMT_FNOTE,
225
+ }
208
226
 
209
227
  return
210
228
 
@@ -220,7 +238,7 @@ class Tokenizer(ABC):
220
238
  @property
221
239
  def allMarkdown(self) -> list[str]:
222
240
  """The combined novelWriter Markdown text."""
223
- return self._allMarkdown
241
+ return self._markdown
224
242
 
225
243
  @property
226
244
  def textStats(self) -> dict[str, int]:
@@ -298,11 +316,9 @@ class Tokenizer(ABC):
298
316
  )
299
317
  return
300
318
 
301
- def setFont(self, family: str, size: int, isFixed: bool = False) -> None:
319
+ def setFont(self, font: QFont) -> None:
302
320
  """Set the build font."""
303
- self._textFont = family
304
- self._textSize = round(int(size))
305
- self._textFixed = isFixed
321
+ self._textFont = font
306
322
  return
307
323
 
308
324
  def setLineHeight(self, height: float) -> None:
@@ -315,6 +331,15 @@ class Tokenizer(ABC):
315
331
  self._blockIndent = min(max(float(indent), 0.0), 10.0)
316
332
  return
317
333
 
334
+ def setFirstLineIndent(self, state: bool, indent: float, first: bool) -> None:
335
+ """Set first line indent and whether to also indent first
336
+ paragraph after a heading.
337
+ """
338
+ self._firstIndent = state
339
+ self._firstWidth = indent
340
+ self._indentFirst = first
341
+ return
342
+
318
343
  def setJustify(self, state: bool) -> None:
319
344
  """Enable or disable text justification."""
320
345
  self._doJustify = state
@@ -355,6 +380,11 @@ class Tokenizer(ABC):
355
380
  self._marginMeta = (float(upper), float(lower))
356
381
  return
357
382
 
383
+ def setSeparatorMargins(self, upper: float, lower: float) -> None:
384
+ """Set the upper and lower meta text margin."""
385
+ self._marginSep = (float(upper), float(lower))
386
+ return
387
+
358
388
  def setLinkHeadings(self, state: bool) -> None:
359
389
  """Enable or disable adding an anchor before headings."""
360
390
  self._linkHeadings = state
@@ -385,9 +415,14 @@ class Tokenizer(ABC):
385
415
  self._skipKeywords = set(x.lower().strip() for x in keywords.split(","))
386
416
  return
387
417
 
418
+ def setKeepLineBreaks(self, state: bool) -> None:
419
+ """Keep line breaks in paragraphs."""
420
+ self._keepBreaks = state
421
+ return
422
+
388
423
  def setKeepMarkdown(self, state: bool) -> None:
389
424
  """Keep original markdown during build."""
390
- self._keepMarkdown = state
425
+ self._keepMD = state
391
426
  return
392
427
 
393
428
  ##
@@ -417,8 +452,8 @@ class Tokenizer(ABC):
417
452
  self._tokens.append((
418
453
  self.T_TITLE, 1, title, [], textAlign
419
454
  ))
420
- if self._keepMarkdown:
421
- self._allMarkdown.append(f"#! {title}\n\n")
455
+ if self._keepMD:
456
+ self._markdown.append(f"#! {title}\n\n")
422
457
 
423
458
  return
424
459
 
@@ -466,22 +501,23 @@ class Tokenizer(ABC):
466
501
  4: The internal formatting map of the text, self.FMT_*
467
502
  5: The style of the block, self.A_*
468
503
  """
469
- self._tokens = []
470
504
  if self._isNovel:
471
505
  self._hFormatter.setHandle(self._handle)
472
506
 
473
507
  nHead = 0
474
508
  breakNext = False
475
509
  tmpMarkdown = []
510
+ tHandle = self._handle or ""
511
+ tokens: list[T_Token] = []
476
512
  for aLine in self._text.splitlines():
477
513
  sLine = aLine.strip().lower()
478
514
 
479
515
  # Check for blank lines
480
516
  if len(sLine) == 0:
481
- self._tokens.append((
517
+ tokens.append((
482
518
  self.T_EMPTY, nHead, "", [], self.A_NONE
483
519
  ))
484
- if self._keepMarkdown:
520
+ if self._keepMD:
485
521
  tmpMarkdown.append("\n")
486
522
 
487
523
  continue
@@ -507,7 +543,7 @@ class Tokenizer(ABC):
507
543
  continue
508
544
 
509
545
  elif sLine == "[vspace]":
510
- self._tokens.append(
546
+ tokens.append(
511
547
  (self.T_SKIP, nHead, "", [], sAlign)
512
548
  )
513
549
  continue
@@ -515,11 +551,11 @@ class Tokenizer(ABC):
515
551
  elif sLine.startswith("[vspace:") and sLine.endswith("]"):
516
552
  nSkip = checkInt(sLine[8:-1], 0)
517
553
  if nSkip >= 1:
518
- self._tokens.append(
554
+ tokens.append(
519
555
  (self.T_SKIP, nHead, "", [], sAlign)
520
556
  )
521
557
  if nSkip > 1:
522
- self._tokens += (nSkip - 1) * [
558
+ tokens += (nSkip - 1) * [
523
559
  (self.T_SKIP, nHead, "", [], self.A_NONE)
524
560
  ]
525
561
  continue
@@ -533,24 +569,32 @@ class Tokenizer(ABC):
533
569
  if aLine.startswith("%~"):
534
570
  continue
535
571
 
536
- cStyle, cText, _ = processComment(aLine)
572
+ cStyle, cKey, cText, _, _ = processComment(aLine)
537
573
  if cStyle == nwComment.SYNOPSIS:
538
- self._tokens.append((
539
- self.T_SYNOPSIS, nHead, cText, [], sAlign
574
+ tLine, tFmt = self._extractFormats(cText)
575
+ tokens.append((
576
+ self.T_SYNOPSIS, nHead, tLine, tFmt, sAlign
540
577
  ))
541
- if self._doSynopsis and self._keepMarkdown:
578
+ if self._doSynopsis and self._keepMD:
542
579
  tmpMarkdown.append(f"{aLine}\n")
543
580
  elif cStyle == nwComment.SHORT:
544
- self._tokens.append((
545
- self.T_SHORT, nHead, cText, [], sAlign
581
+ tLine, tFmt = self._extractFormats(cText)
582
+ tokens.append((
583
+ self.T_SHORT, nHead, tLine, tFmt, sAlign
546
584
  ))
547
- if self._doSynopsis and self._keepMarkdown:
585
+ if self._doSynopsis and self._keepMD:
586
+ tmpMarkdown.append(f"{aLine}\n")
587
+ elif cStyle == nwComment.FOOTNOTE:
588
+ tLine, tFmt = self._extractFormats(cText, skip=self.FMT_FNOTE)
589
+ self._footnotes[f"{tHandle}:{cKey}"] = (tLine, tFmt)
590
+ if self._keepMD:
548
591
  tmpMarkdown.append(f"{aLine}\n")
549
592
  else:
550
- self._tokens.append((
551
- self.T_COMMENT, nHead, cText, [], sAlign
593
+ tLine, tFmt = self._extractFormats(cText)
594
+ tokens.append((
595
+ self.T_COMMENT, nHead, tLine, tFmt, sAlign
552
596
  ))
553
- if self._doComments and self._keepMarkdown:
597
+ if self._doComments and self._keepMD:
554
598
  tmpMarkdown.append(f"{aLine}\n")
555
599
 
556
600
  elif aLine.startswith("@"):
@@ -560,11 +604,14 @@ class Tokenizer(ABC):
560
604
  # are automatically skipped.
561
605
 
562
606
  valid, bits, _ = self._project.index.scanThis(aLine)
563
- if valid and bits and bits[0] not in self._skipKeywords:
564
- self._tokens.append((
607
+ if (
608
+ valid and bits and bits[0] in nwLabels.KEY_NAME
609
+ and bits[0] not in self._skipKeywords
610
+ ):
611
+ tokens.append((
565
612
  self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
566
613
  ))
567
- if self._doKeywords and self._keepMarkdown:
614
+ if self._doKeywords and self._keepMD:
568
615
  tmpMarkdown.append(f"{aLine}\n")
569
616
 
570
617
  elif aLine.startswith(("# ", "#! ")):
@@ -597,10 +644,10 @@ class Tokenizer(ABC):
597
644
  self._hFormatter.resetAll()
598
645
  self._noSep = True
599
646
 
600
- self._tokens.append((
647
+ tokens.append((
601
648
  tType, nHead, tText, [], tStyle
602
649
  ))
603
- if self._keepMarkdown:
650
+ if self._keepMD:
604
651
  tmpMarkdown.append(f"{aLine}\n")
605
652
 
606
653
  elif aLine.startswith(("## ", "##! ")):
@@ -632,10 +679,10 @@ class Tokenizer(ABC):
632
679
  self._hFormatter.resetScene()
633
680
  self._noSep = True
634
681
 
635
- self._tokens.append((
682
+ tokens.append((
636
683
  tType, nHead, tText, [], tStyle
637
684
  ))
638
- if self._keepMarkdown:
685
+ if self._keepMD:
639
686
  tmpMarkdown.append(f"{aLine}\n")
640
687
 
641
688
  elif aLine.startswith(("### ", "###! ")):
@@ -673,10 +720,10 @@ class Tokenizer(ABC):
673
720
  tStyle = self.A_NONE if self._noSep else self.A_CENTRE
674
721
  self._noSep = False
675
722
 
676
- self._tokens.append((
723
+ tokens.append((
677
724
  tType, nHead, tText, [], tStyle
678
725
  ))
679
- if self._keepMarkdown:
726
+ if self._keepMD:
680
727
  tmpMarkdown.append(f"{aLine}\n")
681
728
 
682
729
  elif aLine.startswith("#### "):
@@ -703,10 +750,10 @@ class Tokenizer(ABC):
703
750
  tType = self.T_SEP
704
751
  tStyle = self.A_CENTRE
705
752
 
706
- self._tokens.append((
753
+ tokens.append((
707
754
  tType, nHead, tText, [], tStyle
708
755
  ))
709
- if self._keepMarkdown:
756
+ if self._keepMD:
710
757
  tmpMarkdown.append(f"{aLine}\n")
711
758
 
712
759
  else:
@@ -750,54 +797,91 @@ class Tokenizer(ABC):
750
797
  sAlign |= self.A_IND_R
751
798
 
752
799
  # Process formats
753
- tLine, fmtPos = self._extractFormats(aLine)
754
- self._tokens.append((
755
- self.T_TEXT, nHead, tLine, fmtPos, sAlign
800
+ tLine, tFmt = self._extractFormats(aLine)
801
+ tokens.append((
802
+ self.T_TEXT, nHead, tLine, tFmt, sAlign
756
803
  ))
757
- if self._keepMarkdown:
804
+ if self._keepMD:
758
805
  tmpMarkdown.append(f"{aLine}\n")
759
806
 
760
807
  # If we have content, turn off the first page flag
761
- if self._isFirst and self._tokens:
808
+ if self._isFirst and tokens:
762
809
  self._isFirst = False # First document has been processed
763
810
 
764
811
  # Make sure the token array doesn't start with a page break
765
812
  # on the very first page, adding a blank first page.
766
- if self._tokens[0][4] & self.A_PBB:
767
- token = self._tokens[0]
768
- self._tokens[0] = (
769
- token[0], token[1], token[2], token[3], token[4] & ~self.A_PBB
813
+ if tokens[0][4] & self.A_PBB:
814
+ cToken = tokens[0]
815
+ tokens[0] = (
816
+ cToken[0], cToken[1], cToken[2], cToken[3], cToken[4] & ~self.A_PBB
770
817
  )
771
818
 
772
819
  # Always add an empty line at the end of the file
773
- self._tokens.append((
820
+ tokens.append((
774
821
  self.T_EMPTY, nHead, "", [], self.A_NONE
775
822
  ))
776
- if self._keepMarkdown:
823
+ if self._keepMD:
777
824
  tmpMarkdown.append("\n")
778
- self._allMarkdown.append("".join(tmpMarkdown))
825
+ self._markdown.append("".join(tmpMarkdown))
779
826
 
780
827
  # Second Pass
781
828
  # ===========
782
- # Some items need a second pass
829
+ # This second pass strips away consecutive blank lines, and
830
+ # combines consecutive text lines into the same paragraph.
831
+ # It also ensures that there isn't paragraph spacing between
832
+ # meta data lines for formats that has spacing.
833
+
834
+ self._tokens = []
835
+ pToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
836
+ nToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
837
+
838
+ lineSep = "\n" if self._keepBreaks else " "
839
+ pLines: list[T_Token] = []
783
840
 
784
- pToken = (self.T_EMPTY, 0, "", [], self.A_NONE)
785
- nToken = (self.T_EMPTY, 0, "", [], self.A_NONE)
786
- tCount = len(self._tokens)
787
- for n, token in enumerate(self._tokens):
841
+ tCount = len(tokens)
842
+ for n, cToken in enumerate(tokens):
788
843
 
789
844
  if n > 0:
790
- pToken = self._tokens[n-1]
845
+ pToken = tokens[n-1] # Look behind
791
846
  if n < tCount - 1:
792
- nToken = self._tokens[n+1]
847
+ nToken = tokens[n+1] # Look ahead
793
848
 
794
- if token[0] == self.T_KEYWORD:
795
- aStyle = token[4]
849
+ if cToken[0] == self.T_EMPTY:
850
+ # We don't need to keep the empty lines after this pass
851
+ pass
852
+
853
+ elif cToken[0] == self.T_KEYWORD:
854
+ # Adjust margins for lines in a list of keyword lines
855
+ aStyle = cToken[4]
796
856
  if pToken[0] == self.T_KEYWORD:
797
857
  aStyle |= self.A_Z_TOPMRG
798
858
  if nToken[0] == self.T_KEYWORD:
799
859
  aStyle |= self.A_Z_BTMMRG
800
- self._tokens[n] = (token[0], token[1], token[2], token[3], aStyle)
860
+ self._tokens.append((
861
+ cToken[0], cToken[1], cToken[2], cToken[3], aStyle
862
+ ))
863
+
864
+ elif cToken[0] == self.T_TEXT:
865
+ # Combine lines from the same paragraph
866
+ pLines.append(cToken)
867
+ if nToken[0] != self.T_TEXT:
868
+ nLines = len(pLines)
869
+ if nLines == 1:
870
+ self._tokens.append(pLines[0])
871
+ elif nLines > 1:
872
+ tTxt = ""
873
+ tFmt: T_Formats = []
874
+ for aToken in pLines:
875
+ tLen = len(tTxt)
876
+ tTxt += f"{aToken[2]}{lineSep}"
877
+ tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aToken[3])
878
+ self._tokens.append((
879
+ self.T_TEXT, pLines[0][1], tTxt[:-1], tFmt, pLines[0][4]
880
+ ))
881
+ pLines = []
882
+
883
+ else:
884
+ self._tokens.append(cToken)
801
885
 
802
886
  return
803
887
 
@@ -840,7 +924,6 @@ class Tokenizer(ABC):
840
924
  textWordChars = self._counts.get("textWordChars", 0)
841
925
  titleWordChars = self._counts.get("titleWordChars", 0)
842
926
 
843
- para = []
844
927
  for tType, _, tText, _, _ in self._tokens:
845
928
  tText = tText.replace(nwUnicode.U_ENDASH, " ")
846
929
  tText = tText.replace(nwUnicode.U_EMDASH, " ")
@@ -850,22 +933,19 @@ class Tokenizer(ABC):
850
933
  nChars = len(tText)
851
934
  nWChars = len("".join(tWords))
852
935
 
853
- if tType == self.T_EMPTY:
854
- if len(para) > 0:
855
- tTemp = "\n".join(para)
856
- tPWords = tTemp.split()
857
- nPWords = len(tPWords)
858
- nPChars = len(tTemp)
859
- nPWChars = len("".join(tPWords))
860
-
861
- paragraphCount += 1
862
- allWords += nPWords
863
- textWords += nPWords
864
- allChars += nPChars
865
- textChars += nPChars
866
- allWordChars += nPWChars
867
- textWordChars += nPWChars
868
- para = []
936
+ if tType == self.T_TEXT:
937
+ tPWords = tText.split()
938
+ nPWords = len(tPWords)
939
+ nPChars = len(tText)
940
+ nPWChars = len("".join(tPWords))
941
+
942
+ paragraphCount += 1
943
+ allWords += nPWords
944
+ textWords += nPWords
945
+ allChars += nPChars
946
+ textChars += nPChars
947
+ allWordChars += nPWChars
948
+ textWordChars += nPWChars
869
949
 
870
950
  elif tType in self.L_HEADINGS:
871
951
  titleCount += 1
@@ -881,9 +961,6 @@ class Tokenizer(ABC):
881
961
  allChars += nChars
882
962
  allWordChars += nWChars
883
963
 
884
- elif tType == self.T_TEXT:
885
- para.append(tText.rstrip())
886
-
887
964
  elif tType == self.T_SYNOPSIS and self._doSynopsis:
888
965
  text = "{0}: {1}".format(self._localLookup("Synopsis"), tText)
889
966
  words = text.split()
@@ -935,7 +1012,7 @@ class Tokenizer(ABC):
935
1012
  def saveRawMarkdown(self, path: str | Path) -> None:
936
1013
  """Save the raw text to a plain text file."""
937
1014
  with open(path, mode="w", encoding="utf-8") as outFile:
938
- for nwdPage in self._allMarkdown:
1015
+ for nwdPage in self._markdown:
939
1016
  outFile.write(nwdPage)
940
1017
  return
941
1018
 
@@ -950,7 +1027,7 @@ class Tokenizer(ABC):
950
1027
  "buildTimeStr": formatTimeStamp(timeStamp),
951
1028
  },
952
1029
  "text": {
953
- "nwd": [page.rstrip("\n").split("\n") for page in self._allMarkdown],
1030
+ "nwd": [page.rstrip("\n").split("\n") for page in self._markdown],
954
1031
  }
955
1032
  }
956
1033
  with open(path, mode="w", encoding="utf-8") as fObj:
@@ -961,9 +1038,9 @@ class Tokenizer(ABC):
961
1038
  # Internal Functions
962
1039
  ##
963
1040
 
964
- def _extractFormats(self, text: str) -> tuple[str, list[tuple[int, int]]]:
1041
+ def _extractFormats(self, text: str, skip: int = 0) -> tuple[str, T_Formats]:
965
1042
  """Extract format markers from a text paragraph."""
966
- temp = []
1043
+ temp: list[tuple[int, int, int, str]] = []
967
1044
 
968
1045
  # Match Markdown
969
1046
  for regEx, fmts in self._rxMarkdown:
@@ -971,7 +1048,7 @@ class Tokenizer(ABC):
971
1048
  while rxItt.hasNext():
972
1049
  rxMatch = rxItt.next()
973
1050
  temp.extend(
974
- [rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt]
1051
+ (rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt, "")
975
1052
  for n, fmt in enumerate(fmts) if fmt > 0
976
1053
  )
977
1054
 
@@ -979,25 +1056,37 @@ class Tokenizer(ABC):
979
1056
  rxItt = self._rxShortCodes.globalMatch(text, 0)
980
1057
  while rxItt.hasNext():
981
1058
  rxMatch = rxItt.next()
982
- temp.append([
1059
+ temp.append((
983
1060
  rxMatch.capturedStart(1),
984
1061
  rxMatch.capturedLength(1),
985
- self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0)
986
- ])
1062
+ self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0),
1063
+ "",
1064
+ ))
987
1065
 
988
- # Post-process text and format markers
1066
+ # Match Shortcode w/Values
1067
+ rxItt = self._rxShortCodeVals.globalMatch(text, 0)
1068
+ tHandle = self._handle or ""
1069
+ while rxItt.hasNext():
1070
+ rxMatch = rxItt.next()
1071
+ kind = self._shortCodeVals.get(rxMatch.captured(1).lower(), 0)
1072
+ temp.append((
1073
+ rxMatch.capturedStart(0),
1074
+ rxMatch.capturedLength(0),
1075
+ self.FMT_STRIP if kind == skip else kind,
1076
+ f"{tHandle}:{rxMatch.captured(2)}",
1077
+ ))
1078
+
1079
+ # Post-process text and format
989
1080
  result = text
990
1081
  formats = []
991
- for pos, n, fmt in reversed(sorted(temp, key=lambda x: x[0])):
1082
+ for pos, n, fmt, key in reversed(sorted(temp, key=lambda x: x[0])):
992
1083
  if fmt > 0:
993
1084
  result = result[:pos] + result[pos+n:]
994
- formats = [(p-n, f) for p, f in formats]
995
- formats.insert(0, (pos, fmt))
1085
+ formats = [(p-n, f, k) for p, f, k in formats]
1086
+ formats.insert(0, (pos, fmt, key))
996
1087
 
997
1088
  return result, formats
998
1089
 
999
- # END Class Tokenizer
1000
-
1001
1090
 
1002
1091
  class HeadingFormatter:
1003
1092
 
@@ -1067,5 +1156,3 @@ class HeadingFormatter:
1067
1156
  hFormat = hFormat.replace(nwHeadFmt.CHAR_FOCUS, fText)
1068
1157
 
1069
1158
  return hFormat
1070
-
1071
- # END Class HeadingFormatter