PyPI - novelWriter - Versions diffs - 2.5.1__py3-none-any.whl → 2.6b1__py3-none-any.whl - Mend

novelWriter 2.5.1py3-none-any.whl → 2.6b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

{novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/METADATA +2 -1
{novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/RECORD +61 -56
{novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/WHEEL +1 -1
novelwriter/__init__.py +3 -3
novelwriter/assets/i18n/project_en_GB.json +1 -0
novelwriter/assets/icons/typicons_dark/icons.conf +1 -0
novelwriter/assets/icons/typicons_dark/mixed_copy.svg +4 -0
novelwriter/assets/icons/typicons_light/icons.conf +1 -0
novelwriter/assets/icons/typicons_light/mixed_copy.svg +4 -0
novelwriter/assets/manual.pdf +0 -0
novelwriter/assets/sample.zip +0 -0
novelwriter/assets/themes/default_light.conf +2 -2
novelwriter/common.py +63 -0
novelwriter/config.py +10 -3
novelwriter/constants.py +153 -60
novelwriter/core/buildsettings.py +66 -39
novelwriter/core/coretools.py +34 -22
novelwriter/core/docbuild.py +130 -169
novelwriter/core/index.py +29 -18
novelwriter/core/item.py +2 -2
novelwriter/core/options.py +4 -1
novelwriter/core/spellcheck.py +9 -14
novelwriter/dialogs/preferences.py +45 -32
novelwriter/dialogs/projectsettings.py +3 -3
novelwriter/enum.py +29 -23
novelwriter/extensions/configlayout.py +24 -11
novelwriter/extensions/modified.py +13 -1
novelwriter/extensions/pagedsidebar.py +5 -5
novelwriter/formats/shared.py +155 -0
novelwriter/formats/todocx.py +1195 -0
novelwriter/formats/tohtml.py +452 -0
novelwriter/{core → formats}/tokenizer.py +483 -485
novelwriter/formats/tomarkdown.py +217 -0
novelwriter/{core → formats}/toodt.py +270 -320
novelwriter/formats/toqdoc.py +436 -0
novelwriter/formats/toraw.py +91 -0
novelwriter/gui/doceditor.py +240 -193
novelwriter/gui/dochighlight.py +96 -84
novelwriter/gui/docviewer.py +56 -30
novelwriter/gui/docviewerpanel.py +3 -3
novelwriter/gui/editordocument.py +17 -2
novelwriter/gui/itemdetails.py +8 -4
novelwriter/gui/mainmenu.py +121 -60
novelwriter/gui/noveltree.py +35 -37
novelwriter/gui/outline.py +186 -238
novelwriter/gui/projtree.py +142 -131
novelwriter/gui/sidebar.py +7 -6
novelwriter/gui/theme.py +5 -4
novelwriter/guimain.py +43 -155
novelwriter/shared.py +14 -4
novelwriter/text/counting.py +2 -0
novelwriter/text/patterns.py +155 -59
novelwriter/tools/manusbuild.py +1 -1
novelwriter/tools/manuscript.py +121 -78
novelwriter/tools/manussettings.py +403 -260
novelwriter/tools/welcome.py +4 -4
novelwriter/tools/writingstats.py +3 -3
novelwriter/types.py +16 -6
novelwriter/core/tohtml.py +0 -530
novelwriter/core/tomarkdown.py +0 -252
novelwriter/core/toqdoc.py +0 -419
{novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/LICENSE.md +0 -0
{novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/entry_points.txt +0 -0
{novelWriter-2.5.1.dist-info → novelWriter-2.6b1.dist-info}/top_level.txt +0 -0

novelwriter/{core → formats}/tokenizer.py RENAMED Viewed

@@ -24,41 +24,56 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
 """
 from __future__ import annotations
-import json
 import logging
 import re
 from abc import ABC, abstractmethod
-from functools import partial
 from pathlib import Path
-from time import time
+from typing import NamedTuple
-from PyQt5.QtCore import QCoreApplication, QRegularExpression
-from PyQt5.QtGui import QFont
+from PyQt5.QtCore import QLocale
+from PyQt5.QtGui import QColor, QFont
 from novelwriter import CONFIG
-from novelwriter.common import checkInt, formatTimeStamp, numberToRoman
-from novelwriter.constants import nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwUnicode, trConst
+from novelwriter.common import checkInt, numberToRoman
+from novelwriter.constants import (
+    nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwStats, nwStyles, nwUnicode,
+    trConst
+)
 from novelwriter.core.index import processComment
 from novelwriter.core.project import NWProject
 from novelwriter.enum import nwComment, nwItemLayout
-from novelwriter.text.patterns import REGEX_PATTERNS
+from novelwriter.formats.shared import (
+    BlockFmt, BlockTyp, T_Block, T_Formats, T_Note, TextDocumentTheme, TextFmt
+)
+from novelwriter.text.patterns import REGEX_PATTERNS, DialogParser
 logger = logging.getLogger(__name__)
-ESCAPES = {r"\*": "*", r"\~": "~", r"\_": "_", r"\[": "[", r"\]": "]", r"\ ": ""}
-RX_ESC = re.compile("|".join([re.escape(k) for k in ESCAPES.keys()]), flags=re.DOTALL)
-T_Formats = list[tuple[int, int, str]]
-T_Comment = tuple[str, T_Formats]
-T_Token = tuple[int, int, str, T_Formats, int]
+class ComStyle(NamedTuple):
+    label: str = ""
+    labelClass: str = ""
+    textClass: str = ""
-def stripEscape(text: str) -> str:
-    """Strip escaped Markdown characters from paragraph text."""
-    if "\\" in text:
-        return RX_ESC.sub(lambda x: ESCAPES[x.group(0)], text)
-    return text
+COMMENT_STYLE = {
+    nwComment.PLAIN:    ComStyle("Comment", "comment", "comment"),
+    nwComment.IGNORE:   ComStyle(),
+    nwComment.SYNOPSIS: ComStyle("Synopsis", "modifier", "synopsis"),
+    nwComment.SHORT:    ComStyle("Short Description", "modifier", "synopsis"),
+    nwComment.NOTE:     ComStyle("Note", "modifier", "note"),
+    nwComment.FOOTNOTE: ComStyle("", "modifier", "note"),
+    nwComment.COMMENT:  ComStyle(),
+    nwComment.STORY:    ComStyle("", "modifier", "note"),
+}
+HEADINGS = [BlockTyp.TITLE, BlockTyp.HEAD1, BlockTyp.HEAD2, BlockTyp.HEAD3, BlockTyp.HEAD4]
+SKIP_INDENT = [
+    BlockTyp.TITLE, BlockTyp.HEAD1, BlockTyp.HEAD2, BlockTyp.HEAD2, BlockTyp.HEAD3,
+    BlockTyp.HEAD4, BlockTyp.SEP, BlockTyp.SKIP,
+]
+B_EMPTY: T_Block = (BlockTyp.EMPTY, "", "", [], BlockFmt.NONE)
 class Tokenizer(ABC):
@@ -70,119 +85,71 @@ class Tokenizer(ABC):
     subclasses.
     """
-    # In-Text Format
-    FMT_B_B   = 1   # Begin bold
-    FMT_B_E   = 2   # End bold
-    FMT_I_B   = 3   # Begin italics
-    FMT_I_E   = 4   # End italics
-    FMT_D_B   = 5   # Begin strikeout
-    FMT_D_E   = 6   # End strikeout
-    FMT_U_B   = 7   # Begin underline
-    FMT_U_E   = 8   # End underline
-    FMT_M_B   = 9   # Begin mark
-    FMT_M_E   = 10  # End mark
-    FMT_SUP_B = 11  # Begin superscript
-    FMT_SUP_E = 12  # End superscript
-    FMT_SUB_B = 13  # Begin subscript
-    FMT_SUB_E = 14  # End subscript
-    FMT_DL_B  = 15  # Begin dialogue
-    FMT_DL_E  = 16  # End dialogue
-    FMT_ADL_B = 17  # Begin alt dialogue
-    FMT_ADL_E = 18  # End alt dialogue
-    FMT_FNOTE = 19  # Footnote marker
-    FMT_STRIP = 20  # Strip the format code
-    # Block Type
-    T_EMPTY    = 1   # Empty line (new paragraph)
-    T_SYNOPSIS = 2   # Synopsis comment
-    T_SHORT    = 3   # Short description comment
-    T_COMMENT  = 4   # Comment line
-    T_KEYWORD  = 5   # Command line
-    T_TITLE    = 6   # Title
-    T_HEAD1    = 7   # Heading 1
-    T_HEAD2    = 8   # Heading 2
-    T_HEAD3    = 9   # Heading 3
-    T_HEAD4    = 10  # Heading 4
-    T_TEXT     = 11  # Text line
-    T_SEP      = 12  # Scene separator
-    T_SKIP     = 13  # Paragraph break
-    # Block Style
-    A_NONE     = 0x0000  # No special style
-    A_LEFT     = 0x0001  # Left aligned
-    A_RIGHT    = 0x0002  # Right aligned
-    A_CENTRE   = 0x0004  # Centred
-    A_JUSTIFY  = 0x0008  # Justified
-    A_PBB      = 0x0010  # Page break before
-    A_PBA      = 0x0020  # Page break after
-    A_Z_TOPMRG = 0x0040  # Zero top margin
-    A_Z_BTMMRG = 0x0080  # Zero bottom margin
-    A_IND_L    = 0x0100  # Left indentation
-    A_IND_R    = 0x0200  # Right indentation
-    A_IND_T    = 0x0400  # Text indentation
-    # Masks
-    M_ALIGNED = A_LEFT | A_RIGHT | A_CENTRE | A_JUSTIFY
-    # Lookups
-    L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
-    L_SKIP_INDENT = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD2, T_HEAD3, T_HEAD4, T_SEP, T_SKIP]
-    L_SUMMARY = [T_SYNOPSIS, T_SHORT]
     def __init__(self, project: NWProject) -> None:
         self._project = project
         # Data Variables
-        self._text   = ""     # The raw text to be tokenized
-        self._handle = None   # The item handle currently being processed
-        self._result = ""     # The result of the last document
-        self._keepMD = False  # Whether to keep the markdown text
-        # Tokens and Meta Data (Per Document)
-        self._tokens: list[T_Token] = []
-        self._footnotes: dict[str, T_Comment] = {}
-        # Tokens and Meta Data (Per Instance)
+        self._text     = ""     # The raw text to be tokenized
+        self._handle   = None   # The item handle currently being processed
+        self._keepRaw  = False  # Whether to keep the raw text, used by ToRaw
+        self._noTokens = False  # Disable tokenization if they're not needed
+        # Blocks and Meta Data (Per Document)
+        self._blocks: list[T_Block] = []
+        self._footnotes: dict[str, T_Note] = {}
+        # Blocks and Meta Data (Per Instance)
+        self._raw: list[str] = []
+        self._pages: list[str] = []
         self._counts: dict[str, int] = {}
         self._outline: dict[str, str] = {}
-        self._markdown: list[str] = []
         # User Settings
+        self._dLocale      = CONFIG.locale  # The document locale
         self._textFont     = QFont("Serif", 11)  # Output text font
-        self._lineHeight   = 1.15     # Line height in units of em
-        self._blockIndent  = 4.00     # Block indent in units of em
-        self._firstIndent  = False    # Enable first line indent
-        self._firstWidth   = 1.40     # First line indent in units of em
-        self._indentFirst  = False    # Indent first paragraph
-        self._doJustify    = False    # Justify text
-        self._doBodyText   = True     # Include body text
-        self._doSynopsis   = False    # Also process synopsis comments
-        self._doComments   = False    # Also process comments
-        self._doKeywords   = False    # Also process keywords like tags and references
-        self._skipKeywords = set()    # Keywords to ignore
-        self._keepBreaks   = True     # Keep line breaks in paragraphs
+        self._lineHeight   = 1.15    # Line height in units of em
+        self._colorHeads   = True    # Colourise headings
+        self._scaleHeads   = True    # Scale headings to larger font size
+        self._boldHeads    = True    # Bold headings
+        self._blockIndent  = 4.00    # Block indent in units of em
+        self._firstIndent  = False   # Enable first line indent
+        self._firstWidth   = 1.40    # First line indent in units of em
+        self._indentFirst  = False   # Indent first paragraph
+        self._doJustify    = False   # Justify text
+        self._doBodyText   = True    # Include body text
+        self._doSynopsis   = False   # Also process synopsis comments
+        self._doComments   = False   # Also process comments
+        self._doKeywords   = False   # Also process keywords like tags and references
+        self._keepBreaks   = True    # Keep line breaks in paragraphs
+        self._defaultAlign = "left"  # The default text alignment
+        self._skipKeywords: set[str] = set()  # Keywords to ignore
+        # Other Setting
+        self._theme = TextDocumentTheme()
+        self._classes: dict[str, QColor] = {}
         # Margins
-        self._marginTitle = (1.417, 0.500)
-        self._marginHead1 = (1.417, 0.500)
-        self._marginHead2 = (1.668, 0.500)
-        self._marginHead3 = (1.168, 0.500)
-        self._marginHead4 = (1.168, 0.500)
-        self._marginText  = (0.000, 0.584)
-        self._marginMeta  = (0.000, 0.584)
-        self._marginFoot  = (1.417, 0.467)
-        self._marginSep   = (1.168, 1.168)
+        self._marginTitle = nwStyles.T_MARGIN["H0"]
+        self._marginHead1 = nwStyles.T_MARGIN["H1"]
+        self._marginHead2 = nwStyles.T_MARGIN["H2"]
+        self._marginHead3 = nwStyles.T_MARGIN["H3"]
+        self._marginHead4 = nwStyles.T_MARGIN["H4"]
+        self._marginText  = nwStyles.T_MARGIN["TT"]
+        self._marginMeta  = nwStyles.T_MARGIN["MT"]
+        self._marginFoot  = nwStyles.T_MARGIN["FT"]
+        self._marginSep   = nwStyles.T_MARGIN["SP"]
         # Title Formats
-        self._fmtTitle   = nwHeadFmt.TITLE  # Formatting for titles
+        self._fmtPart    = nwHeadFmt.TITLE  # Formatting for partitions
         self._fmtChapter = nwHeadFmt.TITLE  # Formatting for numbered chapters
         self._fmtUnNum   = nwHeadFmt.TITLE  # Formatting for unnumbered chapters
         self._fmtScene   = nwHeadFmt.TITLE  # Formatting for scenes
         self._fmtHScene  = nwHeadFmt.TITLE  # Formatting for hard scenes
         self._fmtSection = nwHeadFmt.TITLE  # Formatting for sections
-        self._hideTitle   = False  # Do not include title headings
+        self._hidePart    = False  # Do not include partition headings
         self._hideChapter = False  # Do not include chapter headings
         self._hideUnNum   = False  # Do not include unnumbered headings
         self._hideScene   = False  # Do not include scene headings
@@ -191,15 +158,16 @@ class Tokenizer(ABC):
         self._linkHeadings = False  # Add an anchor before headings
-        self._titleStyle   = self.A_CENTRE | self.A_PBB
-        self._chapterStyle = self.A_PBB
-        self._sceneStyle   = self.A_NONE
+        self._titleStyle   = BlockFmt.CENTRE | BlockFmt.PBB
+        self._partStyle    = BlockFmt.CENTRE | BlockFmt.PBB
+        self._chapterStyle = BlockFmt.PBB
+        self._sceneStyle   = BlockFmt.NONE
         # Instance Variables
         self._hFormatter = HeadingFormatter(self._project)
         self._noSep      = True   # Flag to indicate that we don't want a scene separator
         self._noIndent   = False  # Flag to disable text indent on next paragraph
-        self._showDialog = False  # Flag for dialogue highlighting
+        self._breakNext  = False  # Add a page break on next token
         # This File
         self._isNovel = False  # Document is a novel document
@@ -210,31 +178,33 @@ class Tokenizer(ABC):
         # Function Mapping
         self._localLookup = self._project.localLookup
-        self.tr = partial(QCoreApplication.translate, "Tokenizer")
         # Format RegEx
         self._rxMarkdown = [
-            (REGEX_PATTERNS.markdownItalic, [0, self.FMT_I_B, 0, self.FMT_I_E]),
-            (REGEX_PATTERNS.markdownBold,   [0, self.FMT_B_B, 0, self.FMT_B_E]),
-            (REGEX_PATTERNS.markdownStrike, [0, self.FMT_D_B, 0, self.FMT_D_E]),
+            (REGEX_PATTERNS.markdownItalic, [0, TextFmt.I_B, 0, TextFmt.I_E]),
+            (REGEX_PATTERNS.markdownBold,   [0, TextFmt.B_B, 0, TextFmt.B_E]),
+            (REGEX_PATTERNS.markdownStrike, [0, TextFmt.D_B, 0, TextFmt.D_E]),
         ]
-        self._rxShortCodes = REGEX_PATTERNS.shortcodePlain
-        self._rxShortCodeVals = REGEX_PATTERNS.shortcodeValue
         self._shortCodeFmt = {
-            nwShortcode.ITALIC_O: self.FMT_I_B,   nwShortcode.ITALIC_C: self.FMT_I_E,
-            nwShortcode.BOLD_O:   self.FMT_B_B,   nwShortcode.BOLD_C:   self.FMT_B_E,
-            nwShortcode.STRIKE_O: self.FMT_D_B,   nwShortcode.STRIKE_C: self.FMT_D_E,
-            nwShortcode.ULINE_O:  self.FMT_U_B,   nwShortcode.ULINE_C:  self.FMT_U_E,
-            nwShortcode.MARK_O:   self.FMT_M_B,   nwShortcode.MARK_C:   self.FMT_M_E,
-            nwShortcode.SUP_O:    self.FMT_SUP_B, nwShortcode.SUP_C:    self.FMT_SUP_E,
-            nwShortcode.SUB_O:    self.FMT_SUB_B, nwShortcode.SUB_C:    self.FMT_SUB_E,
+            nwShortcode.ITALIC_O: TextFmt.I_B,   nwShortcode.ITALIC_C: TextFmt.I_E,
+            nwShortcode.BOLD_O:   TextFmt.B_B,   nwShortcode.BOLD_C:   TextFmt.B_E,
+            nwShortcode.STRIKE_O: TextFmt.D_B,   nwShortcode.STRIKE_C: TextFmt.D_E,
+            nwShortcode.ULINE_O:  TextFmt.U_B,   nwShortcode.ULINE_C:  TextFmt.U_E,
+            nwShortcode.MARK_O:   TextFmt.M_B,   nwShortcode.MARK_C:   TextFmt.M_E,
+            nwShortcode.SUP_O:    TextFmt.SUP_B, nwShortcode.SUP_C:    TextFmt.SUP_E,
+            nwShortcode.SUB_O:    TextFmt.SUB_B, nwShortcode.SUB_C:    TextFmt.SUB_E,
         }
         self._shortCodeVals = {
-            nwShortcode.FOOTNOTE_B: self.FMT_FNOTE,
+            nwShortcode.FOOTNOTE_B: TextFmt.FNOTE,
+            nwShortcode.FIELD_B:    TextFmt.FIELD,
         }
-        self._rxDialogue: list[tuple[QRegularExpression, int, int]] = []
+        # Dialogue
+        self._hlightDialog = False
+        self._rxAltDialog = REGEX_PATTERNS.altDialogStyle
+        self._dialogParser = DialogParser()
+        self._dialogParser.initParser()
         return
@@ -242,16 +212,6 @@ class Tokenizer(ABC):
     #  Properties
     ##
-    @property
-    def result(self) -> str:
-        """The result of the build process."""
-        return self._result
-    @property
-    def allMarkdown(self) -> list[str]:
-        """The combined novelWriter Markdown text."""
-        return self._markdown
     @property
     def textStats(self) -> dict[str, int]:
         """The collected stats about the text."""
@@ -271,10 +231,21 @@ class Tokenizer(ABC):
     #  Setters
     ##
-    def setTitleFormat(self, hFormat: str, hide: bool = False) -> None:
-        """Set the title format pattern."""
-        self._fmtTitle = hFormat.strip()
-        self._hideTitle = hide
+    def setLanguage(self, language: str | None) -> None:
+        """Set language for the document."""
+        if language:
+            self._dLocale = QLocale(language)
+        return
+    def setTheme(self, theme: TextDocumentTheme) -> None:
+        """Set the document colour theme."""
+        self._theme = theme
+        return
+    def setPartitionFormat(self, hFormat: str, hide: bool = False) -> None:
+        """Set the partition format pattern."""
+        self._fmtPart = hFormat.strip()
+        self._hidePart = hide
         return
     def setChapterFormat(self, hFormat: str, hide: bool = False) -> None:
@@ -309,23 +280,26 @@ class Tokenizer(ABC):
     def setTitleStyle(self, center: bool, pageBreak: bool) -> None:
         """Set the title heading style."""
-        self._titleStyle = (
-            (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
-        )
+        self._titleStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
+        self._titleStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
+        return
+    def setPartitionStyle(self, center: bool, pageBreak: bool) -> None:
+        """Set the partition heading style."""
+        self._partStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
+        self._partStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
         return
     def setChapterStyle(self, center: bool, pageBreak: bool) -> None:
         """Set the chapter heading style."""
-        self._chapterStyle = (
-            (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
-        )
+        self._chapterStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
+        self._chapterStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
         return
     def setSceneStyle(self, center: bool, pageBreak: bool) -> None:
         """Set the scene heading style."""
-        self._sceneStyle = (
-            (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
-        )
+        self._sceneStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
+        self._sceneStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
         return
     def setFont(self, font: QFont) -> None:
@@ -338,6 +312,13 @@ class Tokenizer(ABC):
         self._lineHeight = min(max(float(height), 0.5), 5.0)
         return
+    def setHeadingStyles(self, color: bool, scale: bool, bold: bool) -> None:
+        """Set text style for headings."""
+        self._colorHeads = color
+        self._scaleHeads = scale
+        self._boldHeads = bold
+        return
     def setBlockIndent(self, indent: float) -> None:
         """Set the block indent between 0.0 and 10.0."""
         self._blockIndent = min(max(float(indent), 0.0), 10.0)
@@ -357,27 +338,9 @@ class Tokenizer(ABC):
         self._doJustify = state
         return
-    def setDialogueHighlight(self, state: bool) -> None:
+    def setDialogHighlight(self, state: bool) -> None:
         """Enable or disable dialogue highlighting."""
-        self._rxDialogue = []
-        self._showDialog = state
-        if state:
-            if CONFIG.dialogStyle > 0:
-                self._rxDialogue.append((
-                    REGEX_PATTERNS.dialogStyle, self.FMT_DL_B, self.FMT_DL_E
-                ))
-            if CONFIG.dialogLine:
-                self._rxDialogue.append((
-                    REGEX_PATTERNS.dialogLine, self.FMT_DL_B, self.FMT_DL_E
-                ))
-            if CONFIG.narratorBreak:
-                self._rxDialogue.append((
-                    REGEX_PATTERNS.narratorBreak, self.FMT_DL_E, self.FMT_DL_B
-                ))
-            if CONFIG.altDialogOpen and CONFIG.altDialogClose:
-                self._rxDialogue.append((
-                    REGEX_PATTERNS.altDialogStyle, self.FMT_ADL_B, self.FMT_ADL_E
-                ))
+        self._hlightDialog = state
         return
     def setTitleMargins(self, upper: float, lower: float) -> None:
@@ -455,11 +418,6 @@ class Tokenizer(ABC):
         self._keepBreaks = state
         return
-    def setKeepMarkdown(self, state: bool) -> None:
-        """Keep original markdown during build."""
-        self._keepMD = state
-        return
     ##
     #  Class Methods
     ##
@@ -468,27 +426,54 @@ class Tokenizer(ABC):
     def doConvert(self) -> None:
         raise NotImplementedError
+    @abstractmethod
+    def closeDocument(self) -> None:
+        raise NotImplementedError
+    @abstractmethod
+    def saveDocument(self, path: Path) -> None:
+        raise NotImplementedError
+    def initDocument(self) -> None:
+        """Initialise data after settings."""
+        self._classes["modifier"] = self._theme.modifier
+        self._classes["synopsis"] = self._theme.note
+        self._classes["comment"] = self._theme.comment
+        self._classes["dialog"] = self._theme.dialog
+        self._classes["altdialog"] = self._theme.altdialog
+        self._classes["tag"] = self._theme.tag
+        self._classes["keyword"] = self._theme.keyword
+        self._classes["optional"] = self._theme.optional
+        return
+    def setBreakNext(self) -> None:
+        """Set a page break for next block."""
+        self._breakNext = True
+        return
     def addRootHeading(self, tHandle: str) -> None:
         """Add a heading at the start of a new root folder."""
         self._text = ""
         self._handle = None
-        if (tItem := self._project.tree[tHandle]) and tItem.isRootType():
+        if (item := self._project.tree[tHandle]) and item.isRootType():
             self._handle = tHandle
+            style = BlockFmt.CENTRE
             if self._isFirst:
-                textAlign = self.A_CENTRE
                 self._isFirst = False
             else:
-                textAlign = self.A_PBB | self.A_CENTRE
+                style |= BlockFmt.PBB
-            trNotes = self._localLookup("Notes")
-            title = f"{trNotes}: {tItem.itemName}"
-            self._tokens = []
-            self._tokens.append((
-                self.T_TITLE, 1, title, [], textAlign
-            ))
-            if self._keepMD:
-                self._markdown.append(f"#! {title}\n\n")
+            title = item.itemName
+            if not item.isNovelLike():
+                notes = self._localLookup("Notes")
+                title = f"{notes}: {title}"
+            self._blocks = [(
+                BlockTyp.TITLE, f"{self._handle}:T0001", title, [], style
+            )]
+            if self._keepRaw:
+                self._raw.append(f"#! {title}\n\n")
         return
@@ -515,9 +500,11 @@ class Tokenizer(ABC):
             xRep = re.compile("|".join([re.escape(k) for k in repDict.keys()]), flags=re.DOTALL)
             self._text = xRep.sub(lambda x: repDict[x.group(0)], self._text)
-        # Process the character translation map
-        trDict = {nwUnicode.U_MAPOS: nwUnicode.U_RSQUO}
-        self._text = self._text.translate(str.maketrans(trDict))
+        # Process the translation map for placeholder characters
+        self._text = self._text.translate(str.maketrans({
+            nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
+            nwUnicode.U_HBAR: nwUnicode.U_EMDASH,
+        }))
         return
@@ -526,42 +513,50 @@ class Tokenizer(ABC):
         characters that indicate headings, comments, commands etc, or
         just contain plain text. In the case of plain text, apply the
         same RegExes that the syntax highlighter uses and save the
-        locations of these formatting tags into the token array.
+        locations of these formatting tags into the blocks list.
-        The format of the token list is an entry with a five-tuple for
+        The format of the blocs list is an entry with a five-tuple for
         each line in the file. The tuple is as follows:
-          1: The type of the block, self.T_*
+          1: The type of the block, BlockType.*
           2: The heading number under which the text is placed
           3: The text content of the block, without leading tags
-          4: The internal formatting map of the text, self.FMT_*
-          5: The style of the block, self.A_*
+          4: The internal formatting map of the text, TxtFmt.*
+          5: The formats of the block, BlockFmt.*
         """
+        if self._keepRaw:
+            self._raw.append(f"{self._text.rstrip()}\n\n")
+        if self._noTokens:
+            return
         if self._isNovel:
             self._hFormatter.setHandle(self._handle)
+        # Cache Flags
+        isNovel = self._isNovel
+        doJustify = self._doJustify
+        keepBreaks = self._keepBreaks
+        indentFirst = self._indentFirst
+        firstIndent = self._firstIndent
+        # Replace all instances of [br] with a placeholder character
+        text = REGEX_PATTERNS.lineBreak.sub("\uffff", self._text)
         nHead = 0
-        breakNext = False
-        tmpMarkdown = []
         tHandle = self._handle or ""
-        tokens: list[T_Token] = []
-        for aLine in self._text.splitlines():
+        tBlocks: list[T_Block] = [B_EMPTY]
+        for bLine in text.splitlines():
+            aLine = bLine.replace("\uffff", "")  # Remove placeholder characters
             sLine = aLine.strip().lower()
             # Check for blank lines
-            if len(sLine) == 0:
-                tokens.append((
-                    self.T_EMPTY, nHead, "", [], self.A_NONE
-                ))
-                if self._keepMD:
-                    tmpMarkdown.append("\n")
+            if not sLine:
+                tBlocks.append(B_EMPTY)
                 continue
-            if breakNext:
-                sAlign = self.A_PBB
-                breakNext = False
+            if self._breakNext:
+                tStyle = BlockFmt.PBB
+                self._breakNext = False
             else:
-                sAlign = self.A_NONE
+                tStyle = BlockFmt.NONE
             # Check Line Format
             # =================
@@ -574,24 +569,24 @@ class Tokenizer(ABC):
                 # therefore proceed to check other formats.
                 if sLine in ("[newpage]", "[new page]"):
-                    breakNext = True
+                    self._breakNext = True
                     continue
                 elif sLine == "[vspace]":
-                    tokens.append(
-                        (self.T_SKIP, nHead, "", [], sAlign)
+                    tBlocks.append(
+                        (BlockTyp.SKIP, "", "", [], tStyle)
                     )
                     continue
                 elif sLine.startswith("[vspace:") and sLine.endswith("]"):
                     nSkip = checkInt(sLine[8:-1], 0)
                     if nSkip >= 1:
-                        tokens.append(
-                            (self.T_SKIP, nHead, "", [], sAlign)
+                        tBlocks.append(
+                            (BlockTyp.SKIP, "", "", [], tStyle)
                         )
                     if nSkip > 1:
-                        tokens += (nSkip - 1) * [
-                            (self.T_SKIP, nHead, "", [], self.A_NONE)
+                        tBlocks += (nSkip - 1) * [
+                            (BlockTyp.SKIP, "", "", [], BlockFmt.NONE)
                         ]
                     continue
@@ -605,32 +600,24 @@ class Tokenizer(ABC):
                     continue
                 cStyle, cKey, cText, _, _ = processComment(aLine)
-                if cStyle == nwComment.SYNOPSIS:
-                    tLine, tFmt = self._extractFormats(cText)
-                    tokens.append((
-                        self.T_SYNOPSIS, nHead, tLine, tFmt, sAlign
-                    ))
-                    if self._doSynopsis and self._keepMD:
-                        tmpMarkdown.append(f"{aLine}\n")
-                elif cStyle == nwComment.SHORT:
-                    tLine, tFmt = self._extractFormats(cText)
-                    tokens.append((
-                        self.T_SHORT, nHead, tLine, tFmt, sAlign
+                if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT) and not self._doSynopsis:
+                    continue
+                if cStyle == nwComment.PLAIN and not self._doComments:
+                    continue
+                if doJustify and not tStyle & BlockFmt.ALIGNED:
+                    tStyle |= BlockFmt.JUSTIFY
+                if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT, nwComment.PLAIN):
+                    bStyle = COMMENT_STYLE[cStyle]
+                    tLine, tFmt = self._formatComment(bStyle, cKey, cText)
+                    tBlocks.append((
+                        BlockTyp.COMMENT, "", tLine, tFmt, tStyle
                     ))
-                    if self._doSynopsis and self._keepMD:
-                        tmpMarkdown.append(f"{aLine}\n")
                 elif cStyle == nwComment.FOOTNOTE:
-                    tLine, tFmt = self._extractFormats(cText, skip=self.FMT_FNOTE)
+                    tLine, tFmt = self._extractFormats(cText, skip=TextFmt.FNOTE)
                     self._footnotes[f"{tHandle}:{cKey}"] = (tLine, tFmt)
-                    if self._keepMD:
-                        tmpMarkdown.append(f"{aLine}\n")
-                else:
-                    tLine, tFmt = self._extractFormats(cText)
-                    tokens.append((
-                        self.T_COMMENT, nHead, tLine, tFmt, sAlign
-                    ))
-                    if self._doComments and self._keepMD:
-                        tmpMarkdown.append(f"{aLine}\n")
             elif aLine.startswith("@"):
                 # Keywords
@@ -638,16 +625,12 @@ class Tokenizer(ABC):
                 # Only valid keyword lines are parsed, and any ignored keywords
                 # are automatically skipped.
-                valid, bits, _ = self._project.index.scanThis(aLine)
-                if (
-                    valid and bits and bits[0] in nwLabels.KEY_NAME
-                    and bits[0] not in self._skipKeywords
-                ):
-                    tokens.append((
-                        self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
-                    ))
-                    if self._doKeywords and self._keepMD:
-                        tmpMarkdown.append(f"{aLine}\n")
+                if self._doKeywords:
+                    tTag, tLine, tFmt = self._formatMeta(aLine)
+                    if tLine:
+                        tBlocks.append((
+                            BlockTyp.KEYWORD, tTag[1:], tLine, tFmt, tStyle
+                        ))
             elif aLine.startswith(("# ", "#! ")):
                 # Title or Partition Headings
@@ -662,28 +645,26 @@ class Tokenizer(ABC):
                 nHead += 1
                 tText = aLine[2:].strip()
-                tType = self.T_HEAD1 if isPlain else self.T_TITLE
-                tStyle = self.A_NONE if isPlain else (self.A_PBB | self.A_CENTRE)
-                sHide = self._hideTitle if isPlain else False
-                if self._isNovel:
+                tType = BlockTyp.HEAD1 if isPlain else BlockTyp.TITLE
+                sHide = self._hidePart if isPlain else False
+                if not (isPlain or isNovel and sHide):
+                    tStyle |= self._titleStyle
+                if isNovel:
                     if sHide:
                         tText = ""
-                        tType = self.T_EMPTY
-                        tStyle = self.A_NONE
+                        tType = BlockTyp.EMPTY
                     elif isPlain:
-                        tText = self._hFormatter.apply(self._fmtTitle, tText, nHead)
-                        tStyle = self._titleStyle
+                        tText = self._hFormatter.apply(self._fmtPart, tText, nHead)
+                        tStyle |= self._partStyle
                     if isPlain:
                         self._hFormatter.resetScene()
                     else:
                         self._hFormatter.resetAll()
                     self._noSep = True
-                tokens.append((
-                    tType, nHead, tText, [], tStyle
+                tBlocks.append((
+                    tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
                 ))
-                if self._keepMD:
-                    tmpMarkdown.append(f"{aLine}\n")
             elif aLine.startswith(("## ", "##! ")):
                 # (Unnumbered) Chapter Headings
@@ -698,27 +679,24 @@ class Tokenizer(ABC):
                 nHead += 1
                 tText = aLine[3:].strip()
-                tType = self.T_HEAD2
-                tStyle = self.A_NONE
+                tType = BlockTyp.HEAD2
                 sHide = self._hideChapter if isPlain else self._hideUnNum
                 tFormat = self._fmtChapter if isPlain else self._fmtUnNum
-                if self._isNovel:
+                if isNovel:
                     if isPlain:
                         self._hFormatter.incChapter()
                     if sHide:
                         tText = ""
-                        tType = self.T_EMPTY
+                        tType = BlockTyp.EMPTY
                     else:
                         tText = self._hFormatter.apply(tFormat, tText, nHead)
-                        tStyle = self._chapterStyle
+                        tStyle |= self._chapterStyle
                     self._hFormatter.resetScene()
                     self._noSep = True
-                tokens.append((
-                    tType, nHead, tText, [], tStyle
+                tBlocks.append((
+                    tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
                 ))
-                if self._keepMD:
-                    tmpMarkdown.append(f"{aLine}\n")
             elif aLine.startswith(("### ", "###! ")):
                 # (Alternative) Scene Headings
@@ -735,31 +713,28 @@ class Tokenizer(ABC):
                 nHead += 1
                 tText = aLine[4:].strip()
-                tType = self.T_HEAD3
-                tStyle = self.A_NONE
+                tType = BlockTyp.HEAD3
                 sHide = self._hideScene if isPlain else self._hideHScene
                 tFormat = self._fmtScene if isPlain else self._fmtHScene
-                if self._isNovel:
+                if isNovel:
                     self._hFormatter.incScene()
                     if sHide:
                         tText = ""
-                        tType = self.T_EMPTY
+                        tType = BlockTyp.EMPTY
                     else:
                         tText = self._hFormatter.apply(tFormat, tText, nHead)
-                        tStyle = self._sceneStyle
+                        tStyle |= self._sceneStyle
                         if tText == "":  # Empty Format
-                            tType = self.T_EMPTY if self._noSep else self.T_SKIP
+                            tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SKIP
                         elif tText == tFormat:  # Static Format
                             tText = "" if self._noSep else tText
-                            tType = self.T_EMPTY if self._noSep else self.T_SEP
-                            tStyle = self.A_NONE if self._noSep else self.A_CENTRE
+                            tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SEP
+                            tStyle |= BlockFmt.NONE if self._noSep else BlockFmt.CENTRE
                     self._noSep = False
-                tokens.append((
-                    tType, nHead, tText, [], tStyle
+                tBlocks.append((
+                    tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
                 ))
-                if self._keepMD:
-                    tmpMarkdown.append(f"{aLine}\n")
             elif aLine.startswith("#### "):
                 # Section Headings
@@ -771,25 +746,22 @@ class Tokenizer(ABC):
                 nHead += 1
                 tText = aLine[5:].strip()
-                tType = self.T_HEAD4
-                tStyle = self.A_NONE
-                if self._isNovel:
+                tType = BlockTyp.HEAD4
+                if isNovel:
                     if self._hideSection:
                         tText = ""
-                        tType = self.T_EMPTY
+                        tType = BlockTyp.EMPTY
                     else:
                         tText = self._hFormatter.apply(self._fmtSection, tText, nHead)
                         if tText == "":  # Empty Format
-                            tType = self.T_SKIP
+                            tType = BlockTyp.SKIP
                         elif tText == self._fmtSection:  # Static Format
-                            tType = self.T_SEP
-                            tStyle = self.A_CENTRE
+                            tType = BlockTyp.SEP
+                            tStyle |= BlockFmt.CENTRE
-                tokens.append((
-                    tType, nHead, tText, [], tStyle
+                tBlocks.append((
+                    tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
                 ))
-                if self._keepMD:
-                    tmpMarkdown.append(f"{aLine}\n")
             else:
                 # Text Lines
@@ -805,135 +777,133 @@ class Tokenizer(ABC):
                 alnRight = False
                 indLeft = False
                 indRight = False
-                if aLine.startswith(">>"):
+                if bLine.startswith(">>"):
                     alnRight = True
-                    aLine = aLine[2:].lstrip(" ")
-                elif aLine.startswith(">"):
+                    bLine = bLine[2:].lstrip(" ")
+                elif bLine.startswith(">"):
                     indLeft = True
-                    aLine = aLine[1:].lstrip(" ")
+                    bLine = bLine[1:].lstrip(" ")
-                if aLine.endswith("<<"):
+                if bLine.endswith("<<"):
                     alnLeft = True
-                    aLine = aLine[:-2].rstrip(" ")
-                elif aLine.endswith("<"):
+                    bLine = bLine[:-2].rstrip(" ")
+                elif bLine.endswith("<"):
                     indRight = True
-                    aLine = aLine[:-1].rstrip(" ")
+                    bLine = bLine[:-1].rstrip(" ")
                 if alnLeft and alnRight:
-                    sAlign |= self.A_CENTRE
+                    tStyle |= BlockFmt.CENTRE
                 elif alnLeft:
-                    sAlign |= self.A_LEFT
+                    tStyle |= BlockFmt.LEFT
                 elif alnRight:
-                    sAlign |= self.A_RIGHT
+                    tStyle |= BlockFmt.RIGHT
                 if indLeft:
-                    sAlign |= self.A_IND_L
+                    tStyle |= BlockFmt.IND_L
                 if indRight:
-                    sAlign |= self.A_IND_R
+                    tStyle |= BlockFmt.IND_R
                 # Process formats
-                tLine, tFmt = self._extractFormats(aLine)
-                tokens.append((
-                    self.T_TEXT, nHead, tLine, tFmt, sAlign
+                tLine, tFmt = self._extractFormats(bLine, hDialog=isNovel)
+                tBlocks.append((
+                    BlockTyp.TEXT, "", tLine, tFmt, tStyle
                 ))
-                if self._keepMD:
-                    tmpMarkdown.append(f"{aLine}\n")
         # If we have content, turn off the first page flag
-        if self._isFirst and tokens:
+        if self._isFirst and len(tBlocks) > 1:
             self._isFirst = False  # First document has been processed
-            # Make sure the token array doesn't start with a page break
-            # on the very first page, adding a blank first page.
-            if tokens[0][4] & self.A_PBB:
-                cToken = tokens[0]
-                tokens[0] = (
-                    cToken[0], cToken[1], cToken[2], cToken[3], cToken[4] & ~self.A_PBB
-                )
+            # Make sure the blocks array doesn't start with a page break
+            # on the very first block, adding a blank first page.
+            for n, cBlock in enumerate(tBlocks):
+                if cBlock[0] != BlockTyp.EMPTY:
+                    if cBlock[4] & BlockFmt.PBB:
+                        tBlocks[n] = (
+                            cBlock[0], cBlock[1], cBlock[2], cBlock[3], cBlock[4] & ~BlockFmt.PBB
+                        )
+                    break
         # Always add an empty line at the end of the file
-        tokens.append((
-            self.T_EMPTY, nHead, "", [], self.A_NONE
-        ))
-        if self._keepMD:
-            tmpMarkdown.append("\n")
-            self._markdown.append("".join(tmpMarkdown))
+        tBlocks.append(B_EMPTY)
         # Second Pass
         # ===========
         # This second pass strips away consecutive blank lines, and
         # combines consecutive text lines into the same paragraph.
         # It also ensures that there isn't paragraph spacing between
-        # meta data lines for formats that has spacing.
-        self._tokens = []
-        pToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
-        nToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
+        # meta data lines for formats that have spacing.
-        lineSep = "\n" if self._keepBreaks else " "
-        pLines: list[T_Token] = []
+        lineSep = "\n" if keepBreaks else " "
-        tCount = len(tokens)
-        for n, cToken in enumerate(tokens):
+        pLines: list[T_Block] = []
+        sBlocks: list[T_Block] = []
+        for n, cBlock in enumerate(tBlocks[1:-1], 1):
-            if n > 0:
-                pToken = tokens[n-1]  # Look behind
-            if n < tCount - 1:
-                nToken = tokens[n+1]  # Look ahead
+            pBlock = tBlocks[n-1]  # Look behind
+            nBlock = tBlocks[n+1]  # Look ahead
-            if cToken[0] in self.L_SKIP_INDENT and not self._indentFirst:
+            if cBlock[0] in SKIP_INDENT and not indentFirst:
                 # Unless the indentFirst flag is set, we set up the next
                 # paragraph to not be indented if we see a block of a
                 # specific type
                 self._noIndent = True
-            if cToken[0] == self.T_EMPTY:
+            if cBlock[0] == BlockTyp.EMPTY:
                 # We don't need to keep the empty lines after this pass
                 pass
-            elif cToken[0] == self.T_KEYWORD:
+            elif cBlock[0] == BlockTyp.KEYWORD:
                 # Adjust margins for lines in a list of keyword lines
-                aStyle = cToken[4]
-                if pToken[0] == self.T_KEYWORD:
-                    aStyle |= self.A_Z_TOPMRG
-                if nToken[0] == self.T_KEYWORD:
-                    aStyle |= self.A_Z_BTMMRG
-                self._tokens.append((
-                    cToken[0], cToken[1], cToken[2], cToken[3], aStyle
+                aStyle = cBlock[4]
+                if pBlock[0] == BlockTyp.KEYWORD:
+                    aStyle |= BlockFmt.Z_TOP
+                if nBlock[0] == BlockTyp.KEYWORD:
+                    aStyle |= BlockFmt.Z_BTM
+                sBlocks.append((
+                    cBlock[0], cBlock[1], cBlock[2], cBlock[3], aStyle
                 ))
-            elif cToken[0] == self.T_TEXT:
+            elif cBlock[0] == BlockTyp.TEXT:
                 # Combine lines from the same paragraph
-                pLines.append(cToken)
+                pLines.append(cBlock)
-                if nToken[0] != self.T_TEXT:
-                    # Next token is not text, so we add the buffer to tokens
+                if nBlock[0] != BlockTyp.TEXT:
+                    # Next block is not text, so we add the buffer to blocks
                     nLines = len(pLines)
                     cStyle = pLines[0][4]
-                    if self._firstIndent and not (self._noIndent or cStyle & self.M_ALIGNED):
+                    if firstIndent and not (self._noIndent or cStyle & BlockFmt.ALIGNED):
                         # If paragraph indentation is enabled, not temporarily
                         # turned off, and the block is not aligned, we add the
                         # text indentation flag
-                        cStyle |= self.A_IND_T
+                        cStyle |= BlockFmt.IND_T
                     if nLines == 1:
-                        # The paragraph contains a single line, so we just
-                        # save that directly to the token list
-                        self._tokens.append((
-                            self.T_TEXT, pLines[0][1], pLines[0][2], pLines[0][3], cStyle
+                        # The paragraph contains a single line, so we just save
+                        # that directly to the blocks list. If justify is
+                        # enabled, and there is no alignment, we apply it.
+                        if doJustify and not cStyle & BlockFmt.ALIGNED:
+                            cStyle |= BlockFmt.JUSTIFY
+                        pTxt = pLines[0][2].replace("\uffff", "\n")
+                        sBlocks.append((
+                            BlockTyp.TEXT, pLines[0][1], pTxt, pLines[0][3], cStyle
                         ))
                     elif nLines > 1:
                         # The paragraph contains multiple lines, so we need to
                         # join them according to the line break policy, and
                         # recompute all the formatting markers
                         tTxt = ""
                         tFmt: T_Formats = []
-                        for aToken in pLines:
+                        for aBlock in pLines:
                             tLen = len(tTxt)
-                            tTxt += f"{aToken[2]}{lineSep}"
-                            tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aToken[3])
-                        self._tokens.append((
-                            self.T_TEXT, pLines[0][1], tTxt[:-1], tFmt, cStyle
+                            tTxt += f"{aBlock[2]}{lineSep}"
+                            tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aBlock[3])
+                            cStyle |= aBlock[4]
+                        pTxt = tTxt[:-1].replace("\uffff", "\n")
+                        sBlocks.append((
+                            BlockTyp.TEXT, pLines[0][1], pTxt, tFmt, cStyle
                         ))
                     # Reset buffer and make sure text indent is on for next pass
@@ -941,50 +911,50 @@ class Tokenizer(ABC):
                     self._noIndent = False
             else:
-                self._tokens.append(cToken)
+                sBlocks.append(cBlock)
+        self._blocks = sBlocks
         return
     def buildOutline(self) -> None:
         """Build an outline of the text up to level 3 headings."""
-        tHandle = self._handle or ""
         isNovel = self._isNovel
-        for tType, nHead, tText, _, _ in self._tokens:
-            if tType == self.T_TITLE:
+        for tType, tKey, tText, _, _ in self._blocks:
+            if tType == BlockTyp.TITLE:
                 prefix = "TT"
-            elif tType == self.T_HEAD1:
+            elif tType == BlockTyp.HEAD1:
                 prefix = "PT" if isNovel else "H1"
-            elif tType == self.T_HEAD2:
+            elif tType == BlockTyp.HEAD2:
                 prefix = "CH" if isNovel else "H2"
-            elif tType == self.T_HEAD3:
+            elif tType == BlockTyp.HEAD3:
                 prefix = "SC" if isNovel else "H3"
             else:
                 continue
-            key = f"{tHandle}:T{nHead:04d}"
             text = tText.replace(nwHeadFmt.BR, " ").replace("&amp;", "&")
-            self._outline[key] = f"{prefix}|{text}"
+            self._outline[tKey] = f"{prefix}|{text}"
         return
     def countStats(self) -> None:
         """Count stats on the tokenized text."""
-        titleCount = self._counts.get("titleCount", 0)
-        paragraphCount = self._counts.get("paragraphCount", 0)
+        titleCount = self._counts.get(nwStats.TITLES, 0)
+        paragraphCount = self._counts.get(nwStats.PARAGRAPHS, 0)
-        allWords = self._counts.get("allWords", 0)
-        textWords = self._counts.get("textWords", 0)
-        titleWords = self._counts.get("titleWords", 0)
+        allWords = self._counts.get(nwStats.WORDS_ALL, 0)
+        textWords = self._counts.get(nwStats.WORDS_TEXT, 0)
+        titleWords = self._counts.get(nwStats.WORDS_TITLE, 0)
-        allChars = self._counts.get("allChars", 0)
-        textChars = self._counts.get("textChars", 0)
-        titleChars = self._counts.get("titleChars", 0)
+        allChars = self._counts.get(nwStats.CHARS_ALL, 0)
+        textChars = self._counts.get(nwStats.CHARS_TEXT, 0)
+        titleChars = self._counts.get(nwStats.CHARS_TITLE, 0)
-        allWordChars = self._counts.get("allWordChars", 0)
-        textWordChars = self._counts.get("textWordChars", 0)
-        titleWordChars = self._counts.get("titleWordChars", 0)
+        allWordChars = self._counts.get(nwStats.WCHARS_ALL, 0)
+        textWordChars = self._counts.get(nwStats.WCHARS_TEXT, 0)
+        titleWordChars = self._counts.get(nwStats.WCHARS_TITLE, 0)
-        for tType, _, tText, _, _ in self._tokens:
+        for tType, _, tText, _, _ in self._blocks:
             tText = tText.replace(nwUnicode.U_ENDASH, " ")
             tText = tText.replace(nwUnicode.U_EMDASH, " ")
@@ -993,7 +963,7 @@ class Tokenizer(ABC):
             nChars = len(tText)
             nWChars = len("".join(tWords))
-            if tType == self.T_TEXT:
+            if tType == BlockTyp.TEXT:
                 tPWords = tText.split()
                 nPWords = len(tPWords)
                 nPChars = len(tText)
@@ -1007,7 +977,7 @@ class Tokenizer(ABC):
                 allWordChars += nPWChars
                 textWordChars += nPWChars
-            elif tType in self.L_HEADINGS:
+            elif tType in HEADINGS:
                 titleCount += 1
                 allWords += nWords
                 titleWords += nWords
@@ -1016,143 +986,170 @@ class Tokenizer(ABC):
                 titleChars += nChars
                 titleWordChars += nWChars
-            elif tType == self.T_SEP:
+            elif tType == BlockTyp.SEP:
                 allWords += nWords
                 allChars += nChars
                 allWordChars += nWChars
-            elif tType == self.T_SYNOPSIS and self._doSynopsis:
-                text = "{0}: {1}".format(self._localLookup("Synopsis"), tText)
-                words = text.split()
-                allWords += len(words)
-                allChars += len(text)
-                allWordChars += len("".join(words))
-            elif tType == self.T_SHORT and self._doSynopsis:
-                text = "{0}: {1}".format(self._localLookup("Short Description"), tText)
-                words = text.split()
+            elif tType in (BlockTyp.COMMENT, BlockTyp.KEYWORD):
+                words = tText.split()
                 allWords += len(words)
-                allChars += len(text)
+                allChars += len(tText)
                 allWordChars += len("".join(words))
-            elif tType == self.T_COMMENT and self._doComments:
-                text = "{0}: {1}".format(self._localLookup("Comment"), tText)
-                words = text.split()
-                allWords += len(words)
-                allChars += len(text)
-                allWordChars += len("".join(words))
-            elif tType == self.T_KEYWORD and self._doKeywords:
-                valid, bits, _ = self._project.index.scanThis("@"+tText)
-                if valid and bits:
-                    key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
-                    text = "{0}: {1}".format(key, ", ".join(bits[1:]))
-                    words = text.split()
-                    allWords += len(words)
-                    allChars += len(text)
-                    allWordChars += len("".join(words))
-        self._counts["titleCount"] = titleCount
-        self._counts["paragraphCount"] = paragraphCount
+        self._counts[nwStats.TITLES] = titleCount
+        self._counts[nwStats.PARAGRAPHS] = paragraphCount
-        self._counts["allWords"] = allWords
-        self._counts["textWords"] = textWords
-        self._counts["titleWords"] = titleWords
+        self._counts[nwStats.WORDS_ALL] = allWords
+        self._counts[nwStats.WORDS_TEXT] = textWords
+        self._counts[nwStats.WORDS_TITLE] = titleWords
-        self._counts["allChars"] = allChars
-        self._counts["textChars"] = textChars
-        self._counts["titleChars"] = titleChars
+        self._counts[nwStats.CHARS_ALL] = allChars
+        self._counts[nwStats.CHARS_TEXT] = textChars
+        self._counts[nwStats.CHARS_TITLE] = titleChars
-        self._counts["allWordChars"] = allWordChars
-        self._counts["textWordChars"] = textWordChars
-        self._counts["titleWordChars"] = titleWordChars
+        self._counts[nwStats.WCHARS_ALL] = allWordChars
+        self._counts[nwStats.WCHARS_TEXT] = textWordChars
+        self._counts[nwStats.WCHARS_TITLE] = titleWordChars
         return
-    def saveRawMarkdown(self, path: str | Path) -> None:
-        """Save the raw text to a plain text file."""
-        with open(path, mode="w", encoding="utf-8") as outFile:
-            for nwdPage in self._markdown:
-                outFile.write(nwdPage)
-        return
-    def saveRawMarkdownJSON(self, path: str | Path) -> None:
-        """Save the raw text to a JSON file."""
-        timeStamp = time()
-        data = {
-            "meta": {
-                "projectName": self._project.data.name,
-                "novelAuthor": self._project.data.author,
-                "buildTime": int(timeStamp),
-                "buildTimeStr": formatTimeStamp(timeStamp),
-            },
-            "text": {
-                "nwd": [page.rstrip("\n").split("\n") for page in self._markdown],
-            }
-        }
-        with open(path, mode="w", encoding="utf-8") as fObj:
-            json.dump(data, fObj, indent=2)
-        return
     ##
     #  Internal Functions
     ##
-    def _extractFormats(self, text: str, skip: int = 0) -> tuple[str, T_Formats]:
-        """Extract format markers from a text paragraph."""
+    def _formatInt(self, value: int) -> str:
+        """Return a localised integer."""
+        return self._dLocale.toString(value)
+    def _formatComment(self, style: ComStyle, key: str, text: str) -> tuple[str, T_Formats]:
+        """Apply formatting to comments and notes."""
+        tTxt, tFmt = self._extractFormats(text)
+        tFmt.insert(0, (0, TextFmt.COL_B, style.textClass))
+        tFmt.append((len(tTxt), TextFmt.COL_E, ""))
+        if label := (self._localLookup(style.label) + (f" ({key})" if key else "")).strip():
+            shift = len(label) + 2
+            tTxt = f"{label}: {tTxt}"
+            rFmt = [(0, TextFmt.B_B, ""), (shift - 1, TextFmt.B_E, "")]
+            if style.labelClass:
+                rFmt.insert(1, (0, TextFmt.COL_B, style.labelClass))
+                rFmt.insert(2, (shift - 1, TextFmt.COL_E, ""))
+            rFmt.extend((p + shift, f, d) for p, f, d in tFmt)
+        return tTxt, rFmt
+    def _formatMeta(self, text: str) -> tuple[str, str, T_Formats]:
+        """Apply formatting to a meta data line."""
+        tag = ""
+        txt = []
+        fmt = []
+        valid, bits, _ = self._project.index.scanThis(text)
+        if valid and bits and bits[0] in nwLabels.KEY_NAME and bits[0] not in self._skipKeywords:
+            tag = bits[0]
+            pos = 0
+            lbl = f"{self._localLookup(nwLabels.KEY_NAME[tag])}:"
+            end = len(lbl)
+            fmt = [
+                (pos, TextFmt.B_B, ""),
+                (pos, TextFmt.COL_B, "keyword"),
+                (end, TextFmt.COL_E, ""),
+                (end, TextFmt.B_E, ""),
+            ]
+            txt = [lbl, " "]
+            pos = end + 1
+            if (num := len(bits)) > 1:
+                if bits[0] == nwKeyWords.TAG_KEY:
+                    one, two = self._project.index.parseValue(bits[1])
+                    end = pos + len(one)
+                    fmt.append((pos, TextFmt.COL_B, "tag"))
+                    fmt.append((pos, TextFmt.ANM_B, f"tag_{one}".lower()))
+                    fmt.append((end, TextFmt.ANM_E, ""))
+                    fmt.append((end, TextFmt.COL_E, ""))
+                    txt.append(one)
+                    pos = end
+                    if two:
+                        txt.append(" | ")
+                        pos += 3
+                        end = pos + len(two)
+                        fmt.append((pos, TextFmt.COL_B, "optional"))
+                        fmt.append((end, TextFmt.COL_E, ""))
+                        txt.append(two)
+                        pos = end
+                else:
+                    for n, bit in enumerate(bits[1:], 2):
+                        end = pos + len(bit)
+                        fmt.append((pos, TextFmt.COL_B, "tag"))
+                        fmt.append((pos, TextFmt.ARF_B, f"#tag_{bit}".lower()))
+                        fmt.append((end, TextFmt.ARF_E, ""))
+                        fmt.append((end, TextFmt.COL_E, ""))
+                        txt.append(bit)
+                        pos = end
+                        if n < num:
+                            txt.append(", ")
+                            pos += 2
+        return tag, "".join(txt), fmt
+    def _extractFormats(
+        self, text: str, skip: int = 0, hDialog: bool = False
+    ) -> tuple[str, T_Formats]:
+        """Extract format markers from a text paragraph. In order to
+        also process dialogue highlighting, the hDialog flag must be set
+        to True. See issues #2011 and #2013.
+        """
         temp: list[tuple[int, int, int, str]] = []
         # Match Markdown
         for regEx, fmts in self._rxMarkdown:
-            rxItt = regEx.globalMatch(text, 0)
-            while rxItt.hasNext():
-                rxMatch = rxItt.next()
+            for res in regEx.finditer(text):
                 temp.extend(
-                    (rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt, "")
+                    (res.start(n), res.end(n), fmt, "")
                     for n, fmt in enumerate(fmts) if fmt > 0
                 )
+        # Match URLs
+        for res in REGEX_PATTERNS.url.finditer(text):
+            temp.append((res.start(0), 0, TextFmt.HRF_B, res.group(0)))
+            temp.append((res.end(0), 0, TextFmt.HRF_E, ""))
         # Match Shortcodes
-        rxItt = self._rxShortCodes.globalMatch(text, 0)
-        while rxItt.hasNext():
-            rxMatch = rxItt.next()
+        for res in REGEX_PATTERNS.shortcodePlain.finditer(text):
             temp.append((
-                rxMatch.capturedStart(1),
-                rxMatch.capturedLength(1),
-                self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0),
+                res.start(1), res.end(1),
+                self._shortCodeFmt.get(res.group(1).lower(), 0),
                 "",
             ))
         # Match Shortcode w/Values
-        rxItt = self._rxShortCodeVals.globalMatch(text, 0)
         tHandle = self._handle or ""
-        while rxItt.hasNext():
-            rxMatch = rxItt.next()
-            kind = self._shortCodeVals.get(rxMatch.captured(1).lower(), 0)
+        for res in REGEX_PATTERNS.shortcodeValue.finditer(text):
+            kind = self._shortCodeVals.get(res.group(1).lower(), 0)
             temp.append((
-                rxMatch.capturedStart(0),
-                rxMatch.capturedLength(0),
-                self.FMT_STRIP if kind == skip else kind,
-                f"{tHandle}:{rxMatch.captured(2)}",
+                res.start(0), res.end(0),
+                TextFmt.STRIP if kind == skip else kind,
+                f"{tHandle}:{res.group(2)}",
             ))
         # Match Dialogue
-        if self._rxDialogue:
-            for regEx, fmtB, fmtE in self._rxDialogue:
-                rxItt = regEx.globalMatch(text, 0)
-                while rxItt.hasNext():
-                    rxMatch = rxItt.next()
-                    temp.append((rxMatch.capturedStart(0), 0, fmtB, ""))
-                    temp.append((rxMatch.capturedEnd(0), 0, fmtE, ""))
+        if self._hlightDialog and hDialog:
+            if self._dialogParser.enabled:
+                for pos, end in self._dialogParser(text):
+                    temp.append((pos, 0, TextFmt.COL_B, "dialog"))
+                    temp.append((end, 0, TextFmt.COL_E, ""))
+            if self._rxAltDialog:
+                for res in self._rxAltDialog.finditer(text):
+                    temp.append((res.start(0), 0, TextFmt.COL_B, "altdialog"))
+                    temp.append((res.end(0), 0, TextFmt.COL_E, ""))
         # Post-process text and format
         result = text
         formats = []
-        for pos, n, fmt, key in reversed(sorted(temp, key=lambda x: x[0])):
+        for pos, end, fmt, meta in reversed(sorted(temp, key=lambda x: x[0])):
             if fmt > 0:
-                result = result[:pos] + result[pos+n:]
-                formats = [(p-n, f, k) for p, f, k in formats]
-                formats.insert(0, (pos, fmt, key))
+                if end > pos:
+                    result = result[:pos] + result[end:]
+                    formats = [(p+pos-end if p > pos else p, f, m) for p, f, m in formats]
+                formats.insert(0, (pos, fmt, meta))
         return result, formats
@@ -1198,6 +1195,7 @@ class HeadingFormatter:
     def apply(self, hFormat: str, text: str, nHead: int) -> str:
         """Apply formatting to a specific heading."""
         hFormat = hFormat.replace(nwHeadFmt.TITLE, text)
+        hFormat = hFormat.replace(nwHeadFmt.BR, "\n")
         hFormat = hFormat.replace(nwHeadFmt.CH_NUM, str(self._chCount))
         hFormat = hFormat.replace(nwHeadFmt.SC_NUM, str(self._scChCount))
         hFormat = hFormat.replace(nwHeadFmt.SC_ABS, str(self._scAbsCount))

novelWriter 2.5.1__py3-none-any.whl → 2.6b1__py3-none-any.whl

novelWriter 2.5.1py3-none-any.whl → 2.6b1py3-none-any.whl