PyPI - novelWriter - Versions diffs - 2.3.1__py3-none-any.whl → 2.4rc1__py3-none-any.whl - Mend

novelWriter 2.3.1py3-none-any.whl → 2.4rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

{novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/METADATA +5 -6
{novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/RECORD +102 -95
novelwriter/__init__.py +7 -7
novelwriter/assets/icons/none.svg +4 -0
novelwriter/assets/icons/typicons_dark/icons.conf +4 -0
novelwriter/assets/icons/typicons_dark/nw_tb-mark.svg +7 -0
novelwriter/assets/icons/typicons_dark/typ_refresh-flipped.svg +1 -1
novelwriter/assets/icons/typicons_dark/typ_refresh.svg +1 -1
novelwriter/assets/icons/typicons_dark/typ_search-grey.svg +4 -0
novelwriter/assets/icons/typicons_dark/typ_times.svg +1 -1
novelwriter/assets/icons/typicons_dark/typ_unfold-hidden.svg +4 -0
novelwriter/assets/icons/typicons_dark/typ_unfold-visible.svg +4 -0
novelwriter/assets/icons/typicons_light/icons.conf +4 -0
novelwriter/assets/icons/typicons_light/nw_tb-mark.svg +7 -0
novelwriter/assets/icons/typicons_light/typ_refresh-flipped.svg +1 -1
novelwriter/assets/icons/typicons_light/typ_refresh.svg +1 -1
novelwriter/assets/icons/typicons_light/typ_search-grey.svg +4 -0
novelwriter/assets/icons/typicons_light/typ_times.svg +1 -1
novelwriter/assets/icons/typicons_light/typ_unfold-hidden.svg +4 -0
novelwriter/assets/icons/typicons_light/typ_unfold-visible.svg +4 -0
novelwriter/assets/manual.pdf +0 -0
novelwriter/assets/sample.zip +0 -0
novelwriter/assets/syntax/default_dark.conf +1 -0
novelwriter/assets/syntax/default_light.conf +1 -0
novelwriter/assets/syntax/grey_dark.conf +1 -0
novelwriter/assets/syntax/grey_light.conf +1 -0
novelwriter/assets/syntax/light_owl.conf +1 -0
novelwriter/assets/syntax/night_owl.conf +1 -0
novelwriter/assets/syntax/solarized_dark.conf +1 -0
novelwriter/assets/syntax/solarized_light.conf +1 -0
novelwriter/assets/syntax/tomorrow.conf +1 -0
novelwriter/assets/syntax/tomorrow_night.conf +1 -0
novelwriter/assets/syntax/tomorrow_night_blue.conf +1 -0
novelwriter/assets/syntax/tomorrow_night_bright.conf +1 -0
novelwriter/assets/syntax/tomorrow_night_eighties.conf +1 -0
novelwriter/assets/text/credits_en.htm +25 -23
novelwriter/common.py +7 -2
novelwriter/config.py +43 -16
novelwriter/constants.py +5 -6
novelwriter/core/buildsettings.py +60 -40
novelwriter/core/coretools.py +97 -13
novelwriter/core/docbuild.py +74 -7
novelwriter/core/document.py +24 -3
novelwriter/core/index.py +31 -112
novelwriter/core/project.py +10 -15
novelwriter/core/sessions.py +2 -2
novelwriter/core/status.py +6 -5
novelwriter/core/storage.py +8 -2
novelwriter/core/tohtml.py +22 -25
novelwriter/core/tokenizer.py +416 -232
novelwriter/core/tomd.py +17 -8
novelwriter/core/toodt.py +385 -350
novelwriter/core/tree.py +8 -8
novelwriter/dialogs/about.py +9 -11
novelwriter/dialogs/docmerge.py +17 -14
novelwriter/dialogs/docsplit.py +20 -19
novelwriter/dialogs/editlabel.py +5 -4
novelwriter/dialogs/preferences.py +31 -39
novelwriter/dialogs/projectsettings.py +29 -26
novelwriter/dialogs/quotes.py +10 -9
novelwriter/dialogs/wordlist.py +15 -12
novelwriter/enum.py +17 -14
novelwriter/error.py +13 -11
novelwriter/extensions/circularprogress.py +12 -8
novelwriter/extensions/configlayout.py +1 -3
novelwriter/extensions/modified.py +51 -2
novelwriter/extensions/pagedsidebar.py +16 -14
novelwriter/extensions/simpleprogress.py +3 -1
novelwriter/extensions/statusled.py +3 -1
novelwriter/extensions/switch.py +10 -9
novelwriter/extensions/switchbox.py +14 -13
novelwriter/extensions/versioninfo.py +1 -1
novelwriter/gui/doceditor.py +413 -478
novelwriter/gui/dochighlight.py +33 -29
novelwriter/gui/docviewer.py +162 -175
novelwriter/gui/docviewerpanel.py +20 -37
novelwriter/gui/editordocument.py +15 -4
novelwriter/gui/itemdetails.py +51 -54
novelwriter/gui/mainmenu.py +37 -16
novelwriter/gui/noveltree.py +30 -36
novelwriter/gui/outline.py +114 -92
novelwriter/gui/projtree.py +60 -66
novelwriter/gui/search.py +362 -0
novelwriter/gui/sidebar.py +36 -45
novelwriter/gui/statusbar.py +14 -14
novelwriter/gui/theme.py +93 -28
novelwriter/guimain.py +207 -200
novelwriter/shared.py +31 -6
novelwriter/text/counting.py +137 -0
novelwriter/tools/dictionaries.py +13 -12
novelwriter/tools/lipsum.py +20 -17
novelwriter/tools/manusbuild.py +35 -27
novelwriter/tools/manuscript.py +374 -90
novelwriter/tools/manussettings.py +261 -124
novelwriter/tools/noveldetails.py +20 -18
novelwriter/tools/welcome.py +48 -44
novelwriter/tools/writingstats.py +61 -55
novelwriter/types.py +90 -0
novelwriter/core/__init__.py +0 -3
novelwriter/dialogs/__init__.py +0 -3
novelwriter/extensions/__init__.py +0 -3
novelwriter/gui/__init__.py +0 -3
novelwriter/tools/__init__.py +0 -3
{novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/LICENSE.md +0 -0
{novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/WHEEL +0 -0
{novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/entry_points.txt +0 -0
{novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/top_level.txt +0 -0

novelwriter/core/tokenizer.py CHANGED Viewed

@@ -35,13 +35,13 @@ from functools import partial
 from PyQt5.QtCore import QCoreApplication, QRegularExpression
-from novelwriter.enum import nwComment, nwItemLayout
 from novelwriter.common import formatTimeStamp, numberToRoman, checkInt
 from novelwriter.constants import (
     nwHeadFmt, nwKeyWords, nwLabels, nwRegEx, nwShortcode, nwUnicode, trConst
 )
 from novelwriter.core.index import processComment
 from novelwriter.core.project import NWProject
+from novelwriter.enum import nwComment, nwItemLayout
 logger = logging.getLogger(__name__)
@@ -74,10 +74,12 @@ class Tokenizer(ABC):
     FMT_D_E   = 6   # End strikeout
     FMT_U_B   = 7   # Begin underline
     FMT_U_E   = 8   # End underline
-    FMT_SUP_B = 9   # Begin superscript
-    FMT_SUP_E = 10  # End superscript
-    FMT_SUB_B = 11  # Begin subscript
-    FMT_SUB_E = 12  # End subscript
+    FMT_M_B   = 9   # Begin mark
+    FMT_M_E   = 10  # End mark
+    FMT_SUP_B = 11  # Begin superscript
+    FMT_SUP_E = 12  # End superscript
+    FMT_SUB_B = 13  # Begin subscript
+    FMT_SUB_E = 14  # End subscript
     # Block Type
     T_EMPTY    = 1   # Empty line (new paragraph)
@@ -86,14 +88,13 @@ class Tokenizer(ABC):
     T_COMMENT  = 4   # Comment line
     T_KEYWORD  = 5   # Command line
     T_TITLE    = 6   # Title
-    T_UNNUM    = 7   # Unnumbered
-    T_HEAD1    = 8   # Header 1
-    T_HEAD2    = 9   # Header 2
-    T_HEAD3    = 10  # Header 3
-    T_HEAD4    = 11  # Header 4
-    T_TEXT     = 12  # Text line
-    T_SEP      = 13  # Scene separator
-    T_SKIP     = 14  # Paragraph break
+    T_HEAD1    = 7   # Heading 1
+    T_HEAD2    = 8   # Heading 2
+    T_HEAD3    = 9   # Heading 3
+    T_HEAD4    = 10  # Heading 4
+    T_TEXT     = 11  # Text line
+    T_SEP      = 12  # Scene separator
+    T_SKIP     = 13  # Paragraph break
     # Block Style
     A_NONE     = 0x0000  # No special style
@@ -108,32 +109,38 @@ class Tokenizer(ABC):
     A_IND_L    = 0x0100  # Left indentation
     A_IND_R    = 0x0200  # Right indentation
+    # Lookups
+    L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
     def __init__(self, project: NWProject) -> None:
         self._project = project
         # Data Variables
         self._text   = ""    # The raw text to be tokenized
-        self._nwItem = None  # The NWItem currently being processed
+        self._handle = None  # The item handle currently being processed
         self._result = ""    # The result of the last document
         self._keepMarkdown = False  # Whether to keep the markdown text
         self._allMarkdown  = []     # The result novelWriter markdown of all documents
-        # Processed Tokens
+        # Processed Tokens and Meta Data
         self._tokens: list[tuple[int, int, str, list[tuple[int, int]], int]] = []
+        self._counts: dict[str, int] = {}
+        self._outline: dict[str, str] = {}
         # User Settings
-        self._textFont    = "Serif"  # Output text font
-        self._textSize    = 11       # Output text size
-        self._textFixed   = False    # Fixed width text
-        self._lineHeight  = 1.15     # Line height in units of em
-        self._blockIndent = 4.00     # Block indent in units of em
-        self._doJustify   = False    # Justify text
-        self._doBodyText  = True     # Include body text
-        self._doSynopsis  = False    # Also process synopsis comments
-        self._doComments  = False    # Also process comments
-        self._doKeywords  = False    # Also process keywords like tags and references
+        self._textFont     = "Serif"  # Output text font
+        self._textSize     = 11       # Output text size
+        self._textFixed    = False    # Fixed width text
+        self._lineHeight   = 1.15     # Line height in units of em
+        self._blockIndent  = 4.00     # Block indent in units of em
+        self._doJustify    = False    # Justify text
+        self._doBodyText   = True     # Include body text
+        self._doSynopsis   = False    # Also process synopsis comments
+        self._doComments   = False    # Also process comments
+        self._doKeywords   = False    # Also process keywords like tags and references
+        self._skipKeywords = set()    # Keywords to ignore
         # Margins
         self._marginTitle = (1.000, 0.500)
@@ -149,21 +156,28 @@ class Tokenizer(ABC):
         self._fmtChapter = nwHeadFmt.TITLE  # Formatting for numbered chapters
         self._fmtUnNum   = nwHeadFmt.TITLE  # Formatting for unnumbered chapters
         self._fmtScene   = nwHeadFmt.TITLE  # Formatting for scenes
+        self._fmtHScene  = nwHeadFmt.TITLE  # Formatting for hard scenes
         self._fmtSection = nwHeadFmt.TITLE  # Formatting for sections
-        self._hideScene   = False  # Do not include scene headers
-        self._hideSection = False  # Do not include section headers
+        self._hideTitle   = False  # Do not include title headings
+        self._hideChapter = False  # Do not include chapter headings
+        self._hideUnNum   = False  # Do not include unnumbered headings
+        self._hideScene   = False  # Do not include scene headings
+        self._hideHScene  = False  # Do not include hard scene headings
+        self._hideSection = False  # Do not include section headings
-        self._linkHeaders = False  # Add an anchor before headers
+        self._linkHeadings = False  # Add an anchor before headings
+        self._titleStyle   = self.A_CENTRE | self.A_PBB
+        self._chapterStyle = self.A_PBB
+        self._sceneStyle   = self.A_NONE
         # Instance Variables
         self._hFormatter = HeadingFormatter(self._project)
-        self._skipSeparator = False  # Flag to indicate that we skip the scene separator
+        self._noSep      = True  # Flag to indicate that we don't want a scene separator
         # This File
-        self._isNone  = False  # Document has unknown layout
         self._isNovel = False  # Document is a novel document
-        self._isNote  = False  # Document is a project note
         self._isFirst = True   # Document is the first in a set
         # Error Handling
@@ -187,6 +201,7 @@ class Tokenizer(ABC):
             nwShortcode.BOLD_O:   self.FMT_B_B,   nwShortcode.BOLD_C:   self.FMT_B_E,
             nwShortcode.STRIKE_O: self.FMT_D_B,   nwShortcode.STRIKE_C: self.FMT_D_E,
             nwShortcode.ULINE_O:  self.FMT_U_B,   nwShortcode.ULINE_C:  self.FMT_U_E,
+            nwShortcode.MARK_O:   self.FMT_M_B,   nwShortcode.MARK_C:   self.FMT_M_E,
             nwShortcode.SUP_O:    self.FMT_SUP_B, nwShortcode.SUP_C:    self.FMT_SUP_E,
             nwShortcode.SUB_O:    self.FMT_SUB_B, nwShortcode.SUB_C:    self.FMT_SUB_E,
         }
@@ -203,12 +218,22 @@ class Tokenizer(ABC):
         return self._result
     @property
-    def allMarkdown(self) -> list:
+    def allMarkdown(self) -> list[str]:
         """The combined novelWriter Markdown text."""
         return self._allMarkdown
     @property
-    def errData(self) -> list:
+    def textStats(self) -> dict[str, int]:
+        """The collected stats about the text."""
+        return self._counts
+    @property
+    def textOutline(self) -> dict[str, str]:
+        """The generated outline of the text."""
+        return self._outline
+    @property
+    def errData(self) -> list[str]:
         """The error data."""
         return self._errData
@@ -216,33 +241,63 @@ class Tokenizer(ABC):
     #  Setters
     ##
-    def setTitleFormat(self, hFormat: str) -> None:
+    def setTitleFormat(self, hFormat: str, hide: bool = False) -> None:
         """Set the title format pattern."""
         self._fmtTitle = hFormat.strip()
+        self._hideTitle = hide
         return
-    def setChapterFormat(self, hFormat: str) -> None:
+    def setChapterFormat(self, hFormat: str, hide: bool = False) -> None:
         """Set the chapter format pattern."""
         self._fmtChapter = hFormat.strip()
+        self._hideChapter = hide
         return
-    def setUnNumberedFormat(self, hFormat: str) -> None:
+    def setUnNumberedFormat(self, hFormat: str, hide: bool = False) -> None:
         """Set the unnumbered format pattern."""
         self._fmtUnNum = hFormat.strip()
+        self._hideUnNum = hide
         return
-    def setSceneFormat(self, hFormat: str, hide: bool) -> None:
+    def setSceneFormat(self, hFormat: str, hide: bool = False) -> None:
         """Set the scene format pattern and hidden status."""
         self._fmtScene = hFormat.strip()
         self._hideScene = hide
         return
-    def setSectionFormat(self, hFormat: str, hide: bool) -> None:
+    def setHardSceneFormat(self, hFormat: str, hide: bool = False) -> None:
+        """Set the hard scene format pattern and hidden status."""
+        self._fmtHScene = hFormat.strip()
+        self._hideHScene = hide
+        return
+    def setSectionFormat(self, hFormat: str, hide: bool = False) -> None:
         """Set the section format pattern and hidden status."""
         self._fmtSection = hFormat.strip()
         self._hideSection = hide
         return
+    def setTitleStyle(self, center: bool, pageBreak: bool) -> None:
+        """Set the title heading style."""
+        self._titleStyle = (
+            (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
+        )
+        return
+    def setChapterStyle(self, center: bool, pageBreak: bool) -> None:
+        """Set the chapter heading style."""
+        self._chapterStyle = (
+            (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
+        )
+        return
+    def setSceneStyle(self, center: bool, pageBreak: bool) -> None:
+        """Set the scene heading style."""
+        self._sceneStyle = (
+            (self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
+        )
+        return
     def setFont(self, family: str, size: int, isFixed: bool = False) -> None:
         """Set the build font."""
         self._textFont = family
@@ -271,22 +326,22 @@ class Tokenizer(ABC):
         return
     def setHead1Margins(self, upper: float, lower: float) -> None:
-        """Set the upper and lower header 1 margin."""
+        """Set the upper and lower heading 1 margin."""
         self._marginHead1 = (float(upper), float(lower))
         return
     def setHead2Margins(self, upper: float, lower: float) -> None:
-        """Set the upper and lower header 2 margin."""
+        """Set the upper and lower heading 2 margin."""
         self._marginHead2 = (float(upper), float(lower))
         return
     def setHead3Margins(self, upper: float, lower: float) -> None:
-        """Set the upper and lower header 3 margin."""
+        """Set the upper and lower heading 3 margin."""
         self._marginHead3 = (float(upper), float(lower))
         return
     def setHead4Margins(self, upper: float, lower: float) -> None:
-        """Set the upper and lower header 4 margin."""
+        """Set the upper and lower heading 4 margin."""
         self._marginHead4 = (float(upper), float(lower))
         return
@@ -300,9 +355,9 @@ class Tokenizer(ABC):
         self._marginMeta = (float(upper), float(lower))
         return
-    def setLinkHeaders(self, state: bool) -> None:
-        """Enable or disable adding an anchor before headers."""
-        self._linkHeaders = state
+    def setLinkHeadings(self, state: bool) -> None:
+        """Enable or disable adding an anchor before headings."""
+        self._linkHeadings = state
         return
     def setBodyText(self, state: bool) -> None:
@@ -325,6 +380,11 @@ class Tokenizer(ABC):
         self._doKeywords = state
         return
+    def setIgnoredKeywords(self, keywords: str) -> None:
+        """Comma separated string of keywords to ignore."""
+        self._skipKeywords = set(x.lower().strip() for x in keywords.split(","))
+        return
     def setKeepMarkdown(self, state: bool) -> None:
         """Keep original markdown during build."""
         self._keepMarkdown = state
@@ -338,47 +398,41 @@ class Tokenizer(ABC):
     def doConvert(self) -> None:
         raise NotImplementedError
-    def addRootHeading(self, tHandle: str) -> bool:
+    def addRootHeading(self, tHandle: str) -> None:
         """Add a heading at the start of a new root folder."""
-        tItem = self._project.tree[tHandle]
-        if not tItem or not tItem.isRootType():
-            return False
-        if self._isFirst:
-            textAlign = self.A_CENTRE
-            self._isFirst = False
-        else:
-            textAlign = self.A_PBB | self.A_CENTRE
-        trNotes = self._localLookup("Notes")
-        title = f"{trNotes}: {tItem.itemName}"
-        self._tokens = []
-        self._tokens.append((
-            self.T_TITLE, 0, title, [], textAlign
-        ))
-        if self._keepMarkdown:
-            self._allMarkdown.append(f"# {title}\n\n")
-        return True
-    def setText(self, tHandle: str, text: str | None = None) -> bool:
-        """Set the text for the tokenizer from a handle. If text is not
-        set, load it from the file.
-        """
-        self._nwItem = self._project.tree[tHandle]
-        if self._nwItem is None:
-            return False
+        self._text = ""
+        self._handle = None
-        if text is None:
-            text = self._project.storage.getDocument(tHandle).readDocument() or ""
+        if (tItem := self._project.tree[tHandle]) and tItem.isRootType():
+            self._handle = tHandle
+            if self._isFirst:
+                textAlign = self.A_CENTRE
+                self._isFirst = False
+            else:
+                textAlign = self.A_PBB | self.A_CENTRE
-        self._text = text
+            trNotes = self._localLookup("Notes")
+            title = f"{trNotes}: {tItem.itemName}"
+            self._tokens = []
+            self._tokens.append((
+                self.T_TITLE, 1, title, [], textAlign
+            ))
+            if self._keepMarkdown:
+                self._allMarkdown.append(f"#! {title}\n\n")
-        self._isNone  = self._nwItem.itemLayout == nwItemLayout.NO_LAYOUT
-        self._isNovel = self._nwItem.itemLayout == nwItemLayout.DOCUMENT
-        self._isNote  = self._nwItem.itemLayout == nwItemLayout.NOTE
+        return
-        return True
+    def setText(self, tHandle: str, text: str | None = None) -> None:
+        """Set the text for the tokenizer from a handle. If text is not
+        set, it's is loaded from the file.
+        """
+        self._text = ""
+        self._handle = None
+        if nwItem := self._project.tree[tHandle]:
+            self._text = text or self._project.storage.getDocumentText(tHandle)
+            self._handle = tHandle
+            self._isNovel = nwItem.itemLayout == nwItemLayout.DOCUMENT
+        return
     def doPreProcessing(self) -> None:
         """Run trough the various replace dictionaries."""
@@ -399,7 +453,7 @@ class Tokenizer(ABC):
     def tokenizeText(self) -> None:
         """Scan the text for either lines starting with specific
-        characters that indicate headers, comments, commands etc, or
+        characters that indicate headings, comments, commands etc, or
         just contain plain text. In the case of plain text, apply the
         same RegExes that the syntax highlighter uses and save the
         locations of these formatting tags into the token array.
@@ -407,15 +461,18 @@ class Tokenizer(ABC):
         The format of the token list is an entry with a five-tuple for
         each line in the file. The tuple is as follows:
           1: The type of the block, self.T_*
-          2: The header number under which the text is placed
+          2: The heading number under which the text is placed
           3: The text content of the block, without leading tags
           4: The internal formatting map of the text, self.FMT_*
           5: The style of the block, self.A_*
         """
         self._tokens = []
-        tmpMarkdown = []
+        if self._isNovel:
+            self._hFormatter.setHandle(self._handle)
         nHead = 0
         breakNext = False
+        tmpMarkdown = []
         for aLine in self._text.splitlines():
             sLine = aLine.strip().lower()
@@ -438,11 +495,12 @@ class Tokenizer(ABC):
             # Check Line Format
             # =================
-            if aLine[0] == "[":
-                # Parse special formatting line
-                # This must be a separate if statement, as it may not
-                # reach a continue statement and must therefore proceed
-                # to check other formats.
+            if aLine.startswith("["):
+                # Special Formats
+                # ===============
+                # Parse special formatting line. This must be a separate if
+                # statement, as it may not reach a continue statement and must
+                # therefore proceed to check other formats.
                 if sLine in ("[newpage]", "[new page]"):
                     breakNext = True
@@ -466,9 +524,13 @@ class Tokenizer(ABC):
                         ]
                     continue
-            if aLine[0] == "%":
-                if aLine[1] == "~":
-                    # Completely ignore the paragraph
+            if aLine.startswith("%"):
+                # Comments
+                # ========
+                # All style comments are processed and the exact type exact
+                # style extracted. Ignored comments on the '%~' format are
+                # skipped completely.
+                if aLine.startswith("%~"):
                     continue
                 cStyle, cText, _ = processComment(aLine)
@@ -477,94 +539,181 @@ class Tokenizer(ABC):
                         self.T_SYNOPSIS, nHead, cText, [], sAlign
                     ))
                     if self._doSynopsis and self._keepMarkdown:
-                        tmpMarkdown.append("%s\n" % aLine)
+                        tmpMarkdown.append(f"{aLine}\n")
                 elif cStyle == nwComment.SHORT:
                     self._tokens.append((
                         self.T_SHORT, nHead, cText, [], sAlign
                     ))
                     if self._doSynopsis and self._keepMarkdown:
-                        tmpMarkdown.append("%s\n" % aLine)
+                        tmpMarkdown.append(f"{aLine}\n")
                 else:
                     self._tokens.append((
                         self.T_COMMENT, nHead, cText, [], sAlign
                     ))
                     if self._doComments and self._keepMarkdown:
-                        tmpMarkdown.append("%s\n" % aLine)
+                        tmpMarkdown.append(f"{aLine}\n")
-            elif aLine[0] == "@":
-                self._tokens.append((
-                    self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
-                ))
-                if self._doKeywords and self._keepMarkdown:
-                    tmpMarkdown.append("%s\n" % aLine)
+            elif aLine.startswith("@"):
+                # Keywords
+                # ========
+                # Only valid keyword lines are parsed, and any ignored keywords
+                # are automatically skipped.
-            elif aLine[:2] == "# ":
-                if self._isNovel:
-                    sAlign |= self.A_CENTRE
-                    sAlign |= self.A_PBB
+                valid, bits, _ = self._project.index.scanThis(aLine)
+                if valid and bits and bits[0] not in self._skipKeywords:
+                    self._tokens.append((
+                        self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
+                    ))
+                    if self._doKeywords and self._keepMarkdown:
+                        tmpMarkdown.append(f"{aLine}\n")
+            elif aLine.startswith(("# ", "#! ")):
+                # Title or Partition Headings
+                # ===========================
+                # Main titles are allowed in any document, and they are always
+                # centred and start on a new page. For novel documents, we also
+                # reset all counters when such a title is encountered.
+                # Partition headings are only formatted in novel documents, and
+                # otherwise unchanged. Scene separators are disabled
+                # immediately after partitions, and scene numbers are reset.
+                isPlain = aLine.startswith("# ")
                 nHead += 1
-                self._tokens.append((
-                    self.T_HEAD1, nHead, aLine[2:].strip(), [], sAlign
-                ))
-                if self._keepMarkdown:
-                    tmpMarkdown.append("%s\n" % aLine)
-            elif aLine[:3] == "## ":
+                tText = aLine[2:].strip()
+                tType = self.T_HEAD1 if isPlain else self.T_TITLE
+                tStyle = self.A_NONE if isPlain else (self.A_PBB | self.A_CENTRE)
+                sHide = self._hideTitle if isPlain else False
                 if self._isNovel:
-                    sAlign |= self.A_PBB
+                    if sHide:
+                        tText = ""
+                        tType = self.T_EMPTY
+                        tStyle = self.A_NONE
+                    elif isPlain:
+                        tText = self._hFormatter.apply(self._fmtTitle, tText, nHead)
+                        tStyle = self._titleStyle
+                    if isPlain:
+                        self._hFormatter.resetScene()
+                    else:
+                        self._hFormatter.resetAll()
+                    self._noSep = True
-                nHead += 1
                 self._tokens.append((
-                    self.T_HEAD2, nHead, aLine[3:].strip(), [], sAlign
+                    tType, nHead, tText, [], tStyle
                 ))
                 if self._keepMarkdown:
-                    tmpMarkdown.append("%s\n" % aLine)
+                    tmpMarkdown.append(f"{aLine}\n")
+            elif aLine.startswith(("## ", "##! ")):
+                # (Unnumbered) Chapter Headings
+                # =============================
+                # Chapter headings are only formatted in novel documents, and
+                # otherwise unchanged. Chapter numbers are bumped before the
+                # heading is formatted. Scene separators are disabled
+                # immediately after chapter headings, and scene numbers are
+                # reset. Unnumbered chapters are only meaningful in Novel docs,
+                # so if we're in a note, we keep them as level 2 headings.
+                isPlain = aLine.startswith("## ")
-            elif aLine[:4] == "### ":
                 nHead += 1
-                self._tokens.append((
-                    self.T_HEAD3, nHead, aLine[4:].strip(), [], sAlign
-                ))
-                if self._keepMarkdown:
-                    tmpMarkdown.append("%s\n" % aLine)
+                tText = aLine[3:].strip()
+                tType = self.T_HEAD2
+                tStyle = self.A_NONE
+                sHide = self._hideChapter if isPlain else self._hideUnNum
+                tFormat = self._fmtChapter if isPlain else self._fmtUnNum
+                if self._isNovel:
+                    if isPlain:
+                        self._hFormatter.incChapter()
+                    if sHide:
+                        tText = ""
+                        tType = self.T_EMPTY
+                    else:
+                        tText = self._hFormatter.apply(tFormat, tText, nHead)
+                        tStyle = self._chapterStyle
+                    self._hFormatter.resetScene()
+                    self._noSep = True
-            elif aLine[:5] == "#### ":
-                nHead += 1
                 self._tokens.append((
-                    self.T_HEAD4, nHead, aLine[5:].strip(), [], sAlign
+                    tType, nHead, tText, [], tStyle
                 ))
                 if self._keepMarkdown:
-                    tmpMarkdown.append("%s\n" % aLine)
+                    tmpMarkdown.append(f"{aLine}\n")
+            elif aLine.startswith(("### ", "###! ")):
+                # (Hard) Scene Headings
+                # =====================
+                # Scene headings in novel documents are treated as centred
+                # separators if the formatting does not change the text. If the
+                # format is empty, the scene can be hidden or a blank paragraph
+                # (skip). When the scene title has static text or no text, it
+                # is always ignored if the noSep flag is set. This prevents
+                # separators immediately after other titles. Scene numbers are
+                # always incremented before formatting. For notes, the heading
+                # is unchanged.
+                isPlain = aLine.startswith("### ")
-            elif aLine[:3] == "#! ":
                 nHead += 1
+                tText = aLine[4:].strip()
+                tType = self.T_HEAD3
+                tStyle = self.A_NONE
+                sHide = self._hideScene if isPlain else self._hideHScene
+                tFormat = self._fmtScene if isPlain else self._fmtHScene
                 if self._isNovel:
-                    tStyle = self.T_TITLE
-                else:
-                    tStyle = self.T_HEAD1
+                    self._hFormatter.incScene()
+                    if sHide:
+                        tText = ""
+                        tType = self.T_EMPTY
+                    else:
+                        tText = self._hFormatter.apply(tFormat, tText, nHead)
+                        tStyle = self._sceneStyle
+                        if tText == "":  # Empty Format
+                            tType = self.T_EMPTY if self._noSep else self.T_SKIP
+                        elif tText == tFormat:  # Static Format
+                            tText = "" if self._noSep else tText
+                            tType = self.T_EMPTY if self._noSep else self.T_SEP
+                            tStyle = self.A_NONE if self._noSep else self.A_CENTRE
+                    self._noSep = False
                 self._tokens.append((
-                    tStyle, nHead, aLine[3:].strip(), [], sAlign | self.A_CENTRE
+                    tType, nHead, tText, [], tStyle
                 ))
                 if self._keepMarkdown:
-                    tmpMarkdown.append("%s\n" % aLine)
+                    tmpMarkdown.append(f"{aLine}\n")
+            elif aLine.startswith("#### "):
+                # Section Headings
+                # =================
+                # Section headings in novel docs are treated as centred
+                # separators if the formatting does not change the text. If the
+                # format is empty, the section can be hidden or a blank
+                # paragraph (skip). For notes, the heading is unchanged.
-            elif aLine[:4] == "##! ":
                 nHead += 1
+                tText = aLine[5:].strip()
+                tType = self.T_HEAD4
+                tStyle = self.A_NONE
                 if self._isNovel:
-                    tStyle = self.T_UNNUM
-                    sAlign |= self.A_PBB
-                else:
-                    tStyle = self.T_HEAD2
+                    if self._hideSection:
+                        tText = ""
+                        tType = self.T_EMPTY
+                    else:
+                        tText = self._hFormatter.apply(self._fmtSection, tText, nHead)
+                        if tText == "":  # Empty Format
+                            tType = self.T_SKIP
+                        elif tText == self._fmtSection:  # Static Format
+                            tType = self.T_SEP
+                            tStyle = self.A_CENTRE
                 self._tokens.append((
-                    tStyle, nHead, aLine[4:].strip(), [], sAlign
+                    tType, nHead, tText, [], tStyle
                 ))
                 if self._keepMarkdown:
-                    tmpMarkdown.append("%s\n" % aLine)
+                    tmpMarkdown.append(f"{aLine}\n")
             else:
+                # Text Lines
+                # ==========
+                # Anything remaining at this point is body text. If body text
+                # is not disabled, we proceed to process text formatting.
                 if not self._doBodyText:
                     # Skip all body text
                     continue
@@ -606,11 +755,11 @@ class Tokenizer(ABC):
                     self.T_TEXT, nHead, tLine, fmtPos, sAlign
                 ))
                 if self._keepMarkdown:
-                    tmpMarkdown.append("%s\n" % aLine)
+                    tmpMarkdown.append(f"{aLine}\n")
         # If we have content, turn off the first page flag
         if self._isFirst and self._tokens:
-            self._isFirst = False
+            self._isFirst = False  # First document has been processed
             # Make sure the token array doesn't start with a page break
             # on the very first page, adding a blank first page.
@@ -626,8 +775,6 @@ class Tokenizer(ABC):
         ))
         if self._keepMarkdown:
             tmpMarkdown.append("\n")
-        if self._keepMarkdown:
             self._allMarkdown.append("".join(tmpMarkdown))
         # Second Pass
@@ -654,99 +801,136 @@ class Tokenizer(ABC):
         return
-    def doHeaders(self) -> bool:
-        """Apply formatting to the text headers for novel files. This
-        also applies chapter and scene numbering.
-        """
-        if not self._isNovel:
-            return False
-        self._hFormatter.setHandle(self._nwItem.itemHandle if self._nwItem else None)
-        for n, token in enumerate(self._tokens):
-            if token[0] == self.T_TITLE:  # Title
-                # For new titles, we reset all counters
-                self._skipSeparator = True
-                self._hFormatter.resetAll()
-            elif token[0] == self.T_HEAD1:  # Partition
-                tTemp = self._hFormatter.apply(self._fmtTitle, token[2], token[1])
-                self._tokens[n] = (
-                    token[0], token[1], tTemp, [], token[4]
-                )
-                # Set scene variables
-                self._skipSeparator = True
-                self._hFormatter.resetScene()
-            elif token[0] in (self.T_HEAD2, self.T_UNNUM):  # Chapter, Unnumbered
-                # Numbered or Unnumbered
-                if token[0] == self.T_UNNUM:
-                    tTemp = self._hFormatter.apply(self._fmtUnNum, token[2], token[1])
-                else:
-                    self._hFormatter.incChapter()
-                    tTemp = self._hFormatter.apply(self._fmtChapter, token[2], token[1])
-                # Format the chapter header
-                self._tokens[n] = (
-                    token[0], token[1], tTemp, [], token[4]
-                )
-                # Set scene variables
-                self._skipSeparator = True
-                self._hFormatter.resetScene()
-            elif token[0] == self.T_HEAD3:  # Scene
-                self._hFormatter.incScene()
-                tTemp = self._hFormatter.apply(self._fmtScene, token[2], token[1])
-                if tTemp == "" and self._hideScene:
-                    self._tokens[n] = (
-                        self.T_EMPTY, token[1], "", [], self.A_NONE
-                    )
-                elif tTemp == "" and not self._hideScene:
-                    self._tokens[n] = (
-                        self.T_EMPTY if self._skipSeparator else self.T_SKIP, token[1],
-                        "", [], self.A_NONE if self._skipSeparator else token[4]
-                    )
-                elif tTemp == self._fmtScene:
-                    self._tokens[n] = (
-                        self.T_EMPTY if self._skipSeparator else self.T_SEP, token[1],
-                        "" if self._skipSeparator else tTemp, [],
-                        self.A_NONE if self._skipSeparator else (token[4] | self.A_CENTRE)
-                    )
-                else:
-                    self._tokens[n] = (
-                        token[0], token[1], tTemp, [], token[4]
-                    )
+    def buildOutline(self) -> None:
+        """Build an outline of the text up to level 3 headings."""
+        tHandle = self._handle or ""
+        isNovel = self._isNovel
+        for tType, nHead, tText, _, _ in self._tokens:
+            if tType == self.T_TITLE:
+                prefix = "TT"
+            elif tType == self.T_HEAD1:
+                prefix = "PT" if isNovel else "H1"
+            elif tType == self.T_HEAD2:
+                prefix = "CH" if isNovel else "H2"
+            elif tType == self.T_HEAD3:
+                prefix = "SC" if isNovel else "H3"
+            else:
+                continue
-                self._skipSeparator = False
+            key = f"{tHandle}:T{nHead:04d}"
+            text = tText.replace(nwHeadFmt.BR, " ").replace("&amp;", "&")
+            self._outline[key] = f"{prefix}|{text}"
-            elif token[0] == self.T_HEAD4:  # Section
+        return
-                tTemp = self._hFormatter.apply(self._fmtSection, token[2], token[1])
-                if tTemp == "" and self._hideSection:
-                    self._tokens[n] = (
-                        self.T_EMPTY, token[1], "", [], self.A_NONE
-                    )
-                elif tTemp == "" and not self._hideSection:
-                    self._tokens[n] = (
-                        self.T_SKIP, token[1], "", [], token[4]
-                    )
-                elif tTemp == self._fmtSection:
-                    self._tokens[n] = (
-                        self.T_SEP, token[1], tTemp, [], token[4] | self.A_CENTRE
-                    )
-                else:
-                    self._tokens[n] = (
-                        token[0], token[1], tTemp, [], token[4]
-                    )
+    def countStats(self) -> None:
+        """Count stats on the tokenized text."""
+        titleCount = self._counts.get("titleCount", 0)
+        paragraphCount = self._counts.get("paragraphCount", 0)
+        allWords = self._counts.get("allWords", 0)
+        textWords = self._counts.get("textWords", 0)
+        titleWords = self._counts.get("titleWords", 0)
+        allChars = self._counts.get("allChars", 0)
+        textChars = self._counts.get("textChars", 0)
+        titleChars = self._counts.get("titleChars", 0)
+        allWordChars = self._counts.get("allWordChars", 0)
+        textWordChars = self._counts.get("textWordChars", 0)
+        titleWordChars = self._counts.get("titleWordChars", 0)
+        para = []
+        for tType, _, tText, _, _ in self._tokens:
+            tText = tText.replace(nwUnicode.U_ENDASH, " ")
+            tText = tText.replace(nwUnicode.U_EMDASH, " ")
+            tWords = tText.split()
+            nWords = len(tWords)
+            nChars = len(tText)
+            nWChars = len("".join(tWords))
+            if tType == self.T_EMPTY:
+                if len(para) > 0:
+                    tTemp = "\n".join(para)
+                    tPWords = tTemp.split()
+                    nPWords = len(tPWords)
+                    nPChars = len(tTemp)
+                    nPWChars = len("".join(tPWords))
+                    paragraphCount += 1
+                    allWords += nPWords
+                    textWords += nPWords
+                    allChars += nPChars
+                    textChars += nPChars
+                    allWordChars += nPWChars
+                    textWordChars += nPWChars
+                para = []
+            elif tType in self.L_HEADINGS:
+                titleCount += 1
+                allWords += nWords
+                titleWords += nWords
+                allChars += nChars
+                allWordChars += nWChars
+                titleChars += nChars
+                titleWordChars += nWChars
+            elif tType == self.T_SEP:
+                allWords += nWords
+                allChars += nChars
+                allWordChars += nWChars
+            elif tType == self.T_TEXT:
+                para.append(tText.rstrip())
+            elif tType == self.T_SYNOPSIS and self._doSynopsis:
+                text = "{0}: {1}".format(self._localLookup("Synopsis"), tText)
+                words = text.split()
+                allWords += len(words)
+                allChars += len(text)
+                allWordChars += len("".join(words))
+            elif tType == self.T_SHORT and self._doSynopsis:
+                text = "{0}: {1}".format(self._localLookup("Short Description"), tText)
+                words = text.split()
+                allWords += len(words)
+                allChars += len(text)
+                allWordChars += len("".join(words))
+            elif tType == self.T_COMMENT and self._doComments:
+                text = "{0}: {1}".format(self._localLookup("Comment"), tText)
+                words = text.split()
+                allWords += len(words)
+                allChars += len(text)
+                allWordChars += len("".join(words))
+            elif tType == self.T_KEYWORD and self._doKeywords:
+                valid, bits, _ = self._project.index.scanThis("@"+tText)
+                if valid and bits:
+                    key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
+                    text = "{0}: {1}".format(key, ", ".join(bits[1:]))
+                    words = text.split()
+                    allWords += len(words)
+                    allChars += len(text)
+                    allWordChars += len("".join(words))
+        self._counts["titleCount"] = titleCount
+        self._counts["paragraphCount"] = paragraphCount
+        self._counts["allWords"] = allWords
+        self._counts["textWords"] = textWords
+        self._counts["titleWords"] = titleWords
+        self._counts["allChars"] = allChars
+        self._counts["textChars"] = textChars
+        self._counts["titleChars"] = titleChars
+        self._counts["allWordChars"] = allWordChars
+        self._counts["textWordChars"] = textWordChars
+        self._counts["titleWordChars"] = titleWordChars
-        return True
+        return
     def saveRawMarkdown(self, path: str | Path) -> None:
         """Save the raw text to a plain text file."""

novelWriter 2.3.1__py3-none-any.whl → 2.4rc1__py3-none-any.whl

novelWriter 2.3.1py3-none-any.whl → 2.4rc1py3-none-any.whl