novelWriter 2.5.2__py3-none-any.whl → 2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/METADATA +5 -4
- {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/RECORD +126 -105
- {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/WHEEL +1 -1
- novelwriter/__init__.py +50 -11
- novelwriter/assets/i18n/nw_de_DE.qm +0 -0
- novelwriter/assets/i18n/nw_en_US.qm +0 -0
- novelwriter/assets/i18n/nw_es_419.qm +0 -0
- novelwriter/assets/i18n/nw_fr_FR.qm +0 -0
- novelwriter/assets/i18n/nw_it_IT.qm +0 -0
- novelwriter/assets/i18n/nw_ja_JP.qm +0 -0
- novelwriter/assets/i18n/nw_nb_NO.qm +0 -0
- novelwriter/assets/i18n/nw_nl_NL.qm +0 -0
- novelwriter/assets/i18n/nw_pl_PL.qm +0 -0
- novelwriter/assets/i18n/nw_pt_BR.qm +0 -0
- novelwriter/assets/i18n/nw_ru_RU.qm +0 -0
- novelwriter/assets/i18n/nw_zh_CN.qm +0 -0
- novelwriter/assets/i18n/project_de_DE.json +4 -2
- novelwriter/assets/i18n/project_en_GB.json +1 -0
- novelwriter/assets/i18n/project_en_US.json +2 -0
- novelwriter/assets/i18n/project_it_IT.json +2 -0
- novelwriter/assets/i18n/project_ja_JP.json +2 -0
- novelwriter/assets/i18n/project_nb_NO.json +2 -0
- novelwriter/assets/i18n/project_nl_NL.json +2 -0
- novelwriter/assets/i18n/project_pl_PL.json +2 -0
- novelwriter/assets/i18n/project_pt_BR.json +2 -0
- novelwriter/assets/i18n/project_ru_RU.json +11 -0
- novelwriter/assets/i18n/project_zh_CN.json +2 -0
- novelwriter/assets/icons/typicons_dark/icons.conf +8 -0
- novelwriter/assets/icons/typicons_dark/mixed_copy.svg +4 -0
- novelwriter/assets/icons/typicons_dark/mixed_margin-bottom.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_margin-left.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_margin-right.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_margin-top.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_size-height.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_size-width.svg +6 -0
- novelwriter/assets/icons/typicons_dark/nw_toolbar.svg +5 -0
- novelwriter/assets/icons/typicons_light/icons.conf +8 -0
- novelwriter/assets/icons/typicons_light/mixed_copy.svg +4 -0
- novelwriter/assets/icons/typicons_light/mixed_margin-bottom.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_margin-left.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_margin-right.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_margin-top.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_size-height.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_size-width.svg +6 -0
- novelwriter/assets/icons/typicons_light/nw_toolbar.svg +5 -0
- novelwriter/assets/manual.pdf +0 -0
- novelwriter/assets/sample.zip +0 -0
- novelwriter/assets/text/credits_en.htm +1 -0
- novelwriter/assets/themes/default_light.conf +2 -2
- novelwriter/common.py +101 -3
- novelwriter/config.py +30 -17
- novelwriter/constants.py +189 -81
- novelwriter/core/buildsettings.py +74 -40
- novelwriter/core/coretools.py +146 -148
- novelwriter/core/docbuild.py +133 -171
- novelwriter/core/document.py +1 -1
- novelwriter/core/index.py +39 -38
- novelwriter/core/item.py +42 -9
- novelwriter/core/itemmodel.py +518 -0
- novelwriter/core/options.py +5 -2
- novelwriter/core/project.py +68 -90
- novelwriter/core/projectdata.py +8 -2
- novelwriter/core/projectxml.py +1 -1
- novelwriter/core/sessions.py +1 -1
- novelwriter/core/spellcheck.py +10 -15
- novelwriter/core/status.py +24 -8
- novelwriter/core/storage.py +1 -1
- novelwriter/core/tree.py +269 -288
- novelwriter/dialogs/about.py +1 -1
- novelwriter/dialogs/docmerge.py +8 -18
- novelwriter/dialogs/docsplit.py +1 -1
- novelwriter/dialogs/editlabel.py +1 -1
- novelwriter/dialogs/preferences.py +47 -34
- novelwriter/dialogs/projectsettings.py +149 -99
- novelwriter/dialogs/quotes.py +1 -1
- novelwriter/dialogs/wordlist.py +11 -10
- novelwriter/enum.py +37 -24
- novelwriter/error.py +2 -2
- novelwriter/extensions/configlayout.py +28 -13
- novelwriter/extensions/eventfilters.py +1 -1
- novelwriter/extensions/modified.py +30 -6
- novelwriter/extensions/novelselector.py +4 -3
- novelwriter/extensions/pagedsidebar.py +9 -9
- novelwriter/extensions/progressbars.py +4 -4
- novelwriter/extensions/statusled.py +3 -3
- novelwriter/extensions/switch.py +3 -3
- novelwriter/extensions/switchbox.py +1 -1
- novelwriter/extensions/versioninfo.py +1 -1
- novelwriter/formats/shared.py +156 -0
- novelwriter/formats/todocx.py +1191 -0
- novelwriter/formats/tohtml.py +454 -0
- novelwriter/{core → formats}/tokenizer.py +497 -495
- novelwriter/formats/tomarkdown.py +218 -0
- novelwriter/{core → formats}/toodt.py +312 -433
- novelwriter/formats/toqdoc.py +486 -0
- novelwriter/formats/toraw.py +91 -0
- novelwriter/gui/doceditor.py +347 -287
- novelwriter/gui/dochighlight.py +97 -85
- novelwriter/gui/docviewer.py +90 -33
- novelwriter/gui/docviewerpanel.py +18 -26
- novelwriter/gui/editordocument.py +18 -3
- novelwriter/gui/itemdetails.py +27 -29
- novelwriter/gui/mainmenu.py +130 -64
- novelwriter/gui/noveltree.py +46 -48
- novelwriter/gui/outline.py +202 -256
- novelwriter/gui/projtree.py +590 -1238
- novelwriter/gui/search.py +11 -19
- novelwriter/gui/sidebar.py +8 -7
- novelwriter/gui/statusbar.py +20 -3
- novelwriter/gui/theme.py +11 -6
- novelwriter/guimain.py +101 -201
- novelwriter/shared.py +67 -28
- novelwriter/text/counting.py +3 -1
- novelwriter/text/patterns.py +169 -61
- novelwriter/tools/dictionaries.py +3 -3
- novelwriter/tools/lipsum.py +1 -1
- novelwriter/tools/manusbuild.py +15 -13
- novelwriter/tools/manuscript.py +121 -79
- novelwriter/tools/manussettings.py +424 -291
- novelwriter/tools/noveldetails.py +1 -1
- novelwriter/tools/welcome.py +6 -6
- novelwriter/tools/writingstats.py +4 -4
- novelwriter/types.py +25 -9
- novelwriter/core/tohtml.py +0 -530
- novelwriter/core/tomarkdown.py +0 -252
- novelwriter/core/toqdoc.py +0 -419
- {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/LICENSE.md +0 -0
- {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/entry_points.txt +0 -0
- {novelWriter-2.5.2.dist-info → novelWriter-2.6.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ Created: 2019-05-05 [0.0.1] Tokenizer
|
|
7
7
|
Created: 2023-05-23 [2.1b1] HeadingFormatter
|
8
8
|
|
9
9
|
This file is a part of novelWriter
|
10
|
-
Copyright
|
10
|
+
Copyright (C) 2019 Veronica Berglyd Olsen and novelWriter contributors
|
11
11
|
|
12
12
|
This program is free software: you can redistribute it and/or modify
|
13
13
|
it under the terms of the GNU General Public License as published by
|
@@ -24,41 +24,56 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
24
24
|
"""
|
25
25
|
from __future__ import annotations
|
26
26
|
|
27
|
-
import json
|
28
27
|
import logging
|
29
28
|
import re
|
30
29
|
|
31
30
|
from abc import ABC, abstractmethod
|
32
|
-
from functools import partial
|
33
31
|
from pathlib import Path
|
34
|
-
from
|
32
|
+
from typing import NamedTuple
|
35
33
|
|
36
|
-
from PyQt5.QtCore import
|
37
|
-
from PyQt5.QtGui import QFont
|
34
|
+
from PyQt5.QtCore import QLocale
|
35
|
+
from PyQt5.QtGui import QColor, QFont
|
38
36
|
|
39
37
|
from novelwriter import CONFIG
|
40
|
-
from novelwriter.common import checkInt,
|
41
|
-
from novelwriter.constants import
|
38
|
+
from novelwriter.common import checkInt, fontMatcher, numberToRoman
|
39
|
+
from novelwriter.constants import (
|
40
|
+
nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwStats, nwStyles, nwUnicode,
|
41
|
+
trConst
|
42
|
+
)
|
42
43
|
from novelwriter.core.index import processComment
|
43
44
|
from novelwriter.core.project import NWProject
|
44
45
|
from novelwriter.enum import nwComment, nwItemLayout
|
45
|
-
from novelwriter.
|
46
|
+
from novelwriter.formats.shared import (
|
47
|
+
BlockFmt, BlockTyp, T_Block, T_Formats, T_Note, TextDocumentTheme, TextFmt
|
48
|
+
)
|
49
|
+
from novelwriter.text.patterns import REGEX_PATTERNS, DialogParser
|
46
50
|
|
47
51
|
logger = logging.getLogger(__name__)
|
48
52
|
|
49
|
-
ESCAPES = {r"\*": "*", r"\~": "~", r"\_": "_", r"\[": "[", r"\]": "]", r"\ ": ""}
|
50
|
-
RX_ESC = re.compile("|".join([re.escape(k) for k in ESCAPES.keys()]), flags=re.DOTALL)
|
51
53
|
|
52
|
-
|
53
|
-
T_Comment = tuple[str, T_Formats]
|
54
|
-
T_Token = tuple[int, int, str, T_Formats, int]
|
54
|
+
class ComStyle(NamedTuple):
|
55
55
|
|
56
|
+
label: str = ""
|
57
|
+
labelClass: str = ""
|
58
|
+
textClass: str = ""
|
56
59
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
60
|
+
|
61
|
+
COMMENT_STYLE = {
|
62
|
+
nwComment.PLAIN: ComStyle("Comment", "comment", "comment"),
|
63
|
+
nwComment.IGNORE: ComStyle(),
|
64
|
+
nwComment.SYNOPSIS: ComStyle("Synopsis", "modifier", "synopsis"),
|
65
|
+
nwComment.SHORT: ComStyle("Short Description", "modifier", "synopsis"),
|
66
|
+
nwComment.NOTE: ComStyle("Note", "modifier", "note"),
|
67
|
+
nwComment.FOOTNOTE: ComStyle("", "modifier", "note"),
|
68
|
+
nwComment.COMMENT: ComStyle(),
|
69
|
+
nwComment.STORY: ComStyle("", "modifier", "note"),
|
70
|
+
}
|
71
|
+
HEADINGS = [
|
72
|
+
BlockTyp.TITLE, BlockTyp.PART, BlockTyp.HEAD1,
|
73
|
+
BlockTyp.HEAD2, BlockTyp.HEAD3, BlockTyp.HEAD4,
|
74
|
+
]
|
75
|
+
SKIP_INDENT = HEADINGS + [BlockTyp.SEP, BlockTyp.SKIP]
|
76
|
+
B_EMPTY: T_Block = (BlockTyp.EMPTY, "", "", [], BlockFmt.NONE)
|
62
77
|
|
63
78
|
|
64
79
|
class Tokenizer(ABC):
|
@@ -70,119 +85,71 @@ class Tokenizer(ABC):
|
|
70
85
|
subclasses.
|
71
86
|
"""
|
72
87
|
|
73
|
-
# In-Text Format
|
74
|
-
FMT_B_B = 1 # Begin bold
|
75
|
-
FMT_B_E = 2 # End bold
|
76
|
-
FMT_I_B = 3 # Begin italics
|
77
|
-
FMT_I_E = 4 # End italics
|
78
|
-
FMT_D_B = 5 # Begin strikeout
|
79
|
-
FMT_D_E = 6 # End strikeout
|
80
|
-
FMT_U_B = 7 # Begin underline
|
81
|
-
FMT_U_E = 8 # End underline
|
82
|
-
FMT_M_B = 9 # Begin mark
|
83
|
-
FMT_M_E = 10 # End mark
|
84
|
-
FMT_SUP_B = 11 # Begin superscript
|
85
|
-
FMT_SUP_E = 12 # End superscript
|
86
|
-
FMT_SUB_B = 13 # Begin subscript
|
87
|
-
FMT_SUB_E = 14 # End subscript
|
88
|
-
FMT_DL_B = 15 # Begin dialogue
|
89
|
-
FMT_DL_E = 16 # End dialogue
|
90
|
-
FMT_ADL_B = 17 # Begin alt dialogue
|
91
|
-
FMT_ADL_E = 18 # End alt dialogue
|
92
|
-
FMT_FNOTE = 19 # Footnote marker
|
93
|
-
FMT_STRIP = 20 # Strip the format code
|
94
|
-
|
95
|
-
# Block Type
|
96
|
-
T_EMPTY = 1 # Empty line (new paragraph)
|
97
|
-
T_SYNOPSIS = 2 # Synopsis comment
|
98
|
-
T_SHORT = 3 # Short description comment
|
99
|
-
T_COMMENT = 4 # Comment line
|
100
|
-
T_KEYWORD = 5 # Command line
|
101
|
-
T_TITLE = 6 # Title
|
102
|
-
T_HEAD1 = 7 # Heading 1
|
103
|
-
T_HEAD2 = 8 # Heading 2
|
104
|
-
T_HEAD3 = 9 # Heading 3
|
105
|
-
T_HEAD4 = 10 # Heading 4
|
106
|
-
T_TEXT = 11 # Text line
|
107
|
-
T_SEP = 12 # Scene separator
|
108
|
-
T_SKIP = 13 # Paragraph break
|
109
|
-
|
110
|
-
# Block Style
|
111
|
-
A_NONE = 0x0000 # No special style
|
112
|
-
A_LEFT = 0x0001 # Left aligned
|
113
|
-
A_RIGHT = 0x0002 # Right aligned
|
114
|
-
A_CENTRE = 0x0004 # Centred
|
115
|
-
A_JUSTIFY = 0x0008 # Justified
|
116
|
-
A_PBB = 0x0010 # Page break before
|
117
|
-
A_PBA = 0x0020 # Page break after
|
118
|
-
A_Z_TOPMRG = 0x0040 # Zero top margin
|
119
|
-
A_Z_BTMMRG = 0x0080 # Zero bottom margin
|
120
|
-
A_IND_L = 0x0100 # Left indentation
|
121
|
-
A_IND_R = 0x0200 # Right indentation
|
122
|
-
A_IND_T = 0x0400 # Text indentation
|
123
|
-
|
124
|
-
# Masks
|
125
|
-
M_ALIGNED = A_LEFT | A_RIGHT | A_CENTRE | A_JUSTIFY
|
126
|
-
|
127
|
-
# Lookups
|
128
|
-
L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
|
129
|
-
L_SKIP_INDENT = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD2, T_HEAD3, T_HEAD4, T_SEP, T_SKIP]
|
130
|
-
L_SUMMARY = [T_SYNOPSIS, T_SHORT]
|
131
|
-
|
132
88
|
def __init__(self, project: NWProject) -> None:
|
133
89
|
|
134
90
|
self._project = project
|
135
91
|
|
136
92
|
# Data Variables
|
137
|
-
self._text
|
138
|
-
self._handle
|
139
|
-
self.
|
140
|
-
self.
|
141
|
-
|
142
|
-
#
|
143
|
-
self.
|
144
|
-
self._footnotes: dict[str,
|
145
|
-
|
146
|
-
#
|
93
|
+
self._text = "" # The raw text to be tokenized
|
94
|
+
self._handle = None # The item handle currently being processed
|
95
|
+
self._keepRaw = False # Whether to keep the raw text, used by ToRaw
|
96
|
+
self._noTokens = False # Disable tokenization if they're not needed
|
97
|
+
|
98
|
+
# Blocks and Meta Data (Per Document)
|
99
|
+
self._blocks: list[T_Block] = []
|
100
|
+
self._footnotes: dict[str, T_Note] = {}
|
101
|
+
|
102
|
+
# Blocks and Meta Data (Per Instance)
|
103
|
+
self._raw: list[str] = []
|
104
|
+
self._pages: list[str] = []
|
147
105
|
self._counts: dict[str, int] = {}
|
148
106
|
self._outline: dict[str, str] = {}
|
149
|
-
self._markdown: list[str] = []
|
150
107
|
|
151
108
|
# User Settings
|
109
|
+
self._dLocale = CONFIG.locale # The document locale
|
152
110
|
self._textFont = QFont("Serif", 11) # Output text font
|
153
|
-
self._lineHeight = 1.15
|
154
|
-
self.
|
155
|
-
self.
|
156
|
-
self.
|
157
|
-
self.
|
158
|
-
self.
|
159
|
-
self.
|
160
|
-
self.
|
161
|
-
self.
|
162
|
-
self.
|
163
|
-
self.
|
164
|
-
self.
|
111
|
+
self._lineHeight = 1.15 # Line height in units of em
|
112
|
+
self._colorHeads = True # Colourise headings
|
113
|
+
self._scaleHeads = True # Scale headings to larger font size
|
114
|
+
self._boldHeads = True # Bold headings
|
115
|
+
self._blockIndent = 4.00 # Block indent in units of em
|
116
|
+
self._firstIndent = False # Enable first line indent
|
117
|
+
self._firstWidth = 1.40 # First line indent in units of em
|
118
|
+
self._indentFirst = False # Indent first paragraph
|
119
|
+
self._doJustify = False # Justify text
|
120
|
+
self._doBodyText = True # Include body text
|
121
|
+
self._doSynopsis = False # Also process synopsis comments
|
122
|
+
self._doComments = False # Also process comments
|
123
|
+
self._doKeywords = False # Also process keywords like tags and references
|
124
|
+
self._keepBreaks = True # Keep line breaks in paragraphs
|
125
|
+
self._defaultAlign = "left" # The default text alignment
|
126
|
+
|
127
|
+
self._skipKeywords: set[str] = set() # Keywords to ignore
|
128
|
+
|
129
|
+
# Other Setting
|
130
|
+
self._theme = TextDocumentTheme()
|
131
|
+
self._classes: dict[str, QColor] = {}
|
165
132
|
|
166
133
|
# Margins
|
167
|
-
self._marginTitle =
|
168
|
-
self._marginHead1 =
|
169
|
-
self._marginHead2 =
|
170
|
-
self._marginHead3 =
|
171
|
-
self._marginHead4 =
|
172
|
-
self._marginText =
|
173
|
-
self._marginMeta =
|
174
|
-
self._marginFoot =
|
175
|
-
self._marginSep =
|
134
|
+
self._marginTitle = nwStyles.T_MARGIN["H0"]
|
135
|
+
self._marginHead1 = nwStyles.T_MARGIN["H1"]
|
136
|
+
self._marginHead2 = nwStyles.T_MARGIN["H2"]
|
137
|
+
self._marginHead3 = nwStyles.T_MARGIN["H3"]
|
138
|
+
self._marginHead4 = nwStyles.T_MARGIN["H4"]
|
139
|
+
self._marginText = nwStyles.T_MARGIN["TT"]
|
140
|
+
self._marginMeta = nwStyles.T_MARGIN["MT"]
|
141
|
+
self._marginFoot = nwStyles.T_MARGIN["FT"]
|
142
|
+
self._marginSep = nwStyles.T_MARGIN["SP"]
|
176
143
|
|
177
144
|
# Title Formats
|
178
|
-
self.
|
145
|
+
self._fmtPart = nwHeadFmt.TITLE # Formatting for partitions
|
179
146
|
self._fmtChapter = nwHeadFmt.TITLE # Formatting for numbered chapters
|
180
147
|
self._fmtUnNum = nwHeadFmt.TITLE # Formatting for unnumbered chapters
|
181
148
|
self._fmtScene = nwHeadFmt.TITLE # Formatting for scenes
|
182
149
|
self._fmtHScene = nwHeadFmt.TITLE # Formatting for hard scenes
|
183
150
|
self._fmtSection = nwHeadFmt.TITLE # Formatting for sections
|
184
151
|
|
185
|
-
self.
|
152
|
+
self._hidePart = False # Do not include partition headings
|
186
153
|
self._hideChapter = False # Do not include chapter headings
|
187
154
|
self._hideUnNum = False # Do not include unnumbered headings
|
188
155
|
self._hideScene = False # Do not include scene headings
|
@@ -191,15 +158,16 @@ class Tokenizer(ABC):
|
|
191
158
|
|
192
159
|
self._linkHeadings = False # Add an anchor before headings
|
193
160
|
|
194
|
-
self._titleStyle =
|
195
|
-
self.
|
196
|
-
self.
|
161
|
+
self._titleStyle = BlockFmt.CENTRE | BlockFmt.PBB
|
162
|
+
self._partStyle = BlockFmt.CENTRE | BlockFmt.PBB
|
163
|
+
self._chapterStyle = BlockFmt.PBB
|
164
|
+
self._sceneStyle = BlockFmt.NONE
|
197
165
|
|
198
166
|
# Instance Variables
|
199
167
|
self._hFormatter = HeadingFormatter(self._project)
|
200
168
|
self._noSep = True # Flag to indicate that we don't want a scene separator
|
201
169
|
self._noIndent = False # Flag to disable text indent on next paragraph
|
202
|
-
self.
|
170
|
+
self._breakNext = False # Add a page break on next token
|
203
171
|
|
204
172
|
# This File
|
205
173
|
self._isNovel = False # Document is a novel document
|
@@ -210,31 +178,33 @@ class Tokenizer(ABC):
|
|
210
178
|
|
211
179
|
# Function Mapping
|
212
180
|
self._localLookup = self._project.localLookup
|
213
|
-
self.tr = partial(QCoreApplication.translate, "Tokenizer")
|
214
181
|
|
215
182
|
# Format RegEx
|
216
183
|
self._rxMarkdown = [
|
217
|
-
(REGEX_PATTERNS.markdownItalic, [0,
|
218
|
-
(REGEX_PATTERNS.markdownBold, [0,
|
219
|
-
(REGEX_PATTERNS.markdownStrike, [0,
|
184
|
+
(REGEX_PATTERNS.markdownItalic, [0, TextFmt.I_B, 0, TextFmt.I_E]),
|
185
|
+
(REGEX_PATTERNS.markdownBold, [0, TextFmt.B_B, 0, TextFmt.B_E]),
|
186
|
+
(REGEX_PATTERNS.markdownStrike, [0, TextFmt.D_B, 0, TextFmt.D_E]),
|
220
187
|
]
|
221
|
-
self._rxShortCodes = REGEX_PATTERNS.shortcodePlain
|
222
|
-
self._rxShortCodeVals = REGEX_PATTERNS.shortcodeValue
|
223
188
|
|
224
189
|
self._shortCodeFmt = {
|
225
|
-
nwShortcode.ITALIC_O:
|
226
|
-
nwShortcode.BOLD_O:
|
227
|
-
nwShortcode.STRIKE_O:
|
228
|
-
nwShortcode.ULINE_O:
|
229
|
-
nwShortcode.MARK_O:
|
230
|
-
nwShortcode.SUP_O:
|
231
|
-
nwShortcode.SUB_O:
|
190
|
+
nwShortcode.ITALIC_O: TextFmt.I_B, nwShortcode.ITALIC_C: TextFmt.I_E,
|
191
|
+
nwShortcode.BOLD_O: TextFmt.B_B, nwShortcode.BOLD_C: TextFmt.B_E,
|
192
|
+
nwShortcode.STRIKE_O: TextFmt.D_B, nwShortcode.STRIKE_C: TextFmt.D_E,
|
193
|
+
nwShortcode.ULINE_O: TextFmt.U_B, nwShortcode.ULINE_C: TextFmt.U_E,
|
194
|
+
nwShortcode.MARK_O: TextFmt.M_B, nwShortcode.MARK_C: TextFmt.M_E,
|
195
|
+
nwShortcode.SUP_O: TextFmt.SUP_B, nwShortcode.SUP_C: TextFmt.SUP_E,
|
196
|
+
nwShortcode.SUB_O: TextFmt.SUB_B, nwShortcode.SUB_C: TextFmt.SUB_E,
|
232
197
|
}
|
233
198
|
self._shortCodeVals = {
|
234
|
-
nwShortcode.FOOTNOTE_B:
|
199
|
+
nwShortcode.FOOTNOTE_B: TextFmt.FNOTE,
|
200
|
+
nwShortcode.FIELD_B: TextFmt.FIELD,
|
235
201
|
}
|
236
202
|
|
237
|
-
|
203
|
+
# Dialogue
|
204
|
+
self._hlightDialog = False
|
205
|
+
self._rxAltDialog = REGEX_PATTERNS.altDialogStyle
|
206
|
+
self._dialogParser = DialogParser()
|
207
|
+
self._dialogParser.initParser()
|
238
208
|
|
239
209
|
return
|
240
210
|
|
@@ -242,16 +212,6 @@ class Tokenizer(ABC):
|
|
242
212
|
# Properties
|
243
213
|
##
|
244
214
|
|
245
|
-
@property
|
246
|
-
def result(self) -> str:
|
247
|
-
"""The result of the build process."""
|
248
|
-
return self._result
|
249
|
-
|
250
|
-
@property
|
251
|
-
def allMarkdown(self) -> list[str]:
|
252
|
-
"""The combined novelWriter Markdown text."""
|
253
|
-
return self._markdown
|
254
|
-
|
255
215
|
@property
|
256
216
|
def textStats(self) -> dict[str, int]:
|
257
217
|
"""The collected stats about the text."""
|
@@ -271,10 +231,21 @@ class Tokenizer(ABC):
|
|
271
231
|
# Setters
|
272
232
|
##
|
273
233
|
|
274
|
-
def
|
275
|
-
"""Set
|
276
|
-
|
277
|
-
|
234
|
+
def setLanguage(self, language: str | None) -> None:
|
235
|
+
"""Set language for the document."""
|
236
|
+
if language:
|
237
|
+
self._dLocale = QLocale(language)
|
238
|
+
return
|
239
|
+
|
240
|
+
def setTheme(self, theme: TextDocumentTheme) -> None:
|
241
|
+
"""Set the document colour theme."""
|
242
|
+
self._theme = theme
|
243
|
+
return
|
244
|
+
|
245
|
+
def setPartitionFormat(self, hFormat: str, hide: bool = False) -> None:
|
246
|
+
"""Set the partition format pattern."""
|
247
|
+
self._fmtPart = hFormat.strip()
|
248
|
+
self._hidePart = hide
|
278
249
|
return
|
279
250
|
|
280
251
|
def setChapterFormat(self, hFormat: str, hide: bool = False) -> None:
|
@@ -309,28 +280,31 @@ class Tokenizer(ABC):
|
|
309
280
|
|
310
281
|
def setTitleStyle(self, center: bool, pageBreak: bool) -> None:
|
311
282
|
"""Set the title heading style."""
|
312
|
-
self._titleStyle =
|
313
|
-
|
314
|
-
|
283
|
+
self._titleStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
|
284
|
+
self._titleStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
|
285
|
+
return
|
286
|
+
|
287
|
+
def setPartitionStyle(self, center: bool, pageBreak: bool) -> None:
|
288
|
+
"""Set the partition heading style."""
|
289
|
+
self._partStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
|
290
|
+
self._partStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
|
315
291
|
return
|
316
292
|
|
317
293
|
def setChapterStyle(self, center: bool, pageBreak: bool) -> None:
|
318
294
|
"""Set the chapter heading style."""
|
319
|
-
self._chapterStyle =
|
320
|
-
|
321
|
-
)
|
295
|
+
self._chapterStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
|
296
|
+
self._chapterStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
|
322
297
|
return
|
323
298
|
|
324
299
|
def setSceneStyle(self, center: bool, pageBreak: bool) -> None:
|
325
300
|
"""Set the scene heading style."""
|
326
|
-
self._sceneStyle =
|
327
|
-
|
328
|
-
)
|
301
|
+
self._sceneStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
|
302
|
+
self._sceneStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
|
329
303
|
return
|
330
304
|
|
331
|
-
def
|
305
|
+
def setTextFont(self, font: QFont) -> None:
|
332
306
|
"""Set the build font."""
|
333
|
-
self._textFont = font
|
307
|
+
self._textFont = fontMatcher(font)
|
334
308
|
return
|
335
309
|
|
336
310
|
def setLineHeight(self, height: float) -> None:
|
@@ -338,6 +312,13 @@ class Tokenizer(ABC):
|
|
338
312
|
self._lineHeight = min(max(float(height), 0.5), 5.0)
|
339
313
|
return
|
340
314
|
|
315
|
+
def setHeadingStyles(self, color: bool, scale: bool, bold: bool) -> None:
|
316
|
+
"""Set text style for headings."""
|
317
|
+
self._colorHeads = color
|
318
|
+
self._scaleHeads = scale
|
319
|
+
self._boldHeads = bold
|
320
|
+
return
|
321
|
+
|
341
322
|
def setBlockIndent(self, indent: float) -> None:
|
342
323
|
"""Set the block indent between 0.0 and 10.0."""
|
343
324
|
self._blockIndent = min(max(float(indent), 0.0), 10.0)
|
@@ -357,27 +338,9 @@ class Tokenizer(ABC):
|
|
357
338
|
self._doJustify = state
|
358
339
|
return
|
359
340
|
|
360
|
-
def
|
341
|
+
def setDialogHighlight(self, state: bool) -> None:
|
361
342
|
"""Enable or disable dialogue highlighting."""
|
362
|
-
self.
|
363
|
-
self._showDialog = state
|
364
|
-
if state:
|
365
|
-
if CONFIG.dialogStyle > 0:
|
366
|
-
self._rxDialogue.append((
|
367
|
-
REGEX_PATTERNS.dialogStyle, self.FMT_DL_B, self.FMT_DL_E
|
368
|
-
))
|
369
|
-
if CONFIG.dialogLine:
|
370
|
-
self._rxDialogue.append((
|
371
|
-
REGEX_PATTERNS.dialogLine, self.FMT_DL_B, self.FMT_DL_E
|
372
|
-
))
|
373
|
-
if CONFIG.narratorBreak:
|
374
|
-
self._rxDialogue.append((
|
375
|
-
REGEX_PATTERNS.narratorBreak, self.FMT_DL_E, self.FMT_DL_B
|
376
|
-
))
|
377
|
-
if CONFIG.altDialogOpen and CONFIG.altDialogClose:
|
378
|
-
self._rxDialogue.append((
|
379
|
-
REGEX_PATTERNS.altDialogStyle, self.FMT_ADL_B, self.FMT_ADL_E
|
380
|
-
))
|
343
|
+
self._hlightDialog = state
|
381
344
|
return
|
382
345
|
|
383
346
|
def setTitleMargins(self, upper: float, lower: float) -> None:
|
@@ -455,11 +418,6 @@ class Tokenizer(ABC):
|
|
455
418
|
self._keepBreaks = state
|
456
419
|
return
|
457
420
|
|
458
|
-
def setKeepMarkdown(self, state: bool) -> None:
|
459
|
-
"""Keep original markdown during build."""
|
460
|
-
self._keepMD = state
|
461
|
-
return
|
462
|
-
|
463
421
|
##
|
464
422
|
# Class Methods
|
465
423
|
##
|
@@ -468,27 +426,54 @@ class Tokenizer(ABC):
|
|
468
426
|
def doConvert(self) -> None:
|
469
427
|
raise NotImplementedError
|
470
428
|
|
429
|
+
@abstractmethod
|
430
|
+
def closeDocument(self) -> None:
|
431
|
+
raise NotImplementedError
|
432
|
+
|
433
|
+
@abstractmethod
|
434
|
+
def saveDocument(self, path: Path) -> None:
|
435
|
+
raise NotImplementedError
|
436
|
+
|
437
|
+
def initDocument(self) -> None:
|
438
|
+
"""Initialise data after settings."""
|
439
|
+
self._classes["modifier"] = self._theme.modifier
|
440
|
+
self._classes["synopsis"] = self._theme.note
|
441
|
+
self._classes["comment"] = self._theme.comment
|
442
|
+
self._classes["dialog"] = self._theme.dialog
|
443
|
+
self._classes["altdialog"] = self._theme.altdialog
|
444
|
+
self._classes["tag"] = self._theme.tag
|
445
|
+
self._classes["keyword"] = self._theme.keyword
|
446
|
+
self._classes["optional"] = self._theme.optional
|
447
|
+
return
|
448
|
+
|
449
|
+
def setBreakNext(self) -> None:
|
450
|
+
"""Set a page break for next block."""
|
451
|
+
self._breakNext = True
|
452
|
+
return
|
453
|
+
|
471
454
|
def addRootHeading(self, tHandle: str) -> None:
|
472
455
|
"""Add a heading at the start of a new root folder."""
|
473
456
|
self._text = ""
|
474
457
|
self._handle = None
|
475
458
|
|
476
|
-
if (
|
459
|
+
if (item := self._project.tree[tHandle]) and item.isRootType():
|
477
460
|
self._handle = tHandle
|
461
|
+
style = BlockFmt.CENTRE
|
478
462
|
if self._isFirst:
|
479
|
-
textAlign = self.A_CENTRE
|
480
463
|
self._isFirst = False
|
481
464
|
else:
|
482
|
-
|
465
|
+
style |= BlockFmt.PBB
|
483
466
|
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
467
|
+
title = item.itemName
|
468
|
+
if not item.isNovelLike():
|
469
|
+
notes = self._localLookup("Notes")
|
470
|
+
title = f"{notes}: {title}"
|
471
|
+
|
472
|
+
self._blocks = [(
|
473
|
+
BlockTyp.TITLE, f"{self._handle}:T0001", title, [], style
|
474
|
+
)]
|
475
|
+
if self._keepRaw:
|
476
|
+
self._raw.append(f"#! {title}\n\n")
|
492
477
|
|
493
478
|
return
|
494
479
|
|
@@ -505,20 +490,14 @@ class Tokenizer(ABC):
|
|
505
490
|
return
|
506
491
|
|
507
492
|
def doPreProcessing(self) -> None:
|
508
|
-
"""Run
|
493
|
+
"""Run pre-processing jobs before the text is tokenized."""
|
509
494
|
# Process the user's auto-replace dictionary
|
510
|
-
autoReplace
|
511
|
-
if len(autoReplace) > 0:
|
495
|
+
if autoReplace := self._project.data.autoReplace:
|
512
496
|
repDict = {}
|
513
497
|
for aKey, aVal in autoReplace.items():
|
514
498
|
repDict[f"<{aKey}>"] = aVal
|
515
499
|
xRep = re.compile("|".join([re.escape(k) for k in repDict.keys()]), flags=re.DOTALL)
|
516
500
|
self._text = xRep.sub(lambda x: repDict[x.group(0)], self._text)
|
517
|
-
|
518
|
-
# Process the character translation map
|
519
|
-
trDict = {nwUnicode.U_MAPOS: nwUnicode.U_RSQUO}
|
520
|
-
self._text = self._text.translate(str.maketrans(trDict))
|
521
|
-
|
522
501
|
return
|
523
502
|
|
524
503
|
def tokenizeText(self) -> None:
|
@@ -526,42 +505,62 @@ class Tokenizer(ABC):
|
|
526
505
|
characters that indicate headings, comments, commands etc, or
|
527
506
|
just contain plain text. In the case of plain text, apply the
|
528
507
|
same RegExes that the syntax highlighter uses and save the
|
529
|
-
locations of these formatting tags into the
|
508
|
+
locations of these formatting tags into the blocks list.
|
530
509
|
|
531
|
-
The format of the
|
510
|
+
The format of the blocs list is an entry with a five-tuple for
|
532
511
|
each line in the file. The tuple is as follows:
|
533
|
-
1: The type of the block,
|
512
|
+
1: The type of the block, BlockType.*
|
534
513
|
2: The heading number under which the text is placed
|
535
514
|
3: The text content of the block, without leading tags
|
536
|
-
4: The internal formatting map of the text,
|
537
|
-
5: The
|
515
|
+
4: The internal formatting map of the text, TxtFmt.*
|
516
|
+
5: The formats of the block, BlockFmt.*
|
538
517
|
"""
|
518
|
+
if self._keepRaw:
|
519
|
+
self._raw.append(f"{self._text.rstrip()}\n\n")
|
520
|
+
if self._noTokens:
|
521
|
+
return
|
539
522
|
if self._isNovel:
|
540
523
|
self._hFormatter.setHandle(self._handle)
|
541
524
|
|
525
|
+
# Cache Flags
|
526
|
+
isNovel = self._isNovel
|
527
|
+
doJustify = self._doJustify
|
528
|
+
keepBreaks = self._keepBreaks
|
529
|
+
indentFirst = self._indentFirst
|
530
|
+
firstIndent = self._firstIndent
|
531
|
+
|
532
|
+
# Replace all instances of [br] with a placeholder character
|
533
|
+
text = REGEX_PATTERNS.lineBreak.sub(nwUnicode.U_NAC2, self._text)
|
534
|
+
|
535
|
+
# Translation Maps
|
536
|
+
transMapA = str.maketrans({
|
537
|
+
nwUnicode.U_NAC2: "", # Used when [br] is ignored
|
538
|
+
nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
|
539
|
+
nwUnicode.U_HBAR: nwUnicode.U_EMDASH,
|
540
|
+
})
|
541
|
+
transMapB = str.maketrans({
|
542
|
+
nwUnicode.U_NAC2: "\n", # Used when [br] is not ignored
|
543
|
+
nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
|
544
|
+
nwUnicode.U_HBAR: nwUnicode.U_EMDASH,
|
545
|
+
})
|
546
|
+
|
542
547
|
nHead = 0
|
543
|
-
breakNext = False
|
544
|
-
tmpMarkdown = []
|
545
548
|
tHandle = self._handle or ""
|
546
|
-
|
547
|
-
for
|
549
|
+
tBlocks: list[T_Block] = [B_EMPTY]
|
550
|
+
for bLine in text.splitlines():
|
551
|
+
aLine = bLine.translate(transMapA)
|
548
552
|
sLine = aLine.strip().lower()
|
549
553
|
|
550
554
|
# Check for blank lines
|
551
|
-
if
|
552
|
-
|
553
|
-
self.T_EMPTY, nHead, "", [], self.A_NONE
|
554
|
-
))
|
555
|
-
if self._keepMD:
|
556
|
-
tmpMarkdown.append("\n")
|
557
|
-
|
555
|
+
if not sLine:
|
556
|
+
tBlocks.append(B_EMPTY)
|
558
557
|
continue
|
559
558
|
|
560
|
-
if
|
561
|
-
|
562
|
-
|
559
|
+
if self._breakNext:
|
560
|
+
tStyle = BlockFmt.PBB
|
561
|
+
self._breakNext = False
|
563
562
|
else:
|
564
|
-
|
563
|
+
tStyle = BlockFmt.NONE
|
565
564
|
|
566
565
|
# Check Line Format
|
567
566
|
# =================
|
@@ -574,24 +573,24 @@ class Tokenizer(ABC):
|
|
574
573
|
# therefore proceed to check other formats.
|
575
574
|
|
576
575
|
if sLine in ("[newpage]", "[new page]"):
|
577
|
-
|
576
|
+
self._breakNext = True
|
578
577
|
continue
|
579
578
|
|
580
579
|
elif sLine == "[vspace]":
|
581
|
-
|
582
|
-
(
|
580
|
+
tBlocks.append(
|
581
|
+
(BlockTyp.SKIP, "", "", [], tStyle)
|
583
582
|
)
|
584
583
|
continue
|
585
584
|
|
586
585
|
elif sLine.startswith("[vspace:") and sLine.endswith("]"):
|
587
586
|
nSkip = checkInt(sLine[8:-1], 0)
|
588
587
|
if nSkip >= 1:
|
589
|
-
|
590
|
-
(
|
588
|
+
tBlocks.append(
|
589
|
+
(BlockTyp.SKIP, "", "", [], tStyle)
|
591
590
|
)
|
592
591
|
if nSkip > 1:
|
593
|
-
|
594
|
-
(
|
592
|
+
tBlocks += (nSkip - 1) * [
|
593
|
+
(BlockTyp.SKIP, "", "", [], BlockFmt.NONE)
|
595
594
|
]
|
596
595
|
continue
|
597
596
|
|
@@ -605,32 +604,24 @@ class Tokenizer(ABC):
|
|
605
604
|
continue
|
606
605
|
|
607
606
|
cStyle, cKey, cText, _, _ = processComment(aLine)
|
608
|
-
if cStyle
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
607
|
+
if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT) and not self._doSynopsis:
|
608
|
+
continue
|
609
|
+
if cStyle == nwComment.PLAIN and not self._doComments:
|
610
|
+
continue
|
611
|
+
|
612
|
+
if doJustify and not tStyle & BlockFmt.ALIGNED:
|
613
|
+
tStyle |= BlockFmt.JUSTIFY
|
614
|
+
|
615
|
+
if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT, nwComment.PLAIN):
|
616
|
+
bStyle = COMMENT_STYLE[cStyle]
|
617
|
+
tLine, tFmt = self._formatComment(bStyle, cKey, cText)
|
618
|
+
tBlocks.append((
|
619
|
+
BlockTyp.COMMENT, "", tLine, tFmt, tStyle
|
619
620
|
))
|
620
|
-
|
621
|
-
tmpMarkdown.append(f"{aLine}\n")
|
621
|
+
|
622
622
|
elif cStyle == nwComment.FOOTNOTE:
|
623
|
-
tLine, tFmt = self._extractFormats(cText, skip=
|
623
|
+
tLine, tFmt = self._extractFormats(cText, skip=TextFmt.FNOTE)
|
624
624
|
self._footnotes[f"{tHandle}:{cKey}"] = (tLine, tFmt)
|
625
|
-
if self._keepMD:
|
626
|
-
tmpMarkdown.append(f"{aLine}\n")
|
627
|
-
else:
|
628
|
-
tLine, tFmt = self._extractFormats(cText)
|
629
|
-
tokens.append((
|
630
|
-
self.T_COMMENT, nHead, tLine, tFmt, sAlign
|
631
|
-
))
|
632
|
-
if self._doComments and self._keepMD:
|
633
|
-
tmpMarkdown.append(f"{aLine}\n")
|
634
625
|
|
635
626
|
elif aLine.startswith("@"):
|
636
627
|
# Keywords
|
@@ -638,16 +629,12 @@ class Tokenizer(ABC):
|
|
638
629
|
# Only valid keyword lines are parsed, and any ignored keywords
|
639
630
|
# are automatically skipped.
|
640
631
|
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
|
648
|
-
))
|
649
|
-
if self._doKeywords and self._keepMD:
|
650
|
-
tmpMarkdown.append(f"{aLine}\n")
|
632
|
+
if self._doKeywords:
|
633
|
+
tTag, tLine, tFmt = self._formatMeta(aLine)
|
634
|
+
if tLine:
|
635
|
+
tBlocks.append((
|
636
|
+
BlockTyp.KEYWORD, tTag[1:], tLine, tFmt, tStyle
|
637
|
+
))
|
651
638
|
|
652
639
|
elif aLine.startswith(("# ", "#! ")):
|
653
640
|
# Title or Partition Headings
|
@@ -662,28 +649,27 @@ class Tokenizer(ABC):
|
|
662
649
|
|
663
650
|
nHead += 1
|
664
651
|
tText = aLine[2:].strip()
|
665
|
-
tType =
|
666
|
-
|
667
|
-
|
668
|
-
|
652
|
+
tType = BlockTyp.HEAD1 if isPlain else BlockTyp.TITLE
|
653
|
+
sHide = self._hidePart if isPlain else False
|
654
|
+
if not (isPlain or isNovel and sHide):
|
655
|
+
tStyle |= self._titleStyle
|
656
|
+
if isNovel:
|
657
|
+
tType = BlockTyp.PART if isPlain else BlockTyp.TITLE
|
669
658
|
if sHide:
|
670
659
|
tText = ""
|
671
|
-
tType =
|
672
|
-
tStyle = self.A_NONE
|
660
|
+
tType = BlockTyp.EMPTY
|
673
661
|
elif isPlain:
|
674
|
-
tText = self._hFormatter.apply(self.
|
675
|
-
tStyle
|
662
|
+
tText = self._hFormatter.apply(self._fmtPart, tText, nHead)
|
663
|
+
tStyle |= self._partStyle
|
676
664
|
if isPlain:
|
677
665
|
self._hFormatter.resetScene()
|
678
666
|
else:
|
679
667
|
self._hFormatter.resetAll()
|
680
668
|
self._noSep = True
|
681
669
|
|
682
|
-
|
683
|
-
tType, nHead, tText, [], tStyle
|
670
|
+
tBlocks.append((
|
671
|
+
tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
|
684
672
|
))
|
685
|
-
if self._keepMD:
|
686
|
-
tmpMarkdown.append(f"{aLine}\n")
|
687
673
|
|
688
674
|
elif aLine.startswith(("## ", "##! ")):
|
689
675
|
# (Unnumbered) Chapter Headings
|
@@ -698,27 +684,25 @@ class Tokenizer(ABC):
|
|
698
684
|
|
699
685
|
nHead += 1
|
700
686
|
tText = aLine[3:].strip()
|
701
|
-
tType =
|
702
|
-
tStyle = self.A_NONE
|
687
|
+
tType = BlockTyp.HEAD2
|
703
688
|
sHide = self._hideChapter if isPlain else self._hideUnNum
|
704
689
|
tFormat = self._fmtChapter if isPlain else self._fmtUnNum
|
705
|
-
if
|
690
|
+
if isNovel:
|
691
|
+
tType = BlockTyp.HEAD1 # Promote
|
706
692
|
if isPlain:
|
707
693
|
self._hFormatter.incChapter()
|
708
694
|
if sHide:
|
709
695
|
tText = ""
|
710
|
-
tType =
|
696
|
+
tType = BlockTyp.EMPTY
|
711
697
|
else:
|
712
698
|
tText = self._hFormatter.apply(tFormat, tText, nHead)
|
713
|
-
tStyle
|
699
|
+
tStyle |= self._chapterStyle
|
714
700
|
self._hFormatter.resetScene()
|
715
701
|
self._noSep = True
|
716
702
|
|
717
|
-
|
718
|
-
tType, nHead, tText, [], tStyle
|
703
|
+
tBlocks.append((
|
704
|
+
tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
|
719
705
|
))
|
720
|
-
if self._keepMD:
|
721
|
-
tmpMarkdown.append(f"{aLine}\n")
|
722
706
|
|
723
707
|
elif aLine.startswith(("### ", "###! ")):
|
724
708
|
# (Alternative) Scene Headings
|
@@ -735,31 +719,29 @@ class Tokenizer(ABC):
|
|
735
719
|
|
736
720
|
nHead += 1
|
737
721
|
tText = aLine[4:].strip()
|
738
|
-
tType =
|
739
|
-
tStyle = self.A_NONE
|
722
|
+
tType = BlockTyp.HEAD3
|
740
723
|
sHide = self._hideScene if isPlain else self._hideHScene
|
741
724
|
tFormat = self._fmtScene if isPlain else self._fmtHScene
|
742
|
-
if
|
725
|
+
if isNovel:
|
726
|
+
tType = BlockTyp.HEAD2 # Promote
|
743
727
|
self._hFormatter.incScene()
|
744
728
|
if sHide:
|
745
729
|
tText = ""
|
746
|
-
tType =
|
730
|
+
tType = BlockTyp.EMPTY
|
747
731
|
else:
|
748
732
|
tText = self._hFormatter.apply(tFormat, tText, nHead)
|
749
|
-
tStyle
|
733
|
+
tStyle |= self._sceneStyle
|
750
734
|
if tText == "": # Empty Format
|
751
|
-
tType =
|
735
|
+
tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SKIP
|
752
736
|
elif tText == tFormat: # Static Format
|
753
737
|
tText = "" if self._noSep else tText
|
754
|
-
tType =
|
755
|
-
tStyle
|
738
|
+
tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SEP
|
739
|
+
tStyle |= BlockFmt.NONE if self._noSep else BlockFmt.CENTRE
|
756
740
|
self._noSep = False
|
757
741
|
|
758
|
-
|
759
|
-
tType, nHead, tText, [], tStyle
|
742
|
+
tBlocks.append((
|
743
|
+
tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
|
760
744
|
))
|
761
|
-
if self._keepMD:
|
762
|
-
tmpMarkdown.append(f"{aLine}\n")
|
763
745
|
|
764
746
|
elif aLine.startswith("#### "):
|
765
747
|
# Section Headings
|
@@ -771,25 +753,23 @@ class Tokenizer(ABC):
|
|
771
753
|
|
772
754
|
nHead += 1
|
773
755
|
tText = aLine[5:].strip()
|
774
|
-
tType =
|
775
|
-
|
776
|
-
|
756
|
+
tType = BlockTyp.HEAD4
|
757
|
+
if isNovel:
|
758
|
+
tType = BlockTyp.HEAD3 # Promote
|
777
759
|
if self._hideSection:
|
778
760
|
tText = ""
|
779
|
-
tType =
|
761
|
+
tType = BlockTyp.EMPTY
|
780
762
|
else:
|
781
763
|
tText = self._hFormatter.apply(self._fmtSection, tText, nHead)
|
782
764
|
if tText == "": # Empty Format
|
783
|
-
tType =
|
765
|
+
tType = BlockTyp.SKIP
|
784
766
|
elif tText == self._fmtSection: # Static Format
|
785
|
-
tType =
|
786
|
-
tStyle
|
767
|
+
tType = BlockTyp.SEP
|
768
|
+
tStyle |= BlockFmt.CENTRE
|
787
769
|
|
788
|
-
|
789
|
-
tType, nHead, tText, [], tStyle
|
770
|
+
tBlocks.append((
|
771
|
+
tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
|
790
772
|
))
|
791
|
-
if self._keepMD:
|
792
|
-
tmpMarkdown.append(f"{aLine}\n")
|
793
773
|
|
794
774
|
else:
|
795
775
|
# Text Lines
|
@@ -805,135 +785,133 @@ class Tokenizer(ABC):
|
|
805
785
|
alnRight = False
|
806
786
|
indLeft = False
|
807
787
|
indRight = False
|
808
|
-
if
|
788
|
+
if bLine.startswith(">>"):
|
809
789
|
alnRight = True
|
810
|
-
|
811
|
-
elif
|
790
|
+
bLine = bLine[2:].lstrip(" ")
|
791
|
+
elif bLine.startswith(">"):
|
812
792
|
indLeft = True
|
813
|
-
|
793
|
+
bLine = bLine[1:].lstrip(" ")
|
814
794
|
|
815
|
-
if
|
795
|
+
if bLine.endswith("<<"):
|
816
796
|
alnLeft = True
|
817
|
-
|
818
|
-
elif
|
797
|
+
bLine = bLine[:-2].rstrip(" ")
|
798
|
+
elif bLine.endswith("<"):
|
819
799
|
indRight = True
|
820
|
-
|
800
|
+
bLine = bLine[:-1].rstrip(" ")
|
821
801
|
|
822
802
|
if alnLeft and alnRight:
|
823
|
-
|
803
|
+
tStyle |= BlockFmt.CENTRE
|
824
804
|
elif alnLeft:
|
825
|
-
|
805
|
+
tStyle |= BlockFmt.LEFT
|
826
806
|
elif alnRight:
|
827
|
-
|
807
|
+
tStyle |= BlockFmt.RIGHT
|
828
808
|
|
829
809
|
if indLeft:
|
830
|
-
|
810
|
+
tStyle |= BlockFmt.IND_L
|
831
811
|
if indRight:
|
832
|
-
|
812
|
+
tStyle |= BlockFmt.IND_R
|
833
813
|
|
834
814
|
# Process formats
|
835
|
-
tLine, tFmt = self._extractFormats(
|
836
|
-
|
837
|
-
|
815
|
+
tLine, tFmt = self._extractFormats(bLine, hDialog=isNovel)
|
816
|
+
tBlocks.append((
|
817
|
+
BlockTyp.TEXT, "", tLine, tFmt, tStyle
|
838
818
|
))
|
839
|
-
if self._keepMD:
|
840
|
-
tmpMarkdown.append(f"{aLine}\n")
|
841
819
|
|
842
820
|
# If we have content, turn off the first page flag
|
843
|
-
if self._isFirst and
|
821
|
+
if self._isFirst and len(tBlocks) > 1:
|
844
822
|
self._isFirst = False # First document has been processed
|
845
823
|
|
846
|
-
# Make sure the
|
847
|
-
# on the very first
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
824
|
+
# Make sure the blocks array doesn't start with a page break
|
825
|
+
# on the very first block, adding a blank first page.
|
826
|
+
for n, cBlock in enumerate(tBlocks):
|
827
|
+
if cBlock[0] != BlockTyp.EMPTY:
|
828
|
+
if cBlock[4] & BlockFmt.PBB:
|
829
|
+
tBlocks[n] = (
|
830
|
+
cBlock[0], cBlock[1], cBlock[2], cBlock[3], cBlock[4] & ~BlockFmt.PBB
|
831
|
+
)
|
832
|
+
break
|
853
833
|
|
854
834
|
# Always add an empty line at the end of the file
|
855
|
-
|
856
|
-
self.T_EMPTY, nHead, "", [], self.A_NONE
|
857
|
-
))
|
858
|
-
if self._keepMD:
|
859
|
-
tmpMarkdown.append("\n")
|
860
|
-
self._markdown.append("".join(tmpMarkdown))
|
835
|
+
tBlocks.append(B_EMPTY)
|
861
836
|
|
862
837
|
# Second Pass
|
863
838
|
# ===========
|
864
839
|
# This second pass strips away consecutive blank lines, and
|
865
840
|
# combines consecutive text lines into the same paragraph.
|
866
841
|
# It also ensures that there isn't paragraph spacing between
|
867
|
-
# meta data lines for formats that
|
842
|
+
# meta data lines for formats that have spacing.
|
868
843
|
|
869
|
-
|
870
|
-
pToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
|
871
|
-
nToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
|
844
|
+
lineSep = "\n" if keepBreaks else " "
|
872
845
|
|
873
|
-
|
874
|
-
|
846
|
+
pLines: list[T_Block] = []
|
847
|
+
sBlocks: list[T_Block] = []
|
848
|
+
for n, cBlock in enumerate(tBlocks[1:-1], 1):
|
875
849
|
|
876
|
-
|
877
|
-
|
850
|
+
pBlock = tBlocks[n-1] # Look behind
|
851
|
+
nBlock = tBlocks[n+1] # Look ahead
|
878
852
|
|
879
|
-
if
|
880
|
-
pToken = tokens[n-1] # Look behind
|
881
|
-
if n < tCount - 1:
|
882
|
-
nToken = tokens[n+1] # Look ahead
|
883
|
-
|
884
|
-
if cToken[0] in self.L_SKIP_INDENT and not self._indentFirst:
|
853
|
+
if cBlock[0] in SKIP_INDENT and not indentFirst:
|
885
854
|
# Unless the indentFirst flag is set, we set up the next
|
886
855
|
# paragraph to not be indented if we see a block of a
|
887
856
|
# specific type
|
888
857
|
self._noIndent = True
|
889
858
|
|
890
|
-
if
|
859
|
+
if cBlock[0] == BlockTyp.EMPTY:
|
891
860
|
# We don't need to keep the empty lines after this pass
|
892
861
|
pass
|
893
862
|
|
894
|
-
elif
|
863
|
+
elif cBlock[0] == BlockTyp.KEYWORD:
|
895
864
|
# Adjust margins for lines in a list of keyword lines
|
896
|
-
aStyle =
|
897
|
-
if
|
898
|
-
aStyle |=
|
899
|
-
if
|
900
|
-
aStyle |=
|
901
|
-
|
902
|
-
|
865
|
+
aStyle = cBlock[4]
|
866
|
+
if pBlock[0] == BlockTyp.KEYWORD:
|
867
|
+
aStyle |= BlockFmt.Z_TOP
|
868
|
+
if nBlock[0] == BlockTyp.KEYWORD:
|
869
|
+
aStyle |= BlockFmt.Z_BTM
|
870
|
+
sBlocks.append((
|
871
|
+
cBlock[0], cBlock[1], cBlock[2], cBlock[3], aStyle
|
903
872
|
))
|
904
873
|
|
905
|
-
elif
|
874
|
+
elif cBlock[0] == BlockTyp.TEXT:
|
906
875
|
# Combine lines from the same paragraph
|
907
|
-
pLines.append(
|
876
|
+
pLines.append(cBlock)
|
908
877
|
|
909
|
-
if
|
910
|
-
# Next
|
878
|
+
if nBlock[0] != BlockTyp.TEXT:
|
879
|
+
# Next block is not text, so we add the buffer to blocks
|
911
880
|
nLines = len(pLines)
|
912
881
|
cStyle = pLines[0][4]
|
913
|
-
if
|
882
|
+
if firstIndent and not (self._noIndent or cStyle & BlockFmt.ALIGNED):
|
914
883
|
# If paragraph indentation is enabled, not temporarily
|
915
884
|
# turned off, and the block is not aligned, we add the
|
916
885
|
# text indentation flag
|
917
|
-
cStyle |=
|
886
|
+
cStyle |= BlockFmt.IND_T
|
918
887
|
|
919
888
|
if nLines == 1:
|
920
|
-
# The paragraph contains a single line, so we just
|
921
|
-
#
|
922
|
-
|
923
|
-
|
889
|
+
# The paragraph contains a single line, so we just save
|
890
|
+
# that directly to the blocks list. If justify is
|
891
|
+
# enabled, and there is no alignment, we apply it.
|
892
|
+
if doJustify and not cStyle & BlockFmt.ALIGNED:
|
893
|
+
cStyle |= BlockFmt.JUSTIFY
|
894
|
+
|
895
|
+
pTxt = pLines[0][2].translate(transMapB)
|
896
|
+
sBlocks.append((
|
897
|
+
BlockTyp.TEXT, pLines[0][1], pTxt, pLines[0][3], cStyle
|
924
898
|
))
|
899
|
+
|
925
900
|
elif nLines > 1:
|
926
901
|
# The paragraph contains multiple lines, so we need to
|
927
902
|
# join them according to the line break policy, and
|
928
903
|
# recompute all the formatting markers
|
929
904
|
tTxt = ""
|
930
905
|
tFmt: T_Formats = []
|
931
|
-
for
|
906
|
+
for aBlock in pLines:
|
932
907
|
tLen = len(tTxt)
|
933
|
-
tTxt += f"{
|
934
|
-
tFmt.extend((p+tLen, fmt, key) for p, fmt, key in
|
935
|
-
|
936
|
-
|
908
|
+
tTxt += f"{aBlock[2]}{lineSep}"
|
909
|
+
tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aBlock[3])
|
910
|
+
cStyle |= aBlock[4]
|
911
|
+
|
912
|
+
pTxt = tTxt[:-1].translate(transMapB)
|
913
|
+
sBlocks.append((
|
914
|
+
BlockTyp.TEXT, pLines[0][1], pTxt, tFmt, cStyle
|
937
915
|
))
|
938
916
|
|
939
917
|
# Reset buffer and make sure text indent is on for next pass
|
@@ -941,50 +919,52 @@ class Tokenizer(ABC):
|
|
941
919
|
self._noIndent = False
|
942
920
|
|
943
921
|
else:
|
944
|
-
|
922
|
+
sBlocks.append(cBlock)
|
923
|
+
|
924
|
+
self._blocks = sBlocks
|
945
925
|
|
946
926
|
return
|
947
927
|
|
948
928
|
def buildOutline(self) -> None:
|
949
929
|
"""Build an outline of the text up to level 3 headings."""
|
950
|
-
tHandle = self._handle or ""
|
951
930
|
isNovel = self._isNovel
|
952
|
-
for tType,
|
953
|
-
if tType ==
|
931
|
+
for tType, tKey, tText, _, _ in self._blocks:
|
932
|
+
if tType == BlockTyp.TITLE:
|
954
933
|
prefix = "TT"
|
955
|
-
elif tType ==
|
956
|
-
prefix = "PT"
|
957
|
-
elif tType ==
|
958
|
-
prefix = "CH" if isNovel else "
|
959
|
-
elif tType ==
|
960
|
-
prefix = "SC" if isNovel else "
|
934
|
+
elif tType == BlockTyp.PART:
|
935
|
+
prefix = "PT"
|
936
|
+
elif tType == BlockTyp.HEAD1:
|
937
|
+
prefix = "CH" if isNovel else "H1"
|
938
|
+
elif tType == BlockTyp.HEAD2:
|
939
|
+
prefix = "SC" if isNovel else "H2"
|
940
|
+
elif tType == BlockTyp.HEAD3 and not isNovel:
|
941
|
+
prefix = "H3"
|
961
942
|
else:
|
962
943
|
continue
|
963
944
|
|
964
|
-
key = f"{tHandle}:T{nHead:04d}"
|
965
945
|
text = tText.replace(nwHeadFmt.BR, " ").replace("&", "&")
|
966
|
-
self._outline[
|
946
|
+
self._outline[tKey] = f"{prefix}|{text}"
|
967
947
|
|
968
948
|
return
|
969
949
|
|
970
950
|
def countStats(self) -> None:
|
971
951
|
"""Count stats on the tokenized text."""
|
972
|
-
titleCount = self._counts.get(
|
973
|
-
paragraphCount = self._counts.get(
|
952
|
+
titleCount = self._counts.get(nwStats.TITLES, 0)
|
953
|
+
paragraphCount = self._counts.get(nwStats.PARAGRAPHS, 0)
|
974
954
|
|
975
|
-
allWords = self._counts.get(
|
976
|
-
textWords = self._counts.get(
|
977
|
-
titleWords = self._counts.get(
|
955
|
+
allWords = self._counts.get(nwStats.WORDS, 0)
|
956
|
+
textWords = self._counts.get(nwStats.WORDS_TEXT, 0)
|
957
|
+
titleWords = self._counts.get(nwStats.WORDS_TITLE, 0)
|
978
958
|
|
979
|
-
allChars = self._counts.get(
|
980
|
-
textChars = self._counts.get(
|
981
|
-
titleChars = self._counts.get(
|
959
|
+
allChars = self._counts.get(nwStats.CHARS, 0)
|
960
|
+
textChars = self._counts.get(nwStats.CHARS_TEXT, 0)
|
961
|
+
titleChars = self._counts.get(nwStats.CHARS_TITLE, 0)
|
982
962
|
|
983
|
-
allWordChars = self._counts.get(
|
984
|
-
textWordChars = self._counts.get(
|
985
|
-
titleWordChars = self._counts.get(
|
963
|
+
allWordChars = self._counts.get(nwStats.WCHARS_ALL, 0)
|
964
|
+
textWordChars = self._counts.get(nwStats.WCHARS_TEXT, 0)
|
965
|
+
titleWordChars = self._counts.get(nwStats.WCHARS_TITLE, 0)
|
986
966
|
|
987
|
-
for tType, _, tText, _, _ in self.
|
967
|
+
for tType, _, tText, _, _ in self._blocks:
|
988
968
|
tText = tText.replace(nwUnicode.U_ENDASH, " ")
|
989
969
|
tText = tText.replace(nwUnicode.U_EMDASH, " ")
|
990
970
|
|
@@ -993,7 +973,7 @@ class Tokenizer(ABC):
|
|
993
973
|
nChars = len(tText)
|
994
974
|
nWChars = len("".join(tWords))
|
995
975
|
|
996
|
-
if tType ==
|
976
|
+
if tType == BlockTyp.TEXT:
|
997
977
|
tPWords = tText.split()
|
998
978
|
nPWords = len(tPWords)
|
999
979
|
nPChars = len(tText)
|
@@ -1007,7 +987,7 @@ class Tokenizer(ABC):
|
|
1007
987
|
allWordChars += nPWChars
|
1008
988
|
textWordChars += nPWChars
|
1009
989
|
|
1010
|
-
elif tType in
|
990
|
+
elif tType in HEADINGS:
|
1011
991
|
titleCount += 1
|
1012
992
|
allWords += nWords
|
1013
993
|
titleWords += nWords
|
@@ -1016,88 +996,110 @@ class Tokenizer(ABC):
|
|
1016
996
|
titleChars += nChars
|
1017
997
|
titleWordChars += nWChars
|
1018
998
|
|
1019
|
-
elif tType ==
|
999
|
+
elif tType == BlockTyp.SEP:
|
1020
1000
|
allWords += nWords
|
1021
1001
|
allChars += nChars
|
1022
1002
|
allWordChars += nWChars
|
1023
1003
|
|
1024
|
-
elif tType
|
1025
|
-
|
1026
|
-
words = text.split()
|
1027
|
-
allWords += len(words)
|
1028
|
-
allChars += len(text)
|
1029
|
-
allWordChars += len("".join(words))
|
1030
|
-
|
1031
|
-
elif tType == self.T_SHORT and self._doSynopsis:
|
1032
|
-
text = "{0}: {1}".format(self._localLookup("Short Description"), tText)
|
1033
|
-
words = text.split()
|
1034
|
-
allWords += len(words)
|
1035
|
-
allChars += len(text)
|
1036
|
-
allWordChars += len("".join(words))
|
1037
|
-
|
1038
|
-
elif tType == self.T_COMMENT and self._doComments:
|
1039
|
-
text = "{0}: {1}".format(self._localLookup("Comment"), tText)
|
1040
|
-
words = text.split()
|
1004
|
+
elif tType in (BlockTyp.COMMENT, BlockTyp.KEYWORD):
|
1005
|
+
words = tText.split()
|
1041
1006
|
allWords += len(words)
|
1042
|
-
allChars += len(
|
1007
|
+
allChars += len(tText)
|
1043
1008
|
allWordChars += len("".join(words))
|
1044
1009
|
|
1045
|
-
|
1046
|
-
|
1047
|
-
if valid and bits:
|
1048
|
-
key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
|
1049
|
-
text = "{0}: {1}".format(key, ", ".join(bits[1:]))
|
1050
|
-
words = text.split()
|
1051
|
-
allWords += len(words)
|
1052
|
-
allChars += len(text)
|
1053
|
-
allWordChars += len("".join(words))
|
1010
|
+
self._counts[nwStats.TITLES] = titleCount
|
1011
|
+
self._counts[nwStats.PARAGRAPHS] = paragraphCount
|
1054
1012
|
|
1055
|
-
self._counts[
|
1056
|
-
self._counts[
|
1013
|
+
self._counts[nwStats.WORDS] = allWords
|
1014
|
+
self._counts[nwStats.WORDS_TEXT] = textWords
|
1015
|
+
self._counts[nwStats.WORDS_TITLE] = titleWords
|
1057
1016
|
|
1058
|
-
self._counts[
|
1059
|
-
self._counts[
|
1060
|
-
self._counts[
|
1017
|
+
self._counts[nwStats.CHARS] = allChars
|
1018
|
+
self._counts[nwStats.CHARS_TEXT] = textChars
|
1019
|
+
self._counts[nwStats.CHARS_TITLE] = titleChars
|
1061
1020
|
|
1062
|
-
self._counts[
|
1063
|
-
self._counts[
|
1064
|
-
self._counts[
|
1021
|
+
self._counts[nwStats.WCHARS_ALL] = allWordChars
|
1022
|
+
self._counts[nwStats.WCHARS_TEXT] = textWordChars
|
1023
|
+
self._counts[nwStats.WCHARS_TITLE] = titleWordChars
|
1065
1024
|
|
1066
|
-
self._counts["allWordChars"] = allWordChars
|
1067
|
-
self._counts["textWordChars"] = textWordChars
|
1068
|
-
self._counts["titleWordChars"] = titleWordChars
|
1069
|
-
|
1070
|
-
return
|
1071
|
-
|
1072
|
-
def saveRawMarkdown(self, path: str | Path) -> None:
|
1073
|
-
"""Save the raw text to a plain text file."""
|
1074
|
-
with open(path, mode="w", encoding="utf-8") as outFile:
|
1075
|
-
for nwdPage in self._markdown:
|
1076
|
-
outFile.write(nwdPage)
|
1077
|
-
return
|
1078
|
-
|
1079
|
-
def saveRawMarkdownJSON(self, path: str | Path) -> None:
|
1080
|
-
"""Save the raw text to a JSON file."""
|
1081
|
-
timeStamp = time()
|
1082
|
-
data = {
|
1083
|
-
"meta": {
|
1084
|
-
"projectName": self._project.data.name,
|
1085
|
-
"novelAuthor": self._project.data.author,
|
1086
|
-
"buildTime": int(timeStamp),
|
1087
|
-
"buildTimeStr": formatTimeStamp(timeStamp),
|
1088
|
-
},
|
1089
|
-
"text": {
|
1090
|
-
"nwd": [page.rstrip("\n").split("\n") for page in self._markdown],
|
1091
|
-
}
|
1092
|
-
}
|
1093
|
-
with open(path, mode="w", encoding="utf-8") as fObj:
|
1094
|
-
json.dump(data, fObj, indent=2)
|
1095
1025
|
return
|
1096
1026
|
|
1097
1027
|
##
|
1098
1028
|
# Internal Functions
|
1099
1029
|
##
|
1100
1030
|
|
1031
|
+
def _formatInt(self, value: int) -> str:
|
1032
|
+
"""Return a localised integer."""
|
1033
|
+
return self._dLocale.toString(value)
|
1034
|
+
|
1035
|
+
def _formatComment(self, style: ComStyle, key: str, text: str) -> tuple[str, T_Formats]:
|
1036
|
+
"""Apply formatting to comments and notes."""
|
1037
|
+
tTxt, tFmt = self._extractFormats(text)
|
1038
|
+
tFmt.insert(0, (0, TextFmt.COL_B, style.textClass))
|
1039
|
+
tFmt.append((len(tTxt), TextFmt.COL_E, ""))
|
1040
|
+
if label := (self._localLookup(style.label) + (f" ({key})" if key else "")).strip():
|
1041
|
+
shift = len(label) + 2
|
1042
|
+
tTxt = f"{label}: {tTxt}"
|
1043
|
+
rFmt = [(0, TextFmt.B_B, ""), (shift - 1, TextFmt.B_E, "")]
|
1044
|
+
if style.labelClass:
|
1045
|
+
rFmt.insert(1, (0, TextFmt.COL_B, style.labelClass))
|
1046
|
+
rFmt.insert(2, (shift - 1, TextFmt.COL_E, ""))
|
1047
|
+
rFmt.extend((p + shift, f, d) for p, f, d in tFmt)
|
1048
|
+
return tTxt, rFmt
|
1049
|
+
|
1050
|
+
def _formatMeta(self, text: str) -> tuple[str, str, T_Formats]:
|
1051
|
+
"""Apply formatting to a meta data line."""
|
1052
|
+
tag = ""
|
1053
|
+
txt = []
|
1054
|
+
fmt = []
|
1055
|
+
valid, bits, _ = self._project.index.scanThis(text)
|
1056
|
+
if valid and bits and bits[0] in nwLabels.KEY_NAME and bits[0] not in self._skipKeywords:
|
1057
|
+
tag = bits[0]
|
1058
|
+
pos = 0
|
1059
|
+
lbl = f"{self._localLookup(nwLabels.KEY_NAME[tag])}:"
|
1060
|
+
end = len(lbl)
|
1061
|
+
fmt = [
|
1062
|
+
(pos, TextFmt.B_B, ""),
|
1063
|
+
(pos, TextFmt.COL_B, "keyword"),
|
1064
|
+
(end, TextFmt.COL_E, ""),
|
1065
|
+
(end, TextFmt.B_E, ""),
|
1066
|
+
]
|
1067
|
+
txt = [lbl, " "]
|
1068
|
+
pos = end + 1
|
1069
|
+
|
1070
|
+
if (num := len(bits)) > 1:
|
1071
|
+
if bits[0] == nwKeyWords.TAG_KEY:
|
1072
|
+
one, two = self._project.index.parseValue(bits[1])
|
1073
|
+
end = pos + len(one)
|
1074
|
+
fmt.append((pos, TextFmt.COL_B, "tag"))
|
1075
|
+
fmt.append((pos, TextFmt.ANM_B, f"tag_{one}".lower()))
|
1076
|
+
fmt.append((end, TextFmt.ANM_E, ""))
|
1077
|
+
fmt.append((end, TextFmt.COL_E, ""))
|
1078
|
+
txt.append(one)
|
1079
|
+
pos = end
|
1080
|
+
if two:
|
1081
|
+
txt.append(" | ")
|
1082
|
+
pos += 3
|
1083
|
+
end = pos + len(two)
|
1084
|
+
fmt.append((pos, TextFmt.COL_B, "optional"))
|
1085
|
+
fmt.append((end, TextFmt.COL_E, ""))
|
1086
|
+
txt.append(two)
|
1087
|
+
pos = end
|
1088
|
+
else:
|
1089
|
+
for n, bit in enumerate(bits[1:], 2):
|
1090
|
+
end = pos + len(bit)
|
1091
|
+
fmt.append((pos, TextFmt.COL_B, "tag"))
|
1092
|
+
fmt.append((pos, TextFmt.ARF_B, f"#tag_{bit}".lower()))
|
1093
|
+
fmt.append((end, TextFmt.ARF_E, ""))
|
1094
|
+
fmt.append((end, TextFmt.COL_E, ""))
|
1095
|
+
txt.append(bit)
|
1096
|
+
pos = end
|
1097
|
+
if n < num:
|
1098
|
+
txt.append(", ")
|
1099
|
+
pos += 2
|
1100
|
+
|
1101
|
+
return tag, "".join(txt), fmt
|
1102
|
+
|
1101
1103
|
def _extractFormats(
|
1102
1104
|
self, text: str, skip: int = 0, hDialog: bool = False
|
1103
1105
|
) -> tuple[str, T_Formats]:
|
@@ -1109,56 +1111,55 @@ class Tokenizer(ABC):
|
|
1109
1111
|
|
1110
1112
|
# Match Markdown
|
1111
1113
|
for regEx, fmts in self._rxMarkdown:
|
1112
|
-
|
1113
|
-
while rxItt.hasNext():
|
1114
|
-
rxMatch = rxItt.next()
|
1114
|
+
for res in regEx.finditer(text):
|
1115
1115
|
temp.extend(
|
1116
|
-
(
|
1116
|
+
(res.start(n), res.end(n), fmt, "")
|
1117
1117
|
for n, fmt in enumerate(fmts) if fmt > 0
|
1118
1118
|
)
|
1119
1119
|
|
1120
|
+
# Match URLs
|
1121
|
+
for res in REGEX_PATTERNS.url.finditer(text):
|
1122
|
+
temp.append((res.start(0), 0, TextFmt.HRF_B, res.group(0)))
|
1123
|
+
temp.append((res.end(0), 0, TextFmt.HRF_E, ""))
|
1124
|
+
|
1120
1125
|
# Match Shortcodes
|
1121
|
-
|
1122
|
-
while rxItt.hasNext():
|
1123
|
-
rxMatch = rxItt.next()
|
1126
|
+
for res in REGEX_PATTERNS.shortcodePlain.finditer(text):
|
1124
1127
|
temp.append((
|
1125
|
-
|
1126
|
-
|
1127
|
-
self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0),
|
1128
|
+
res.start(1), res.end(1),
|
1129
|
+
self._shortCodeFmt.get(res.group(1).lower(), 0),
|
1128
1130
|
"",
|
1129
1131
|
))
|
1130
1132
|
|
1131
1133
|
# Match Shortcode w/Values
|
1132
|
-
rxItt = self._rxShortCodeVals.globalMatch(text, 0)
|
1133
1134
|
tHandle = self._handle or ""
|
1134
|
-
|
1135
|
-
|
1136
|
-
kind = self._shortCodeVals.get(rxMatch.captured(1).lower(), 0)
|
1135
|
+
for res in REGEX_PATTERNS.shortcodeValue.finditer(text):
|
1136
|
+
kind = self._shortCodeVals.get(res.group(1).lower(), 0)
|
1137
1137
|
temp.append((
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
f"{tHandle}:{rxMatch.captured(2)}",
|
1138
|
+
res.start(0), res.end(0),
|
1139
|
+
TextFmt.STRIP if kind == skip else kind,
|
1140
|
+
f"{tHandle}:{res.group(2)}",
|
1142
1141
|
))
|
1143
1142
|
|
1144
1143
|
# Match Dialogue
|
1145
|
-
if self.
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1144
|
+
if self._hlightDialog and hDialog:
|
1145
|
+
if self._dialogParser.enabled:
|
1146
|
+
for pos, end in self._dialogParser(text):
|
1147
|
+
temp.append((pos, 0, TextFmt.COL_B, "dialog"))
|
1148
|
+
temp.append((end, 0, TextFmt.COL_E, ""))
|
1149
|
+
if self._rxAltDialog:
|
1150
|
+
for res in self._rxAltDialog.finditer(text):
|
1151
|
+
temp.append((res.start(0), 0, TextFmt.COL_B, "altdialog"))
|
1152
|
+
temp.append((res.end(0), 0, TextFmt.COL_E, ""))
|
1152
1153
|
|
1153
1154
|
# Post-process text and format
|
1154
1155
|
result = text
|
1155
1156
|
formats = []
|
1156
|
-
for pos,
|
1157
|
+
for pos, end, fmt, meta in reversed(sorted(temp, key=lambda x: x[0])):
|
1157
1158
|
if fmt > 0:
|
1158
|
-
if
|
1159
|
-
result = result[:pos] + result[
|
1160
|
-
formats = [(p-
|
1161
|
-
formats.insert(0, (pos, fmt,
|
1159
|
+
if end > pos:
|
1160
|
+
result = result[:pos] + result[end:]
|
1161
|
+
formats = [(p+pos-end if p > pos else p, f, m) for p, f, m in formats]
|
1162
|
+
formats.insert(0, (pos, fmt, meta))
|
1162
1163
|
|
1163
1164
|
return result, formats
|
1164
1165
|
|
@@ -1204,6 +1205,7 @@ class HeadingFormatter:
|
|
1204
1205
|
def apply(self, hFormat: str, text: str, nHead: int) -> str:
|
1205
1206
|
"""Apply formatting to a specific heading."""
|
1206
1207
|
hFormat = hFormat.replace(nwHeadFmt.TITLE, text)
|
1208
|
+
hFormat = hFormat.replace(nwHeadFmt.BR, "\n")
|
1207
1209
|
hFormat = hFormat.replace(nwHeadFmt.CH_NUM, str(self._chCount))
|
1208
1210
|
hFormat = hFormat.replace(nwHeadFmt.SC_NUM, str(self._scChCount))
|
1209
1211
|
hFormat = hFormat.replace(nwHeadFmt.SC_ABS, str(self._scAbsCount))
|