novelWriter 2.5.3__py3-none-any.whl → 2.6b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {novelWriter-2.5.3.dist-info → novelWriter-2.6b2.dist-info}/METADATA +1 -1
- {novelWriter-2.5.3.dist-info → novelWriter-2.6b2.dist-info}/RECORD +80 -60
- novelwriter/__init__.py +49 -10
- novelwriter/assets/i18n/project_en_GB.json +1 -0
- novelwriter/assets/icons/typicons_dark/icons.conf +8 -0
- novelwriter/assets/icons/typicons_dark/mixed_copy.svg +4 -0
- novelwriter/assets/icons/typicons_dark/mixed_margin-bottom.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_margin-left.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_margin-right.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_margin-top.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_size-height.svg +6 -0
- novelwriter/assets/icons/typicons_dark/mixed_size-width.svg +6 -0
- novelwriter/assets/icons/typicons_dark/nw_toolbar.svg +5 -0
- novelwriter/assets/icons/typicons_light/icons.conf +8 -0
- novelwriter/assets/icons/typicons_light/mixed_copy.svg +4 -0
- novelwriter/assets/icons/typicons_light/mixed_margin-bottom.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_margin-left.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_margin-right.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_margin-top.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_size-height.svg +6 -0
- novelwriter/assets/icons/typicons_light/mixed_size-width.svg +6 -0
- novelwriter/assets/icons/typicons_light/nw_toolbar.svg +5 -0
- novelwriter/assets/manual.pdf +0 -0
- novelwriter/assets/sample.zip +0 -0
- novelwriter/common.py +100 -2
- novelwriter/config.py +25 -15
- novelwriter/constants.py +168 -60
- novelwriter/core/buildsettings.py +66 -39
- novelwriter/core/coretools.py +145 -147
- novelwriter/core/docbuild.py +132 -170
- novelwriter/core/index.py +38 -37
- novelwriter/core/item.py +41 -8
- novelwriter/core/itemmodel.py +518 -0
- novelwriter/core/options.py +4 -1
- novelwriter/core/project.py +67 -89
- novelwriter/core/spellcheck.py +9 -14
- novelwriter/core/status.py +7 -5
- novelwriter/core/tree.py +268 -287
- novelwriter/dialogs/docmerge.py +7 -17
- novelwriter/dialogs/preferences.py +46 -33
- novelwriter/dialogs/projectsettings.py +5 -5
- novelwriter/enum.py +36 -23
- novelwriter/extensions/configlayout.py +27 -12
- novelwriter/extensions/modified.py +13 -1
- novelwriter/extensions/pagedsidebar.py +5 -5
- novelwriter/formats/shared.py +155 -0
- novelwriter/formats/todocx.py +1191 -0
- novelwriter/formats/tohtml.py +451 -0
- novelwriter/{core → formats}/tokenizer.py +487 -491
- novelwriter/formats/tomarkdown.py +217 -0
- novelwriter/{core → formats}/toodt.py +311 -432
- novelwriter/formats/toqdoc.py +484 -0
- novelwriter/formats/toraw.py +91 -0
- novelwriter/gui/doceditor.py +342 -284
- novelwriter/gui/dochighlight.py +96 -84
- novelwriter/gui/docviewer.py +88 -31
- novelwriter/gui/docviewerpanel.py +17 -25
- novelwriter/gui/editordocument.py +17 -2
- novelwriter/gui/itemdetails.py +25 -28
- novelwriter/gui/mainmenu.py +129 -63
- novelwriter/gui/noveltree.py +45 -47
- novelwriter/gui/outline.py +196 -249
- novelwriter/gui/projtree.py +594 -1241
- novelwriter/gui/search.py +9 -10
- novelwriter/gui/sidebar.py +7 -6
- novelwriter/gui/theme.py +10 -5
- novelwriter/guimain.py +100 -196
- novelwriter/shared.py +66 -27
- novelwriter/text/counting.py +2 -0
- novelwriter/text/patterns.py +168 -60
- novelwriter/tools/manusbuild.py +14 -12
- novelwriter/tools/manuscript.py +120 -78
- novelwriter/tools/manussettings.py +424 -291
- novelwriter/tools/welcome.py +4 -4
- novelwriter/tools/writingstats.py +3 -3
- novelwriter/types.py +23 -7
- novelwriter/core/tohtml.py +0 -530
- novelwriter/core/tomarkdown.py +0 -252
- novelwriter/core/toqdoc.py +0 -419
- {novelWriter-2.5.3.dist-info → novelWriter-2.6b2.dist-info}/LICENSE.md +0 -0
- {novelWriter-2.5.3.dist-info → novelWriter-2.6b2.dist-info}/WHEEL +0 -0
- {novelWriter-2.5.3.dist-info → novelWriter-2.6b2.dist-info}/entry_points.txt +0 -0
- {novelWriter-2.5.3.dist-info → novelWriter-2.6b2.dist-info}/top_level.txt +0 -0
@@ -24,41 +24,56 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
24
24
|
"""
|
25
25
|
from __future__ import annotations
|
26
26
|
|
27
|
-
import json
|
28
27
|
import logging
|
29
28
|
import re
|
30
29
|
|
31
30
|
from abc import ABC, abstractmethod
|
32
|
-
from functools import partial
|
33
31
|
from pathlib import Path
|
34
|
-
from
|
32
|
+
from typing import NamedTuple
|
35
33
|
|
36
|
-
from PyQt5.QtCore import
|
37
|
-
from PyQt5.QtGui import QFont
|
34
|
+
from PyQt5.QtCore import QLocale
|
35
|
+
from PyQt5.QtGui import QColor, QFont
|
38
36
|
|
39
37
|
from novelwriter import CONFIG
|
40
|
-
from novelwriter.common import checkInt,
|
41
|
-
from novelwriter.constants import
|
38
|
+
from novelwriter.common import checkInt, fontMatcher, numberToRoman
|
39
|
+
from novelwriter.constants import (
|
40
|
+
nwHeadFmt, nwKeyWords, nwLabels, nwShortcode, nwStats, nwStyles, nwUnicode,
|
41
|
+
trConst
|
42
|
+
)
|
42
43
|
from novelwriter.core.index import processComment
|
43
44
|
from novelwriter.core.project import NWProject
|
44
45
|
from novelwriter.enum import nwComment, nwItemLayout
|
45
|
-
from novelwriter.
|
46
|
+
from novelwriter.formats.shared import (
|
47
|
+
BlockFmt, BlockTyp, T_Block, T_Formats, T_Note, TextDocumentTheme, TextFmt
|
48
|
+
)
|
49
|
+
from novelwriter.text.patterns import REGEX_PATTERNS, DialogParser
|
46
50
|
|
47
51
|
logger = logging.getLogger(__name__)
|
48
52
|
|
49
|
-
ESCAPES = {r"\*": "*", r"\~": "~", r"\_": "_", r"\[": "[", r"\]": "]", r"\ ": ""}
|
50
|
-
RX_ESC = re.compile("|".join([re.escape(k) for k in ESCAPES.keys()]), flags=re.DOTALL)
|
51
53
|
|
52
|
-
|
53
|
-
T_Comment = tuple[str, T_Formats]
|
54
|
-
T_Token = tuple[int, int, str, T_Formats, int]
|
54
|
+
class ComStyle(NamedTuple):
|
55
55
|
|
56
|
+
label: str = ""
|
57
|
+
labelClass: str = ""
|
58
|
+
textClass: str = ""
|
56
59
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
60
|
+
|
61
|
+
COMMENT_STYLE = {
|
62
|
+
nwComment.PLAIN: ComStyle("Comment", "comment", "comment"),
|
63
|
+
nwComment.IGNORE: ComStyle(),
|
64
|
+
nwComment.SYNOPSIS: ComStyle("Synopsis", "modifier", "synopsis"),
|
65
|
+
nwComment.SHORT: ComStyle("Short Description", "modifier", "synopsis"),
|
66
|
+
nwComment.NOTE: ComStyle("Note", "modifier", "note"),
|
67
|
+
nwComment.FOOTNOTE: ComStyle("", "modifier", "note"),
|
68
|
+
nwComment.COMMENT: ComStyle(),
|
69
|
+
nwComment.STORY: ComStyle("", "modifier", "note"),
|
70
|
+
}
|
71
|
+
HEADINGS = [BlockTyp.TITLE, BlockTyp.HEAD1, BlockTyp.HEAD2, BlockTyp.HEAD3, BlockTyp.HEAD4]
|
72
|
+
SKIP_INDENT = [
|
73
|
+
BlockTyp.TITLE, BlockTyp.HEAD1, BlockTyp.HEAD2, BlockTyp.HEAD2, BlockTyp.HEAD3,
|
74
|
+
BlockTyp.HEAD4, BlockTyp.SEP, BlockTyp.SKIP,
|
75
|
+
]
|
76
|
+
B_EMPTY: T_Block = (BlockTyp.EMPTY, "", "", [], BlockFmt.NONE)
|
62
77
|
|
63
78
|
|
64
79
|
class Tokenizer(ABC):
|
@@ -70,119 +85,71 @@ class Tokenizer(ABC):
|
|
70
85
|
subclasses.
|
71
86
|
"""
|
72
87
|
|
73
|
-
# In-Text Format
|
74
|
-
FMT_B_B = 1 # Begin bold
|
75
|
-
FMT_B_E = 2 # End bold
|
76
|
-
FMT_I_B = 3 # Begin italics
|
77
|
-
FMT_I_E = 4 # End italics
|
78
|
-
FMT_D_B = 5 # Begin strikeout
|
79
|
-
FMT_D_E = 6 # End strikeout
|
80
|
-
FMT_U_B = 7 # Begin underline
|
81
|
-
FMT_U_E = 8 # End underline
|
82
|
-
FMT_M_B = 9 # Begin mark
|
83
|
-
FMT_M_E = 10 # End mark
|
84
|
-
FMT_SUP_B = 11 # Begin superscript
|
85
|
-
FMT_SUP_E = 12 # End superscript
|
86
|
-
FMT_SUB_B = 13 # Begin subscript
|
87
|
-
FMT_SUB_E = 14 # End subscript
|
88
|
-
FMT_DL_B = 15 # Begin dialogue
|
89
|
-
FMT_DL_E = 16 # End dialogue
|
90
|
-
FMT_ADL_B = 17 # Begin alt dialogue
|
91
|
-
FMT_ADL_E = 18 # End alt dialogue
|
92
|
-
FMT_FNOTE = 19 # Footnote marker
|
93
|
-
FMT_STRIP = 20 # Strip the format code
|
94
|
-
|
95
|
-
# Block Type
|
96
|
-
T_EMPTY = 1 # Empty line (new paragraph)
|
97
|
-
T_SYNOPSIS = 2 # Synopsis comment
|
98
|
-
T_SHORT = 3 # Short description comment
|
99
|
-
T_COMMENT = 4 # Comment line
|
100
|
-
T_KEYWORD = 5 # Command line
|
101
|
-
T_TITLE = 6 # Title
|
102
|
-
T_HEAD1 = 7 # Heading 1
|
103
|
-
T_HEAD2 = 8 # Heading 2
|
104
|
-
T_HEAD3 = 9 # Heading 3
|
105
|
-
T_HEAD4 = 10 # Heading 4
|
106
|
-
T_TEXT = 11 # Text line
|
107
|
-
T_SEP = 12 # Scene separator
|
108
|
-
T_SKIP = 13 # Paragraph break
|
109
|
-
|
110
|
-
# Block Style
|
111
|
-
A_NONE = 0x0000 # No special style
|
112
|
-
A_LEFT = 0x0001 # Left aligned
|
113
|
-
A_RIGHT = 0x0002 # Right aligned
|
114
|
-
A_CENTRE = 0x0004 # Centred
|
115
|
-
A_JUSTIFY = 0x0008 # Justified
|
116
|
-
A_PBB = 0x0010 # Page break before
|
117
|
-
A_PBA = 0x0020 # Page break after
|
118
|
-
A_Z_TOPMRG = 0x0040 # Zero top margin
|
119
|
-
A_Z_BTMMRG = 0x0080 # Zero bottom margin
|
120
|
-
A_IND_L = 0x0100 # Left indentation
|
121
|
-
A_IND_R = 0x0200 # Right indentation
|
122
|
-
A_IND_T = 0x0400 # Text indentation
|
123
|
-
|
124
|
-
# Masks
|
125
|
-
M_ALIGNED = A_LEFT | A_RIGHT | A_CENTRE | A_JUSTIFY
|
126
|
-
|
127
|
-
# Lookups
|
128
|
-
L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
|
129
|
-
L_SKIP_INDENT = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD2, T_HEAD3, T_HEAD4, T_SEP, T_SKIP]
|
130
|
-
L_SUMMARY = [T_SYNOPSIS, T_SHORT]
|
131
|
-
|
132
88
|
def __init__(self, project: NWProject) -> None:
|
133
89
|
|
134
90
|
self._project = project
|
135
91
|
|
136
92
|
# Data Variables
|
137
|
-
self._text
|
138
|
-
self._handle
|
139
|
-
self.
|
140
|
-
self.
|
141
|
-
|
142
|
-
#
|
143
|
-
self.
|
144
|
-
self._footnotes: dict[str,
|
145
|
-
|
146
|
-
#
|
93
|
+
self._text = "" # The raw text to be tokenized
|
94
|
+
self._handle = None # The item handle currently being processed
|
95
|
+
self._keepRaw = False # Whether to keep the raw text, used by ToRaw
|
96
|
+
self._noTokens = False # Disable tokenization if they're not needed
|
97
|
+
|
98
|
+
# Blocks and Meta Data (Per Document)
|
99
|
+
self._blocks: list[T_Block] = []
|
100
|
+
self._footnotes: dict[str, T_Note] = {}
|
101
|
+
|
102
|
+
# Blocks and Meta Data (Per Instance)
|
103
|
+
self._raw: list[str] = []
|
104
|
+
self._pages: list[str] = []
|
147
105
|
self._counts: dict[str, int] = {}
|
148
106
|
self._outline: dict[str, str] = {}
|
149
|
-
self._markdown: list[str] = []
|
150
107
|
|
151
108
|
# User Settings
|
109
|
+
self._dLocale = CONFIG.locale # The document locale
|
152
110
|
self._textFont = QFont("Serif", 11) # Output text font
|
153
|
-
self._lineHeight = 1.15
|
154
|
-
self.
|
155
|
-
self.
|
156
|
-
self.
|
157
|
-
self.
|
158
|
-
self.
|
159
|
-
self.
|
160
|
-
self.
|
161
|
-
self.
|
162
|
-
self.
|
163
|
-
self.
|
164
|
-
self.
|
111
|
+
self._lineHeight = 1.15 # Line height in units of em
|
112
|
+
self._colorHeads = True # Colourise headings
|
113
|
+
self._scaleHeads = True # Scale headings to larger font size
|
114
|
+
self._boldHeads = True # Bold headings
|
115
|
+
self._blockIndent = 4.00 # Block indent in units of em
|
116
|
+
self._firstIndent = False # Enable first line indent
|
117
|
+
self._firstWidth = 1.40 # First line indent in units of em
|
118
|
+
self._indentFirst = False # Indent first paragraph
|
119
|
+
self._doJustify = False # Justify text
|
120
|
+
self._doBodyText = True # Include body text
|
121
|
+
self._doSynopsis = False # Also process synopsis comments
|
122
|
+
self._doComments = False # Also process comments
|
123
|
+
self._doKeywords = False # Also process keywords like tags and references
|
124
|
+
self._keepBreaks = True # Keep line breaks in paragraphs
|
125
|
+
self._defaultAlign = "left" # The default text alignment
|
126
|
+
|
127
|
+
self._skipKeywords: set[str] = set() # Keywords to ignore
|
128
|
+
|
129
|
+
# Other Setting
|
130
|
+
self._theme = TextDocumentTheme()
|
131
|
+
self._classes: dict[str, QColor] = {}
|
165
132
|
|
166
133
|
# Margins
|
167
|
-
self._marginTitle =
|
168
|
-
self._marginHead1 =
|
169
|
-
self._marginHead2 =
|
170
|
-
self._marginHead3 =
|
171
|
-
self._marginHead4 =
|
172
|
-
self._marginText =
|
173
|
-
self._marginMeta =
|
174
|
-
self._marginFoot =
|
175
|
-
self._marginSep =
|
134
|
+
self._marginTitle = nwStyles.T_MARGIN["H0"]
|
135
|
+
self._marginHead1 = nwStyles.T_MARGIN["H1"]
|
136
|
+
self._marginHead2 = nwStyles.T_MARGIN["H2"]
|
137
|
+
self._marginHead3 = nwStyles.T_MARGIN["H3"]
|
138
|
+
self._marginHead4 = nwStyles.T_MARGIN["H4"]
|
139
|
+
self._marginText = nwStyles.T_MARGIN["TT"]
|
140
|
+
self._marginMeta = nwStyles.T_MARGIN["MT"]
|
141
|
+
self._marginFoot = nwStyles.T_MARGIN["FT"]
|
142
|
+
self._marginSep = nwStyles.T_MARGIN["SP"]
|
176
143
|
|
177
144
|
# Title Formats
|
178
|
-
self.
|
145
|
+
self._fmtPart = nwHeadFmt.TITLE # Formatting for partitions
|
179
146
|
self._fmtChapter = nwHeadFmt.TITLE # Formatting for numbered chapters
|
180
147
|
self._fmtUnNum = nwHeadFmt.TITLE # Formatting for unnumbered chapters
|
181
148
|
self._fmtScene = nwHeadFmt.TITLE # Formatting for scenes
|
182
149
|
self._fmtHScene = nwHeadFmt.TITLE # Formatting for hard scenes
|
183
150
|
self._fmtSection = nwHeadFmt.TITLE # Formatting for sections
|
184
151
|
|
185
|
-
self.
|
152
|
+
self._hidePart = False # Do not include partition headings
|
186
153
|
self._hideChapter = False # Do not include chapter headings
|
187
154
|
self._hideUnNum = False # Do not include unnumbered headings
|
188
155
|
self._hideScene = False # Do not include scene headings
|
@@ -191,15 +158,16 @@ class Tokenizer(ABC):
|
|
191
158
|
|
192
159
|
self._linkHeadings = False # Add an anchor before headings
|
193
160
|
|
194
|
-
self._titleStyle =
|
195
|
-
self.
|
196
|
-
self.
|
161
|
+
self._titleStyle = BlockFmt.CENTRE | BlockFmt.PBB
|
162
|
+
self._partStyle = BlockFmt.CENTRE | BlockFmt.PBB
|
163
|
+
self._chapterStyle = BlockFmt.PBB
|
164
|
+
self._sceneStyle = BlockFmt.NONE
|
197
165
|
|
198
166
|
# Instance Variables
|
199
167
|
self._hFormatter = HeadingFormatter(self._project)
|
200
168
|
self._noSep = True # Flag to indicate that we don't want a scene separator
|
201
169
|
self._noIndent = False # Flag to disable text indent on next paragraph
|
202
|
-
self.
|
170
|
+
self._breakNext = False # Add a page break on next token
|
203
171
|
|
204
172
|
# This File
|
205
173
|
self._isNovel = False # Document is a novel document
|
@@ -210,31 +178,33 @@ class Tokenizer(ABC):
|
|
210
178
|
|
211
179
|
# Function Mapping
|
212
180
|
self._localLookup = self._project.localLookup
|
213
|
-
self.tr = partial(QCoreApplication.translate, "Tokenizer")
|
214
181
|
|
215
182
|
# Format RegEx
|
216
183
|
self._rxMarkdown = [
|
217
|
-
(REGEX_PATTERNS.markdownItalic, [0,
|
218
|
-
(REGEX_PATTERNS.markdownBold, [0,
|
219
|
-
(REGEX_PATTERNS.markdownStrike, [0,
|
184
|
+
(REGEX_PATTERNS.markdownItalic, [0, TextFmt.I_B, 0, TextFmt.I_E]),
|
185
|
+
(REGEX_PATTERNS.markdownBold, [0, TextFmt.B_B, 0, TextFmt.B_E]),
|
186
|
+
(REGEX_PATTERNS.markdownStrike, [0, TextFmt.D_B, 0, TextFmt.D_E]),
|
220
187
|
]
|
221
|
-
self._rxShortCodes = REGEX_PATTERNS.shortcodePlain
|
222
|
-
self._rxShortCodeVals = REGEX_PATTERNS.shortcodeValue
|
223
188
|
|
224
189
|
self._shortCodeFmt = {
|
225
|
-
nwShortcode.ITALIC_O:
|
226
|
-
nwShortcode.BOLD_O:
|
227
|
-
nwShortcode.STRIKE_O:
|
228
|
-
nwShortcode.ULINE_O:
|
229
|
-
nwShortcode.MARK_O:
|
230
|
-
nwShortcode.SUP_O:
|
231
|
-
nwShortcode.SUB_O:
|
190
|
+
nwShortcode.ITALIC_O: TextFmt.I_B, nwShortcode.ITALIC_C: TextFmt.I_E,
|
191
|
+
nwShortcode.BOLD_O: TextFmt.B_B, nwShortcode.BOLD_C: TextFmt.B_E,
|
192
|
+
nwShortcode.STRIKE_O: TextFmt.D_B, nwShortcode.STRIKE_C: TextFmt.D_E,
|
193
|
+
nwShortcode.ULINE_O: TextFmt.U_B, nwShortcode.ULINE_C: TextFmt.U_E,
|
194
|
+
nwShortcode.MARK_O: TextFmt.M_B, nwShortcode.MARK_C: TextFmt.M_E,
|
195
|
+
nwShortcode.SUP_O: TextFmt.SUP_B, nwShortcode.SUP_C: TextFmt.SUP_E,
|
196
|
+
nwShortcode.SUB_O: TextFmt.SUB_B, nwShortcode.SUB_C: TextFmt.SUB_E,
|
232
197
|
}
|
233
198
|
self._shortCodeVals = {
|
234
|
-
nwShortcode.FOOTNOTE_B:
|
199
|
+
nwShortcode.FOOTNOTE_B: TextFmt.FNOTE,
|
200
|
+
nwShortcode.FIELD_B: TextFmt.FIELD,
|
235
201
|
}
|
236
202
|
|
237
|
-
|
203
|
+
# Dialogue
|
204
|
+
self._hlightDialog = False
|
205
|
+
self._rxAltDialog = REGEX_PATTERNS.altDialogStyle
|
206
|
+
self._dialogParser = DialogParser()
|
207
|
+
self._dialogParser.initParser()
|
238
208
|
|
239
209
|
return
|
240
210
|
|
@@ -242,16 +212,6 @@ class Tokenizer(ABC):
|
|
242
212
|
# Properties
|
243
213
|
##
|
244
214
|
|
245
|
-
@property
|
246
|
-
def result(self) -> str:
|
247
|
-
"""The result of the build process."""
|
248
|
-
return self._result
|
249
|
-
|
250
|
-
@property
|
251
|
-
def allMarkdown(self) -> list[str]:
|
252
|
-
"""The combined novelWriter Markdown text."""
|
253
|
-
return self._markdown
|
254
|
-
|
255
215
|
@property
|
256
216
|
def textStats(self) -> dict[str, int]:
|
257
217
|
"""The collected stats about the text."""
|
@@ -271,10 +231,21 @@ class Tokenizer(ABC):
|
|
271
231
|
# Setters
|
272
232
|
##
|
273
233
|
|
274
|
-
def
|
275
|
-
"""Set
|
276
|
-
|
277
|
-
|
234
|
+
def setLanguage(self, language: str | None) -> None:
|
235
|
+
"""Set language for the document."""
|
236
|
+
if language:
|
237
|
+
self._dLocale = QLocale(language)
|
238
|
+
return
|
239
|
+
|
240
|
+
def setTheme(self, theme: TextDocumentTheme) -> None:
|
241
|
+
"""Set the document colour theme."""
|
242
|
+
self._theme = theme
|
243
|
+
return
|
244
|
+
|
245
|
+
def setPartitionFormat(self, hFormat: str, hide: bool = False) -> None:
|
246
|
+
"""Set the partition format pattern."""
|
247
|
+
self._fmtPart = hFormat.strip()
|
248
|
+
self._hidePart = hide
|
278
249
|
return
|
279
250
|
|
280
251
|
def setChapterFormat(self, hFormat: str, hide: bool = False) -> None:
|
@@ -309,28 +280,31 @@ class Tokenizer(ABC):
|
|
309
280
|
|
310
281
|
def setTitleStyle(self, center: bool, pageBreak: bool) -> None:
|
311
282
|
"""Set the title heading style."""
|
312
|
-
self._titleStyle =
|
313
|
-
|
314
|
-
|
283
|
+
self._titleStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
|
284
|
+
self._titleStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
|
285
|
+
return
|
286
|
+
|
287
|
+
def setPartitionStyle(self, center: bool, pageBreak: bool) -> None:
|
288
|
+
"""Set the partition heading style."""
|
289
|
+
self._partStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
|
290
|
+
self._partStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
|
315
291
|
return
|
316
292
|
|
317
293
|
def setChapterStyle(self, center: bool, pageBreak: bool) -> None:
|
318
294
|
"""Set the chapter heading style."""
|
319
|
-
self._chapterStyle =
|
320
|
-
|
321
|
-
)
|
295
|
+
self._chapterStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
|
296
|
+
self._chapterStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
|
322
297
|
return
|
323
298
|
|
324
299
|
def setSceneStyle(self, center: bool, pageBreak: bool) -> None:
|
325
300
|
"""Set the scene heading style."""
|
326
|
-
self._sceneStyle =
|
327
|
-
|
328
|
-
)
|
301
|
+
self._sceneStyle = BlockFmt.CENTRE if center else BlockFmt.NONE
|
302
|
+
self._sceneStyle |= BlockFmt.PBB if pageBreak else BlockFmt.NONE
|
329
303
|
return
|
330
304
|
|
331
|
-
def
|
305
|
+
def setTextFont(self, font: QFont) -> None:
|
332
306
|
"""Set the build font."""
|
333
|
-
self._textFont = font
|
307
|
+
self._textFont = fontMatcher(font)
|
334
308
|
return
|
335
309
|
|
336
310
|
def setLineHeight(self, height: float) -> None:
|
@@ -338,6 +312,13 @@ class Tokenizer(ABC):
|
|
338
312
|
self._lineHeight = min(max(float(height), 0.5), 5.0)
|
339
313
|
return
|
340
314
|
|
315
|
+
def setHeadingStyles(self, color: bool, scale: bool, bold: bool) -> None:
|
316
|
+
"""Set text style for headings."""
|
317
|
+
self._colorHeads = color
|
318
|
+
self._scaleHeads = scale
|
319
|
+
self._boldHeads = bold
|
320
|
+
return
|
321
|
+
|
341
322
|
def setBlockIndent(self, indent: float) -> None:
|
342
323
|
"""Set the block indent between 0.0 and 10.0."""
|
343
324
|
self._blockIndent = min(max(float(indent), 0.0), 10.0)
|
@@ -357,27 +338,9 @@ class Tokenizer(ABC):
|
|
357
338
|
self._doJustify = state
|
358
339
|
return
|
359
340
|
|
360
|
-
def
|
341
|
+
def setDialogHighlight(self, state: bool) -> None:
|
361
342
|
"""Enable or disable dialogue highlighting."""
|
362
|
-
self.
|
363
|
-
self._showDialog = state
|
364
|
-
if state:
|
365
|
-
if CONFIG.dialogStyle > 0:
|
366
|
-
self._rxDialogue.append((
|
367
|
-
REGEX_PATTERNS.dialogStyle, self.FMT_DL_B, self.FMT_DL_E
|
368
|
-
))
|
369
|
-
if CONFIG.dialogLine:
|
370
|
-
self._rxDialogue.append((
|
371
|
-
REGEX_PATTERNS.dialogLine, self.FMT_DL_B, self.FMT_DL_E
|
372
|
-
))
|
373
|
-
if CONFIG.narratorBreak:
|
374
|
-
self._rxDialogue.append((
|
375
|
-
REGEX_PATTERNS.narratorBreak, self.FMT_DL_E, self.FMT_DL_B
|
376
|
-
))
|
377
|
-
if CONFIG.altDialogOpen and CONFIG.altDialogClose:
|
378
|
-
self._rxDialogue.append((
|
379
|
-
REGEX_PATTERNS.altDialogStyle, self.FMT_ADL_B, self.FMT_ADL_E
|
380
|
-
))
|
343
|
+
self._hlightDialog = state
|
381
344
|
return
|
382
345
|
|
383
346
|
def setTitleMargins(self, upper: float, lower: float) -> None:
|
@@ -455,11 +418,6 @@ class Tokenizer(ABC):
|
|
455
418
|
self._keepBreaks = state
|
456
419
|
return
|
457
420
|
|
458
|
-
def setKeepMarkdown(self, state: bool) -> None:
|
459
|
-
"""Keep original markdown during build."""
|
460
|
-
self._keepMD = state
|
461
|
-
return
|
462
|
-
|
463
421
|
##
|
464
422
|
# Class Methods
|
465
423
|
##
|
@@ -468,27 +426,54 @@ class Tokenizer(ABC):
|
|
468
426
|
def doConvert(self) -> None:
|
469
427
|
raise NotImplementedError
|
470
428
|
|
429
|
+
@abstractmethod
|
430
|
+
def closeDocument(self) -> None:
|
431
|
+
raise NotImplementedError
|
432
|
+
|
433
|
+
@abstractmethod
|
434
|
+
def saveDocument(self, path: Path) -> None:
|
435
|
+
raise NotImplementedError
|
436
|
+
|
437
|
+
def initDocument(self) -> None:
|
438
|
+
"""Initialise data after settings."""
|
439
|
+
self._classes["modifier"] = self._theme.modifier
|
440
|
+
self._classes["synopsis"] = self._theme.note
|
441
|
+
self._classes["comment"] = self._theme.comment
|
442
|
+
self._classes["dialog"] = self._theme.dialog
|
443
|
+
self._classes["altdialog"] = self._theme.altdialog
|
444
|
+
self._classes["tag"] = self._theme.tag
|
445
|
+
self._classes["keyword"] = self._theme.keyword
|
446
|
+
self._classes["optional"] = self._theme.optional
|
447
|
+
return
|
448
|
+
|
449
|
+
def setBreakNext(self) -> None:
|
450
|
+
"""Set a page break for next block."""
|
451
|
+
self._breakNext = True
|
452
|
+
return
|
453
|
+
|
471
454
|
def addRootHeading(self, tHandle: str) -> None:
|
472
455
|
"""Add a heading at the start of a new root folder."""
|
473
456
|
self._text = ""
|
474
457
|
self._handle = None
|
475
458
|
|
476
|
-
if (
|
459
|
+
if (item := self._project.tree[tHandle]) and item.isRootType():
|
477
460
|
self._handle = tHandle
|
461
|
+
style = BlockFmt.CENTRE
|
478
462
|
if self._isFirst:
|
479
|
-
textAlign = self.A_CENTRE
|
480
463
|
self._isFirst = False
|
481
464
|
else:
|
482
|
-
|
465
|
+
style |= BlockFmt.PBB
|
483
466
|
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
467
|
+
title = item.itemName
|
468
|
+
if not item.isNovelLike():
|
469
|
+
notes = self._localLookup("Notes")
|
470
|
+
title = f"{notes}: {title}"
|
471
|
+
|
472
|
+
self._blocks = [(
|
473
|
+
BlockTyp.TITLE, f"{self._handle}:T0001", title, [], style
|
474
|
+
)]
|
475
|
+
if self._keepRaw:
|
476
|
+
self._raw.append(f"#! {title}\n\n")
|
492
477
|
|
493
478
|
return
|
494
479
|
|
@@ -505,20 +490,14 @@ class Tokenizer(ABC):
|
|
505
490
|
return
|
506
491
|
|
507
492
|
def doPreProcessing(self) -> None:
|
508
|
-
"""Run
|
493
|
+
"""Run pre-processing jobs before the text is tokenized."""
|
509
494
|
# Process the user's auto-replace dictionary
|
510
|
-
autoReplace
|
511
|
-
if len(autoReplace) > 0:
|
495
|
+
if autoReplace := self._project.data.autoReplace:
|
512
496
|
repDict = {}
|
513
497
|
for aKey, aVal in autoReplace.items():
|
514
498
|
repDict[f"<{aKey}>"] = aVal
|
515
499
|
xRep = re.compile("|".join([re.escape(k) for k in repDict.keys()]), flags=re.DOTALL)
|
516
500
|
self._text = xRep.sub(lambda x: repDict[x.group(0)], self._text)
|
517
|
-
|
518
|
-
# Process the character translation map
|
519
|
-
trDict = {nwUnicode.U_MAPOS: nwUnicode.U_RSQUO}
|
520
|
-
self._text = self._text.translate(str.maketrans(trDict))
|
521
|
-
|
522
501
|
return
|
523
502
|
|
524
503
|
def tokenizeText(self) -> None:
|
@@ -526,42 +505,62 @@ class Tokenizer(ABC):
|
|
526
505
|
characters that indicate headings, comments, commands etc, or
|
527
506
|
just contain plain text. In the case of plain text, apply the
|
528
507
|
same RegExes that the syntax highlighter uses and save the
|
529
|
-
locations of these formatting tags into the
|
508
|
+
locations of these formatting tags into the blocks list.
|
530
509
|
|
531
|
-
The format of the
|
510
|
+
The format of the blocs list is an entry with a five-tuple for
|
532
511
|
each line in the file. The tuple is as follows:
|
533
|
-
1: The type of the block,
|
512
|
+
1: The type of the block, BlockType.*
|
534
513
|
2: The heading number under which the text is placed
|
535
514
|
3: The text content of the block, without leading tags
|
536
|
-
4: The internal formatting map of the text,
|
537
|
-
5: The
|
515
|
+
4: The internal formatting map of the text, TxtFmt.*
|
516
|
+
5: The formats of the block, BlockFmt.*
|
538
517
|
"""
|
518
|
+
if self._keepRaw:
|
519
|
+
self._raw.append(f"{self._text.rstrip()}\n\n")
|
520
|
+
if self._noTokens:
|
521
|
+
return
|
539
522
|
if self._isNovel:
|
540
523
|
self._hFormatter.setHandle(self._handle)
|
541
524
|
|
525
|
+
# Cache Flags
|
526
|
+
isNovel = self._isNovel
|
527
|
+
doJustify = self._doJustify
|
528
|
+
keepBreaks = self._keepBreaks
|
529
|
+
indentFirst = self._indentFirst
|
530
|
+
firstIndent = self._firstIndent
|
531
|
+
|
532
|
+
# Replace all instances of [br] with a placeholder character
|
533
|
+
text = REGEX_PATTERNS.lineBreak.sub(nwUnicode.U_NAC2, self._text)
|
534
|
+
|
535
|
+
# Translation Maps
|
536
|
+
transMapA = str.maketrans({
|
537
|
+
nwUnicode.U_NAC2: "", # Used when [br] is ignored
|
538
|
+
nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
|
539
|
+
nwUnicode.U_HBAR: nwUnicode.U_EMDASH,
|
540
|
+
})
|
541
|
+
transMapB = str.maketrans({
|
542
|
+
nwUnicode.U_NAC2: "\n", # Used when [br] is not ignored
|
543
|
+
nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
|
544
|
+
nwUnicode.U_HBAR: nwUnicode.U_EMDASH,
|
545
|
+
})
|
546
|
+
|
542
547
|
nHead = 0
|
543
|
-
breakNext = False
|
544
|
-
tmpMarkdown = []
|
545
548
|
tHandle = self._handle or ""
|
546
|
-
|
547
|
-
for
|
549
|
+
tBlocks: list[T_Block] = [B_EMPTY]
|
550
|
+
for bLine in text.splitlines():
|
551
|
+
aLine = bLine.translate(transMapA)
|
548
552
|
sLine = aLine.strip().lower()
|
549
553
|
|
550
554
|
# Check for blank lines
|
551
|
-
if
|
552
|
-
|
553
|
-
self.T_EMPTY, nHead, "", [], self.A_NONE
|
554
|
-
))
|
555
|
-
if self._keepMD:
|
556
|
-
tmpMarkdown.append("\n")
|
557
|
-
|
555
|
+
if not sLine:
|
556
|
+
tBlocks.append(B_EMPTY)
|
558
557
|
continue
|
559
558
|
|
560
|
-
if
|
561
|
-
|
562
|
-
|
559
|
+
if self._breakNext:
|
560
|
+
tStyle = BlockFmt.PBB
|
561
|
+
self._breakNext = False
|
563
562
|
else:
|
564
|
-
|
563
|
+
tStyle = BlockFmt.NONE
|
565
564
|
|
566
565
|
# Check Line Format
|
567
566
|
# =================
|
@@ -574,24 +573,24 @@ class Tokenizer(ABC):
|
|
574
573
|
# therefore proceed to check other formats.
|
575
574
|
|
576
575
|
if sLine in ("[newpage]", "[new page]"):
|
577
|
-
|
576
|
+
self._breakNext = True
|
578
577
|
continue
|
579
578
|
|
580
579
|
elif sLine == "[vspace]":
|
581
|
-
|
582
|
-
(
|
580
|
+
tBlocks.append(
|
581
|
+
(BlockTyp.SKIP, "", "", [], tStyle)
|
583
582
|
)
|
584
583
|
continue
|
585
584
|
|
586
585
|
elif sLine.startswith("[vspace:") and sLine.endswith("]"):
|
587
586
|
nSkip = checkInt(sLine[8:-1], 0)
|
588
587
|
if nSkip >= 1:
|
589
|
-
|
590
|
-
(
|
588
|
+
tBlocks.append(
|
589
|
+
(BlockTyp.SKIP, "", "", [], tStyle)
|
591
590
|
)
|
592
591
|
if nSkip > 1:
|
593
|
-
|
594
|
-
(
|
592
|
+
tBlocks += (nSkip - 1) * [
|
593
|
+
(BlockTyp.SKIP, "", "", [], BlockFmt.NONE)
|
595
594
|
]
|
596
595
|
continue
|
597
596
|
|
@@ -605,32 +604,24 @@ class Tokenizer(ABC):
|
|
605
604
|
continue
|
606
605
|
|
607
606
|
cStyle, cKey, cText, _, _ = processComment(aLine)
|
608
|
-
if cStyle
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
607
|
+
if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT) and not self._doSynopsis:
|
608
|
+
continue
|
609
|
+
if cStyle == nwComment.PLAIN and not self._doComments:
|
610
|
+
continue
|
611
|
+
|
612
|
+
if doJustify and not tStyle & BlockFmt.ALIGNED:
|
613
|
+
tStyle |= BlockFmt.JUSTIFY
|
614
|
+
|
615
|
+
if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT, nwComment.PLAIN):
|
616
|
+
bStyle = COMMENT_STYLE[cStyle]
|
617
|
+
tLine, tFmt = self._formatComment(bStyle, cKey, cText)
|
618
|
+
tBlocks.append((
|
619
|
+
BlockTyp.COMMENT, "", tLine, tFmt, tStyle
|
619
620
|
))
|
620
|
-
|
621
|
-
tmpMarkdown.append(f"{aLine}\n")
|
621
|
+
|
622
622
|
elif cStyle == nwComment.FOOTNOTE:
|
623
|
-
tLine, tFmt = self._extractFormats(cText, skip=
|
623
|
+
tLine, tFmt = self._extractFormats(cText, skip=TextFmt.FNOTE)
|
624
624
|
self._footnotes[f"{tHandle}:{cKey}"] = (tLine, tFmt)
|
625
|
-
if self._keepMD:
|
626
|
-
tmpMarkdown.append(f"{aLine}\n")
|
627
|
-
else:
|
628
|
-
tLine, tFmt = self._extractFormats(cText)
|
629
|
-
tokens.append((
|
630
|
-
self.T_COMMENT, nHead, tLine, tFmt, sAlign
|
631
|
-
))
|
632
|
-
if self._doComments and self._keepMD:
|
633
|
-
tmpMarkdown.append(f"{aLine}\n")
|
634
625
|
|
635
626
|
elif aLine.startswith("@"):
|
636
627
|
# Keywords
|
@@ -638,16 +629,12 @@ class Tokenizer(ABC):
|
|
638
629
|
# Only valid keyword lines are parsed, and any ignored keywords
|
639
630
|
# are automatically skipped.
|
640
631
|
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
|
648
|
-
))
|
649
|
-
if self._doKeywords and self._keepMD:
|
650
|
-
tmpMarkdown.append(f"{aLine}\n")
|
632
|
+
if self._doKeywords:
|
633
|
+
tTag, tLine, tFmt = self._formatMeta(aLine)
|
634
|
+
if tLine:
|
635
|
+
tBlocks.append((
|
636
|
+
BlockTyp.KEYWORD, tTag[1:], tLine, tFmt, tStyle
|
637
|
+
))
|
651
638
|
|
652
639
|
elif aLine.startswith(("# ", "#! ")):
|
653
640
|
# Title or Partition Headings
|
@@ -662,28 +649,26 @@ class Tokenizer(ABC):
|
|
662
649
|
|
663
650
|
nHead += 1
|
664
651
|
tText = aLine[2:].strip()
|
665
|
-
tType =
|
666
|
-
|
667
|
-
|
668
|
-
|
652
|
+
tType = BlockTyp.HEAD1 if isPlain else BlockTyp.TITLE
|
653
|
+
sHide = self._hidePart if isPlain else False
|
654
|
+
if not (isPlain or isNovel and sHide):
|
655
|
+
tStyle |= self._titleStyle
|
656
|
+
if isNovel:
|
669
657
|
if sHide:
|
670
658
|
tText = ""
|
671
|
-
tType =
|
672
|
-
tStyle = self.A_NONE
|
659
|
+
tType = BlockTyp.EMPTY
|
673
660
|
elif isPlain:
|
674
|
-
tText = self._hFormatter.apply(self.
|
675
|
-
tStyle
|
661
|
+
tText = self._hFormatter.apply(self._fmtPart, tText, nHead)
|
662
|
+
tStyle |= self._partStyle
|
676
663
|
if isPlain:
|
677
664
|
self._hFormatter.resetScene()
|
678
665
|
else:
|
679
666
|
self._hFormatter.resetAll()
|
680
667
|
self._noSep = True
|
681
668
|
|
682
|
-
|
683
|
-
tType, nHead, tText, [], tStyle
|
669
|
+
tBlocks.append((
|
670
|
+
tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
|
684
671
|
))
|
685
|
-
if self._keepMD:
|
686
|
-
tmpMarkdown.append(f"{aLine}\n")
|
687
672
|
|
688
673
|
elif aLine.startswith(("## ", "##! ")):
|
689
674
|
# (Unnumbered) Chapter Headings
|
@@ -698,27 +683,24 @@ class Tokenizer(ABC):
|
|
698
683
|
|
699
684
|
nHead += 1
|
700
685
|
tText = aLine[3:].strip()
|
701
|
-
tType =
|
702
|
-
tStyle = self.A_NONE
|
686
|
+
tType = BlockTyp.HEAD2
|
703
687
|
sHide = self._hideChapter if isPlain else self._hideUnNum
|
704
688
|
tFormat = self._fmtChapter if isPlain else self._fmtUnNum
|
705
|
-
if
|
689
|
+
if isNovel:
|
706
690
|
if isPlain:
|
707
691
|
self._hFormatter.incChapter()
|
708
692
|
if sHide:
|
709
693
|
tText = ""
|
710
|
-
tType =
|
694
|
+
tType = BlockTyp.EMPTY
|
711
695
|
else:
|
712
696
|
tText = self._hFormatter.apply(tFormat, tText, nHead)
|
713
|
-
tStyle
|
697
|
+
tStyle |= self._chapterStyle
|
714
698
|
self._hFormatter.resetScene()
|
715
699
|
self._noSep = True
|
716
700
|
|
717
|
-
|
718
|
-
tType, nHead, tText, [], tStyle
|
701
|
+
tBlocks.append((
|
702
|
+
tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
|
719
703
|
))
|
720
|
-
if self._keepMD:
|
721
|
-
tmpMarkdown.append(f"{aLine}\n")
|
722
704
|
|
723
705
|
elif aLine.startswith(("### ", "###! ")):
|
724
706
|
# (Alternative) Scene Headings
|
@@ -735,31 +717,28 @@ class Tokenizer(ABC):
|
|
735
717
|
|
736
718
|
nHead += 1
|
737
719
|
tText = aLine[4:].strip()
|
738
|
-
tType =
|
739
|
-
tStyle = self.A_NONE
|
720
|
+
tType = BlockTyp.HEAD3
|
740
721
|
sHide = self._hideScene if isPlain else self._hideHScene
|
741
722
|
tFormat = self._fmtScene if isPlain else self._fmtHScene
|
742
|
-
if
|
723
|
+
if isNovel:
|
743
724
|
self._hFormatter.incScene()
|
744
725
|
if sHide:
|
745
726
|
tText = ""
|
746
|
-
tType =
|
727
|
+
tType = BlockTyp.EMPTY
|
747
728
|
else:
|
748
729
|
tText = self._hFormatter.apply(tFormat, tText, nHead)
|
749
|
-
tStyle
|
730
|
+
tStyle |= self._sceneStyle
|
750
731
|
if tText == "": # Empty Format
|
751
|
-
tType =
|
732
|
+
tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SKIP
|
752
733
|
elif tText == tFormat: # Static Format
|
753
734
|
tText = "" if self._noSep else tText
|
754
|
-
tType =
|
755
|
-
tStyle
|
735
|
+
tType = BlockTyp.EMPTY if self._noSep else BlockTyp.SEP
|
736
|
+
tStyle |= BlockFmt.NONE if self._noSep else BlockFmt.CENTRE
|
756
737
|
self._noSep = False
|
757
738
|
|
758
|
-
|
759
|
-
tType, nHead, tText, [], tStyle
|
739
|
+
tBlocks.append((
|
740
|
+
tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
|
760
741
|
))
|
761
|
-
if self._keepMD:
|
762
|
-
tmpMarkdown.append(f"{aLine}\n")
|
763
742
|
|
764
743
|
elif aLine.startswith("#### "):
|
765
744
|
# Section Headings
|
@@ -771,25 +750,22 @@ class Tokenizer(ABC):
|
|
771
750
|
|
772
751
|
nHead += 1
|
773
752
|
tText = aLine[5:].strip()
|
774
|
-
tType =
|
775
|
-
|
776
|
-
if self._isNovel:
|
753
|
+
tType = BlockTyp.HEAD4
|
754
|
+
if isNovel:
|
777
755
|
if self._hideSection:
|
778
756
|
tText = ""
|
779
|
-
tType =
|
757
|
+
tType = BlockTyp.EMPTY
|
780
758
|
else:
|
781
759
|
tText = self._hFormatter.apply(self._fmtSection, tText, nHead)
|
782
760
|
if tText == "": # Empty Format
|
783
|
-
tType =
|
761
|
+
tType = BlockTyp.SKIP
|
784
762
|
elif tText == self._fmtSection: # Static Format
|
785
|
-
tType =
|
786
|
-
tStyle
|
763
|
+
tType = BlockTyp.SEP
|
764
|
+
tStyle |= BlockFmt.CENTRE
|
787
765
|
|
788
|
-
|
789
|
-
tType, nHead, tText, [], tStyle
|
766
|
+
tBlocks.append((
|
767
|
+
tType, f"{tHandle}:T{nHead:04d}", tText, [], tStyle
|
790
768
|
))
|
791
|
-
if self._keepMD:
|
792
|
-
tmpMarkdown.append(f"{aLine}\n")
|
793
769
|
|
794
770
|
else:
|
795
771
|
# Text Lines
|
@@ -805,135 +781,133 @@ class Tokenizer(ABC):
|
|
805
781
|
alnRight = False
|
806
782
|
indLeft = False
|
807
783
|
indRight = False
|
808
|
-
if
|
784
|
+
if bLine.startswith(">>"):
|
809
785
|
alnRight = True
|
810
|
-
|
811
|
-
elif
|
786
|
+
bLine = bLine[2:].lstrip(" ")
|
787
|
+
elif bLine.startswith(">"):
|
812
788
|
indLeft = True
|
813
|
-
|
789
|
+
bLine = bLine[1:].lstrip(" ")
|
814
790
|
|
815
|
-
if
|
791
|
+
if bLine.endswith("<<"):
|
816
792
|
alnLeft = True
|
817
|
-
|
818
|
-
elif
|
793
|
+
bLine = bLine[:-2].rstrip(" ")
|
794
|
+
elif bLine.endswith("<"):
|
819
795
|
indRight = True
|
820
|
-
|
796
|
+
bLine = bLine[:-1].rstrip(" ")
|
821
797
|
|
822
798
|
if alnLeft and alnRight:
|
823
|
-
|
799
|
+
tStyle |= BlockFmt.CENTRE
|
824
800
|
elif alnLeft:
|
825
|
-
|
801
|
+
tStyle |= BlockFmt.LEFT
|
826
802
|
elif alnRight:
|
827
|
-
|
803
|
+
tStyle |= BlockFmt.RIGHT
|
828
804
|
|
829
805
|
if indLeft:
|
830
|
-
|
806
|
+
tStyle |= BlockFmt.IND_L
|
831
807
|
if indRight:
|
832
|
-
|
808
|
+
tStyle |= BlockFmt.IND_R
|
833
809
|
|
834
810
|
# Process formats
|
835
|
-
tLine, tFmt = self._extractFormats(
|
836
|
-
|
837
|
-
|
811
|
+
tLine, tFmt = self._extractFormats(bLine, hDialog=isNovel)
|
812
|
+
tBlocks.append((
|
813
|
+
BlockTyp.TEXT, "", tLine, tFmt, tStyle
|
838
814
|
))
|
839
|
-
if self._keepMD:
|
840
|
-
tmpMarkdown.append(f"{aLine}\n")
|
841
815
|
|
842
816
|
# If we have content, turn off the first page flag
|
843
|
-
if self._isFirst and
|
817
|
+
if self._isFirst and len(tBlocks) > 1:
|
844
818
|
self._isFirst = False # First document has been processed
|
845
819
|
|
846
|
-
# Make sure the
|
847
|
-
# on the very first
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
820
|
+
# Make sure the blocks array doesn't start with a page break
|
821
|
+
# on the very first block, adding a blank first page.
|
822
|
+
for n, cBlock in enumerate(tBlocks):
|
823
|
+
if cBlock[0] != BlockTyp.EMPTY:
|
824
|
+
if cBlock[4] & BlockFmt.PBB:
|
825
|
+
tBlocks[n] = (
|
826
|
+
cBlock[0], cBlock[1], cBlock[2], cBlock[3], cBlock[4] & ~BlockFmt.PBB
|
827
|
+
)
|
828
|
+
break
|
853
829
|
|
854
830
|
# Always add an empty line at the end of the file
|
855
|
-
|
856
|
-
self.T_EMPTY, nHead, "", [], self.A_NONE
|
857
|
-
))
|
858
|
-
if self._keepMD:
|
859
|
-
tmpMarkdown.append("\n")
|
860
|
-
self._markdown.append("".join(tmpMarkdown))
|
831
|
+
tBlocks.append(B_EMPTY)
|
861
832
|
|
862
833
|
# Second Pass
|
863
834
|
# ===========
|
864
835
|
# This second pass strips away consecutive blank lines, and
|
865
836
|
# combines consecutive text lines into the same paragraph.
|
866
837
|
# It also ensures that there isn't paragraph spacing between
|
867
|
-
# meta data lines for formats that
|
838
|
+
# meta data lines for formats that have spacing.
|
868
839
|
|
869
|
-
|
870
|
-
pToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
|
871
|
-
nToken: T_Token = (self.T_EMPTY, 0, "", [], self.A_NONE)
|
840
|
+
lineSep = "\n" if keepBreaks else " "
|
872
841
|
|
873
|
-
|
874
|
-
|
842
|
+
pLines: list[T_Block] = []
|
843
|
+
sBlocks: list[T_Block] = []
|
844
|
+
for n, cBlock in enumerate(tBlocks[1:-1], 1):
|
875
845
|
|
876
|
-
|
877
|
-
|
846
|
+
pBlock = tBlocks[n-1] # Look behind
|
847
|
+
nBlock = tBlocks[n+1] # Look ahead
|
878
848
|
|
879
|
-
if
|
880
|
-
pToken = tokens[n-1] # Look behind
|
881
|
-
if n < tCount - 1:
|
882
|
-
nToken = tokens[n+1] # Look ahead
|
883
|
-
|
884
|
-
if cToken[0] in self.L_SKIP_INDENT and not self._indentFirst:
|
849
|
+
if cBlock[0] in SKIP_INDENT and not indentFirst:
|
885
850
|
# Unless the indentFirst flag is set, we set up the next
|
886
851
|
# paragraph to not be indented if we see a block of a
|
887
852
|
# specific type
|
888
853
|
self._noIndent = True
|
889
854
|
|
890
|
-
if
|
855
|
+
if cBlock[0] == BlockTyp.EMPTY:
|
891
856
|
# We don't need to keep the empty lines after this pass
|
892
857
|
pass
|
893
858
|
|
894
|
-
elif
|
859
|
+
elif cBlock[0] == BlockTyp.KEYWORD:
|
895
860
|
# Adjust margins for lines in a list of keyword lines
|
896
|
-
aStyle =
|
897
|
-
if
|
898
|
-
aStyle |=
|
899
|
-
if
|
900
|
-
aStyle |=
|
901
|
-
|
902
|
-
|
861
|
+
aStyle = cBlock[4]
|
862
|
+
if pBlock[0] == BlockTyp.KEYWORD:
|
863
|
+
aStyle |= BlockFmt.Z_TOP
|
864
|
+
if nBlock[0] == BlockTyp.KEYWORD:
|
865
|
+
aStyle |= BlockFmt.Z_BTM
|
866
|
+
sBlocks.append((
|
867
|
+
cBlock[0], cBlock[1], cBlock[2], cBlock[3], aStyle
|
903
868
|
))
|
904
869
|
|
905
|
-
elif
|
870
|
+
elif cBlock[0] == BlockTyp.TEXT:
|
906
871
|
# Combine lines from the same paragraph
|
907
|
-
pLines.append(
|
872
|
+
pLines.append(cBlock)
|
908
873
|
|
909
|
-
if
|
910
|
-
# Next
|
874
|
+
if nBlock[0] != BlockTyp.TEXT:
|
875
|
+
# Next block is not text, so we add the buffer to blocks
|
911
876
|
nLines = len(pLines)
|
912
877
|
cStyle = pLines[0][4]
|
913
|
-
if
|
878
|
+
if firstIndent and not (self._noIndent or cStyle & BlockFmt.ALIGNED):
|
914
879
|
# If paragraph indentation is enabled, not temporarily
|
915
880
|
# turned off, and the block is not aligned, we add the
|
916
881
|
# text indentation flag
|
917
|
-
cStyle |=
|
882
|
+
cStyle |= BlockFmt.IND_T
|
918
883
|
|
919
884
|
if nLines == 1:
|
920
|
-
# The paragraph contains a single line, so we just
|
921
|
-
#
|
922
|
-
|
923
|
-
|
885
|
+
# The paragraph contains a single line, so we just save
|
886
|
+
# that directly to the blocks list. If justify is
|
887
|
+
# enabled, and there is no alignment, we apply it.
|
888
|
+
if doJustify and not cStyle & BlockFmt.ALIGNED:
|
889
|
+
cStyle |= BlockFmt.JUSTIFY
|
890
|
+
|
891
|
+
pTxt = pLines[0][2].translate(transMapB)
|
892
|
+
sBlocks.append((
|
893
|
+
BlockTyp.TEXT, pLines[0][1], pTxt, pLines[0][3], cStyle
|
924
894
|
))
|
895
|
+
|
925
896
|
elif nLines > 1:
|
926
897
|
# The paragraph contains multiple lines, so we need to
|
927
898
|
# join them according to the line break policy, and
|
928
899
|
# recompute all the formatting markers
|
929
900
|
tTxt = ""
|
930
901
|
tFmt: T_Formats = []
|
931
|
-
for
|
902
|
+
for aBlock in pLines:
|
932
903
|
tLen = len(tTxt)
|
933
|
-
tTxt += f"{
|
934
|
-
tFmt.extend((p+tLen, fmt, key) for p, fmt, key in
|
935
|
-
|
936
|
-
|
904
|
+
tTxt += f"{aBlock[2]}{lineSep}"
|
905
|
+
tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aBlock[3])
|
906
|
+
cStyle |= aBlock[4]
|
907
|
+
|
908
|
+
pTxt = tTxt[:-1].translate(transMapB)
|
909
|
+
sBlocks.append((
|
910
|
+
BlockTyp.TEXT, pLines[0][1], pTxt, tFmt, cStyle
|
937
911
|
))
|
938
912
|
|
939
913
|
# Reset buffer and make sure text indent is on for next pass
|
@@ -941,50 +915,50 @@ class Tokenizer(ABC):
|
|
941
915
|
self._noIndent = False
|
942
916
|
|
943
917
|
else:
|
944
|
-
|
918
|
+
sBlocks.append(cBlock)
|
919
|
+
|
920
|
+
self._blocks = sBlocks
|
945
921
|
|
946
922
|
return
|
947
923
|
|
948
924
|
def buildOutline(self) -> None:
|
949
925
|
"""Build an outline of the text up to level 3 headings."""
|
950
|
-
tHandle = self._handle or ""
|
951
926
|
isNovel = self._isNovel
|
952
|
-
for tType,
|
953
|
-
if tType ==
|
927
|
+
for tType, tKey, tText, _, _ in self._blocks:
|
928
|
+
if tType == BlockTyp.TITLE:
|
954
929
|
prefix = "TT"
|
955
|
-
elif tType ==
|
930
|
+
elif tType == BlockTyp.HEAD1:
|
956
931
|
prefix = "PT" if isNovel else "H1"
|
957
|
-
elif tType ==
|
932
|
+
elif tType == BlockTyp.HEAD2:
|
958
933
|
prefix = "CH" if isNovel else "H2"
|
959
|
-
elif tType ==
|
934
|
+
elif tType == BlockTyp.HEAD3:
|
960
935
|
prefix = "SC" if isNovel else "H3"
|
961
936
|
else:
|
962
937
|
continue
|
963
938
|
|
964
|
-
key = f"{tHandle}:T{nHead:04d}"
|
965
939
|
text = tText.replace(nwHeadFmt.BR, " ").replace("&", "&")
|
966
|
-
self._outline[
|
940
|
+
self._outline[tKey] = f"{prefix}|{text}"
|
967
941
|
|
968
942
|
return
|
969
943
|
|
970
944
|
def countStats(self) -> None:
|
971
945
|
"""Count stats on the tokenized text."""
|
972
|
-
titleCount = self._counts.get(
|
973
|
-
paragraphCount = self._counts.get(
|
946
|
+
titleCount = self._counts.get(nwStats.TITLES, 0)
|
947
|
+
paragraphCount = self._counts.get(nwStats.PARAGRAPHS, 0)
|
974
948
|
|
975
|
-
allWords = self._counts.get(
|
976
|
-
textWords = self._counts.get(
|
977
|
-
titleWords = self._counts.get(
|
949
|
+
allWords = self._counts.get(nwStats.WORDS_ALL, 0)
|
950
|
+
textWords = self._counts.get(nwStats.WORDS_TEXT, 0)
|
951
|
+
titleWords = self._counts.get(nwStats.WORDS_TITLE, 0)
|
978
952
|
|
979
|
-
allChars = self._counts.get(
|
980
|
-
textChars = self._counts.get(
|
981
|
-
titleChars = self._counts.get(
|
953
|
+
allChars = self._counts.get(nwStats.CHARS_ALL, 0)
|
954
|
+
textChars = self._counts.get(nwStats.CHARS_TEXT, 0)
|
955
|
+
titleChars = self._counts.get(nwStats.CHARS_TITLE, 0)
|
982
956
|
|
983
|
-
allWordChars = self._counts.get(
|
984
|
-
textWordChars = self._counts.get(
|
985
|
-
titleWordChars = self._counts.get(
|
957
|
+
allWordChars = self._counts.get(nwStats.WCHARS_ALL, 0)
|
958
|
+
textWordChars = self._counts.get(nwStats.WCHARS_TEXT, 0)
|
959
|
+
titleWordChars = self._counts.get(nwStats.WCHARS_TITLE, 0)
|
986
960
|
|
987
|
-
for tType, _, tText, _, _ in self.
|
961
|
+
for tType, _, tText, _, _ in self._blocks:
|
988
962
|
tText = tText.replace(nwUnicode.U_ENDASH, " ")
|
989
963
|
tText = tText.replace(nwUnicode.U_EMDASH, " ")
|
990
964
|
|
@@ -993,7 +967,7 @@ class Tokenizer(ABC):
|
|
993
967
|
nChars = len(tText)
|
994
968
|
nWChars = len("".join(tWords))
|
995
969
|
|
996
|
-
if tType ==
|
970
|
+
if tType == BlockTyp.TEXT:
|
997
971
|
tPWords = tText.split()
|
998
972
|
nPWords = len(tPWords)
|
999
973
|
nPChars = len(tText)
|
@@ -1007,7 +981,7 @@ class Tokenizer(ABC):
|
|
1007
981
|
allWordChars += nPWChars
|
1008
982
|
textWordChars += nPWChars
|
1009
983
|
|
1010
|
-
elif tType in
|
984
|
+
elif tType in HEADINGS:
|
1011
985
|
titleCount += 1
|
1012
986
|
allWords += nWords
|
1013
987
|
titleWords += nWords
|
@@ -1016,88 +990,110 @@ class Tokenizer(ABC):
|
|
1016
990
|
titleChars += nChars
|
1017
991
|
titleWordChars += nWChars
|
1018
992
|
|
1019
|
-
elif tType ==
|
993
|
+
elif tType == BlockTyp.SEP:
|
1020
994
|
allWords += nWords
|
1021
995
|
allChars += nChars
|
1022
996
|
allWordChars += nWChars
|
1023
997
|
|
1024
|
-
elif tType
|
1025
|
-
|
1026
|
-
words = text.split()
|
1027
|
-
allWords += len(words)
|
1028
|
-
allChars += len(text)
|
1029
|
-
allWordChars += len("".join(words))
|
1030
|
-
|
1031
|
-
elif tType == self.T_SHORT and self._doSynopsis:
|
1032
|
-
text = "{0}: {1}".format(self._localLookup("Short Description"), tText)
|
1033
|
-
words = text.split()
|
1034
|
-
allWords += len(words)
|
1035
|
-
allChars += len(text)
|
1036
|
-
allWordChars += len("".join(words))
|
1037
|
-
|
1038
|
-
elif tType == self.T_COMMENT and self._doComments:
|
1039
|
-
text = "{0}: {1}".format(self._localLookup("Comment"), tText)
|
1040
|
-
words = text.split()
|
998
|
+
elif tType in (BlockTyp.COMMENT, BlockTyp.KEYWORD):
|
999
|
+
words = tText.split()
|
1041
1000
|
allWords += len(words)
|
1042
|
-
allChars += len(
|
1001
|
+
allChars += len(tText)
|
1043
1002
|
allWordChars += len("".join(words))
|
1044
1003
|
|
1045
|
-
|
1046
|
-
|
1047
|
-
if valid and bits:
|
1048
|
-
key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
|
1049
|
-
text = "{0}: {1}".format(key, ", ".join(bits[1:]))
|
1050
|
-
words = text.split()
|
1051
|
-
allWords += len(words)
|
1052
|
-
allChars += len(text)
|
1053
|
-
allWordChars += len("".join(words))
|
1004
|
+
self._counts[nwStats.TITLES] = titleCount
|
1005
|
+
self._counts[nwStats.PARAGRAPHS] = paragraphCount
|
1054
1006
|
|
1055
|
-
self._counts[
|
1056
|
-
self._counts[
|
1007
|
+
self._counts[nwStats.WORDS_ALL] = allWords
|
1008
|
+
self._counts[nwStats.WORDS_TEXT] = textWords
|
1009
|
+
self._counts[nwStats.WORDS_TITLE] = titleWords
|
1057
1010
|
|
1058
|
-
self._counts[
|
1059
|
-
self._counts[
|
1060
|
-
self._counts[
|
1011
|
+
self._counts[nwStats.CHARS_ALL] = allChars
|
1012
|
+
self._counts[nwStats.CHARS_TEXT] = textChars
|
1013
|
+
self._counts[nwStats.CHARS_TITLE] = titleChars
|
1061
1014
|
|
1062
|
-
self._counts[
|
1063
|
-
self._counts[
|
1064
|
-
self._counts[
|
1015
|
+
self._counts[nwStats.WCHARS_ALL] = allWordChars
|
1016
|
+
self._counts[nwStats.WCHARS_TEXT] = textWordChars
|
1017
|
+
self._counts[nwStats.WCHARS_TITLE] = titleWordChars
|
1065
1018
|
|
1066
|
-
self._counts["allWordChars"] = allWordChars
|
1067
|
-
self._counts["textWordChars"] = textWordChars
|
1068
|
-
self._counts["titleWordChars"] = titleWordChars
|
1069
|
-
|
1070
|
-
return
|
1071
|
-
|
1072
|
-
def saveRawMarkdown(self, path: str | Path) -> None:
|
1073
|
-
"""Save the raw text to a plain text file."""
|
1074
|
-
with open(path, mode="w", encoding="utf-8") as outFile:
|
1075
|
-
for nwdPage in self._markdown:
|
1076
|
-
outFile.write(nwdPage)
|
1077
|
-
return
|
1078
|
-
|
1079
|
-
def saveRawMarkdownJSON(self, path: str | Path) -> None:
|
1080
|
-
"""Save the raw text to a JSON file."""
|
1081
|
-
timeStamp = time()
|
1082
|
-
data = {
|
1083
|
-
"meta": {
|
1084
|
-
"projectName": self._project.data.name,
|
1085
|
-
"novelAuthor": self._project.data.author,
|
1086
|
-
"buildTime": int(timeStamp),
|
1087
|
-
"buildTimeStr": formatTimeStamp(timeStamp),
|
1088
|
-
},
|
1089
|
-
"text": {
|
1090
|
-
"nwd": [page.rstrip("\n").split("\n") for page in self._markdown],
|
1091
|
-
}
|
1092
|
-
}
|
1093
|
-
with open(path, mode="w", encoding="utf-8") as fObj:
|
1094
|
-
json.dump(data, fObj, indent=2)
|
1095
1019
|
return
|
1096
1020
|
|
1097
1021
|
##
|
1098
1022
|
# Internal Functions
|
1099
1023
|
##
|
1100
1024
|
|
1025
|
+
def _formatInt(self, value: int) -> str:
|
1026
|
+
"""Return a localised integer."""
|
1027
|
+
return self._dLocale.toString(value)
|
1028
|
+
|
1029
|
+
def _formatComment(self, style: ComStyle, key: str, text: str) -> tuple[str, T_Formats]:
|
1030
|
+
"""Apply formatting to comments and notes."""
|
1031
|
+
tTxt, tFmt = self._extractFormats(text)
|
1032
|
+
tFmt.insert(0, (0, TextFmt.COL_B, style.textClass))
|
1033
|
+
tFmt.append((len(tTxt), TextFmt.COL_E, ""))
|
1034
|
+
if label := (self._localLookup(style.label) + (f" ({key})" if key else "")).strip():
|
1035
|
+
shift = len(label) + 2
|
1036
|
+
tTxt = f"{label}: {tTxt}"
|
1037
|
+
rFmt = [(0, TextFmt.B_B, ""), (shift - 1, TextFmt.B_E, "")]
|
1038
|
+
if style.labelClass:
|
1039
|
+
rFmt.insert(1, (0, TextFmt.COL_B, style.labelClass))
|
1040
|
+
rFmt.insert(2, (shift - 1, TextFmt.COL_E, ""))
|
1041
|
+
rFmt.extend((p + shift, f, d) for p, f, d in tFmt)
|
1042
|
+
return tTxt, rFmt
|
1043
|
+
|
1044
|
+
def _formatMeta(self, text: str) -> tuple[str, str, T_Formats]:
|
1045
|
+
"""Apply formatting to a meta data line."""
|
1046
|
+
tag = ""
|
1047
|
+
txt = []
|
1048
|
+
fmt = []
|
1049
|
+
valid, bits, _ = self._project.index.scanThis(text)
|
1050
|
+
if valid and bits and bits[0] in nwLabels.KEY_NAME and bits[0] not in self._skipKeywords:
|
1051
|
+
tag = bits[0]
|
1052
|
+
pos = 0
|
1053
|
+
lbl = f"{self._localLookup(nwLabels.KEY_NAME[tag])}:"
|
1054
|
+
end = len(lbl)
|
1055
|
+
fmt = [
|
1056
|
+
(pos, TextFmt.B_B, ""),
|
1057
|
+
(pos, TextFmt.COL_B, "keyword"),
|
1058
|
+
(end, TextFmt.COL_E, ""),
|
1059
|
+
(end, TextFmt.B_E, ""),
|
1060
|
+
]
|
1061
|
+
txt = [lbl, " "]
|
1062
|
+
pos = end + 1
|
1063
|
+
|
1064
|
+
if (num := len(bits)) > 1:
|
1065
|
+
if bits[0] == nwKeyWords.TAG_KEY:
|
1066
|
+
one, two = self._project.index.parseValue(bits[1])
|
1067
|
+
end = pos + len(one)
|
1068
|
+
fmt.append((pos, TextFmt.COL_B, "tag"))
|
1069
|
+
fmt.append((pos, TextFmt.ANM_B, f"tag_{one}".lower()))
|
1070
|
+
fmt.append((end, TextFmt.ANM_E, ""))
|
1071
|
+
fmt.append((end, TextFmt.COL_E, ""))
|
1072
|
+
txt.append(one)
|
1073
|
+
pos = end
|
1074
|
+
if two:
|
1075
|
+
txt.append(" | ")
|
1076
|
+
pos += 3
|
1077
|
+
end = pos + len(two)
|
1078
|
+
fmt.append((pos, TextFmt.COL_B, "optional"))
|
1079
|
+
fmt.append((end, TextFmt.COL_E, ""))
|
1080
|
+
txt.append(two)
|
1081
|
+
pos = end
|
1082
|
+
else:
|
1083
|
+
for n, bit in enumerate(bits[1:], 2):
|
1084
|
+
end = pos + len(bit)
|
1085
|
+
fmt.append((pos, TextFmt.COL_B, "tag"))
|
1086
|
+
fmt.append((pos, TextFmt.ARF_B, f"#tag_{bit}".lower()))
|
1087
|
+
fmt.append((end, TextFmt.ARF_E, ""))
|
1088
|
+
fmt.append((end, TextFmt.COL_E, ""))
|
1089
|
+
txt.append(bit)
|
1090
|
+
pos = end
|
1091
|
+
if n < num:
|
1092
|
+
txt.append(", ")
|
1093
|
+
pos += 2
|
1094
|
+
|
1095
|
+
return tag, "".join(txt), fmt
|
1096
|
+
|
1101
1097
|
def _extractFormats(
|
1102
1098
|
self, text: str, skip: int = 0, hDialog: bool = False
|
1103
1099
|
) -> tuple[str, T_Formats]:
|
@@ -1109,56 +1105,55 @@ class Tokenizer(ABC):
|
|
1109
1105
|
|
1110
1106
|
# Match Markdown
|
1111
1107
|
for regEx, fmts in self._rxMarkdown:
|
1112
|
-
|
1113
|
-
while rxItt.hasNext():
|
1114
|
-
rxMatch = rxItt.next()
|
1108
|
+
for res in regEx.finditer(text):
|
1115
1109
|
temp.extend(
|
1116
|
-
(
|
1110
|
+
(res.start(n), res.end(n), fmt, "")
|
1117
1111
|
for n, fmt in enumerate(fmts) if fmt > 0
|
1118
1112
|
)
|
1119
1113
|
|
1114
|
+
# Match URLs
|
1115
|
+
for res in REGEX_PATTERNS.url.finditer(text):
|
1116
|
+
temp.append((res.start(0), 0, TextFmt.HRF_B, res.group(0)))
|
1117
|
+
temp.append((res.end(0), 0, TextFmt.HRF_E, ""))
|
1118
|
+
|
1120
1119
|
# Match Shortcodes
|
1121
|
-
|
1122
|
-
while rxItt.hasNext():
|
1123
|
-
rxMatch = rxItt.next()
|
1120
|
+
for res in REGEX_PATTERNS.shortcodePlain.finditer(text):
|
1124
1121
|
temp.append((
|
1125
|
-
|
1126
|
-
|
1127
|
-
self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0),
|
1122
|
+
res.start(1), res.end(1),
|
1123
|
+
self._shortCodeFmt.get(res.group(1).lower(), 0),
|
1128
1124
|
"",
|
1129
1125
|
))
|
1130
1126
|
|
1131
1127
|
# Match Shortcode w/Values
|
1132
|
-
rxItt = self._rxShortCodeVals.globalMatch(text, 0)
|
1133
1128
|
tHandle = self._handle or ""
|
1134
|
-
|
1135
|
-
|
1136
|
-
kind = self._shortCodeVals.get(rxMatch.captured(1).lower(), 0)
|
1129
|
+
for res in REGEX_PATTERNS.shortcodeValue.finditer(text):
|
1130
|
+
kind = self._shortCodeVals.get(res.group(1).lower(), 0)
|
1137
1131
|
temp.append((
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
f"{tHandle}:{rxMatch.captured(2)}",
|
1132
|
+
res.start(0), res.end(0),
|
1133
|
+
TextFmt.STRIP if kind == skip else kind,
|
1134
|
+
f"{tHandle}:{res.group(2)}",
|
1142
1135
|
))
|
1143
1136
|
|
1144
1137
|
# Match Dialogue
|
1145
|
-
if self.
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1138
|
+
if self._hlightDialog and hDialog:
|
1139
|
+
if self._dialogParser.enabled:
|
1140
|
+
for pos, end in self._dialogParser(text):
|
1141
|
+
temp.append((pos, 0, TextFmt.COL_B, "dialog"))
|
1142
|
+
temp.append((end, 0, TextFmt.COL_E, ""))
|
1143
|
+
if self._rxAltDialog:
|
1144
|
+
for res in self._rxAltDialog.finditer(text):
|
1145
|
+
temp.append((res.start(0), 0, TextFmt.COL_B, "altdialog"))
|
1146
|
+
temp.append((res.end(0), 0, TextFmt.COL_E, ""))
|
1152
1147
|
|
1153
1148
|
# Post-process text and format
|
1154
1149
|
result = text
|
1155
1150
|
formats = []
|
1156
|
-
for pos,
|
1151
|
+
for pos, end, fmt, meta in reversed(sorted(temp, key=lambda x: x[0])):
|
1157
1152
|
if fmt > 0:
|
1158
|
-
if
|
1159
|
-
result = result[:pos] + result[
|
1160
|
-
formats = [(p-
|
1161
|
-
formats.insert(0, (pos, fmt,
|
1153
|
+
if end > pos:
|
1154
|
+
result = result[:pos] + result[end:]
|
1155
|
+
formats = [(p+pos-end if p > pos else p, f, m) for p, f, m in formats]
|
1156
|
+
formats.insert(0, (pos, fmt, meta))
|
1162
1157
|
|
1163
1158
|
return result, formats
|
1164
1159
|
|
@@ -1204,6 +1199,7 @@ class HeadingFormatter:
|
|
1204
1199
|
def apply(self, hFormat: str, text: str, nHead: int) -> str:
|
1205
1200
|
"""Apply formatting to a specific heading."""
|
1206
1201
|
hFormat = hFormat.replace(nwHeadFmt.TITLE, text)
|
1202
|
+
hFormat = hFormat.replace(nwHeadFmt.BR, "\n")
|
1207
1203
|
hFormat = hFormat.replace(nwHeadFmt.CH_NUM, str(self._chCount))
|
1208
1204
|
hFormat = hFormat.replace(nwHeadFmt.SC_NUM, str(self._scChCount))
|
1209
1205
|
hFormat = hFormat.replace(nwHeadFmt.SC_ABS, str(self._scAbsCount))
|