novelWriter 2.3.1__py3-none-any.whl → 2.4rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/METADATA +5 -6
- {novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/RECORD +102 -95
- novelwriter/__init__.py +7 -7
- novelwriter/assets/icons/none.svg +4 -0
- novelwriter/assets/icons/typicons_dark/icons.conf +4 -0
- novelwriter/assets/icons/typicons_dark/nw_tb-mark.svg +7 -0
- novelwriter/assets/icons/typicons_dark/typ_refresh-flipped.svg +1 -1
- novelwriter/assets/icons/typicons_dark/typ_refresh.svg +1 -1
- novelwriter/assets/icons/typicons_dark/typ_search-grey.svg +4 -0
- novelwriter/assets/icons/typicons_dark/typ_times.svg +1 -1
- novelwriter/assets/icons/typicons_dark/typ_unfold-hidden.svg +4 -0
- novelwriter/assets/icons/typicons_dark/typ_unfold-visible.svg +4 -0
- novelwriter/assets/icons/typicons_light/icons.conf +4 -0
- novelwriter/assets/icons/typicons_light/nw_tb-mark.svg +7 -0
- novelwriter/assets/icons/typicons_light/typ_refresh-flipped.svg +1 -1
- novelwriter/assets/icons/typicons_light/typ_refresh.svg +1 -1
- novelwriter/assets/icons/typicons_light/typ_search-grey.svg +4 -0
- novelwriter/assets/icons/typicons_light/typ_times.svg +1 -1
- novelwriter/assets/icons/typicons_light/typ_unfold-hidden.svg +4 -0
- novelwriter/assets/icons/typicons_light/typ_unfold-visible.svg +4 -0
- novelwriter/assets/manual.pdf +0 -0
- novelwriter/assets/sample.zip +0 -0
- novelwriter/assets/syntax/default_dark.conf +1 -0
- novelwriter/assets/syntax/default_light.conf +1 -0
- novelwriter/assets/syntax/grey_dark.conf +1 -0
- novelwriter/assets/syntax/grey_light.conf +1 -0
- novelwriter/assets/syntax/light_owl.conf +1 -0
- novelwriter/assets/syntax/night_owl.conf +1 -0
- novelwriter/assets/syntax/solarized_dark.conf +1 -0
- novelwriter/assets/syntax/solarized_light.conf +1 -0
- novelwriter/assets/syntax/tomorrow.conf +1 -0
- novelwriter/assets/syntax/tomorrow_night.conf +1 -0
- novelwriter/assets/syntax/tomorrow_night_blue.conf +1 -0
- novelwriter/assets/syntax/tomorrow_night_bright.conf +1 -0
- novelwriter/assets/syntax/tomorrow_night_eighties.conf +1 -0
- novelwriter/assets/text/credits_en.htm +25 -23
- novelwriter/common.py +7 -2
- novelwriter/config.py +43 -16
- novelwriter/constants.py +5 -6
- novelwriter/core/buildsettings.py +60 -40
- novelwriter/core/coretools.py +97 -13
- novelwriter/core/docbuild.py +74 -7
- novelwriter/core/document.py +24 -3
- novelwriter/core/index.py +31 -112
- novelwriter/core/project.py +10 -15
- novelwriter/core/sessions.py +2 -2
- novelwriter/core/status.py +6 -5
- novelwriter/core/storage.py +8 -2
- novelwriter/core/tohtml.py +22 -25
- novelwriter/core/tokenizer.py +416 -232
- novelwriter/core/tomd.py +17 -8
- novelwriter/core/toodt.py +385 -350
- novelwriter/core/tree.py +8 -8
- novelwriter/dialogs/about.py +9 -11
- novelwriter/dialogs/docmerge.py +17 -14
- novelwriter/dialogs/docsplit.py +20 -19
- novelwriter/dialogs/editlabel.py +5 -4
- novelwriter/dialogs/preferences.py +31 -39
- novelwriter/dialogs/projectsettings.py +29 -26
- novelwriter/dialogs/quotes.py +10 -9
- novelwriter/dialogs/wordlist.py +15 -12
- novelwriter/enum.py +17 -14
- novelwriter/error.py +13 -11
- novelwriter/extensions/circularprogress.py +12 -8
- novelwriter/extensions/configlayout.py +1 -3
- novelwriter/extensions/modified.py +51 -2
- novelwriter/extensions/pagedsidebar.py +16 -14
- novelwriter/extensions/simpleprogress.py +3 -1
- novelwriter/extensions/statusled.py +3 -1
- novelwriter/extensions/switch.py +10 -9
- novelwriter/extensions/switchbox.py +14 -13
- novelwriter/extensions/versioninfo.py +1 -1
- novelwriter/gui/doceditor.py +413 -478
- novelwriter/gui/dochighlight.py +33 -29
- novelwriter/gui/docviewer.py +162 -175
- novelwriter/gui/docviewerpanel.py +20 -37
- novelwriter/gui/editordocument.py +15 -4
- novelwriter/gui/itemdetails.py +51 -54
- novelwriter/gui/mainmenu.py +37 -16
- novelwriter/gui/noveltree.py +30 -36
- novelwriter/gui/outline.py +114 -92
- novelwriter/gui/projtree.py +60 -66
- novelwriter/gui/search.py +362 -0
- novelwriter/gui/sidebar.py +36 -45
- novelwriter/gui/statusbar.py +14 -14
- novelwriter/gui/theme.py +93 -28
- novelwriter/guimain.py +207 -200
- novelwriter/shared.py +31 -6
- novelwriter/text/counting.py +137 -0
- novelwriter/tools/dictionaries.py +13 -12
- novelwriter/tools/lipsum.py +20 -17
- novelwriter/tools/manusbuild.py +35 -27
- novelwriter/tools/manuscript.py +374 -90
- novelwriter/tools/manussettings.py +261 -124
- novelwriter/tools/noveldetails.py +20 -18
- novelwriter/tools/welcome.py +48 -44
- novelwriter/tools/writingstats.py +61 -55
- novelwriter/types.py +90 -0
- novelwriter/core/__init__.py +0 -3
- novelwriter/dialogs/__init__.py +0 -3
- novelwriter/extensions/__init__.py +0 -3
- novelwriter/gui/__init__.py +0 -3
- novelwriter/tools/__init__.py +0 -3
- {novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/LICENSE.md +0 -0
- {novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/WHEEL +0 -0
- {novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/entry_points.txt +0 -0
- {novelWriter-2.3.1.dist-info → novelWriter-2.4rc1.dist-info}/top_level.txt +0 -0
novelwriter/core/tokenizer.py
CHANGED
@@ -35,13 +35,13 @@ from functools import partial
|
|
35
35
|
|
36
36
|
from PyQt5.QtCore import QCoreApplication, QRegularExpression
|
37
37
|
|
38
|
-
from novelwriter.enum import nwComment, nwItemLayout
|
39
38
|
from novelwriter.common import formatTimeStamp, numberToRoman, checkInt
|
40
39
|
from novelwriter.constants import (
|
41
40
|
nwHeadFmt, nwKeyWords, nwLabels, nwRegEx, nwShortcode, nwUnicode, trConst
|
42
41
|
)
|
43
42
|
from novelwriter.core.index import processComment
|
44
43
|
from novelwriter.core.project import NWProject
|
44
|
+
from novelwriter.enum import nwComment, nwItemLayout
|
45
45
|
|
46
46
|
logger = logging.getLogger(__name__)
|
47
47
|
|
@@ -74,10 +74,12 @@ class Tokenizer(ABC):
|
|
74
74
|
FMT_D_E = 6 # End strikeout
|
75
75
|
FMT_U_B = 7 # Begin underline
|
76
76
|
FMT_U_E = 8 # End underline
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
77
|
+
FMT_M_B = 9 # Begin mark
|
78
|
+
FMT_M_E = 10 # End mark
|
79
|
+
FMT_SUP_B = 11 # Begin superscript
|
80
|
+
FMT_SUP_E = 12 # End superscript
|
81
|
+
FMT_SUB_B = 13 # Begin subscript
|
82
|
+
FMT_SUB_E = 14 # End subscript
|
81
83
|
|
82
84
|
# Block Type
|
83
85
|
T_EMPTY = 1 # Empty line (new paragraph)
|
@@ -86,14 +88,13 @@ class Tokenizer(ABC):
|
|
86
88
|
T_COMMENT = 4 # Comment line
|
87
89
|
T_KEYWORD = 5 # Command line
|
88
90
|
T_TITLE = 6 # Title
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
T_SKIP = 14 # Paragraph break
|
91
|
+
T_HEAD1 = 7 # Heading 1
|
92
|
+
T_HEAD2 = 8 # Heading 2
|
93
|
+
T_HEAD3 = 9 # Heading 3
|
94
|
+
T_HEAD4 = 10 # Heading 4
|
95
|
+
T_TEXT = 11 # Text line
|
96
|
+
T_SEP = 12 # Scene separator
|
97
|
+
T_SKIP = 13 # Paragraph break
|
97
98
|
|
98
99
|
# Block Style
|
99
100
|
A_NONE = 0x0000 # No special style
|
@@ -108,32 +109,38 @@ class Tokenizer(ABC):
|
|
108
109
|
A_IND_L = 0x0100 # Left indentation
|
109
110
|
A_IND_R = 0x0200 # Right indentation
|
110
111
|
|
112
|
+
# Lookups
|
113
|
+
L_HEADINGS = [T_TITLE, T_HEAD1, T_HEAD2, T_HEAD3, T_HEAD4]
|
114
|
+
|
111
115
|
def __init__(self, project: NWProject) -> None:
|
112
116
|
|
113
117
|
self._project = project
|
114
118
|
|
115
119
|
# Data Variables
|
116
120
|
self._text = "" # The raw text to be tokenized
|
117
|
-
self.
|
121
|
+
self._handle = None # The item handle currently being processed
|
118
122
|
self._result = "" # The result of the last document
|
119
123
|
|
120
124
|
self._keepMarkdown = False # Whether to keep the markdown text
|
121
125
|
self._allMarkdown = [] # The result novelWriter markdown of all documents
|
122
126
|
|
123
|
-
# Processed Tokens
|
127
|
+
# Processed Tokens and Meta Data
|
124
128
|
self._tokens: list[tuple[int, int, str, list[tuple[int, int]], int]] = []
|
129
|
+
self._counts: dict[str, int] = {}
|
130
|
+
self._outline: dict[str, str] = {}
|
125
131
|
|
126
132
|
# User Settings
|
127
|
-
self._textFont
|
128
|
-
self._textSize
|
129
|
-
self._textFixed
|
130
|
-
self._lineHeight
|
131
|
-
self._blockIndent
|
132
|
-
self._doJustify
|
133
|
-
self._doBodyText
|
134
|
-
self._doSynopsis
|
135
|
-
self._doComments
|
136
|
-
self._doKeywords
|
133
|
+
self._textFont = "Serif" # Output text font
|
134
|
+
self._textSize = 11 # Output text size
|
135
|
+
self._textFixed = False # Fixed width text
|
136
|
+
self._lineHeight = 1.15 # Line height in units of em
|
137
|
+
self._blockIndent = 4.00 # Block indent in units of em
|
138
|
+
self._doJustify = False # Justify text
|
139
|
+
self._doBodyText = True # Include body text
|
140
|
+
self._doSynopsis = False # Also process synopsis comments
|
141
|
+
self._doComments = False # Also process comments
|
142
|
+
self._doKeywords = False # Also process keywords like tags and references
|
143
|
+
self._skipKeywords = set() # Keywords to ignore
|
137
144
|
|
138
145
|
# Margins
|
139
146
|
self._marginTitle = (1.000, 0.500)
|
@@ -149,21 +156,28 @@ class Tokenizer(ABC):
|
|
149
156
|
self._fmtChapter = nwHeadFmt.TITLE # Formatting for numbered chapters
|
150
157
|
self._fmtUnNum = nwHeadFmt.TITLE # Formatting for unnumbered chapters
|
151
158
|
self._fmtScene = nwHeadFmt.TITLE # Formatting for scenes
|
159
|
+
self._fmtHScene = nwHeadFmt.TITLE # Formatting for hard scenes
|
152
160
|
self._fmtSection = nwHeadFmt.TITLE # Formatting for sections
|
153
161
|
|
154
|
-
self.
|
155
|
-
self.
|
162
|
+
self._hideTitle = False # Do not include title headings
|
163
|
+
self._hideChapter = False # Do not include chapter headings
|
164
|
+
self._hideUnNum = False # Do not include unnumbered headings
|
165
|
+
self._hideScene = False # Do not include scene headings
|
166
|
+
self._hideHScene = False # Do not include hard scene headings
|
167
|
+
self._hideSection = False # Do not include section headings
|
156
168
|
|
157
|
-
self.
|
169
|
+
self._linkHeadings = False # Add an anchor before headings
|
170
|
+
|
171
|
+
self._titleStyle = self.A_CENTRE | self.A_PBB
|
172
|
+
self._chapterStyle = self.A_PBB
|
173
|
+
self._sceneStyle = self.A_NONE
|
158
174
|
|
159
175
|
# Instance Variables
|
160
176
|
self._hFormatter = HeadingFormatter(self._project)
|
161
|
-
self.
|
177
|
+
self._noSep = True # Flag to indicate that we don't want a scene separator
|
162
178
|
|
163
179
|
# This File
|
164
|
-
self._isNone = False # Document has unknown layout
|
165
180
|
self._isNovel = False # Document is a novel document
|
166
|
-
self._isNote = False # Document is a project note
|
167
181
|
self._isFirst = True # Document is the first in a set
|
168
182
|
|
169
183
|
# Error Handling
|
@@ -187,6 +201,7 @@ class Tokenizer(ABC):
|
|
187
201
|
nwShortcode.BOLD_O: self.FMT_B_B, nwShortcode.BOLD_C: self.FMT_B_E,
|
188
202
|
nwShortcode.STRIKE_O: self.FMT_D_B, nwShortcode.STRIKE_C: self.FMT_D_E,
|
189
203
|
nwShortcode.ULINE_O: self.FMT_U_B, nwShortcode.ULINE_C: self.FMT_U_E,
|
204
|
+
nwShortcode.MARK_O: self.FMT_M_B, nwShortcode.MARK_C: self.FMT_M_E,
|
190
205
|
nwShortcode.SUP_O: self.FMT_SUP_B, nwShortcode.SUP_C: self.FMT_SUP_E,
|
191
206
|
nwShortcode.SUB_O: self.FMT_SUB_B, nwShortcode.SUB_C: self.FMT_SUB_E,
|
192
207
|
}
|
@@ -203,12 +218,22 @@ class Tokenizer(ABC):
|
|
203
218
|
return self._result
|
204
219
|
|
205
220
|
@property
|
206
|
-
def allMarkdown(self) -> list:
|
221
|
+
def allMarkdown(self) -> list[str]:
|
207
222
|
"""The combined novelWriter Markdown text."""
|
208
223
|
return self._allMarkdown
|
209
224
|
|
210
225
|
@property
|
211
|
-
def
|
226
|
+
def textStats(self) -> dict[str, int]:
|
227
|
+
"""The collected stats about the text."""
|
228
|
+
return self._counts
|
229
|
+
|
230
|
+
@property
|
231
|
+
def textOutline(self) -> dict[str, str]:
|
232
|
+
"""The generated outline of the text."""
|
233
|
+
return self._outline
|
234
|
+
|
235
|
+
@property
|
236
|
+
def errData(self) -> list[str]:
|
212
237
|
"""The error data."""
|
213
238
|
return self._errData
|
214
239
|
|
@@ -216,33 +241,63 @@ class Tokenizer(ABC):
|
|
216
241
|
# Setters
|
217
242
|
##
|
218
243
|
|
219
|
-
def setTitleFormat(self, hFormat: str) -> None:
|
244
|
+
def setTitleFormat(self, hFormat: str, hide: bool = False) -> None:
|
220
245
|
"""Set the title format pattern."""
|
221
246
|
self._fmtTitle = hFormat.strip()
|
247
|
+
self._hideTitle = hide
|
222
248
|
return
|
223
249
|
|
224
|
-
def setChapterFormat(self, hFormat: str) -> None:
|
250
|
+
def setChapterFormat(self, hFormat: str, hide: bool = False) -> None:
|
225
251
|
"""Set the chapter format pattern."""
|
226
252
|
self._fmtChapter = hFormat.strip()
|
253
|
+
self._hideChapter = hide
|
227
254
|
return
|
228
255
|
|
229
|
-
def setUnNumberedFormat(self, hFormat: str) -> None:
|
256
|
+
def setUnNumberedFormat(self, hFormat: str, hide: bool = False) -> None:
|
230
257
|
"""Set the unnumbered format pattern."""
|
231
258
|
self._fmtUnNum = hFormat.strip()
|
259
|
+
self._hideUnNum = hide
|
232
260
|
return
|
233
261
|
|
234
|
-
def setSceneFormat(self, hFormat: str, hide: bool) -> None:
|
262
|
+
def setSceneFormat(self, hFormat: str, hide: bool = False) -> None:
|
235
263
|
"""Set the scene format pattern and hidden status."""
|
236
264
|
self._fmtScene = hFormat.strip()
|
237
265
|
self._hideScene = hide
|
238
266
|
return
|
239
267
|
|
240
|
-
def
|
268
|
+
def setHardSceneFormat(self, hFormat: str, hide: bool = False) -> None:
|
269
|
+
"""Set the hard scene format pattern and hidden status."""
|
270
|
+
self._fmtHScene = hFormat.strip()
|
271
|
+
self._hideHScene = hide
|
272
|
+
return
|
273
|
+
|
274
|
+
def setSectionFormat(self, hFormat: str, hide: bool = False) -> None:
|
241
275
|
"""Set the section format pattern and hidden status."""
|
242
276
|
self._fmtSection = hFormat.strip()
|
243
277
|
self._hideSection = hide
|
244
278
|
return
|
245
279
|
|
280
|
+
def setTitleStyle(self, center: bool, pageBreak: bool) -> None:
|
281
|
+
"""Set the title heading style."""
|
282
|
+
self._titleStyle = (
|
283
|
+
(self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
|
284
|
+
)
|
285
|
+
return
|
286
|
+
|
287
|
+
def setChapterStyle(self, center: bool, pageBreak: bool) -> None:
|
288
|
+
"""Set the chapter heading style."""
|
289
|
+
self._chapterStyle = (
|
290
|
+
(self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
|
291
|
+
)
|
292
|
+
return
|
293
|
+
|
294
|
+
def setSceneStyle(self, center: bool, pageBreak: bool) -> None:
|
295
|
+
"""Set the scene heading style."""
|
296
|
+
self._sceneStyle = (
|
297
|
+
(self.A_CENTRE if center else self.A_NONE) | (self.A_PBB if pageBreak else self.A_NONE)
|
298
|
+
)
|
299
|
+
return
|
300
|
+
|
246
301
|
def setFont(self, family: str, size: int, isFixed: bool = False) -> None:
|
247
302
|
"""Set the build font."""
|
248
303
|
self._textFont = family
|
@@ -271,22 +326,22 @@ class Tokenizer(ABC):
|
|
271
326
|
return
|
272
327
|
|
273
328
|
def setHead1Margins(self, upper: float, lower: float) -> None:
|
274
|
-
"""Set the upper and lower
|
329
|
+
"""Set the upper and lower heading 1 margin."""
|
275
330
|
self._marginHead1 = (float(upper), float(lower))
|
276
331
|
return
|
277
332
|
|
278
333
|
def setHead2Margins(self, upper: float, lower: float) -> None:
|
279
|
-
"""Set the upper and lower
|
334
|
+
"""Set the upper and lower heading 2 margin."""
|
280
335
|
self._marginHead2 = (float(upper), float(lower))
|
281
336
|
return
|
282
337
|
|
283
338
|
def setHead3Margins(self, upper: float, lower: float) -> None:
|
284
|
-
"""Set the upper and lower
|
339
|
+
"""Set the upper and lower heading 3 margin."""
|
285
340
|
self._marginHead3 = (float(upper), float(lower))
|
286
341
|
return
|
287
342
|
|
288
343
|
def setHead4Margins(self, upper: float, lower: float) -> None:
|
289
|
-
"""Set the upper and lower
|
344
|
+
"""Set the upper and lower heading 4 margin."""
|
290
345
|
self._marginHead4 = (float(upper), float(lower))
|
291
346
|
return
|
292
347
|
|
@@ -300,9 +355,9 @@ class Tokenizer(ABC):
|
|
300
355
|
self._marginMeta = (float(upper), float(lower))
|
301
356
|
return
|
302
357
|
|
303
|
-
def
|
304
|
-
"""Enable or disable adding an anchor before
|
305
|
-
self.
|
358
|
+
def setLinkHeadings(self, state: bool) -> None:
|
359
|
+
"""Enable or disable adding an anchor before headings."""
|
360
|
+
self._linkHeadings = state
|
306
361
|
return
|
307
362
|
|
308
363
|
def setBodyText(self, state: bool) -> None:
|
@@ -325,6 +380,11 @@ class Tokenizer(ABC):
|
|
325
380
|
self._doKeywords = state
|
326
381
|
return
|
327
382
|
|
383
|
+
def setIgnoredKeywords(self, keywords: str) -> None:
|
384
|
+
"""Comma separated string of keywords to ignore."""
|
385
|
+
self._skipKeywords = set(x.lower().strip() for x in keywords.split(","))
|
386
|
+
return
|
387
|
+
|
328
388
|
def setKeepMarkdown(self, state: bool) -> None:
|
329
389
|
"""Keep original markdown during build."""
|
330
390
|
self._keepMarkdown = state
|
@@ -338,47 +398,41 @@ class Tokenizer(ABC):
|
|
338
398
|
def doConvert(self) -> None:
|
339
399
|
raise NotImplementedError
|
340
400
|
|
341
|
-
def addRootHeading(self, tHandle: str) ->
|
401
|
+
def addRootHeading(self, tHandle: str) -> None:
|
342
402
|
"""Add a heading at the start of a new root folder."""
|
343
|
-
|
344
|
-
|
345
|
-
return False
|
346
|
-
|
347
|
-
if self._isFirst:
|
348
|
-
textAlign = self.A_CENTRE
|
349
|
-
self._isFirst = False
|
350
|
-
else:
|
351
|
-
textAlign = self.A_PBB | self.A_CENTRE
|
352
|
-
|
353
|
-
trNotes = self._localLookup("Notes")
|
354
|
-
title = f"{trNotes}: {tItem.itemName}"
|
355
|
-
self._tokens = []
|
356
|
-
self._tokens.append((
|
357
|
-
self.T_TITLE, 0, title, [], textAlign
|
358
|
-
))
|
359
|
-
if self._keepMarkdown:
|
360
|
-
self._allMarkdown.append(f"# {title}\n\n")
|
361
|
-
|
362
|
-
return True
|
363
|
-
|
364
|
-
def setText(self, tHandle: str, text: str | None = None) -> bool:
|
365
|
-
"""Set the text for the tokenizer from a handle. If text is not
|
366
|
-
set, load it from the file.
|
367
|
-
"""
|
368
|
-
self._nwItem = self._project.tree[tHandle]
|
369
|
-
if self._nwItem is None:
|
370
|
-
return False
|
403
|
+
self._text = ""
|
404
|
+
self._handle = None
|
371
405
|
|
372
|
-
if
|
373
|
-
|
406
|
+
if (tItem := self._project.tree[tHandle]) and tItem.isRootType():
|
407
|
+
self._handle = tHandle
|
408
|
+
if self._isFirst:
|
409
|
+
textAlign = self.A_CENTRE
|
410
|
+
self._isFirst = False
|
411
|
+
else:
|
412
|
+
textAlign = self.A_PBB | self.A_CENTRE
|
374
413
|
|
375
|
-
|
414
|
+
trNotes = self._localLookup("Notes")
|
415
|
+
title = f"{trNotes}: {tItem.itemName}"
|
416
|
+
self._tokens = []
|
417
|
+
self._tokens.append((
|
418
|
+
self.T_TITLE, 1, title, [], textAlign
|
419
|
+
))
|
420
|
+
if self._keepMarkdown:
|
421
|
+
self._allMarkdown.append(f"#! {title}\n\n")
|
376
422
|
|
377
|
-
|
378
|
-
self._isNovel = self._nwItem.itemLayout == nwItemLayout.DOCUMENT
|
379
|
-
self._isNote = self._nwItem.itemLayout == nwItemLayout.NOTE
|
423
|
+
return
|
380
424
|
|
381
|
-
|
425
|
+
def setText(self, tHandle: str, text: str | None = None) -> None:
|
426
|
+
"""Set the text for the tokenizer from a handle. If text is not
|
427
|
+
set, it's is loaded from the file.
|
428
|
+
"""
|
429
|
+
self._text = ""
|
430
|
+
self._handle = None
|
431
|
+
if nwItem := self._project.tree[tHandle]:
|
432
|
+
self._text = text or self._project.storage.getDocumentText(tHandle)
|
433
|
+
self._handle = tHandle
|
434
|
+
self._isNovel = nwItem.itemLayout == nwItemLayout.DOCUMENT
|
435
|
+
return
|
382
436
|
|
383
437
|
def doPreProcessing(self) -> None:
|
384
438
|
"""Run trough the various replace dictionaries."""
|
@@ -399,7 +453,7 @@ class Tokenizer(ABC):
|
|
399
453
|
|
400
454
|
def tokenizeText(self) -> None:
|
401
455
|
"""Scan the text for either lines starting with specific
|
402
|
-
characters that indicate
|
456
|
+
characters that indicate headings, comments, commands etc, or
|
403
457
|
just contain plain text. In the case of plain text, apply the
|
404
458
|
same RegExes that the syntax highlighter uses and save the
|
405
459
|
locations of these formatting tags into the token array.
|
@@ -407,15 +461,18 @@ class Tokenizer(ABC):
|
|
407
461
|
The format of the token list is an entry with a five-tuple for
|
408
462
|
each line in the file. The tuple is as follows:
|
409
463
|
1: The type of the block, self.T_*
|
410
|
-
2: The
|
464
|
+
2: The heading number under which the text is placed
|
411
465
|
3: The text content of the block, without leading tags
|
412
466
|
4: The internal formatting map of the text, self.FMT_*
|
413
467
|
5: The style of the block, self.A_*
|
414
468
|
"""
|
415
469
|
self._tokens = []
|
416
|
-
|
470
|
+
if self._isNovel:
|
471
|
+
self._hFormatter.setHandle(self._handle)
|
472
|
+
|
417
473
|
nHead = 0
|
418
474
|
breakNext = False
|
475
|
+
tmpMarkdown = []
|
419
476
|
for aLine in self._text.splitlines():
|
420
477
|
sLine = aLine.strip().lower()
|
421
478
|
|
@@ -438,11 +495,12 @@ class Tokenizer(ABC):
|
|
438
495
|
# Check Line Format
|
439
496
|
# =================
|
440
497
|
|
441
|
-
if aLine
|
442
|
-
#
|
443
|
-
#
|
444
|
-
#
|
445
|
-
#
|
498
|
+
if aLine.startswith("["):
|
499
|
+
# Special Formats
|
500
|
+
# ===============
|
501
|
+
# Parse special formatting line. This must be a separate if
|
502
|
+
# statement, as it may not reach a continue statement and must
|
503
|
+
# therefore proceed to check other formats.
|
446
504
|
|
447
505
|
if sLine in ("[newpage]", "[new page]"):
|
448
506
|
breakNext = True
|
@@ -466,9 +524,13 @@ class Tokenizer(ABC):
|
|
466
524
|
]
|
467
525
|
continue
|
468
526
|
|
469
|
-
if aLine
|
470
|
-
|
471
|
-
|
527
|
+
if aLine.startswith("%"):
|
528
|
+
# Comments
|
529
|
+
# ========
|
530
|
+
# All style comments are processed and the exact type exact
|
531
|
+
# style extracted. Ignored comments on the '%~' format are
|
532
|
+
# skipped completely.
|
533
|
+
if aLine.startswith("%~"):
|
472
534
|
continue
|
473
535
|
|
474
536
|
cStyle, cText, _ = processComment(aLine)
|
@@ -477,94 +539,181 @@ class Tokenizer(ABC):
|
|
477
539
|
self.T_SYNOPSIS, nHead, cText, [], sAlign
|
478
540
|
))
|
479
541
|
if self._doSynopsis and self._keepMarkdown:
|
480
|
-
tmpMarkdown.append("
|
542
|
+
tmpMarkdown.append(f"{aLine}\n")
|
481
543
|
elif cStyle == nwComment.SHORT:
|
482
544
|
self._tokens.append((
|
483
545
|
self.T_SHORT, nHead, cText, [], sAlign
|
484
546
|
))
|
485
547
|
if self._doSynopsis and self._keepMarkdown:
|
486
|
-
tmpMarkdown.append("
|
548
|
+
tmpMarkdown.append(f"{aLine}\n")
|
487
549
|
else:
|
488
550
|
self._tokens.append((
|
489
551
|
self.T_COMMENT, nHead, cText, [], sAlign
|
490
552
|
))
|
491
553
|
if self._doComments and self._keepMarkdown:
|
492
|
-
tmpMarkdown.append("
|
554
|
+
tmpMarkdown.append(f"{aLine}\n")
|
493
555
|
|
494
|
-
elif aLine
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
tmpMarkdown.append("%s\n" % aLine)
|
556
|
+
elif aLine.startswith("@"):
|
557
|
+
# Keywords
|
558
|
+
# ========
|
559
|
+
# Only valid keyword lines are parsed, and any ignored keywords
|
560
|
+
# are automatically skipped.
|
500
561
|
|
501
|
-
|
502
|
-
if self.
|
503
|
-
|
504
|
-
|
562
|
+
valid, bits, _ = self._project.index.scanThis(aLine)
|
563
|
+
if valid and bits and bits[0] not in self._skipKeywords:
|
564
|
+
self._tokens.append((
|
565
|
+
self.T_KEYWORD, nHead, aLine[1:].strip(), [], sAlign
|
566
|
+
))
|
567
|
+
if self._doKeywords and self._keepMarkdown:
|
568
|
+
tmpMarkdown.append(f"{aLine}\n")
|
569
|
+
|
570
|
+
elif aLine.startswith(("# ", "#! ")):
|
571
|
+
# Title or Partition Headings
|
572
|
+
# ===========================
|
573
|
+
# Main titles are allowed in any document, and they are always
|
574
|
+
# centred and start on a new page. For novel documents, we also
|
575
|
+
# reset all counters when such a title is encountered.
|
576
|
+
# Partition headings are only formatted in novel documents, and
|
577
|
+
# otherwise unchanged. Scene separators are disabled
|
578
|
+
# immediately after partitions, and scene numbers are reset.
|
579
|
+
isPlain = aLine.startswith("# ")
|
505
580
|
|
506
581
|
nHead += 1
|
507
|
-
|
508
|
-
|
509
|
-
)
|
510
|
-
|
511
|
-
tmpMarkdown.append("%s\n" % aLine)
|
512
|
-
|
513
|
-
elif aLine[:3] == "## ":
|
582
|
+
tText = aLine[2:].strip()
|
583
|
+
tType = self.T_HEAD1 if isPlain else self.T_TITLE
|
584
|
+
tStyle = self.A_NONE if isPlain else (self.A_PBB | self.A_CENTRE)
|
585
|
+
sHide = self._hideTitle if isPlain else False
|
514
586
|
if self._isNovel:
|
515
|
-
|
587
|
+
if sHide:
|
588
|
+
tText = ""
|
589
|
+
tType = self.T_EMPTY
|
590
|
+
tStyle = self.A_NONE
|
591
|
+
elif isPlain:
|
592
|
+
tText = self._hFormatter.apply(self._fmtTitle, tText, nHead)
|
593
|
+
tStyle = self._titleStyle
|
594
|
+
if isPlain:
|
595
|
+
self._hFormatter.resetScene()
|
596
|
+
else:
|
597
|
+
self._hFormatter.resetAll()
|
598
|
+
self._noSep = True
|
516
599
|
|
517
|
-
nHead += 1
|
518
600
|
self._tokens.append((
|
519
|
-
|
601
|
+
tType, nHead, tText, [], tStyle
|
520
602
|
))
|
521
603
|
if self._keepMarkdown:
|
522
|
-
tmpMarkdown.append("
|
604
|
+
tmpMarkdown.append(f"{aLine}\n")
|
605
|
+
|
606
|
+
elif aLine.startswith(("## ", "##! ")):
|
607
|
+
# (Unnumbered) Chapter Headings
|
608
|
+
# =============================
|
609
|
+
# Chapter headings are only formatted in novel documents, and
|
610
|
+
# otherwise unchanged. Chapter numbers are bumped before the
|
611
|
+
# heading is formatted. Scene separators are disabled
|
612
|
+
# immediately after chapter headings, and scene numbers are
|
613
|
+
# reset. Unnumbered chapters are only meaningful in Novel docs,
|
614
|
+
# so if we're in a note, we keep them as level 2 headings.
|
615
|
+
isPlain = aLine.startswith("## ")
|
523
616
|
|
524
|
-
elif aLine[:4] == "### ":
|
525
617
|
nHead += 1
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
if self.
|
530
|
-
|
618
|
+
tText = aLine[3:].strip()
|
619
|
+
tType = self.T_HEAD2
|
620
|
+
tStyle = self.A_NONE
|
621
|
+
sHide = self._hideChapter if isPlain else self._hideUnNum
|
622
|
+
tFormat = self._fmtChapter if isPlain else self._fmtUnNum
|
623
|
+
if self._isNovel:
|
624
|
+
if isPlain:
|
625
|
+
self._hFormatter.incChapter()
|
626
|
+
if sHide:
|
627
|
+
tText = ""
|
628
|
+
tType = self.T_EMPTY
|
629
|
+
else:
|
630
|
+
tText = self._hFormatter.apply(tFormat, tText, nHead)
|
631
|
+
tStyle = self._chapterStyle
|
632
|
+
self._hFormatter.resetScene()
|
633
|
+
self._noSep = True
|
531
634
|
|
532
|
-
elif aLine[:5] == "#### ":
|
533
|
-
nHead += 1
|
534
635
|
self._tokens.append((
|
535
|
-
|
636
|
+
tType, nHead, tText, [], tStyle
|
536
637
|
))
|
537
638
|
if self._keepMarkdown:
|
538
|
-
tmpMarkdown.append("
|
639
|
+
tmpMarkdown.append(f"{aLine}\n")
|
640
|
+
|
641
|
+
elif aLine.startswith(("### ", "###! ")):
|
642
|
+
# (Hard) Scene Headings
|
643
|
+
# =====================
|
644
|
+
# Scene headings in novel documents are treated as centred
|
645
|
+
# separators if the formatting does not change the text. If the
|
646
|
+
# format is empty, the scene can be hidden or a blank paragraph
|
647
|
+
# (skip). When the scene title has static text or no text, it
|
648
|
+
# is always ignored if the noSep flag is set. This prevents
|
649
|
+
# separators immediately after other titles. Scene numbers are
|
650
|
+
# always incremented before formatting. For notes, the heading
|
651
|
+
# is unchanged.
|
652
|
+
isPlain = aLine.startswith("### ")
|
539
653
|
|
540
|
-
elif aLine[:3] == "#! ":
|
541
654
|
nHead += 1
|
655
|
+
tText = aLine[4:].strip()
|
656
|
+
tType = self.T_HEAD3
|
657
|
+
tStyle = self.A_NONE
|
658
|
+
sHide = self._hideScene if isPlain else self._hideHScene
|
659
|
+
tFormat = self._fmtScene if isPlain else self._fmtHScene
|
542
660
|
if self._isNovel:
|
543
|
-
|
544
|
-
|
545
|
-
|
661
|
+
self._hFormatter.incScene()
|
662
|
+
if sHide:
|
663
|
+
tText = ""
|
664
|
+
tType = self.T_EMPTY
|
665
|
+
else:
|
666
|
+
tText = self._hFormatter.apply(tFormat, tText, nHead)
|
667
|
+
tStyle = self._sceneStyle
|
668
|
+
if tText == "": # Empty Format
|
669
|
+
tType = self.T_EMPTY if self._noSep else self.T_SKIP
|
670
|
+
elif tText == tFormat: # Static Format
|
671
|
+
tText = "" if self._noSep else tText
|
672
|
+
tType = self.T_EMPTY if self._noSep else self.T_SEP
|
673
|
+
tStyle = self.A_NONE if self._noSep else self.A_CENTRE
|
674
|
+
self._noSep = False
|
546
675
|
|
547
676
|
self._tokens.append((
|
548
|
-
|
677
|
+
tType, nHead, tText, [], tStyle
|
549
678
|
))
|
550
679
|
if self._keepMarkdown:
|
551
|
-
tmpMarkdown.append("
|
680
|
+
tmpMarkdown.append(f"{aLine}\n")
|
681
|
+
|
682
|
+
elif aLine.startswith("#### "):
|
683
|
+
# Section Headings
|
684
|
+
# =================
|
685
|
+
# Section headings in novel docs are treated as centred
|
686
|
+
# separators if the formatting does not change the text. If the
|
687
|
+
# format is empty, the section can be hidden or a blank
|
688
|
+
# paragraph (skip). For notes, the heading is unchanged.
|
552
689
|
|
553
|
-
elif aLine[:4] == "##! ":
|
554
690
|
nHead += 1
|
691
|
+
tText = aLine[5:].strip()
|
692
|
+
tType = self.T_HEAD4
|
693
|
+
tStyle = self.A_NONE
|
555
694
|
if self._isNovel:
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
695
|
+
if self._hideSection:
|
696
|
+
tText = ""
|
697
|
+
tType = self.T_EMPTY
|
698
|
+
else:
|
699
|
+
tText = self._hFormatter.apply(self._fmtSection, tText, nHead)
|
700
|
+
if tText == "": # Empty Format
|
701
|
+
tType = self.T_SKIP
|
702
|
+
elif tText == self._fmtSection: # Static Format
|
703
|
+
tType = self.T_SEP
|
704
|
+
tStyle = self.A_CENTRE
|
560
705
|
|
561
706
|
self._tokens.append((
|
562
|
-
|
707
|
+
tType, nHead, tText, [], tStyle
|
563
708
|
))
|
564
709
|
if self._keepMarkdown:
|
565
|
-
tmpMarkdown.append("
|
710
|
+
tmpMarkdown.append(f"{aLine}\n")
|
566
711
|
|
567
712
|
else:
|
713
|
+
# Text Lines
|
714
|
+
# ==========
|
715
|
+
# Anything remaining at this point is body text. If body text
|
716
|
+
# is not disabled, we proceed to process text formatting.
|
568
717
|
if not self._doBodyText:
|
569
718
|
# Skip all body text
|
570
719
|
continue
|
@@ -606,11 +755,11 @@ class Tokenizer(ABC):
|
|
606
755
|
self.T_TEXT, nHead, tLine, fmtPos, sAlign
|
607
756
|
))
|
608
757
|
if self._keepMarkdown:
|
609
|
-
tmpMarkdown.append("
|
758
|
+
tmpMarkdown.append(f"{aLine}\n")
|
610
759
|
|
611
760
|
# If we have content, turn off the first page flag
|
612
761
|
if self._isFirst and self._tokens:
|
613
|
-
self._isFirst = False
|
762
|
+
self._isFirst = False # First document has been processed
|
614
763
|
|
615
764
|
# Make sure the token array doesn't start with a page break
|
616
765
|
# on the very first page, adding a blank first page.
|
@@ -626,8 +775,6 @@ class Tokenizer(ABC):
|
|
626
775
|
))
|
627
776
|
if self._keepMarkdown:
|
628
777
|
tmpMarkdown.append("\n")
|
629
|
-
|
630
|
-
if self._keepMarkdown:
|
631
778
|
self._allMarkdown.append("".join(tmpMarkdown))
|
632
779
|
|
633
780
|
# Second Pass
|
@@ -654,99 +801,136 @@ class Tokenizer(ABC):
|
|
654
801
|
|
655
802
|
return
|
656
803
|
|
657
|
-
def
|
658
|
-
"""
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
elif token[0] == self.T_HEAD1: # Partition
|
674
|
-
|
675
|
-
tTemp = self._hFormatter.apply(self._fmtTitle, token[2], token[1])
|
676
|
-
self._tokens[n] = (
|
677
|
-
token[0], token[1], tTemp, [], token[4]
|
678
|
-
)
|
679
|
-
|
680
|
-
# Set scene variables
|
681
|
-
self._skipSeparator = True
|
682
|
-
self._hFormatter.resetScene()
|
683
|
-
|
684
|
-
elif token[0] in (self.T_HEAD2, self.T_UNNUM): # Chapter, Unnumbered
|
685
|
-
|
686
|
-
# Numbered or Unnumbered
|
687
|
-
if token[0] == self.T_UNNUM:
|
688
|
-
tTemp = self._hFormatter.apply(self._fmtUnNum, token[2], token[1])
|
689
|
-
else:
|
690
|
-
self._hFormatter.incChapter()
|
691
|
-
tTemp = self._hFormatter.apply(self._fmtChapter, token[2], token[1])
|
692
|
-
|
693
|
-
# Format the chapter header
|
694
|
-
self._tokens[n] = (
|
695
|
-
token[0], token[1], tTemp, [], token[4]
|
696
|
-
)
|
697
|
-
|
698
|
-
# Set scene variables
|
699
|
-
self._skipSeparator = True
|
700
|
-
self._hFormatter.resetScene()
|
701
|
-
|
702
|
-
elif token[0] == self.T_HEAD3: # Scene
|
703
|
-
|
704
|
-
self._hFormatter.incScene()
|
705
|
-
|
706
|
-
tTemp = self._hFormatter.apply(self._fmtScene, token[2], token[1])
|
707
|
-
if tTemp == "" and self._hideScene:
|
708
|
-
self._tokens[n] = (
|
709
|
-
self.T_EMPTY, token[1], "", [], self.A_NONE
|
710
|
-
)
|
711
|
-
elif tTemp == "" and not self._hideScene:
|
712
|
-
self._tokens[n] = (
|
713
|
-
self.T_EMPTY if self._skipSeparator else self.T_SKIP, token[1],
|
714
|
-
"", [], self.A_NONE if self._skipSeparator else token[4]
|
715
|
-
)
|
716
|
-
elif tTemp == self._fmtScene:
|
717
|
-
self._tokens[n] = (
|
718
|
-
self.T_EMPTY if self._skipSeparator else self.T_SEP, token[1],
|
719
|
-
"" if self._skipSeparator else tTemp, [],
|
720
|
-
self.A_NONE if self._skipSeparator else (token[4] | self.A_CENTRE)
|
721
|
-
)
|
722
|
-
else:
|
723
|
-
self._tokens[n] = (
|
724
|
-
token[0], token[1], tTemp, [], token[4]
|
725
|
-
)
|
804
|
+
def buildOutline(self) -> None:
|
805
|
+
"""Build an outline of the text up to level 3 headings."""
|
806
|
+
tHandle = self._handle or ""
|
807
|
+
isNovel = self._isNovel
|
808
|
+
for tType, nHead, tText, _, _ in self._tokens:
|
809
|
+
if tType == self.T_TITLE:
|
810
|
+
prefix = "TT"
|
811
|
+
elif tType == self.T_HEAD1:
|
812
|
+
prefix = "PT" if isNovel else "H1"
|
813
|
+
elif tType == self.T_HEAD2:
|
814
|
+
prefix = "CH" if isNovel else "H2"
|
815
|
+
elif tType == self.T_HEAD3:
|
816
|
+
prefix = "SC" if isNovel else "H3"
|
817
|
+
else:
|
818
|
+
continue
|
726
819
|
|
727
|
-
|
820
|
+
key = f"{tHandle}:T{nHead:04d}"
|
821
|
+
text = tText.replace(nwHeadFmt.BR, " ").replace("&", "&")
|
822
|
+
self._outline[key] = f"{prefix}|{text}"
|
728
823
|
|
729
|
-
|
824
|
+
return
|
730
825
|
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
826
|
+
def countStats(self) -> None:
|
827
|
+
"""Count stats on the tokenized text."""
|
828
|
+
titleCount = self._counts.get("titleCount", 0)
|
829
|
+
paragraphCount = self._counts.get("paragraphCount", 0)
|
830
|
+
|
831
|
+
allWords = self._counts.get("allWords", 0)
|
832
|
+
textWords = self._counts.get("textWords", 0)
|
833
|
+
titleWords = self._counts.get("titleWords", 0)
|
834
|
+
|
835
|
+
allChars = self._counts.get("allChars", 0)
|
836
|
+
textChars = self._counts.get("textChars", 0)
|
837
|
+
titleChars = self._counts.get("titleChars", 0)
|
838
|
+
|
839
|
+
allWordChars = self._counts.get("allWordChars", 0)
|
840
|
+
textWordChars = self._counts.get("textWordChars", 0)
|
841
|
+
titleWordChars = self._counts.get("titleWordChars", 0)
|
842
|
+
|
843
|
+
para = []
|
844
|
+
for tType, _, tText, _, _ in self._tokens:
|
845
|
+
tText = tText.replace(nwUnicode.U_ENDASH, " ")
|
846
|
+
tText = tText.replace(nwUnicode.U_EMDASH, " ")
|
847
|
+
|
848
|
+
tWords = tText.split()
|
849
|
+
nWords = len(tWords)
|
850
|
+
nChars = len(tText)
|
851
|
+
nWChars = len("".join(tWords))
|
852
|
+
|
853
|
+
if tType == self.T_EMPTY:
|
854
|
+
if len(para) > 0:
|
855
|
+
tTemp = "\n".join(para)
|
856
|
+
tPWords = tTemp.split()
|
857
|
+
nPWords = len(tPWords)
|
858
|
+
nPChars = len(tTemp)
|
859
|
+
nPWChars = len("".join(tPWords))
|
860
|
+
|
861
|
+
paragraphCount += 1
|
862
|
+
allWords += nPWords
|
863
|
+
textWords += nPWords
|
864
|
+
allChars += nPChars
|
865
|
+
textChars += nPChars
|
866
|
+
allWordChars += nPWChars
|
867
|
+
textWordChars += nPWChars
|
868
|
+
para = []
|
869
|
+
|
870
|
+
elif tType in self.L_HEADINGS:
|
871
|
+
titleCount += 1
|
872
|
+
allWords += nWords
|
873
|
+
titleWords += nWords
|
874
|
+
allChars += nChars
|
875
|
+
allWordChars += nWChars
|
876
|
+
titleChars += nChars
|
877
|
+
titleWordChars += nWChars
|
878
|
+
|
879
|
+
elif tType == self.T_SEP:
|
880
|
+
allWords += nWords
|
881
|
+
allChars += nChars
|
882
|
+
allWordChars += nWChars
|
883
|
+
|
884
|
+
elif tType == self.T_TEXT:
|
885
|
+
para.append(tText.rstrip())
|
886
|
+
|
887
|
+
elif tType == self.T_SYNOPSIS and self._doSynopsis:
|
888
|
+
text = "{0}: {1}".format(self._localLookup("Synopsis"), tText)
|
889
|
+
words = text.split()
|
890
|
+
allWords += len(words)
|
891
|
+
allChars += len(text)
|
892
|
+
allWordChars += len("".join(words))
|
893
|
+
|
894
|
+
elif tType == self.T_SHORT and self._doSynopsis:
|
895
|
+
text = "{0}: {1}".format(self._localLookup("Short Description"), tText)
|
896
|
+
words = text.split()
|
897
|
+
allWords += len(words)
|
898
|
+
allChars += len(text)
|
899
|
+
allWordChars += len("".join(words))
|
900
|
+
|
901
|
+
elif tType == self.T_COMMENT and self._doComments:
|
902
|
+
text = "{0}: {1}".format(self._localLookup("Comment"), tText)
|
903
|
+
words = text.split()
|
904
|
+
allWords += len(words)
|
905
|
+
allChars += len(text)
|
906
|
+
allWordChars += len("".join(words))
|
907
|
+
|
908
|
+
elif tType == self.T_KEYWORD and self._doKeywords:
|
909
|
+
valid, bits, _ = self._project.index.scanThis("@"+tText)
|
910
|
+
if valid and bits:
|
911
|
+
key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
|
912
|
+
text = "{0}: {1}".format(key, ", ".join(bits[1:]))
|
913
|
+
words = text.split()
|
914
|
+
allWords += len(words)
|
915
|
+
allChars += len(text)
|
916
|
+
allWordChars += len("".join(words))
|
917
|
+
|
918
|
+
self._counts["titleCount"] = titleCount
|
919
|
+
self._counts["paragraphCount"] = paragraphCount
|
920
|
+
|
921
|
+
self._counts["allWords"] = allWords
|
922
|
+
self._counts["textWords"] = textWords
|
923
|
+
self._counts["titleWords"] = titleWords
|
924
|
+
|
925
|
+
self._counts["allChars"] = allChars
|
926
|
+
self._counts["textChars"] = textChars
|
927
|
+
self._counts["titleChars"] = titleChars
|
928
|
+
|
929
|
+
self._counts["allWordChars"] = allWordChars
|
930
|
+
self._counts["textWordChars"] = textWordChars
|
931
|
+
self._counts["titleWordChars"] = titleWordChars
|
748
932
|
|
749
|
-
return
|
933
|
+
return
|
750
934
|
|
751
935
|
def saveRawMarkdown(self, path: str | Path) -> None:
|
752
936
|
"""Save the raw text to a plain text file."""
|