novelWriter 2.1.1__py3-none-any.whl → 2.2rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {novelWriter-2.1.1.dist-info → novelWriter-2.2rc1.dist-info}/METADATA +3 -3
  2. {novelWriter-2.1.1.dist-info → novelWriter-2.2rc1.dist-info}/RECORD +105 -76
  3. novelwriter/__init__.py +6 -24
  4. novelwriter/assets/i18n/project_de_DE.json +10 -0
  5. novelwriter/assets/i18n/project_en_GB.json +11 -0
  6. novelwriter/assets/i18n/project_en_US.json +10 -0
  7. novelwriter/assets/i18n/project_ja_JP.json +11 -1
  8. novelwriter/assets/i18n/project_nb_NO.json +10 -0
  9. novelwriter/assets/i18n/project_nn_NO.json +10 -0
  10. novelwriter/assets/icons/novelwriter.ico +0 -0
  11. novelwriter/assets/icons/novelwriter.svg +8 -183
  12. novelwriter/assets/icons/typicons_dark/icons.conf +17 -2
  13. novelwriter/assets/icons/typicons_dark/nw_deco-h2-narrow.svg +4 -0
  14. novelwriter/assets/icons/typicons_dark/nw_deco-h3-narrow.svg +4 -0
  15. novelwriter/assets/icons/typicons_dark/nw_deco-h4-narrow.svg +4 -0
  16. novelwriter/assets/icons/typicons_dark/nw_deco-note.svg +4 -0
  17. novelwriter/assets/icons/typicons_dark/nw_panel.svg +4 -0
  18. novelwriter/assets/icons/typicons_dark/nw_tb-bold.svg +4 -0
  19. novelwriter/assets/icons/typicons_dark/nw_tb-italic.svg +4 -0
  20. novelwriter/assets/icons/typicons_dark/nw_tb-markdown.svg +8 -0
  21. novelwriter/assets/icons/typicons_dark/nw_tb-shortcode.svg +8 -0
  22. novelwriter/assets/icons/typicons_dark/nw_tb-strike.svg +4 -0
  23. novelwriter/assets/icons/typicons_dark/nw_tb-subscript.svg +5 -0
  24. novelwriter/assets/icons/typicons_dark/nw_tb-superscript.svg +5 -0
  25. novelwriter/assets/icons/typicons_dark/nw_tb-underline.svg +5 -0
  26. novelwriter/assets/icons/typicons_dark/typ_eye.svg +4 -0
  27. novelwriter/assets/icons/typicons_dark/typ_th-dot-menu.svg +4 -0
  28. novelwriter/assets/icons/typicons_light/icons.conf +17 -2
  29. novelwriter/assets/icons/typicons_light/nw_deco-h2-narrow.svg +4 -0
  30. novelwriter/assets/icons/typicons_light/nw_deco-h3-narrow.svg +4 -0
  31. novelwriter/assets/icons/typicons_light/nw_deco-h4-narrow.svg +4 -0
  32. novelwriter/assets/icons/typicons_light/nw_deco-note.svg +4 -0
  33. novelwriter/assets/icons/typicons_light/nw_panel.svg +4 -0
  34. novelwriter/assets/icons/typicons_light/nw_tb-bold.svg +4 -0
  35. novelwriter/assets/icons/typicons_light/nw_tb-italic.svg +4 -0
  36. novelwriter/assets/icons/typicons_light/nw_tb-markdown.svg +8 -0
  37. novelwriter/assets/icons/typicons_light/nw_tb-shortcode.svg +8 -0
  38. novelwriter/assets/icons/typicons_light/nw_tb-strike.svg +4 -0
  39. novelwriter/assets/icons/typicons_light/nw_tb-subscript.svg +5 -0
  40. novelwriter/assets/icons/typicons_light/nw_tb-superscript.svg +5 -0
  41. novelwriter/assets/icons/typicons_light/nw_tb-underline.svg +5 -0
  42. novelwriter/assets/icons/typicons_light/typ_eye.svg +4 -0
  43. novelwriter/assets/icons/typicons_light/typ_th-dot-menu.svg +4 -0
  44. novelwriter/assets/icons/x-novelwriter-project.ico +0 -0
  45. novelwriter/assets/icons/x-novelwriter-project.svg +7 -206
  46. novelwriter/assets/manual.pdf +0 -0
  47. novelwriter/assets/sample.zip +0 -0
  48. novelwriter/assets/syntax/default_dark.conf +1 -0
  49. novelwriter/assets/syntax/default_light.conf +1 -0
  50. novelwriter/assets/syntax/grey_dark.conf +1 -0
  51. novelwriter/assets/syntax/grey_light.conf +1 -0
  52. novelwriter/assets/syntax/light_owl.conf +1 -0
  53. novelwriter/assets/syntax/night_owl.conf +1 -0
  54. novelwriter/assets/syntax/solarized_dark.conf +1 -0
  55. novelwriter/assets/syntax/solarized_light.conf +1 -0
  56. novelwriter/assets/syntax/tomorrow.conf +1 -0
  57. novelwriter/assets/syntax/tomorrow_night.conf +1 -0
  58. novelwriter/assets/syntax/tomorrow_night_blue.conf +1 -0
  59. novelwriter/assets/syntax/tomorrow_night_bright.conf +1 -0
  60. novelwriter/assets/syntax/tomorrow_night_eighties.conf +1 -0
  61. novelwriter/assets/text/credits_en.htm +7 -0
  62. novelwriter/assets/text/release_notes.htm +7 -37
  63. novelwriter/common.py +22 -1
  64. novelwriter/config.py +27 -42
  65. novelwriter/constants.py +45 -7
  66. novelwriter/core/buildsettings.py +40 -24
  67. novelwriter/core/coretools.py +8 -1
  68. novelwriter/core/docbuild.py +2 -6
  69. novelwriter/core/index.py +264 -175
  70. novelwriter/core/options.py +8 -3
  71. novelwriter/core/project.py +2 -2
  72. novelwriter/core/projectdata.py +3 -3
  73. novelwriter/core/tohtml.py +60 -59
  74. novelwriter/core/tokenizer.py +110 -70
  75. novelwriter/core/tomd.py +51 -38
  76. novelwriter/core/toodt.py +184 -147
  77. novelwriter/dialogs/preferences.py +75 -106
  78. novelwriter/dialogs/projsettings.py +101 -110
  79. novelwriter/dialogs/updates.py +25 -14
  80. novelwriter/enum.py +28 -3
  81. novelwriter/extensions/novelselector.py +1 -1
  82. novelwriter/gui/doceditor.py +1345 -1235
  83. novelwriter/gui/dochighlight.py +98 -62
  84. novelwriter/gui/docviewer.py +151 -340
  85. novelwriter/gui/docviewerpanel.py +457 -0
  86. novelwriter/gui/editordocument.py +126 -0
  87. novelwriter/gui/mainmenu.py +350 -300
  88. novelwriter/gui/noveltree.py +101 -125
  89. novelwriter/gui/outline.py +154 -171
  90. novelwriter/gui/projtree.py +480 -380
  91. novelwriter/gui/sidebar.py +106 -75
  92. novelwriter/gui/statusbar.py +1 -1
  93. novelwriter/gui/theme.py +114 -75
  94. novelwriter/guimain.py +353 -254
  95. novelwriter/shared.py +36 -3
  96. novelwriter/tools/dictionaries.py +268 -0
  97. novelwriter/tools/manusbuild.py +17 -6
  98. novelwriter/tools/manuscript.py +11 -3
  99. novelwriter/tools/manussettings.py +0 -14
  100. novelwriter/tools/projwizard.py +16 -2
  101. novelwriter/tools/writingstats.py +1 -1
  102. novelwriter/assets/icons/typicons_dark/typ_at.svg +0 -4
  103. novelwriter/assets/icons/typicons_dark/typ_th-menu.svg +0 -4
  104. novelwriter/assets/icons/typicons_light/typ_at.svg +0 -4
  105. novelwriter/assets/icons/typicons_light/typ_th-menu.svg +0 -4
  106. {novelWriter-2.1.1.dist-info → novelWriter-2.2rc1.dist-info}/LICENSE.md +0 -0
  107. {novelWriter-2.1.1.dist-info → novelWriter-2.2rc1.dist-info}/WHEEL +0 -0
  108. {novelWriter-2.1.1.dist-info → novelWriter-2.2rc1.dist-info}/entry_points.txt +0 -0
  109. {novelWriter-2.1.1.dist-info → novelWriter-2.2rc1.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,7 @@ import json
28
28
  import logging
29
29
 
30
30
  from enum import Enum
31
- from typing import TYPE_CHECKING, Any
31
+ from typing import TYPE_CHECKING, Any, TypeVar
32
32
  from pathlib import Path
33
33
 
34
34
  from novelwriter.error import logException
@@ -40,6 +40,8 @@ if TYPE_CHECKING: # pragma: no cover
40
40
 
41
41
  logger = logging.getLogger(__name__)
42
42
 
43
+ NWEnum = TypeVar("NWEnum", bound=Enum)
44
+
43
45
  VALID_MAP = {
44
46
  "GuiWritingStats": {
45
47
  "winWidth", "winHeight", "widthCol0", "widthCol1", "widthCol2",
@@ -67,6 +69,9 @@ VALID_MAP = {
67
69
  "GuiManuscriptBuild": {
68
70
  "winWidth", "winHeight", "fmtWidth", "sumWidth",
69
71
  },
72
+ "GuiDocViewerPanel": {
73
+ "colWidths",
74
+ }
70
75
  }
71
76
 
72
77
 
@@ -201,11 +206,11 @@ class OptionState:
201
206
  return checkBool(self._state[group].get(name, default), default)
202
207
  return default
203
208
 
204
- def getEnum(self, group: str, name: str, lookup: type, default: Enum) -> Enum:
209
+ def getEnum(self, group: str, name: str, lookup: type, default: NWEnum) -> NWEnum:
205
210
  """Return the value mapped to an enum. Otherwise return the
206
211
  default value.
207
212
  """
208
- if issubclass(lookup, Enum):
213
+ if issubclass(lookup, type(default)):
209
214
  if group in self._state:
210
215
  if name in self._state[group]:
211
216
  value = self._state[group][name]
@@ -45,7 +45,7 @@ from novelwriter.core.sessions import NWSessionLog
45
45
  from novelwriter.core.projectxml import ProjectXMLReader, ProjectXMLWriter, XMLReadState
46
46
  from novelwriter.core.projectdata import NWProjectData
47
47
  from novelwriter.common import (
48
- checkStringNone, formatInt, formatTimeStamp, hexToInt, makeFileNameSafe, minmax
48
+ checkStringNone, formatInt, formatTimeStamp, getFileSize, hexToInt, makeFileNameSafe, minmax
49
49
  )
50
50
 
51
51
  if TYPE_CHECKING: # pragma: no cover
@@ -420,7 +420,7 @@ class NWProject:
420
420
  timeStamp = formatTimeStamp(time(), fileSafe=True)
421
421
  archName = baseDir / f"{cleanName} {timeStamp}.zip"
422
422
  if self._storage.zipIt(archName, compression=2):
423
- size = formatInt(archName.stat().st_size)
423
+ size = formatInt(getFileSize(archName))
424
424
  if doNotify:
425
425
  SHARED.info(
426
426
  self.tr("Created a backup of your project of size {0}B.").format(size),
@@ -152,14 +152,14 @@ class NWProjectData:
152
152
  """Return the initial count of words for novel and note
153
153
  documents.
154
154
  """
155
- return tuple(self._initCounts)
155
+ return self._initCounts[0], self._initCounts[1]
156
156
 
157
157
  @property
158
158
  def currCounts(self) -> tuple[int, int]:
159
159
  """Return the current count of words for novel and note
160
160
  documents.
161
161
  """
162
- return tuple(self._currCounts)
162
+ return self._currCounts[0], self._currCounts[1]
163
163
 
164
164
  @property
165
165
  def lastHandle(self) -> dict[str, str | None]:
@@ -170,7 +170,7 @@ class NWProjectData:
170
170
 
171
171
  @property
172
172
  def autoReplace(self) -> dict[str, str]:
173
- """Return the autoreplace dictionary."""
173
+ """Return the auto-replace dictionary."""
174
174
  return self._autoReplace
175
175
 
176
176
  @property
@@ -111,7 +111,7 @@ class ToHtml(Tokenizer):
111
111
 
112
112
  def getFullResultSize(self) -> int:
113
113
  """Return the size of the full HTML result."""
114
- return sum([len(x) for x in self._fullHTML])
114
+ return sum(len(x) for x in self._fullHTML)
115
115
 
116
116
  def doPreProcessing(self) -> None:
117
117
  """Extend the auto-replace to also properly encode some unicode
@@ -122,9 +122,7 @@ class ToHtml(Tokenizer):
122
122
  return
123
123
 
124
124
  def doConvert(self) -> None:
125
- """Convert the list of text tokens into a HTML document saved
126
- to _result.
127
- """
125
+ """Convert the list of text tokens into an HTML document."""
128
126
  if self._genMode == self.M_PREVIEW:
129
127
  htmlTags = { # HTML4 + CSS2 (for Qt)
130
128
  self.FMT_B_B: "<b>",
@@ -133,6 +131,8 @@ class ToHtml(Tokenizer):
133
131
  self.FMT_I_E: "</i>",
134
132
  self.FMT_D_B: "<span style='text-decoration: line-through;'>",
135
133
  self.FMT_D_E: "</span>",
134
+ self.FMT_U_B: "<u>",
135
+ self.FMT_U_E: "</u>",
136
136
  }
137
137
  else:
138
138
  htmlTags = { # HTML5 (for export)
@@ -142,8 +142,15 @@ class ToHtml(Tokenizer):
142
142
  self.FMT_I_E: "</em>",
143
143
  self.FMT_D_B: "<del>",
144
144
  self.FMT_D_E: "</del>",
145
+ self.FMT_U_B: "<span style='text-decoration: underline;'>",
146
+ self.FMT_U_E: "</span>",
145
147
  }
146
148
 
149
+ htmlTags[self.FMT_SUP_B] = "<sup>"
150
+ htmlTags[self.FMT_SUP_E] = "</sup>"
151
+ htmlTags[self.FMT_SUB_B] = "<sub>"
152
+ htmlTags[self.FMT_SUB_E] = "</sub>"
153
+
147
154
  if self._isNovel and self._genMode != self.M_PREVIEW:
148
155
  # For story files, we bump the titles one level up
149
156
  h1Cl = " class='title'"
@@ -160,9 +167,9 @@ class ToHtml(Tokenizer):
160
167
 
161
168
  self._result = ""
162
169
 
163
- thisPar = []
164
- parStyle = None
165
- tmpResult = []
170
+ para = []
171
+ pStyle = None
172
+ lines = []
166
173
 
167
174
  for tType, nHead, tText, tFormat, tStyle in self._tokens:
168
175
 
@@ -174,18 +181,16 @@ class ToHtml(Tokenizer):
174
181
  for c in tText:
175
182
  if c == "<":
176
183
  cText.append("&lt;")
177
- tFormat = [[a + 3 if a > i else a, b, c] for a, b, c in tFormat]
184
+ tFormat = [[p + 3 if p > i else p, f] for p, f in tFormat]
178
185
  i += 4
179
186
  elif c == ">":
180
187
  cText.append("&gt;")
181
- tFormat = [[a + 3 if a > i else a, b, c] for a, b, c in tFormat]
188
+ tFormat = [[p + 3 if p > i else p, f] for p, f in tFormat]
182
189
  i += 4
183
190
  else:
184
191
  cText.append(c)
185
192
  i += 1
186
-
187
193
  tText = "".join(cText)
188
-
189
194
  else:
190
195
  # If we don't have formatting, we can do a plain replace
191
196
  tText = tText.replace("<", "&lt;").replace(">", "&gt;")
@@ -231,69 +236,70 @@ class ToHtml(Tokenizer):
231
236
 
232
237
  # Process Text Type
233
238
  if tType == self.T_EMPTY:
234
- if parStyle is None:
235
- parStyle = ""
236
- if len(thisPar) > 1 and self._cssStyles:
237
- parClass = " class='break'"
239
+ if pStyle is None:
240
+ pStyle = ""
241
+ if len(para) > 1 and self._cssStyles:
242
+ pClass = " class='break'"
238
243
  else:
239
- parClass = ""
240
- if len(thisPar) > 0:
241
- tTemp = "<br/>".join(thisPar)
242
- tmpResult.append(f"<p{parClass+parStyle}>{tTemp.rstrip()}</p>\n")
243
- thisPar = []
244
- parStyle = None
244
+ pClass = ""
245
+ if len(para) > 0:
246
+ tTemp = "<br/>".join(para)
247
+ lines.append(f"<p{pClass+pStyle}>{tTemp.rstrip()}</p>\n")
248
+ para = []
249
+ pStyle = None
245
250
 
246
251
  elif tType == self.T_TITLE:
247
252
  tHead = tText.replace(nwHeadFmt.BR, "<br/>")
248
- tmpResult.append(f"<h1 class='title'{hStyle}>{aNm}{tHead}</h1>\n")
253
+ lines.append(f"<h1 class='title'{hStyle}>{aNm}{tHead}</h1>\n")
249
254
 
250
255
  elif tType == self.T_UNNUM:
251
256
  tHead = tText.replace(nwHeadFmt.BR, "<br/>")
252
- tmpResult.append(f"<{h2}{hStyle}>{aNm}{tHead}</{h2}>\n")
257
+ lines.append(f"<{h2}{hStyle}>{aNm}{tHead}</{h2}>\n")
253
258
 
254
259
  elif tType == self.T_HEAD1:
255
260
  tHead = tText.replace(nwHeadFmt.BR, "<br/>")
256
- tmpResult.append(f"<{h1}{h1Cl}{hStyle}>{aNm}{tHead}</{h1}>\n")
261
+ lines.append(f"<{h1}{h1Cl}{hStyle}>{aNm}{tHead}</{h1}>\n")
257
262
 
258
263
  elif tType == self.T_HEAD2:
259
264
  tHead = tText.replace(nwHeadFmt.BR, "<br/>")
260
- tmpResult.append(f"<{h2}{hStyle}>{aNm}{tHead}</{h2}>\n")
265
+ lines.append(f"<{h2}{hStyle}>{aNm}{tHead}</{h2}>\n")
261
266
 
262
267
  elif tType == self.T_HEAD3:
263
268
  tHead = tText.replace(nwHeadFmt.BR, "<br/>")
264
- tmpResult.append(f"<{h3}{hStyle}>{aNm}{tHead}</{h3}>\n")
269
+ lines.append(f"<{h3}{hStyle}>{aNm}{tHead}</{h3}>\n")
265
270
 
266
271
  elif tType == self.T_HEAD4:
267
272
  tHead = tText.replace(nwHeadFmt.BR, "<br/>")
268
- tmpResult.append(f"<{h4}{hStyle}>{aNm}{tHead}</{h4}>\n")
273
+ lines.append(f"<{h4}{hStyle}>{aNm}{tHead}</{h4}>\n")
269
274
 
270
275
  elif tType == self.T_SEP:
271
- tmpResult.append(f"<p class='sep'{hStyle}>{tText}</p>\n")
276
+ lines.append(f"<p class='sep'{hStyle}>{tText}</p>\n")
272
277
 
273
278
  elif tType == self.T_SKIP:
274
- tmpResult.append(f"<p class='skip'{hStyle}>&nbsp;</p>\n")
279
+ lines.append(f"<p class='skip'{hStyle}>&nbsp;</p>\n")
275
280
 
276
281
  elif tType == self.T_TEXT:
277
282
  tTemp = tText
278
- if parStyle is None:
279
- parStyle = hStyle
280
- for xPos, xLen, xFmt in reversed(tFormat):
281
- tTemp = tTemp[:xPos] + htmlTags[xFmt] + tTemp[xPos+xLen:]
282
- thisPar.append(stripEscape(tTemp.rstrip()))
283
+ if pStyle is None:
284
+ pStyle = hStyle
285
+ for pos, fmt in reversed(tFormat):
286
+ tTemp = f"{tTemp[:pos]}{htmlTags[fmt]}{tTemp[pos:]}"
287
+ para.append(stripEscape(tTemp.rstrip()))
283
288
 
284
289
  elif tType == self.T_SYNOPSIS and self._doSynopsis:
285
- tmpResult.append(self._formatSynopsis(tText))
290
+ lines.append(self._formatSynopsis(tText, True))
291
+
292
+ elif tType == self.T_SHORT and self._doSynopsis:
293
+ lines.append(self._formatSynopsis(tText, False))
286
294
 
287
295
  elif tType == self.T_COMMENT and self._doComments:
288
- tmpResult.append(self._formatComments(tText))
296
+ lines.append(self._formatComments(tText))
289
297
 
290
298
  elif tType == self.T_KEYWORD and self._doKeywords:
291
299
  tTemp = f"<p{hStyle}>{self._formatKeywords(tText)}</p>\n"
292
- tmpResult.append(tTemp)
293
-
294
- self._result = "".join(tmpResult)
295
- tmpResult = []
300
+ lines.append(tTemp)
296
301
 
302
+ self._result = "".join(lines)
297
303
  if self._genMode != self.M_PREVIEW:
298
304
  self._fullHTML.append(self._result)
299
305
 
@@ -451,13 +457,15 @@ class ToHtml(Tokenizer):
451
457
  # Internal Functions
452
458
  ##
453
459
 
454
- def _formatSynopsis(self, text: str) -> str:
460
+ def _formatSynopsis(self, text: str, synopsis: bool) -> str:
455
461
  """Apply HTML formatting to synopsis."""
462
+ if synopsis:
463
+ sSynop = self._localLookup("Synopsis")
464
+ else:
465
+ sSynop = self._localLookup("Short Description")
456
466
  if self._genMode == self.M_PREVIEW:
457
- sSynop = self._trSynopsis
458
467
  return f"<p class='comment'><span class='synopsis'>{sSynop}:</span> {text}</p>\n"
459
468
  else:
460
- sSynop = self._localLookup("Synopsis")
461
469
  return f"<p class='synopsis'><strong>{sSynop}:</strong> {text}</p>\n"
462
470
 
463
471
  def _formatComments(self, text: str) -> str:
@@ -471,25 +479,18 @@ class ToHtml(Tokenizer):
471
479
  def _formatKeywords(self, text: str) -> str:
472
480
  """Apply HTML formatting to keywords."""
473
481
  valid, bits, _ = self._project.index.scanThis("@"+text)
474
- if not valid or not bits:
482
+ if not valid or not bits or bits[0] not in nwLabels.KEY_NAME:
475
483
  return ""
476
484
 
477
- result = ""
478
- tags = []
479
- if bits[0] in nwLabels.KEY_NAME:
480
- result += f"<span class='tags'>{nwLabels.KEY_NAME[bits[0]]}:</span> "
481
- if len(bits) > 1:
482
- if bits[0] == nwKeyWords.TAG_KEY:
483
- result += f"<a name='tag_{bits[1]}'>{bits[1]}</a>"
485
+ result = f"<span class='tags'>{self._localLookup(nwLabels.KEY_NAME[bits[0]])}:</span> "
486
+ if len(bits) > 1:
487
+ if bits[0] == nwKeyWords.TAG_KEY:
488
+ result += f"<a name='tag_{bits[1]}'>{bits[1]}</a>"
489
+ else:
490
+ if self._genMode == self.M_PREVIEW:
491
+ result += ", ".join(f"<a href='#{bits[0][1:]}={t}'>{t}</a>" for t in bits[1:])
484
492
  else:
485
- if self._genMode == self.M_PREVIEW:
486
- for tTag in bits[1:]:
487
- tags.append(f"<a href='#{bits[0][1:]}={tTag}'>{tTag}</a>")
488
- result += ", ".join(tags)
489
- else:
490
- for tTag in bits[1:]:
491
- tags.append(f"<a href='#tag_{tTag}'>{tTag}</a>")
492
- result += ", ".join(tags)
493
+ result += ", ".join(f"<a href='#tag_{t}'>{t}</a>" for t in bits[1:])
493
494
 
494
495
  return result
495
496
 
@@ -31,27 +31,26 @@ import logging
31
31
  from abc import ABC, abstractmethod
32
32
  from time import time
33
33
  from pathlib import Path
34
- from operator import itemgetter
35
34
  from functools import partial
36
35
 
37
36
  from PyQt5.QtCore import QCoreApplication, QRegularExpression
37
+ from novelwriter.core.index import processComment
38
38
 
39
- from novelwriter.enum import nwItemLayout
39
+ from novelwriter.enum import nwComment, nwItemLayout
40
40
  from novelwriter.common import formatTimeStamp, numberToRoman, checkInt
41
- from novelwriter.constants import nwConst, nwHeadFmt, nwRegEx, nwUnicode
41
+ from novelwriter.constants import nwHeadFmt, nwRegEx, nwShortcode, nwUnicode
42
42
  from novelwriter.core.project import NWProject
43
43
 
44
44
  logger = logging.getLogger(__name__)
45
45
 
46
+ ESCAPES = {r"\*": "*", r"\~": "~", r"\_": "_", r"\[": "[", r"\]": "]", r"\ ": ""}
47
+ RX_ESC = re.compile("|".join([re.escape(k) for k in ESCAPES.keys()]), flags=re.DOTALL)
48
+
46
49
 
47
50
  def stripEscape(text) -> str:
48
- """Helper function to strip escaped Markdown characters from
49
- paragraph text.
50
- """
51
+ """Strip escaped Markdown characters from paragraph text."""
51
52
  if "\\" in text:
52
- # Checking first is slightly slower when there are escaped
53
- # characters in the text, but significantly faster when not
54
- return text.replace(r"\*", "*").replace(r"\~", "~").replace(r"\_", "_")
53
+ return RX_ESC.sub(lambda x: ESCAPES[x.group(0)], text)
55
54
  return text
56
55
 
57
56
 
@@ -65,27 +64,34 @@ class Tokenizer(ABC):
65
64
  """
66
65
 
67
66
  # In-Text Format
68
- FMT_B_B = 1 # Begin bold
69
- FMT_B_E = 2 # End bold
70
- FMT_I_B = 3 # Begin italics
71
- FMT_I_E = 4 # End italics
72
- FMT_D_B = 5 # Begin strikeout
73
- FMT_D_E = 6 # End strikeout
67
+ FMT_B_B = 1 # Begin bold
68
+ FMT_B_E = 2 # End bold
69
+ FMT_I_B = 3 # Begin italics
70
+ FMT_I_E = 4 # End italics
71
+ FMT_D_B = 5 # Begin strikeout
72
+ FMT_D_E = 6 # End strikeout
73
+ FMT_U_B = 7 # Begin underline
74
+ FMT_U_E = 8 # End underline
75
+ FMT_SUP_B = 9 # Begin superscript
76
+ FMT_SUP_E = 10 # End superscript
77
+ FMT_SUB_B = 11 # Begin subscript
78
+ FMT_SUB_E = 12 # End subscript
74
79
 
75
80
  # Block Type
76
81
  T_EMPTY = 1 # Empty line (new paragraph)
77
82
  T_SYNOPSIS = 2 # Synopsis comment
78
- T_COMMENT = 3 # Comment line
79
- T_KEYWORD = 4 # Command line
80
- T_TITLE = 5 # Title
81
- T_UNNUM = 6 # Unnumbered
82
- T_HEAD1 = 7 # Header 1
83
- T_HEAD2 = 8 # Header 2
84
- T_HEAD3 = 9 # Header 3
85
- T_HEAD4 = 10 # Header 4
86
- T_TEXT = 11 # Text line
87
- T_SEP = 12 # Scene separator
88
- T_SKIP = 13 # Paragraph break
83
+ T_SHORT = 3 # Short description comment
84
+ T_COMMENT = 4 # Comment line
85
+ T_KEYWORD = 5 # Command line
86
+ T_TITLE = 6 # Title
87
+ T_UNNUM = 7 # Unnumbered
88
+ T_HEAD1 = 8 # Header 1
89
+ T_HEAD2 = 9 # Header 2
90
+ T_HEAD3 = 10 # Header 3
91
+ T_HEAD4 = 11 # Header 4
92
+ T_TEXT = 12 # Text line
93
+ T_SEP = 13 # Scene separator
94
+ T_SKIP = 14 # Paragraph break
89
95
 
90
96
  # Block Style
91
97
  A_NONE = 0x0000 # No special style
@@ -163,8 +169,23 @@ class Tokenizer(ABC):
163
169
  self._localLookup = self._project.localLookup
164
170
  self.tr = partial(QCoreApplication.translate, "Tokenizer")
165
171
 
166
- # Cached Translations
167
- self._trSynopsis = self.tr("Synopsis")
172
+ # Format RegEx
173
+ self._rxMarkdown = [
174
+ (QRegularExpression(nwRegEx.FMT_EI), [0, self.FMT_I_B, 0, self.FMT_I_E]),
175
+ (QRegularExpression(nwRegEx.FMT_EB), [0, self.FMT_B_B, 0, self.FMT_B_E]),
176
+ (QRegularExpression(nwRegEx.FMT_ST), [0, self.FMT_D_B, 0, self.FMT_D_E]),
177
+ ]
178
+ self._rxShortCodes = QRegularExpression(nwRegEx.FMT_SC)
179
+ self._rxShortCodeVals = QRegularExpression(nwRegEx.FMT_SV)
180
+
181
+ self._shortCodeFmt = {
182
+ nwShortcode.ITALIC_O: self.FMT_I_B, nwShortcode.ITALIC_C: self.FMT_I_E,
183
+ nwShortcode.BOLD_O: self.FMT_B_B, nwShortcode.BOLD_C: self.FMT_B_E,
184
+ nwShortcode.STRIKE_O: self.FMT_D_B, nwShortcode.STRIKE_C: self.FMT_D_E,
185
+ nwShortcode.ULINE_O: self.FMT_U_B, nwShortcode.ULINE_C: self.FMT_U_E,
186
+ nwShortcode.SUP_O: self.FMT_SUP_B, nwShortcode.SUP_C: self.FMT_SUP_E,
187
+ nwShortcode.SUB_O: self.FMT_SUB_B, nwShortcode.SUB_C: self.FMT_SUB_E,
188
+ }
168
189
 
169
190
  return
170
191
 
@@ -197,7 +218,7 @@ class Tokenizer(ABC):
197
218
  return
198
219
 
199
220
  def setChapterFormat(self, hFormat: str) -> None:
200
- """Set the chapert format pattern."""
221
+ """Set the chapter format pattern."""
201
222
  self._fmtChapter = hFormat.strip()
202
223
  return
203
224
 
@@ -349,14 +370,6 @@ class Tokenizer(ABC):
349
370
 
350
371
  self._text = text
351
372
 
352
- docSize = len(self._text)
353
- if docSize > nwConst.MAX_DOCSIZE:
354
- errVal = self.tr("Document '{0}' is too big ({1} MB). Skipping.").format(
355
- self._nwItem.itemName, f"{docSize/1.0e6:.2f}"
356
- )
357
- self._text = "# {0}\n\n{1}\n\n".format(self.tr("ERROR"), errVal)
358
- self._errData.append(errVal)
359
-
360
373
  self._isNone = self._nwItem.itemLayout == nwItemLayout.NO_LAYOUT
361
374
  self._isNovel = self._nwItem.itemLayout == nwItemLayout.DOCUMENT
362
375
  self._isNote = self._nwItem.itemLayout == nwItemLayout.NOTE
@@ -395,19 +408,12 @@ class Tokenizer(ABC):
395
408
  4: The internal formatting map of the text, self.FMT_*
396
409
  5: The style of the block, self.A_*
397
410
  """
398
- # RegExes for adding formatting tags within text lines
399
- rxFormats = [
400
- (QRegularExpression(nwRegEx.FMT_EI), [None, self.FMT_I_B, None, self.FMT_I_E]),
401
- (QRegularExpression(nwRegEx.FMT_EB), [None, self.FMT_B_B, None, self.FMT_B_E]),
402
- (QRegularExpression(nwRegEx.FMT_ST), [None, self.FMT_D_B, None, self.FMT_D_E]),
403
- ]
404
-
405
411
  self._tokens = []
406
412
  tmpMarkdown = []
407
413
  nHead = 0
408
414
  breakNext = False
409
415
  for aLine in self._text.splitlines():
410
- sLine = aLine.strip()
416
+ sLine = aLine.strip().lower()
411
417
 
412
418
  # Check for blank lines
413
419
  if len(sLine) == 0:
@@ -430,18 +436,21 @@ class Tokenizer(ABC):
430
436
 
431
437
  if aLine[0] == "[":
432
438
  # Parse special formatting line
439
+ # This must be a separate if statement, as it may not
440
+ # reach a continue statement and must therefore proceed
441
+ # to check other formats.
433
442
 
434
- if sLine in ("[NEWPAGE]", "[NEW PAGE]"):
443
+ if sLine in ("[newpage]", "[new page]"):
435
444
  breakNext = True
436
445
  continue
437
446
 
438
- elif sLine == "[VSPACE]":
447
+ elif sLine == "[vspace]":
439
448
  self._tokens.append(
440
449
  (self.T_SKIP, nHead, "", None, sAlign)
441
450
  )
442
451
  continue
443
452
 
444
- elif sLine.startswith("[VSPACE:") and sLine.endswith("]"):
453
+ elif sLine.startswith("[vspace:") and sLine.endswith("]"):
445
454
  nSkip = checkInt(sLine[8:-1], 0)
446
455
  if nSkip >= 1:
447
456
  self._tokens.append(
@@ -453,18 +462,23 @@ class Tokenizer(ABC):
453
462
  ]
454
463
  continue
455
464
 
456
- elif aLine[0] == "%":
457
- cLine = aLine[1:].lstrip()
458
- synTag = cLine[:9].lower()
459
- if synTag == "synopsis:":
465
+ if aLine[0] == "%":
466
+ cStyle, cText, _ = processComment(aLine)
467
+ if cStyle == nwComment.SYNOPSIS:
460
468
  self._tokens.append((
461
- self.T_SYNOPSIS, nHead, cLine[9:].strip(), None, sAlign
469
+ self.T_SYNOPSIS, nHead, cText, None, sAlign
470
+ ))
471
+ if self._doSynopsis and self._keepMarkdown:
472
+ tmpMarkdown.append("%s\n" % aLine)
473
+ elif cStyle == nwComment.SHORT:
474
+ self._tokens.append((
475
+ self.T_SHORT, nHead, cText, None, sAlign
462
476
  ))
463
477
  if self._doSynopsis and self._keepMarkdown:
464
478
  tmpMarkdown.append("%s\n" % aLine)
465
479
  else:
466
480
  self._tokens.append((
467
- self.T_COMMENT, nHead, aLine[1:].strip(), None, sAlign
481
+ self.T_COMMENT, nHead, cText, None, sAlign
468
482
  ))
469
483
  if self._doComments and self._keepMarkdown:
470
484
  tmpMarkdown.append("%s\n" % aLine)
@@ -578,23 +592,10 @@ class Tokenizer(ABC):
578
592
  if indRight:
579
593
  sAlign |= self.A_IND_R
580
594
 
581
- # Otherwise we use RegEx to find formatting tags within a line of text
582
- fmtPos = []
583
- for theRX, theKeys in rxFormats:
584
- rxThis = theRX.globalMatch(aLine, 0)
585
- while rxThis.hasNext():
586
- rxMatch = rxThis.next()
587
- for n in range(1, len(theKeys)):
588
- if theKeys[n] is not None:
589
- xPos = rxMatch.capturedStart(n)
590
- xLen = rxMatch.capturedLength(n)
591
- fmtPos.append([xPos, xLen, theKeys[n]])
592
-
593
- # Save the line as is, but append the array of formatting locations
594
- # sorted by position
595
- fmtPos = sorted(fmtPos, key=itemgetter(0))
595
+ # Process formats
596
+ tLine, fmtPos = self._extractFormats(aLine)
596
597
  self._tokens.append((
597
- self.T_TEXT, nHead, aLine, fmtPos, sAlign
598
+ self.T_TEXT, nHead, tLine, fmtPos, sAlign
598
599
  ))
599
600
  if self._keepMarkdown:
600
601
  tmpMarkdown.append("%s\n" % aLine)
@@ -771,6 +772,45 @@ class Tokenizer(ABC):
771
772
  json.dump(data, fObj, indent=2)
772
773
  return
773
774
 
775
+ ##
776
+ # Internal Functions
777
+ ##
778
+
779
+ def _extractFormats(self, text: str) -> tuple[str, list[tuple[int, int]]]:
780
+ """Extract format markers from a text paragraph."""
781
+ temp = []
782
+
783
+ # Match Markdown
784
+ for regEx, fmts in self._rxMarkdown:
785
+ rxItt = regEx.globalMatch(text, 0)
786
+ while rxItt.hasNext():
787
+ rxMatch = rxItt.next()
788
+ temp.extend(
789
+ [rxMatch.capturedStart(n), rxMatch.capturedLength(n), fmt]
790
+ for n, fmt in enumerate(fmts) if fmt > 0
791
+ )
792
+
793
+ # Match Shortcodes
794
+ rxItt = self._rxShortCodes.globalMatch(text, 0)
795
+ while rxItt.hasNext():
796
+ rxMatch = rxItt.next()
797
+ temp.append([
798
+ rxMatch.capturedStart(1),
799
+ rxMatch.capturedLength(1),
800
+ self._shortCodeFmt.get(rxMatch.captured(1).lower(), 0)
801
+ ])
802
+
803
+ # Post-process text and format markers
804
+ result = text
805
+ formats = []
806
+ for pos, n, fmt in reversed(sorted(temp, key=lambda x: x[0])):
807
+ if fmt > 0:
808
+ result = result[:pos] + result[pos+n:]
809
+ formats = [(p-n, f) for p, f in formats]
810
+ formats.insert(0, (pos, fmt))
811
+
812
+ return result, formats
813
+
774
814
  # END Class Tokenizer
775
815
 
776
816