markdown-to-confluence 0.5.3__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/__main__.py CHANGED
@@ -16,17 +16,22 @@ import sys
16
16
  import typing
17
17
  from io import StringIO
18
18
  from pathlib import Path
19
+ from types import TracebackType
19
20
  from typing import Any, Iterable, Literal, Sequence
20
21
 
22
+ from requests.exceptions import HTTPError, JSONDecodeError
23
+
21
24
  from . import __version__
22
25
  from .compatibility import override
23
26
  from .environment import ArgumentError, ConfluenceSiteProperties, ConnectionProperties
24
27
  from .metadata import ConfluenceSiteMetadata
25
28
  from .options import ConfluencePageID, ConverterOptions, DocumentOptions, ImageLayoutOptions, LayoutOptions
26
29
 
30
+ LOGGER = logging.getLogger(__name__)
31
+
27
32
 
28
33
  class Arguments(argparse.Namespace):
29
- mdpath: Path
34
+ mdpath: list[Path]
30
35
  domain: str | None
31
36
  path: str | None
32
37
  api_url: str | None
@@ -41,6 +46,7 @@ class Arguments(argparse.Namespace):
41
46
  skip_title_heading: bool
42
47
  title_prefix: str | None
43
48
  generated_by: str | None
49
+ skip_update: bool
44
50
  prefer_raster: bool
45
51
  render_drawio: bool
46
52
  render_mermaid: bool
@@ -100,7 +106,7 @@ def get_parser() -> argparse.ArgumentParser:
100
106
  parser = argparse.ArgumentParser(formatter_class=PositionalOnlyHelpFormatter)
101
107
  parser.prog = os.path.basename(os.path.dirname(__file__))
102
108
  parser.add_argument("--version", action="version", version=__version__)
103
- parser.add_argument("mdpath", help="Path to Markdown file or directory to convert and publish.")
109
+ parser.add_argument("mdpath", type=Path, nargs="+", help="Path to Markdown file or directory to convert and publish.")
104
110
  parser.add_argument("-d", "--domain", help="Confluence organization domain.")
105
111
  parser.add_argument("-p", "--path", help="Base path for Confluence (default: '/wiki/').")
106
112
  parser.add_argument(
@@ -166,6 +172,12 @@ def get_parser() -> argparse.ArgumentParser:
166
172
  const=None,
167
173
  help="Do not add 'generated by a tool' prompt to pages.",
168
174
  )
175
+ parser.add_argument(
176
+ "--skip-update",
177
+ action="store_true",
178
+ default=False,
179
+ help="Skip saving Confluence page ID in Markdown files.",
180
+ )
169
181
  parser.add_argument(
170
182
  "--render-drawio",
171
183
  dest="render_drawio",
@@ -324,13 +336,31 @@ def get_help() -> str:
324
336
  return buf.getvalue()
325
337
 
326
338
 
339
+ def _exception_hook(exc_type: type[BaseException], exc_value: BaseException, traceback: TracebackType | None) -> None:
340
+ LOGGER.exception("Exception raised: %s", exc_type.__name__, exc_info=exc_value)
341
+ ex: BaseException | None = exc_value
342
+ while ex is not None:
343
+ print(f"\033[95m{ex.__class__.__name__}\033[0m: {ex}")
344
+
345
+ if isinstance(ex, HTTPError):
346
+ # print details for a response with JSON body
347
+ if ex.response is not None:
348
+ try:
349
+ LOGGER.error(ex.response.json())
350
+ except JSONDecodeError:
351
+ pass
352
+
353
+ ex = ex.__cause__
354
+
355
+
356
+ sys.excepthook = _exception_hook
357
+
358
+
327
359
  def main() -> None:
328
360
  parser = get_parser()
329
361
  args = Arguments()
330
362
  parser.parse_args(namespace=args)
331
363
 
332
- args.mdpath = Path(args.mdpath)
333
-
334
364
  logging.basicConfig(
335
365
  level=getattr(logging, args.loglevel.upper(), logging.INFO),
336
366
  format="%(asctime)s - %(levelname)s - %(funcName)s [%(lineno)d] - %(message)s",
@@ -341,6 +371,7 @@ def main() -> None:
341
371
  keep_hierarchy=args.keep_hierarchy,
342
372
  title_prefix=args.title_prefix,
343
373
  generated_by=args.generated_by,
374
+ skip_update=args.skip_update,
344
375
  converter=ConverterOptions(
345
376
  heading_anchors=args.heading_anchors,
346
377
  ignore_invalid_url=args.ignore_invalid_url,
@@ -377,10 +408,10 @@ def main() -> None:
377
408
  base_path=site_properties.base_path,
378
409
  space_key=site_properties.space_key,
379
410
  )
380
- LocalConverter(options, site_metadata).process(args.mdpath)
411
+ converter = LocalConverter(options, site_metadata)
412
+ for item in args.mdpath:
413
+ converter.process(item)
381
414
  else:
382
- from requests import HTTPError, JSONDecodeError
383
-
384
415
  from .api import ConfluenceAPI
385
416
  from .publisher import Publisher
386
417
 
@@ -396,20 +427,10 @@ def main() -> None:
396
427
  )
397
428
  except ArgumentError as e:
398
429
  parser.error(str(e))
399
- try:
400
- with ConfluenceAPI(properties) as api:
401
- Publisher(api, options).process(args.mdpath)
402
- except HTTPError as err:
403
- logging.error(err)
404
-
405
- # print details for a response with JSON body
406
- if err.response is not None:
407
- try:
408
- logging.error(err.response.json())
409
- except JSONDecodeError:
410
- pass
411
-
412
- sys.exit(1)
430
+ with ConfluenceAPI(properties) as api:
431
+ publisher = Publisher(api, options)
432
+ for item in args.mdpath:
433
+ publisher.process(item)
413
434
 
414
435
 
415
436
  if __name__ == "__main__":
md2conf/api.py CHANGED
@@ -890,10 +890,10 @@ class ConfluenceSession:
890
890
  else:
891
891
  raise
892
892
 
893
- # This should not be reached, but satisfies type checker
893
+ # this should not be reached, but satisfies type checker
894
894
  if last_error is not None:
895
895
  raise last_error
896
- raise ConfluenceError(f"Failed to get page {page_id}")
896
+ raise ConfluenceError(f"failed to get page: {page_id}")
897
897
 
898
898
  def get_page_properties(self, page_id: str) -> ConfluencePageProperties:
899
899
  """
@@ -1058,6 +1058,7 @@ class ConfluenceSession:
1058
1058
 
1059
1059
  :param title: Page title. Pages in the same Confluence space must have a unique title.
1060
1060
  :param parent_id: Identifies the parent page for a new child page.
1061
+ :returns: Confluence page info for the found or newly created page.
1061
1062
  """
1062
1063
 
1063
1064
  parent_page = self.get_page_properties(parent_id)
md2conf/converter.py CHANGED
@@ -633,7 +633,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
633
633
  absolute_path = (self.base_dir / path).resolve()
634
634
 
635
635
  if not absolute_path.exists():
636
- self._warn_or_raise(f"path to image {path} does not exist")
636
+ self._warn_or_raise(f"path to image does not exist: {path}")
637
637
  return None
638
638
 
639
639
  if not is_directory_within(absolute_path, self.root_dir):
@@ -802,7 +802,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
802
802
 
803
803
  content = blockquote[0]
804
804
  if content.text is None:
805
- raise DocumentError("empty content")
805
+ raise DocumentError("empty content for GitHub alert")
806
806
 
807
807
  pattern = re.compile(r"^\[!([A-Z]+)\]\s*")
808
808
  match = pattern.match(content.text)
@@ -839,7 +839,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
839
839
 
840
840
  content = blockquote[0]
841
841
  if content.text is None:
842
- raise DocumentError("empty content")
842
+ raise DocumentError("empty content for GitLab alert")
843
843
 
844
844
  pattern = re.compile(r"^(FLAG|NOTE|WARNING|DISCLAIMER):\s*")
845
845
  match = pattern.match(content.text)
@@ -1324,10 +1324,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1324
1324
  tasks: list[ElementType] = []
1325
1325
  for index, item in enumerate(elem, start=1):
1326
1326
  if item.text is None:
1327
- raise NotImplementedError("pre-condition check not exhaustive")
1327
+ raise NotImplementedError("pre-condition check for tasklist not exhaustive")
1328
1328
  match = re.match(r"^\[([x X])\]", item.text)
1329
1329
  if match is None:
1330
- raise NotImplementedError("pre-condition check not exhaustive")
1330
+ raise NotImplementedError("pre-condition check for tasklist not exhaustive")
1331
1331
 
1332
1332
  status = "incomplete" if match.group(1).isspace() else "complete"
1333
1333
  item.text = item.text[3:]
@@ -1630,7 +1630,7 @@ class ConfluenceDocument:
1630
1630
  try:
1631
1631
  self.root = elements_from_strings(content)
1632
1632
  except ParseError as ex:
1633
- raise ConversionError(path) from ex
1633
+ raise ConversionError(f"failed to convert Markdown file: {path}") from ex
1634
1634
 
1635
1635
  # configure HTML-to-Confluence converter
1636
1636
  converter_options = copy.deepcopy(self.options.converter)
@@ -1641,8 +1641,8 @@ class ConfluenceDocument:
1641
1641
  # execute HTML-to-Confluence converter
1642
1642
  try:
1643
1643
  converter.visit(self.root)
1644
- except DocumentError as ex:
1645
- raise ConversionError(path) from ex
1644
+ except RuntimeError as ex:
1645
+ raise ConversionError(f"failed to convert Markdown file: {path}") from ex
1646
1646
 
1647
1647
  # extract information discovered by converter
1648
1648
  self.links = converter.links
@@ -14,7 +14,7 @@ import lxml.etree as ET
14
14
  from md2conf.attachment import EmbeddedFileData, ImageData, attachment_name
15
15
  from md2conf.compatibility import override, path_relative_to
16
16
  from md2conf.csf import AC_ATTR, AC_ELEM
17
- from md2conf.extension import MarketplaceExtension
17
+ from md2conf.extension import ExtensionError, MarketplaceExtension
18
18
  from md2conf.formatting import ImageAlignment, ImageAttributes
19
19
 
20
20
  from .render import extract_diagram, render_diagram
@@ -38,11 +38,11 @@ class DrawioExtension(MarketplaceExtension):
38
38
  elif absolute_path.name.endswith((".drawio", ".drawio.xml")):
39
39
  return self._transform_drawio(absolute_path, attrs)
40
40
  else:
41
- raise RuntimeError(f"unrecognized image format: {absolute_path.suffix}")
41
+ raise ExtensionError(f"unrecognized image format: {absolute_path.suffix}")
42
42
 
43
43
  @override
44
44
  def transform_fenced(self, content: str) -> ElementType:
45
- raise RuntimeError("draw.io diagrams cannot be defined in fenced code blocks")
45
+ raise ExtensionError("draw.io diagrams cannot be defined in fenced code blocks")
46
46
 
47
47
  def _transform_drawio(self, absolute_path: Path, attrs: ImageAttributes) -> ElementType:
48
48
  relative_path = path_relative_to(absolute_path, self.base_dir)
md2conf/extension.py CHANGED
@@ -19,6 +19,10 @@ from .image import ImageGenerator
19
19
  ElementType = ET._Element # pyright: ignore [reportPrivateUsage]
20
20
 
21
21
 
22
+ class ExtensionError(RuntimeError):
23
+ pass
24
+
25
+
22
26
  @dataclass
23
27
  class ExtensionOptions:
24
28
  """
md2conf/external.py CHANGED
@@ -7,13 +7,14 @@ Copyright 2022-2026, Levente Hunyadi
7
7
  """
8
8
 
9
9
  import logging
10
+ import re
10
11
  import subprocess
11
12
  from typing import Sequence
12
13
 
13
14
  LOGGER = logging.getLogger(__name__)
14
15
 
15
16
 
16
- def execute_subprocess(command: Sequence[str], data: bytes, *, application: str | None = None) -> bytes:
17
+ def execute_subprocess(command: Sequence[str], data: bytes, *, application: str) -> bytes:
17
18
  """
18
19
  Executes a subprocess, feeding input to stdin, and capturing output from stdout.
19
20
 
@@ -37,13 +38,29 @@ def execute_subprocess(command: Sequence[str], data: bytes, *, application: str
37
38
  stdout, stderr = proc.communicate(input=data)
38
39
 
39
40
  if proc.returncode:
40
- messages = [f"failed to execute {application or 'application'}; exit code: {proc.returncode}"]
41
- console_output = stdout.decode("utf-8")
42
- if console_output:
43
- messages.append(f"output:\n{console_output}")
44
- console_error = stderr.decode("utf-8")
45
- if console_error:
46
- messages.append(f"error:\n{console_error}")
41
+ message = f"failed to execute {application}; exit code: {proc.returncode}"
42
+ LOGGER.error("Failed to execute %s; exit code: %d", application, proc.returncode)
43
+ messages = [message]
44
+ if stdout:
45
+ try:
46
+ console_output = stdout.decode("utf-8")
47
+ LOGGER.error(console_output)
48
+ messages.append(f"output:\n{console_output}")
49
+ except UnicodeDecodeError:
50
+ LOGGER.error("%s returned binary data on stdout", application)
51
+ pass
52
+ if stderr:
53
+ try:
54
+ console_error = stderr.decode("utf-8")
55
+ LOGGER.error(console_error)
56
+
57
+ # omit Node.js exception stack trace
58
+ console_error = re.sub(r"^\s+at.*:\d+:\d+\)$\n", "", console_error, flags=re.MULTILINE).rstrip()
59
+
60
+ messages.append(f"error:\n{console_error}")
61
+ except UnicodeDecodeError:
62
+ LOGGER.error("%s returned binary data on stderr", application)
63
+ pass
47
64
  raise RuntimeError("\n".join(messages))
48
65
 
49
66
  return stdout
md2conf/image.py CHANGED
@@ -58,12 +58,13 @@ class ImageGenerator:
58
58
 
59
59
  # infer SVG dimensions if not already specified
60
60
  if absolute_path.suffix == ".svg" and attrs.width is None and attrs.height is None:
61
- svg_width, svg_height = get_svg_dimensions(absolute_path)
62
- if svg_width is not None:
61
+ dimensions = get_svg_dimensions(absolute_path)
62
+ if dimensions is not None:
63
+ width, height = dimensions
63
64
  attrs = ImageAttributes(
64
65
  context=attrs.context,
65
- width=svg_width,
66
- height=svg_height,
66
+ width=width,
67
+ height=height,
67
68
  alt=attrs.alt,
68
69
  title=attrs.title,
69
70
  caption=attrs.caption,
@@ -78,17 +79,17 @@ class ImageGenerator:
78
79
  "Emits Confluence Storage Format XHTML for an attached raster or vector image."
79
80
 
80
81
  # extract dimensions and update attributes based on format
81
- width: int | None
82
- height: int | None
82
+ dimensions: tuple[int, int] | None
83
83
  match self.options.output_format:
84
84
  case "svg":
85
- image_data, width, height = fix_svg_get_dimensions(image_data)
85
+ image_data, dimensions = fix_svg_get_dimensions(image_data)
86
86
  case "png":
87
- width, height = extract_png_dimensions(data=image_data)
87
+ dimensions = extract_png_dimensions(data=image_data)
88
88
 
89
89
  # only update attributes if we successfully extracted dimensions and the base attributes don't already have explicit dimensions
90
- if (width is not None or height is not None) and (attrs.width is None and attrs.height is None):
90
+ if dimensions is not None and (attrs.width is None and attrs.height is None):
91
91
  # create updated image attributes with extracted dimensions
92
+ width, height = dimensions
92
93
  attrs = ImageAttributes(
93
94
  context=attrs.context,
94
95
  width=width,
md2conf/mermaid/render.py CHANGED
@@ -67,7 +67,7 @@ def render_diagram(source: str, output_format: Literal["png", "svg"] = "png", co
67
67
  str(config.scale or 2),
68
68
  ]
69
69
  if _is_docker():
70
- root = os.path.dirname(os.path.dirname(__file__))
70
+ root = os.path.dirname(__file__)
71
71
  cmd.extend(["-p", os.path.join(root, "puppeteer-config.json")])
72
72
 
73
73
  return execute_subprocess(cmd, source.encode("utf-8"), application="Mermaid")
md2conf/options.py CHANGED
@@ -106,6 +106,7 @@ class DocumentOptions:
106
106
  :param keep_hierarchy: Whether to maintain source directory structure when exporting to Confluence.
107
107
  :param title_prefix: String to prepend to Confluence page title for each published page.
108
108
  :param generated_by: Text to use as the generated-by prompt (or `None` to omit a prompt).
109
+ :param skip_update: Whether to skip saving Confluence page ID in Markdown files.
109
110
  :param converter: Options for converting an HTML tree into Confluence Storage Format.
110
111
  """
111
112
 
@@ -113,4 +114,5 @@ class DocumentOptions:
113
114
  keep_hierarchy: bool = False
114
115
  title_prefix: str | None = None
115
116
  generated_by: str | None = "This page has been generated with a tool."
117
+ skip_update: bool = False
116
118
  converter: ConverterOptions = dataclasses.field(default_factory=ConverterOptions)
@@ -19,7 +19,7 @@ from md2conf.compatibility import override, path_relative_to
19
19
  from md2conf.csf import AC_ATTR, AC_ELEM
20
20
  from md2conf.extension import MarketplaceExtension
21
21
  from md2conf.formatting import ImageAttributes
22
- from md2conf.svg import get_svg_dimensions_from_bytes
22
+ from md2conf.svg import get_svg_dimensions
23
23
 
24
24
  from .config import PlantUMLConfigProperties
25
25
  from .render import compress_plantuml_data, has_plantuml, render_diagram
@@ -87,7 +87,7 @@ class PlantUMLExtension(MarketplaceExtension):
87
87
  image_data = render_diagram(content, "svg", config=config)
88
88
 
89
89
  # extract dimensions from SVG
90
- width, height = get_svg_dimensions_from_bytes(image_data)
90
+ dimensions = get_svg_dimensions(image_data)
91
91
 
92
92
  # generate SVG filename and add as attachment
93
93
  if relative_path is not None:
@@ -98,11 +98,11 @@ class PlantUMLExtension(MarketplaceExtension):
98
98
  svg_filename = attachment_name(f"embedded_{plantuml_hash}.svg")
99
99
  self.attachments.add_embed(svg_filename, EmbeddedFileData(image_data))
100
100
 
101
- return self._create_plantuml_macro(content, svg_filename, width, height)
101
+ return self._create_plantuml_macro(content, svg_filename, dimensions)
102
102
  else:
103
103
  return self._create_plantuml_macro(content)
104
104
 
105
- def _create_plantuml_macro(self, source: str, filename: str | None = None, width: int | None = None, height: int | None = None) -> ElementType:
105
+ def _create_plantuml_macro(self, source: str, filename: str | None = None, dimensions: tuple[int, int] | None = None) -> ElementType:
106
106
  """
107
107
  A PlantUML diagram using a `structured-macro` with embedded data.
108
108
 
@@ -128,7 +128,8 @@ class PlantUMLExtension(MarketplaceExtension):
128
128
  parameters.append(AC_ELEM("parameter", {AC_ATTR("name"): "filename"}, filename))
129
129
 
130
130
  # add optional dimension parameters if available
131
- if width is not None:
131
+ if dimensions is not None:
132
+ width, height = dimensions
132
133
  parameters.append(
133
134
  AC_ELEM(
134
135
  "parameter",
@@ -136,7 +137,6 @@ class PlantUMLExtension(MarketplaceExtension):
136
137
  str(width),
137
138
  )
138
139
  )
139
- if height is not None:
140
140
  parameters.append(
141
141
  AC_ELEM(
142
142
  "parameter",
@@ -92,17 +92,16 @@ def render_diagram(
92
92
  if config is None:
93
93
  config = PlantUMLConfigProperties()
94
94
 
95
- # Build command for PlantUML with pipe mode
96
- # -pipe: read from stdin and write to stdout
97
- # -t<format>: output format (png or svg)
98
- # -charset utf-8: ensure UTF-8 encoding
95
+ # command for PlantUML with pipe mode
99
96
  cmd = _get_plantuml_command()
100
97
  cmd.extend(
101
98
  [
102
- "-pipe",
103
- f"-t{output_format}",
104
- "-charset",
99
+ "--charset",
105
100
  "utf-8",
101
+ "--format",
102
+ output_format,
103
+ "--no-error-image",
104
+ "--pipe",
106
105
  ]
107
106
  )
108
107
 
md2conf/png.py CHANGED
@@ -12,6 +12,10 @@ from struct import unpack
12
12
  from typing import BinaryIO, Iterable, overload
13
13
 
14
14
 
15
+ class ImageFormatError(RuntimeError):
16
+ pass
17
+
18
+
15
19
  class _Chunk:
16
20
  "Data chunk in binary data as per the PNG image format."
17
21
 
@@ -34,7 +38,7 @@ def _read_signature(f: BinaryIO) -> None:
34
38
 
35
39
  signature = f.read(8)
36
40
  if signature != b"\x89PNG\r\n\x1a\n":
37
- raise ValueError("not a valid PNG file")
41
+ raise ImageFormatError("not a valid PNG file")
38
42
 
39
43
 
40
44
  def _read_chunk(f: BinaryIO) -> _Chunk | None:
@@ -45,7 +49,7 @@ def _read_chunk(f: BinaryIO) -> _Chunk | None:
45
49
  return None
46
50
 
47
51
  if len(length_bytes) != 4:
48
- raise ValueError("expected: 4 bytes storing chunk length")
52
+ raise ImageFormatError("expected: 4 bytes storing chunk length")
49
53
 
50
54
  length = int.from_bytes(length_bytes, "big")
51
55
 
@@ -53,7 +57,7 @@ def _read_chunk(f: BinaryIO) -> _Chunk | None:
53
57
  data_bytes = f.read(data_length)
54
58
  actual_length = len(data_bytes)
55
59
  if actual_length != data_length:
56
- raise ValueError(f"expected: {length} bytes storing chunk data; got: {actual_length}")
60
+ raise ImageFormatError(f"expected: {length} bytes storing chunk data; got: {actual_length}")
57
61
 
58
62
  chunk_type = data_bytes[0:4]
59
63
  chunk_data = data_bytes[4:-4]
@@ -75,12 +79,12 @@ def _extract_png_dimensions(source_file: BinaryIO) -> tuple[int, int]:
75
79
  # validate IHDR (Image Header) chunk
76
80
  ihdr = _read_chunk(source_file)
77
81
  if ihdr is None:
78
- raise ValueError("missing IHDR chunk")
82
+ raise ImageFormatError("missing IHDR chunk")
79
83
 
80
84
  if ihdr.length != 13:
81
- raise ValueError("invalid chunk length")
85
+ raise ImageFormatError("invalid chunk length")
82
86
  if ihdr.name != b"IHDR":
83
- raise ValueError(f"expected: IHDR chunk; got: {ihdr.name!r}")
87
+ raise ImageFormatError(f"expected: IHDR chunk; got: {ihdr.name!r}")
84
88
 
85
89
  (
86
90
  width,
md2conf/processor.py CHANGED
@@ -15,11 +15,12 @@ from typing import Iterable
15
15
 
16
16
  from .collection import ConfluencePageCollection
17
17
  from .converter import ConfluenceDocument
18
- from .environment import ArgumentError
18
+ from .environment import ArgumentError, PageError
19
19
  from .matcher import DirectoryEntry, FileEntry, Matcher, MatcherOptions
20
20
  from .metadata import ConfluenceSiteMetadata
21
21
  from .options import ConfluencePageID, DocumentOptions
22
22
  from .scanner import Scanner
23
+ from .toc import unique_title
23
24
 
24
25
  LOGGER = logging.getLogger(__name__)
25
26
 
@@ -143,6 +144,22 @@ class Processor:
143
144
  Processes a sub-tree rooted at an ancestor node.
144
145
  """
145
146
 
147
+ # verify if pages have a unique title to avoid overwrites within synchronized set
148
+ title_to_path: dict[str, Path] = {}
149
+ duplicates: set[Path] = set()
150
+ for node in root.all():
151
+ if node.title is not None:
152
+ path = title_to_path.get(node.title)
153
+ if path is not None:
154
+ duplicates.add(path)
155
+ duplicates.add(node.absolute_path)
156
+ else:
157
+ title_to_path[node.title] = node.absolute_path
158
+ if duplicates:
159
+ raise PageError(
160
+ f"expected: each synchronized page to have a unique title but duplicates found in files: {', '.join(str(p) for p in sorted(list(duplicates)))}"
161
+ )
162
+
146
163
  # synchronize directory tree structure with page hierarchy in space (find matching pages in Confluence)
147
164
  self._synchronize_tree(root, self.options.root_page_id)
148
165
 
@@ -246,14 +263,18 @@ class Processor:
246
263
  LOGGER.info("Indexing file: %s", path)
247
264
 
248
265
  # extract information from a Markdown document found in a local directory.
249
- document = Scanner().read(path)
266
+ with open(path, "r", encoding="utf-8") as f:
267
+ text = f.read()
250
268
 
269
+ document = Scanner().parse(text)
251
270
  props = document.properties
271
+ title = props.title or unique_title(text)
272
+
252
273
  return DocumentNode(
253
274
  absolute_path=path,
254
275
  page_id=props.page_id,
255
276
  space_key=props.space_key,
256
- title=props.title,
277
+ title=title,
257
278
  synchronized=props.synchronized if props.synchronized is not None else True,
258
279
  )
259
280