notionary 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +3 -2
- notionary/blocks/__init__.py +54 -25
- notionary/blocks/audio/__init__.py +7 -0
- notionary/blocks/audio/audio_element.py +152 -0
- notionary/blocks/audio/audio_markdown_node.py +29 -0
- notionary/blocks/audio/audio_models.py +59 -0
- notionary/blocks/bookmark/__init__.py +7 -0
- notionary/blocks/{bookmark_element.py → bookmark/bookmark_element.py} +20 -65
- notionary/blocks/bookmark/bookmark_markdown_node.py +43 -0
- notionary/blocks/bookmark/bookmark_models.py +0 -0
- notionary/blocks/bulleted_list/__init__.py +7 -0
- notionary/blocks/{bulleted_list_element.py → bulleted_list/bulleted_list_element.py} +7 -3
- notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +33 -0
- notionary/blocks/bulleted_list/bulleted_list_models.py +0 -0
- notionary/blocks/callout/__init__.py +7 -0
- notionary/blocks/callout/callout_element.py +132 -0
- notionary/blocks/callout/callout_markdown_node.py +31 -0
- notionary/blocks/callout/callout_models.py +0 -0
- notionary/blocks/code/__init__.py +7 -0
- notionary/blocks/{code_block_element.py → code/code_element.py} +72 -40
- notionary/blocks/code/code_markdown_node.py +43 -0
- notionary/blocks/code/code_models.py +0 -0
- notionary/blocks/column/__init__.py +5 -0
- notionary/blocks/{column_element.py → column/column_element.py} +24 -55
- notionary/blocks/column/column_models.py +0 -0
- notionary/blocks/divider/__init__.py +7 -0
- notionary/blocks/{divider_element.py → divider/divider_element.py} +11 -3
- notionary/blocks/divider/divider_markdown_node.py +24 -0
- notionary/blocks/divider/divider_models.py +0 -0
- notionary/blocks/document/__init__.py +7 -0
- notionary/blocks/document/document_element.py +102 -0
- notionary/blocks/document/document_markdown_node.py +31 -0
- notionary/blocks/document/document_models.py +0 -0
- notionary/blocks/embed/__init__.py +7 -0
- notionary/blocks/{embed_element.py → embed/embed_element.py} +50 -32
- notionary/blocks/embed/embed_markdown_node.py +30 -0
- notionary/blocks/embed/embed_models.py +0 -0
- notionary/blocks/heading/__init__.py +7 -0
- notionary/blocks/{heading_element.py → heading/heading_element.py} +25 -17
- notionary/blocks/heading/heading_markdown_node.py +29 -0
- notionary/blocks/heading/heading_models.py +0 -0
- notionary/blocks/image/__init__.py +7 -0
- notionary/blocks/{image_element.py → image/image_element.py} +62 -42
- notionary/blocks/image/image_markdown_node.py +33 -0
- notionary/blocks/image/image_models.py +0 -0
- notionary/blocks/markdown_builder.py +356 -0
- notionary/blocks/markdown_node.py +29 -0
- notionary/blocks/mention/__init__.py +7 -0
- notionary/blocks/{mention_element.py → mention/mention_element.py} +6 -2
- notionary/blocks/mention/mention_markdown_node.py +38 -0
- notionary/blocks/mention/mention_models.py +0 -0
- notionary/blocks/numbered_list/__init__.py +7 -0
- notionary/blocks/{numbered_list_element.py → numbered_list/numbered_list_element.py} +10 -6
- notionary/blocks/numbered_list/numbered_list_markdown_node.py +29 -0
- notionary/blocks/numbered_list/numbered_list_models.py +0 -0
- notionary/blocks/paragraph/__init__.py +7 -0
- notionary/blocks/{paragraph_element.py → paragraph/paragraph_element.py} +7 -3
- notionary/blocks/paragraph/paragraph_markdown_node.py +25 -0
- notionary/blocks/paragraph/paragraph_models.py +0 -0
- notionary/blocks/quote/__init__.py +7 -0
- notionary/blocks/quote/quote_element.py +92 -0
- notionary/blocks/quote/quote_markdown_node.py +23 -0
- notionary/blocks/quote/quote_models.py +0 -0
- notionary/blocks/registry/block_registry.py +17 -3
- notionary/blocks/registry/block_registry_builder.py +90 -178
- notionary/blocks/shared/__init__.py +0 -0
- notionary/blocks/shared/block_client.py +256 -0
- notionary/blocks/shared/models.py +713 -0
- notionary/blocks/{notion_block_element.py → shared/notion_block_element.py} +8 -5
- notionary/blocks/{text_inline_formatter.py → shared/text_inline_formatter.py} +14 -14
- notionary/blocks/shared/text_inline_formatter_new.py +139 -0
- notionary/blocks/table/__init__.py +7 -0
- notionary/blocks/{table_element.py → table/table_element.py} +23 -11
- notionary/blocks/table/table_markdown_node.py +40 -0
- notionary/blocks/table/table_models.py +0 -0
- notionary/blocks/todo/__init__.py +7 -0
- notionary/blocks/{todo_element.py → todo/todo_element.py} +8 -4
- notionary/blocks/todo/todo_markdown_node.py +31 -0
- notionary/blocks/todo/todo_models.py +0 -0
- notionary/blocks/toggle/__init__.py +4 -0
- notionary/blocks/{toggle_element.py → toggle/toggle_element.py} +7 -3
- notionary/blocks/toggle/toggle_markdown_node.py +35 -0
- notionary/blocks/toggle/toggle_models.py +0 -0
- notionary/blocks/toggleable_heading/__init__.py +9 -0
- notionary/blocks/{toggleable_heading_element.py → toggleable_heading/toggleable_heading_element.py} +8 -4
- notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +43 -0
- notionary/blocks/toggleable_heading/toggleable_heading_models.py +0 -0
- notionary/blocks/video/__init__.py +7 -0
- notionary/blocks/{video_element.py → video/video_element.py} +82 -57
- notionary/blocks/video/video_markdown_node.py +30 -0
- notionary/file_upload/notion_file_upload.py +1 -1
- notionary/page/content/markdown_whitespace_processor.py +80 -0
- notionary/page/content/notion_text_length_utils.py +87 -0
- notionary/page/content/page_content_retriever.py +18 -10
- notionary/page/content/page_content_writer.py +97 -148
- notionary/page/formatting/line_processor.py +153 -0
- notionary/page/formatting/markdown_to_notion_converter.py +104 -425
- notionary/page/notion_page.py +9 -11
- notionary/page/notion_to_markdown_converter.py +9 -13
- notionary/util/factory_decorator.py +0 -0
- notionary/workspace.py +0 -1
- {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/METADATA +1 -1
- notionary-0.2.19.dist-info/RECORD +150 -0
- notionary/blocks/audio_element.py +0 -144
- notionary/blocks/callout_element.py +0 -122
- notionary/blocks/document_element.py +0 -194
- notionary/blocks/notion_block_client.py +0 -26
- notionary/blocks/qoute_element.py +0 -169
- notionary/page/content/notion_page_content_chunker.py +0 -84
- notionary/page/formatting/spacer_rules.py +0 -483
- notionary-0.2.17.dist-info/RECORD +0 -85
- {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/LICENSE +0 -0
- {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/WHEEL +0 -0
@@ -1,474 +1,153 @@
|
|
1
|
-
import
|
2
|
-
from
|
3
|
-
|
4
|
-
from notionary.blocks import ColumnElement, BlockRegistry, BlockRegistryBuilder
|
5
|
-
from notionary.page.formatting.spacer_rules import SpacerRule, SpacerRuleEngine
|
6
|
-
|
1
|
+
from notionary.blocks import ColumnElement, BlockRegistry
|
2
|
+
from notionary.page.formatting.line_processor import LineProcessor
|
7
3
|
|
4
|
+
# TODO: Hier rekursiven Baum Parser verwenden!
|
8
5
|
class MarkdownToNotionConverter:
|
9
|
-
"""
|
10
|
-
|
11
|
-
def __init__(self, block_registry: Optional[BlockRegistry] = None):
|
12
|
-
"""Initialize the converter with an optional custom block registry."""
|
13
|
-
self._block_registry = (
|
14
|
-
block_registry or BlockRegistryBuilder().create_full_registry()
|
15
|
-
)
|
16
|
-
|
17
|
-
# Spacer-Engine mit konfigurierbaren Regeln
|
18
|
-
self._spacer_engine = SpacerRuleEngine()
|
6
|
+
"""Clean converter focused on block identification and conversion"""
|
19
7
|
|
20
|
-
|
21
|
-
self.
|
22
|
-
self.
|
8
|
+
def __init__(self, block_registry: BlockRegistry):
|
9
|
+
self._block_registry = block_registry
|
10
|
+
self._pipe_content_pattern = r"^\|\s?(.*)$"
|
11
|
+
self._toggle_element_types = ["ToggleElement", "ToggleableHeadingElement"]
|
23
12
|
|
13
|
+
# Setup column element callback if available
|
24
14
|
if self._block_registry.contains(ColumnElement):
|
25
15
|
ColumnElement.set_converter_callback(self.convert)
|
26
16
|
|
27
|
-
def convert(self, markdown_text: str) ->
|
28
|
-
"""Convert markdown text to Notion API block format
|
17
|
+
def convert(self, markdown_text: str) -> list[dict[str, any]]:
|
18
|
+
"""Convert markdown text to Notion API block format"""
|
29
19
|
if not markdown_text:
|
30
20
|
return []
|
31
21
|
|
32
|
-
#
|
33
|
-
|
34
|
-
|
35
|
-
# Rest der Pipeline bleibt gleich
|
36
|
-
all_blocks_with_positions = self._collect_all_blocks_with_positions(
|
37
|
-
processed_markdown
|
38
|
-
)
|
39
|
-
all_blocks_with_positions.sort(key=lambda x: x[0])
|
40
|
-
blocks = [block for _, _, block in all_blocks_with_positions]
|
41
|
-
|
42
|
-
return self._process_block_spacing(blocks)
|
43
|
-
|
44
|
-
def _add_spacers_with_rules(self, markdown_text: str) -> str:
|
45
|
-
"""Fügt Spacer mit expliziten Regeln hinzu"""
|
46
|
-
lines = markdown_text.split("\n")
|
47
|
-
processed_lines = []
|
48
|
-
|
49
|
-
# Initialer State
|
50
|
-
state = {
|
51
|
-
"in_code_block": False,
|
52
|
-
"last_line_was_spacer": False,
|
53
|
-
"last_non_empty_was_heading": False,
|
54
|
-
"has_content_before": False,
|
55
|
-
"processed_lines": processed_lines,
|
56
|
-
}
|
57
|
-
|
58
|
-
for line_number, line in enumerate(lines):
|
59
|
-
result_lines, state = self._spacer_engine.process_line(
|
60
|
-
line, line_number, state
|
61
|
-
)
|
62
|
-
processed_lines.extend(result_lines)
|
63
|
-
state["processed_lines"] = processed_lines
|
64
|
-
|
65
|
-
return "\n".join(processed_lines)
|
22
|
+
# Main conversion pipeline
|
23
|
+
blocks_with_positions = self._identify_all_blocks(markdown_text)
|
24
|
+
blocks_with_positions.sort(key=lambda x: x[0]) # Sort by position
|
66
25
|
|
67
|
-
|
68
|
-
|
26
|
+
# Flatten blocks (some elements return lists of blocks)
|
27
|
+
result = []
|
28
|
+
for _, _, block in blocks_with_positions:
|
29
|
+
if isinstance(block, list):
|
30
|
+
result.extend(block)
|
31
|
+
else:
|
32
|
+
result.append(block)
|
33
|
+
return result
|
69
34
|
|
70
|
-
|
71
|
-
rule: Die hinzuzufügende Regel
|
72
|
-
priority: Position in der Regelliste (-1 = am Ende)
|
73
|
-
"""
|
74
|
-
if priority == -1:
|
75
|
-
self._spacer_engine.rules.append(rule)
|
76
|
-
else:
|
77
|
-
self._spacer_engine.rules.insert(priority, rule)
|
78
|
-
|
79
|
-
def get_spacer_rules_info(self) -> List[Dict[str, str]]:
|
80
|
-
"""Gibt Informationen über alle aktiven Spacer-Regeln zurück"""
|
81
|
-
return [
|
82
|
-
{"name": rule.name, "description": rule.description}
|
83
|
-
for rule in self._spacer_engine.rules
|
84
|
-
]
|
85
|
-
|
86
|
-
# Alle anderen Methoden bleiben unverändert...
|
87
|
-
def _collect_all_blocks_with_positions(
|
35
|
+
def _identify_all_blocks(
|
88
36
|
self, markdown_text: str
|
89
|
-
) ->
|
90
|
-
"""
|
37
|
+
) -> list[tuple[int, int, dict[str, any]]]:
|
38
|
+
"""Main block identification pipeline"""
|
91
39
|
all_blocks = []
|
92
40
|
|
93
|
-
# Process
|
94
|
-
toggleable_blocks = self.
|
41
|
+
# 1. Process complex multiline blocks first (toggles, etc.)
|
42
|
+
toggleable_blocks = self._find_toggleable_blocks(markdown_text)
|
43
|
+
all_blocks.extend(toggleable_blocks)
|
95
44
|
|
96
|
-
# Process other multiline
|
97
|
-
multiline_blocks = self.
|
98
|
-
|
99
|
-
)
|
45
|
+
# 2. Process other multiline blocks
|
46
|
+
multiline_blocks = self._find_multiline_blocks(markdown_text, toggleable_blocks)
|
47
|
+
all_blocks.extend(multiline_blocks)
|
100
48
|
|
101
|
-
# Process remaining text line by line
|
49
|
+
# 3. Process remaining text line by line
|
102
50
|
processed_blocks = toggleable_blocks + multiline_blocks
|
103
|
-
line_blocks = self.
|
104
|
-
|
105
|
-
# Combine all blocks
|
106
|
-
all_blocks.extend(toggleable_blocks)
|
107
|
-
all_blocks.extend(multiline_blocks)
|
51
|
+
line_blocks = self._process_remaining_lines(markdown_text, processed_blocks)
|
108
52
|
all_blocks.extend(line_blocks)
|
109
53
|
|
110
54
|
return all_blocks
|
111
55
|
|
112
|
-
def
|
56
|
+
def _find_toggleable_blocks(
|
113
57
|
self, text: str
|
114
|
-
) ->
|
115
|
-
"""
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
toggleable_elements = self._get_toggleable_elements()
|
120
|
-
|
121
|
-
if not toggleable_elements:
|
122
|
-
return []
|
58
|
+
) -> list[tuple[int, int, dict[str, any]]]:
|
59
|
+
"""Find all toggleable blocks (Toggle and ToggleableHeading)"""
|
60
|
+
toggleable_elements = self._get_elements_by_type(
|
61
|
+
self._toggle_element_types, multiline_only=True
|
62
|
+
)
|
123
63
|
|
64
|
+
blocks = []
|
124
65
|
for element in toggleable_elements:
|
125
66
|
matches = element.find_matches(text, self.convert, context_aware=True)
|
126
67
|
if matches:
|
127
|
-
|
128
|
-
|
129
|
-
return toggleable_blocks
|
130
|
-
|
131
|
-
def _get_toggleable_elements(self):
|
132
|
-
"""Return all toggleable elements from the registry."""
|
133
|
-
toggleable_elements = []
|
134
|
-
for element in self._block_registry.get_elements():
|
135
|
-
if (
|
136
|
-
element.is_multiline()
|
137
|
-
and hasattr(element, "match_markdown")
|
138
|
-
and element.__name__ in self.TOGGLE_ELEMENT_TYPES
|
139
|
-
):
|
140
|
-
toggleable_elements.append(element)
|
141
|
-
return toggleable_elements
|
142
|
-
|
143
|
-
def _identify_multiline_blocks(
|
144
|
-
self, text: str, exclude_blocks: List[Tuple[int, int, Dict[str, Any]]]
|
145
|
-
) -> List[Tuple[int, int, Dict[str, Any]]]:
|
146
|
-
"""Identify all multiline blocks (except toggleable blocks)."""
|
147
|
-
# Get all multiline elements except toggleable ones
|
148
|
-
multiline_elements = self._get_non_toggleable_multiline_elements()
|
149
|
-
|
150
|
-
if not multiline_elements:
|
151
|
-
return []
|
152
|
-
|
153
|
-
# Create set of positions to exclude
|
154
|
-
excluded_ranges = self._create_excluded_position_set(exclude_blocks)
|
155
|
-
|
156
|
-
multiline_blocks = []
|
157
|
-
for element in multiline_elements:
|
158
|
-
matches = element.find_matches(text)
|
159
|
-
|
160
|
-
if not matches:
|
161
|
-
continue
|
68
|
+
blocks.extend(matches)
|
162
69
|
|
163
|
-
|
164
|
-
for start_pos, end_pos, block in matches:
|
165
|
-
if self._overlaps_with_excluded_positions(
|
166
|
-
start_pos, end_pos, excluded_ranges
|
167
|
-
):
|
168
|
-
continue
|
169
|
-
multiline_blocks.append((start_pos, end_pos, block))
|
170
|
-
|
171
|
-
return multiline_blocks
|
70
|
+
return blocks
|
172
71
|
|
173
|
-
def
|
174
|
-
|
175
|
-
|
72
|
+
def _find_multiline_blocks(
|
73
|
+
self, text: str, exclude_blocks: list[tuple[int, int, dict[str, any]]]
|
74
|
+
) -> list[tuple[int, int, dict[str, any]]]:
|
75
|
+
"""Find all multiline blocks except toggleable ones"""
|
76
|
+
multiline_elements = [
|
176
77
|
element
|
177
78
|
for element in self._block_registry.get_multiline_elements()
|
178
|
-
if element.__name__ not in self.
|
79
|
+
if element.__name__ not in self._toggle_element_types
|
179
80
|
]
|
180
81
|
|
181
|
-
|
182
|
-
"""Create a set of positions to exclude based on block ranges."""
|
183
|
-
excluded_positions = set()
|
184
|
-
for start_pos, end_pos, _ in exclude_blocks:
|
185
|
-
excluded_positions.update(range(start_pos, end_pos + 1))
|
186
|
-
return excluded_positions
|
82
|
+
excluded_ranges = self._create_excluded_ranges(exclude_blocks)
|
187
83
|
|
188
|
-
|
189
|
-
|
190
|
-
|
84
|
+
blocks = []
|
85
|
+
for element in multiline_elements:
|
86
|
+
matches = element.find_matches(text)
|
191
87
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
88
|
+
for start_pos, end_pos, block in matches:
|
89
|
+
if not self._overlaps_with_ranges(start_pos, end_pos, excluded_ranges):
|
90
|
+
# Handle multiple blocks from single element
|
91
|
+
element_blocks = self._normalize_to_list(block)
|
92
|
+
|
93
|
+
current_pos = start_pos
|
94
|
+
for i, single_block in enumerate(element_blocks):
|
95
|
+
blocks.append((current_pos, end_pos, single_block))
|
96
|
+
# Increment position for subsequent blocks
|
97
|
+
current_pos = end_pos + i + 1
|
98
|
+
|
99
|
+
return blocks
|
100
|
+
|
101
|
+
def _process_remaining_lines(
|
102
|
+
self, text: str, exclude_blocks: list[tuple[int, int, dict[str, any]]]
|
103
|
+
) -> list[tuple[int, int, dict[str, any]]]:
|
104
|
+
"""Process text line by line, excluding already processed ranges"""
|
196
105
|
if not text:
|
197
106
|
return []
|
198
107
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
current_pos = 0
|
206
|
-
current_paragraph = []
|
207
|
-
paragraph_start = 0
|
208
|
-
in_todo_sequence = False
|
209
|
-
|
210
|
-
for line in lines:
|
211
|
-
line_length = len(line) + 1 # +1 for newline
|
212
|
-
line_end = current_pos + line_length - 1
|
213
|
-
|
214
|
-
# Skip excluded lines and pipe syntax lines (they're part of toggleable content)
|
215
|
-
if self._overlaps_with_excluded_positions(
|
216
|
-
current_pos, line_end, excluded_positions
|
217
|
-
) or self._is_pipe_syntax_line(line):
|
218
|
-
current_pos += line_length
|
219
|
-
continue
|
220
|
-
|
221
|
-
processed = self._process_line(
|
222
|
-
line,
|
223
|
-
current_pos,
|
224
|
-
line_end,
|
225
|
-
line_blocks,
|
226
|
-
current_paragraph,
|
227
|
-
paragraph_start,
|
228
|
-
in_todo_sequence,
|
229
|
-
)
|
230
|
-
|
231
|
-
current_pos = processed["current_pos"]
|
232
|
-
current_paragraph = processed["current_paragraph"]
|
233
|
-
paragraph_start = processed["paragraph_start"]
|
234
|
-
in_todo_sequence = processed["in_todo_sequence"]
|
235
|
-
|
236
|
-
# Process remaining paragraph
|
237
|
-
self._process_paragraph(
|
238
|
-
current_paragraph, paragraph_start, current_pos, line_blocks
|
239
|
-
)
|
240
|
-
|
241
|
-
return line_blocks
|
242
|
-
|
243
|
-
def _is_pipe_syntax_line(self, line: str) -> bool:
|
244
|
-
"""Check if a line uses pipe syntax (for nested content)."""
|
245
|
-
return bool(re.match(self.PIPE_CONTENT_PATTERN, line))
|
246
|
-
|
247
|
-
def _process_line(
|
248
|
-
self,
|
249
|
-
line: str,
|
250
|
-
current_pos: int,
|
251
|
-
line_end: int,
|
252
|
-
line_blocks: List[Tuple[int, int, Dict[str, Any]]],
|
253
|
-
current_paragraph: List[str],
|
254
|
-
paragraph_start: int,
|
255
|
-
in_todo_sequence: bool,
|
256
|
-
) -> Dict[str, Any]:
|
257
|
-
"""Process a single line of text."""
|
258
|
-
line_length = len(line) + 1 # +1 for newline
|
259
|
-
|
260
|
-
# Check for spacer
|
261
|
-
if self._is_spacer_line(line):
|
262
|
-
line_blocks.append((current_pos, line_end, self._create_empty_paragraph()))
|
263
|
-
return self._update_line_state(
|
264
|
-
current_pos + line_length,
|
265
|
-
current_paragraph,
|
266
|
-
paragraph_start,
|
267
|
-
in_todo_sequence,
|
268
|
-
)
|
269
|
-
|
270
|
-
# Handle todo items
|
271
|
-
todo_block = self._extract_todo_item(line)
|
272
|
-
if todo_block:
|
273
|
-
return self._process_todo_line(
|
274
|
-
todo_block,
|
275
|
-
current_pos,
|
276
|
-
line_end,
|
277
|
-
line_blocks,
|
278
|
-
current_paragraph,
|
279
|
-
paragraph_start,
|
280
|
-
in_todo_sequence,
|
281
|
-
line_length,
|
282
|
-
)
|
283
|
-
|
284
|
-
if in_todo_sequence:
|
285
|
-
in_todo_sequence = False
|
286
|
-
|
287
|
-
# Handle empty lines
|
288
|
-
if not line.strip():
|
289
|
-
self._process_paragraph(
|
290
|
-
current_paragraph, paragraph_start, current_pos, line_blocks
|
291
|
-
)
|
292
|
-
return self._update_line_state(
|
293
|
-
current_pos + line_length, [], paragraph_start, False
|
294
|
-
)
|
295
|
-
|
296
|
-
# Handle special blocks
|
297
|
-
special_block = self._extract_special_block(line)
|
298
|
-
if special_block:
|
299
|
-
self._process_paragraph(
|
300
|
-
current_paragraph, paragraph_start, current_pos, line_blocks
|
301
|
-
)
|
302
|
-
line_blocks.append((current_pos, line_end, special_block))
|
303
|
-
return self._update_line_state(
|
304
|
-
current_pos + line_length, [], paragraph_start, False
|
305
|
-
)
|
306
|
-
|
307
|
-
# Handle as paragraph
|
308
|
-
if not current_paragraph:
|
309
|
-
paragraph_start = current_pos
|
310
|
-
current_paragraph.append(line)
|
311
|
-
|
312
|
-
return self._update_line_state(
|
313
|
-
current_pos + line_length,
|
314
|
-
current_paragraph,
|
315
|
-
paragraph_start,
|
316
|
-
in_todo_sequence,
|
108
|
+
excluded_ranges = self._create_excluded_ranges(exclude_blocks)
|
109
|
+
processor = LineProcessor(
|
110
|
+
block_registry=self._block_registry,
|
111
|
+
excluded_ranges=excluded_ranges,
|
112
|
+
pipe_pattern=self._pipe_content_pattern,
|
317
113
|
)
|
318
114
|
|
319
|
-
|
320
|
-
"""Check if a line is a spacer marker."""
|
321
|
-
return line.strip() == self._spacer_engine.SPACER_MARKER
|
322
|
-
|
323
|
-
def _process_todo_line(
|
324
|
-
self,
|
325
|
-
todo_block: Dict[str, Any],
|
326
|
-
current_pos: int,
|
327
|
-
line_end: int,
|
328
|
-
line_blocks: List[Tuple[int, int, Dict[str, Any]]],
|
329
|
-
current_paragraph: List[str],
|
330
|
-
paragraph_start: int,
|
331
|
-
in_todo_sequence: bool,
|
332
|
-
line_length: int,
|
333
|
-
) -> Dict[str, Any]:
|
334
|
-
"""Process a line that contains a todo item."""
|
335
|
-
# Finish paragraph if needed
|
336
|
-
if not in_todo_sequence and current_paragraph:
|
337
|
-
self._process_paragraph(
|
338
|
-
current_paragraph, paragraph_start, current_pos, line_blocks
|
339
|
-
)
|
340
|
-
|
341
|
-
line_blocks.append((current_pos, line_end, todo_block))
|
115
|
+
return processor.process_lines(text)
|
342
116
|
|
343
|
-
|
344
|
-
|
117
|
+
def _get_elements_by_type(
|
118
|
+
self, type_names: list[str], multiline_only: bool = False
|
119
|
+
) -> list[any]:
|
120
|
+
"""Get elements from registry by type names"""
|
121
|
+
elements = (
|
122
|
+
self._block_registry.get_multiline_elements()
|
123
|
+
if multiline_only
|
124
|
+
else self._block_registry.get_elements()
|
345
125
|
)
|
346
126
|
|
347
|
-
|
348
|
-
self,
|
349
|
-
current_pos: int,
|
350
|
-
current_paragraph: List[str],
|
351
|
-
paragraph_start: int,
|
352
|
-
in_todo_sequence: bool,
|
353
|
-
) -> Dict[str, Any]:
|
354
|
-
"""Update and return the state after processing a line."""
|
355
|
-
return {
|
356
|
-
"current_pos": current_pos,
|
357
|
-
"current_paragraph": current_paragraph,
|
358
|
-
"paragraph_start": paragraph_start,
|
359
|
-
"in_todo_sequence": in_todo_sequence,
|
360
|
-
}
|
361
|
-
|
362
|
-
def _extract_todo_item(self, line: str) -> Optional[Dict[str, Any]]:
|
363
|
-
"""Extract a todo item from a line if possible."""
|
364
|
-
todo_elements = [
|
365
|
-
element
|
366
|
-
for element in self._block_registry.get_elements()
|
367
|
-
if not element.is_multiline() and element.__name__ == "TodoElement"
|
368
|
-
]
|
369
|
-
|
370
|
-
for element in todo_elements:
|
371
|
-
if element.match_markdown(line):
|
372
|
-
return element.markdown_to_notion(line)
|
373
|
-
return None
|
374
|
-
|
375
|
-
def _extract_special_block(self, line: str) -> Optional[Dict[str, Any]]:
|
376
|
-
"""Extract a special block (not paragraph) from a line if possible."""
|
377
|
-
non_multiline_elements = [
|
127
|
+
return [
|
378
128
|
element
|
379
|
-
for element in
|
380
|
-
if
|
129
|
+
for element in elements
|
130
|
+
if element.__name__ in type_names and hasattr(element, "match_markdown")
|
381
131
|
]
|
382
132
|
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
self,
|
392
|
-
paragraph_lines: List[str],
|
393
|
-
start_pos: int,
|
394
|
-
end_pos: int,
|
395
|
-
blocks: List[Tuple[int, int, Dict[str, Any]]],
|
396
|
-
) -> None:
|
397
|
-
"""Process a paragraph and add it to blocks if valid."""
|
398
|
-
if not paragraph_lines:
|
399
|
-
return
|
400
|
-
|
401
|
-
paragraph_text = "\n".join(paragraph_lines)
|
402
|
-
block = self._block_registry.markdown_to_notion(paragraph_text)
|
403
|
-
|
404
|
-
if block:
|
405
|
-
blocks.append((start_pos, end_pos, block))
|
406
|
-
|
407
|
-
def _process_block_spacing(
|
408
|
-
self, blocks: List[Dict[str, Any]]
|
409
|
-
) -> List[Dict[str, Any]]:
|
410
|
-
"""Add spacing between blocks where needed."""
|
411
|
-
if not blocks:
|
412
|
-
return blocks
|
413
|
-
|
414
|
-
final_blocks = []
|
415
|
-
|
416
|
-
for block_index, current_block in enumerate(blocks):
|
417
|
-
final_blocks.append(current_block)
|
418
|
-
|
419
|
-
# Only add spacing after multiline blocks
|
420
|
-
if not self._is_multiline_block_type(current_block.get("type")):
|
421
|
-
continue
|
422
|
-
|
423
|
-
# Check if we need to add a spacer
|
424
|
-
if self._needs_spacer_after_block(blocks, block_index):
|
425
|
-
final_blocks.append(self._create_empty_paragraph())
|
426
|
-
|
427
|
-
return final_blocks
|
133
|
+
def _create_excluded_ranges(
|
134
|
+
self, exclude_blocks: list[tuple[int, int, dict[str, any]]]
|
135
|
+
) -> set[int]:
|
136
|
+
"""Create set of excluded positions from block ranges"""
|
137
|
+
excluded_positions = set()
|
138
|
+
for start_pos, end_pos, _ in exclude_blocks:
|
139
|
+
excluded_positions.update(range(start_pos, end_pos + 1))
|
140
|
+
return excluded_positions
|
428
141
|
|
429
|
-
def
|
430
|
-
self,
|
142
|
+
def _overlaps_with_ranges(
|
143
|
+
self, start_pos: int, end_pos: int, excluded_ranges: set[int]
|
431
144
|
) -> bool:
|
432
|
-
"""
|
433
|
-
|
434
|
-
if block_index + 1 >= len(blocks):
|
435
|
-
return False
|
436
|
-
|
437
|
-
# Check if next block is already a spacer
|
438
|
-
next_block = blocks[block_index + 1]
|
439
|
-
if self._is_empty_paragraph(next_block):
|
440
|
-
return False
|
441
|
-
|
442
|
-
# No spacer needed
|
443
|
-
return True
|
444
|
-
|
445
|
-
def _create_empty_paragraph(self):
|
446
|
-
"""Create an empty paragraph block."""
|
447
|
-
return {"type": "paragraph", "paragraph": {"rich_text": []}}
|
448
|
-
|
449
|
-
def _is_multiline_block_type(self, block_type: str) -> bool:
|
450
|
-
"""Check if a block type corresponds to a multiline element."""
|
451
|
-
if not block_type:
|
452
|
-
return False
|
453
|
-
|
454
|
-
multiline_elements = self._block_registry.get_multiline_elements()
|
145
|
+
"""Check if a range overlaps with excluded positions"""
|
146
|
+
return any(pos in excluded_ranges for pos in range(start_pos, end_pos + 1))
|
455
147
|
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
dummy_block = {"type": block_type}
|
463
|
-
if element.match_notion(dummy_block):
|
464
|
-
return True
|
465
|
-
|
466
|
-
return False
|
467
|
-
|
468
|
-
def _is_empty_paragraph(self, block: Dict[str, Any]) -> bool:
|
469
|
-
"""Check if a block is an empty paragraph."""
|
470
|
-
if block.get("type") != "paragraph":
|
471
|
-
return False
|
472
|
-
|
473
|
-
rich_text = block.get("paragraph", {}).get("rich_text", [])
|
474
|
-
return not rich_text or len(rich_text) == 0
|
148
|
+
@staticmethod
|
149
|
+
def _normalize_to_list(result) -> list[dict[str, any]]:
|
150
|
+
"""Normalize Union[list[dict], dict] to list[dict]"""
|
151
|
+
if result is None:
|
152
|
+
return []
|
153
|
+
return result if isinstance(result, list) else [result]
|
notionary/page/notion_page.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
import asyncio
|
3
|
-
from typing import Any, Dict,
|
3
|
+
from typing import Any, Dict, Optional, TYPE_CHECKING
|
4
4
|
import random
|
5
5
|
|
6
|
-
from notionary.blocks import BlockRegistry
|
6
|
+
from notionary.blocks import BlockRegistry
|
7
7
|
from notionary.models.notion_database_response import NotionPageResponse
|
8
8
|
from notionary.models.notion_page_response import DatabaseParent
|
9
9
|
from notionary.page.client import NotionPageClient
|
@@ -51,11 +51,10 @@ class NotionPage(LoggingMixin):
|
|
51
51
|
self._client = NotionPageClient(token=token)
|
52
52
|
self._page_data = None
|
53
53
|
|
54
|
-
self._block_element_registry =
|
54
|
+
self._block_element_registry = BlockRegistry.create_registry()
|
55
55
|
|
56
56
|
self._page_content_writer = PageContentWriter(
|
57
57
|
page_id=self._page_id,
|
58
|
-
client=self._client,
|
59
58
|
block_registry=self._block_element_registry,
|
60
59
|
)
|
61
60
|
|
@@ -94,8 +93,8 @@ class NotionPage(LoggingMixin):
|
|
94
93
|
workspace = NotionWorkspace()
|
95
94
|
|
96
95
|
try:
|
97
|
-
search_results:
|
98
|
-
page_name, limit=
|
96
|
+
search_results: list[NotionPage] = await workspace.search_pages(
|
97
|
+
page_name, limit=5
|
99
98
|
)
|
100
99
|
|
101
100
|
if not search_results:
|
@@ -205,7 +204,6 @@ class NotionPage(LoggingMixin):
|
|
205
204
|
|
206
205
|
except Exception as e:
|
207
206
|
self.logger.error("Error setting page title: %s", str(e))
|
208
|
-
return None
|
209
207
|
|
210
208
|
async def append_markdown(self, markdown: str, append_divider=False) -> bool:
|
211
209
|
"""
|
@@ -353,7 +351,7 @@ class NotionPage(LoggingMixin):
|
|
353
351
|
|
354
352
|
async def _get_relation_property_values_by_name(
|
355
353
|
self, property_name: str
|
356
|
-
) ->
|
354
|
+
) -> list[str]:
|
357
355
|
"""
|
358
356
|
Retrieve the titles of all related pages for a relation property.
|
359
357
|
"""
|
@@ -366,7 +364,7 @@ class NotionPage(LoggingMixin):
|
|
366
364
|
]
|
367
365
|
return [page.title for page in notion_pages if page]
|
368
366
|
|
369
|
-
async def get_options_for_property_by_name(self, property_name: str) ->
|
367
|
+
async def get_options_for_property_by_name(self, property_name: str) -> list[str]:
|
370
368
|
"""
|
371
369
|
Get the available options for a property (select, multi_select, status, relation).
|
372
370
|
"""
|
@@ -426,8 +424,8 @@ class NotionPage(LoggingMixin):
|
|
426
424
|
return None
|
427
425
|
|
428
426
|
async def set_relation_property_values_by_name(
|
429
|
-
self, property_name: str, page_titles:
|
430
|
-
) ->
|
427
|
+
self, property_name: str, page_titles: list[str]
|
428
|
+
) -> list[str]:
|
431
429
|
"""
|
432
430
|
Add one or more relations to a relation property.
|
433
431
|
"""
|