logseq-matryca-parser 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logseq_matryca_parser/.gitignore +0 -0
- logseq_matryca_parser/NOTICE +7 -0
- logseq_matryca_parser/__init__.py +61 -0
- logseq_matryca_parser/__main__.py +8 -0
- logseq_matryca_parser/agent_press.py +99 -0
- logseq_matryca_parser/agent_writer.py +250 -0
- logseq_matryca_parser/exceptions.py +13 -0
- logseq_matryca_parser/forge.py +399 -0
- logseq_matryca_parser/graph.py +493 -0
- logseq_matryca_parser/kinetic.py +531 -0
- logseq_matryca_parser/lens.py +427 -0
- logseq_matryca_parser/logos_core.py +171 -0
- logseq_matryca_parser/logos_parser.py +1047 -0
- logseq_matryca_parser/pyproject.toml +0 -0
- logseq_matryca_parser/synapse.py +329 -0
- logseq_matryca_parser-0.3.0.dist-info/METADATA +279 -0
- logseq_matryca_parser-0.3.0.dist-info/RECORD +21 -0
- logseq_matryca_parser-0.3.0.dist-info/WHEEL +4 -0
- logseq_matryca_parser-0.3.0.dist-info/entry_points.txt +2 -0
- logseq_matryca_parser-0.3.0.dist-info/licenses/LICENSE +201 -0
- logseq_matryca_parser-0.3.0.dist-info/licenses/NOTICE +7 -0
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
"""FORGE exporters implemented with AST visitors."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .logos_core import ASTVisitor, LogseqNode, LogseqPage
|
|
12
|
+
from .logos_parser import LOGSEQ_PATTERNS
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
EmbedResolver = Callable[[str], tuple[str, str] | None]
|
|
17
|
+
"""Maps a Logseq block id string to (Obsidian page title, anchor without leading ^)."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _flatten_nodes_preorder(nodes: list[LogseqNode]) -> list[LogseqNode]:
|
|
21
|
+
flat: list[LogseqNode] = []
|
|
22
|
+
for node in nodes:
|
|
23
|
+
flat.append(node)
|
|
24
|
+
if node.children:
|
|
25
|
+
flat.extend(_flatten_nodes_preorder(node.children))
|
|
26
|
+
return flat
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _node_identity_keys(node: LogseqNode) -> set[str]:
|
|
30
|
+
keys: set[str] = {node.uuid.lower()}
|
|
31
|
+
if node.source_uuid:
|
|
32
|
+
keys.add(node.source_uuid.lower())
|
|
33
|
+
raw_id = node.properties.get("id")
|
|
34
|
+
if isinstance(raw_id, str) and len(raw_id) == 36:
|
|
35
|
+
keys.add(raw_id.lower())
|
|
36
|
+
return keys
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _outgoing_embed_ids(node: LogseqNode) -> set[str]:
|
|
40
|
+
found: set[str] = {r.lower() for r in node.block_refs}
|
|
41
|
+
for match in LOGSEQ_PATTERNS["block_ref"].finditer(node.clean_text):
|
|
42
|
+
uid = match.group(1) or match.group(2)
|
|
43
|
+
if uid:
|
|
44
|
+
found.add(uid.lower())
|
|
45
|
+
return found
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _nodes_needing_trailing_anchor(
|
|
49
|
+
flat: list[LogseqNode],
|
|
50
|
+
*,
|
|
51
|
+
vault_wide_ref_targets: set[str] | None = None,
|
|
52
|
+
) -> set[str]:
|
|
53
|
+
"""Synthetic UUIDs of nodes that should receive a trailing Obsidian block id.
|
|
54
|
+
|
|
55
|
+
When ``vault_wide_ref_targets`` is provided (lowercased embed ids used anywhere in the
|
|
56
|
+
vault), any node whose identity keys intersect that set is marked — so cross-page
|
|
57
|
+
``((uuid))`` references still get a stable ``^`` anchor on the target block's line.
|
|
58
|
+
"""
|
|
59
|
+
if vault_wide_ref_targets is not None:
|
|
60
|
+
return {
|
|
61
|
+
n.uuid for n in flat if _node_identity_keys(n) & {t.lower() for t in vault_wide_ref_targets}
|
|
62
|
+
}
|
|
63
|
+
need: set[str] = set()
|
|
64
|
+
for target in flat:
|
|
65
|
+
target_keys = _node_identity_keys(target)
|
|
66
|
+
for referrer in flat:
|
|
67
|
+
if referrer.uuid == target.uuid:
|
|
68
|
+
continue
|
|
69
|
+
if target_keys & _outgoing_embed_ids(referrer):
|
|
70
|
+
need.add(target.uuid)
|
|
71
|
+
break
|
|
72
|
+
return need
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _allocate_obsidian_suffixes(flat: list[LogseqNode], need_anchor: set[str]) -> dict[str, str]:
|
|
76
|
+
"""Return mapping synthetic uuid -> suffix (without ^) unique within this page."""
|
|
77
|
+
used: set[str] = set()
|
|
78
|
+
result: dict[str, str] = {}
|
|
79
|
+
for node in flat:
|
|
80
|
+
if node.uuid not in need_anchor:
|
|
81
|
+
continue
|
|
82
|
+
base = node.uuid.replace("-", "")[:8]
|
|
83
|
+
candidate = base
|
|
84
|
+
if candidate in used:
|
|
85
|
+
candidate = node.uuid.replace("-", "")
|
|
86
|
+
if candidate in used:
|
|
87
|
+
candidate = node.uuid
|
|
88
|
+
n = 0
|
|
89
|
+
while candidate in used:
|
|
90
|
+
n += 1
|
|
91
|
+
candidate = f"{base}{n}"
|
|
92
|
+
used.add(candidate)
|
|
93
|
+
result[node.uuid] = candidate
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _yaml_quote_key(key: str) -> str:
|
|
98
|
+
if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_-]*", key):
|
|
99
|
+
return key
|
|
100
|
+
return json.dumps(key, ensure_ascii=False)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _yaml_quote_value(value: Any) -> str:
|
|
104
|
+
if value is None:
|
|
105
|
+
return "null"
|
|
106
|
+
if isinstance(value, bool):
|
|
107
|
+
return "true" if value else "false"
|
|
108
|
+
if isinstance(value, int) and not isinstance(value, bool):
|
|
109
|
+
return str(value)
|
|
110
|
+
if isinstance(value, float):
|
|
111
|
+
return repr(value)
|
|
112
|
+
if isinstance(value, str):
|
|
113
|
+
if "\n" in value:
|
|
114
|
+
lines = value.split("\n")
|
|
115
|
+
body = "\n".join(" " + line for line in lines)
|
|
116
|
+
return "|\n" + body
|
|
117
|
+
if re.search(r'[:#"\[\]{}]|^\s|\s$', value) or value in ("true", "false", "null"):
|
|
118
|
+
return json.dumps(value, ensure_ascii=False)
|
|
119
|
+
return value
|
|
120
|
+
return json.dumps(value, default=str, ensure_ascii=False)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _page_properties_to_yaml_frontmatter(properties: dict[str, Any]) -> str:
|
|
124
|
+
if not properties:
|
|
125
|
+
return ""
|
|
126
|
+
lines = ["---", *[f"{_yaml_quote_key(str(k))}: {_yaml_quote_value(v)}" for k, v in properties.items()], "---"]
|
|
127
|
+
return "\n".join(lines) + "\n\n"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _build_local_embed_index(flat: list[LogseqNode]) -> dict[str, LogseqNode]:
|
|
131
|
+
index: dict[str, LogseqNode] = {}
|
|
132
|
+
for node in flat:
|
|
133
|
+
index[node.uuid.lower()] = node
|
|
134
|
+
if node.source_uuid:
|
|
135
|
+
index[node.source_uuid.lower()] = node
|
|
136
|
+
raw_id = node.properties.get("id")
|
|
137
|
+
if isinstance(raw_id, str) and len(raw_id) == 36:
|
|
138
|
+
index[raw_id.lower()] = node
|
|
139
|
+
return index
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _replace_block_refs_in_text(
|
|
143
|
+
text: str,
|
|
144
|
+
page_title: str,
|
|
145
|
+
local_index: dict[str, LogseqNode],
|
|
146
|
+
suffix_map: dict[str, str],
|
|
147
|
+
embed_resolver: EmbedResolver | None,
|
|
148
|
+
) -> str:
|
|
149
|
+
block_ref_pattern = LOGSEQ_PATTERNS["block_ref"]
|
|
150
|
+
|
|
151
|
+
def repl(match: re.Match[str]) -> str:
|
|
152
|
+
uid = match.group(1) or match.group(2)
|
|
153
|
+
if not uid:
|
|
154
|
+
return match.group(0)
|
|
155
|
+
uid_lower = uid.lower()
|
|
156
|
+
if embed_resolver is not None:
|
|
157
|
+
resolved = embed_resolver(uid)
|
|
158
|
+
if resolved is not None:
|
|
159
|
+
other_title, resolved_anchor = resolved
|
|
160
|
+
logger.debug(
|
|
161
|
+
"embed_resolver mapped ref=%s -> [[%s#^%s]]",
|
|
162
|
+
uid,
|
|
163
|
+
other_title,
|
|
164
|
+
resolved_anchor,
|
|
165
|
+
)
|
|
166
|
+
return f"[[{other_title}#^{resolved_anchor}]]"
|
|
167
|
+
target = local_index.get(uid_lower)
|
|
168
|
+
if target is None:
|
|
169
|
+
logger.debug("Unresolved block ref %s in Obsidian export (same-page index miss)", uid)
|
|
170
|
+
return match.group(0)
|
|
171
|
+
block_anchor = suffix_map.get(target.uuid)
|
|
172
|
+
if block_anchor is None:
|
|
173
|
+
block_anchor = target.uuid.replace("-", "")[:8]
|
|
174
|
+
logger.debug("Same-page block ref %s -> [[%s#^%s]]", uid, page_title, block_anchor)
|
|
175
|
+
return f"[[{page_title}#^{block_anchor}]]"
|
|
176
|
+
|
|
177
|
+
return block_ref_pattern.sub(repl, text)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _obsidian_line_source(node: LogseqNode) -> str:
|
|
181
|
+
"""Prefer the first line of ``content`` so ``((uuid))`` survives when stripped from ``clean_text``."""
|
|
182
|
+
if node.content:
|
|
183
|
+
first = node.content.split("\n", 1)[0]
|
|
184
|
+
else:
|
|
185
|
+
first = node.clean_text
|
|
186
|
+
stripped = LOGSEQ_PATTERNS["inline_uuid_prop"].sub("", first)
|
|
187
|
+
return stripped.replace("\n", " ").strip()
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class JSONForgeVisitor(ASTVisitor):
|
|
191
|
+
"""Builds a nested JSON-serializable structure during AST traversal."""
|
|
192
|
+
|
|
193
|
+
def __init__(self) -> None:
|
|
194
|
+
self._roots: list[dict[str, Any]] = []
|
|
195
|
+
self._stack: list[dict[str, Any]] = []
|
|
196
|
+
|
|
197
|
+
def visit_node(self, node: LogseqNode) -> None:
|
|
198
|
+
node_payload = node.model_dump(exclude={"children"})
|
|
199
|
+
node_payload["children"] = []
|
|
200
|
+
if self._stack:
|
|
201
|
+
self._stack[-1]["children"].append(node_payload)
|
|
202
|
+
else:
|
|
203
|
+
self._roots.append(node_payload)
|
|
204
|
+
self._stack.append(node_payload)
|
|
205
|
+
|
|
206
|
+
def depart_node(self, node: LogseqNode) -> None:
|
|
207
|
+
_ = node
|
|
208
|
+
self._stack.pop()
|
|
209
|
+
|
|
210
|
+
def get_data(self) -> list[dict[str, Any]]:
|
|
211
|
+
return self._roots
|
|
212
|
+
|
|
213
|
+
def get_json(self, indent: int = 2) -> str:
|
|
214
|
+
return json.dumps(self._roots, indent=indent)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class FlatListForgeVisitor(ASTVisitor):
|
|
218
|
+
"""Collects nodes in preorder as a flat list."""
|
|
219
|
+
|
|
220
|
+
def __init__(self) -> None:
|
|
221
|
+
self._flat_items: list[dict[str, Any]] = []
|
|
222
|
+
|
|
223
|
+
def visit_node(self, node: LogseqNode) -> None:
|
|
224
|
+
self._flat_items.append(node.model_dump(exclude={"children"}))
|
|
225
|
+
|
|
226
|
+
def depart_node(self, node: LogseqNode) -> None:
|
|
227
|
+
_ = node
|
|
228
|
+
|
|
229
|
+
def get_data(self) -> list[dict[str, Any]]:
|
|
230
|
+
return self._flat_items
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class MarkdownForgeVisitor(ASTVisitor):
|
|
234
|
+
"""Builds clean markdown output with topology-preserving indentation."""
|
|
235
|
+
|
|
236
|
+
def __init__(self) -> None:
|
|
237
|
+
self._lines: list[str] = []
|
|
238
|
+
self._stack: list[str] = []
|
|
239
|
+
|
|
240
|
+
def visit_node(self, node: LogseqNode) -> None:
|
|
241
|
+
depth = len(self._stack)
|
|
242
|
+
prefix = " " * depth + "- "
|
|
243
|
+
line_text = node.clean_text.replace("\n", " ")
|
|
244
|
+
self._lines.append(f"{prefix}{line_text}")
|
|
245
|
+
if node.properties:
|
|
246
|
+
for key, value in node.properties.items():
|
|
247
|
+
if key != "id":
|
|
248
|
+
self._lines.append(f" {' ' * depth} [:{key} {value}]")
|
|
249
|
+
self._stack.append(node.uuid)
|
|
250
|
+
|
|
251
|
+
def depart_node(self, node: LogseqNode) -> None:
|
|
252
|
+
_ = node
|
|
253
|
+
self._stack.pop()
|
|
254
|
+
|
|
255
|
+
def get_markdown(self) -> str:
|
|
256
|
+
return "\n".join(self._lines)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class ObsidianForgeVisitor(ASTVisitor):
|
|
260
|
+
"""Builds Obsidian-friendly Markdown: YAML frontmatter, list body, ``^`` block ids."""
|
|
261
|
+
|
|
262
|
+
def __init__(
|
|
263
|
+
self,
|
|
264
|
+
*,
|
|
265
|
+
page_title: str,
|
|
266
|
+
suffix_map: dict[str, str],
|
|
267
|
+
needs_suffix: set[str],
|
|
268
|
+
local_index: dict[str, LogseqNode],
|
|
269
|
+
embed_resolver: EmbedResolver | None,
|
|
270
|
+
header: str,
|
|
271
|
+
) -> None:
|
|
272
|
+
self._page_title = page_title
|
|
273
|
+
self._suffix_map = suffix_map
|
|
274
|
+
self._needs_suffix = needs_suffix
|
|
275
|
+
self._local_index = local_index
|
|
276
|
+
self._embed_resolver = embed_resolver
|
|
277
|
+
self._header = header
|
|
278
|
+
self._lines: list[str] = []
|
|
279
|
+
self._stack: list[str] = []
|
|
280
|
+
|
|
281
|
+
def visit_node(self, node: LogseqNode) -> None:
|
|
282
|
+
depth = len(self._stack)
|
|
283
|
+
prefix = " " * depth + "- "
|
|
284
|
+
line_core = _replace_block_refs_in_text(
|
|
285
|
+
_obsidian_line_source(node),
|
|
286
|
+
self._page_title,
|
|
287
|
+
self._local_index,
|
|
288
|
+
self._suffix_map,
|
|
289
|
+
self._embed_resolver,
|
|
290
|
+
)
|
|
291
|
+
if node.uuid in self._needs_suffix:
|
|
292
|
+
anchor = self._suffix_map.get(node.uuid, node.uuid.replace("-", "")[:8])
|
|
293
|
+
line_core = f"{line_core.rstrip()} ^{anchor}"
|
|
294
|
+
logger.debug("Obsidian trailing block id uuid=%s ^%s", node.uuid, anchor)
|
|
295
|
+
self._lines.append(f"{prefix}{line_core}")
|
|
296
|
+
self._stack.append(node.uuid)
|
|
297
|
+
|
|
298
|
+
def depart_node(self, node: LogseqNode) -> None:
|
|
299
|
+
_ = node
|
|
300
|
+
self._stack.pop()
|
|
301
|
+
|
|
302
|
+
def get_markdown(self) -> str:
|
|
303
|
+
return self._header + "\n".join(self._lines)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class ForgeExporter:
|
|
307
|
+
"""Transforms Logseq nodes into artifacts ready for AI ingestion."""
|
|
308
|
+
|
|
309
|
+
@staticmethod
|
|
310
|
+
def to_json(nodes: list[LogseqNode], indent: int = 2) -> str:
|
|
311
|
+
"""Export the full tree as structured JSON."""
|
|
312
|
+
visitor = JSONForgeVisitor()
|
|
313
|
+
for node in nodes:
|
|
314
|
+
node.accept(visitor)
|
|
315
|
+
return visitor.get_json(indent=indent)
|
|
316
|
+
|
|
317
|
+
@staticmethod
|
|
318
|
+
def to_flat_list(nodes: list[LogseqNode]) -> list[dict[str, Any]]:
|
|
319
|
+
"""Flatten the tree in preorder while preserving node metadata."""
|
|
320
|
+
visitor = FlatListForgeVisitor()
|
|
321
|
+
for node in nodes:
|
|
322
|
+
node.accept(visitor)
|
|
323
|
+
return visitor.get_data()
|
|
324
|
+
|
|
325
|
+
@staticmethod
|
|
326
|
+
def to_clean_markdown(nodes: list[LogseqNode]) -> str:
|
|
327
|
+
"""Render clean markdown preserving spatial hierarchy."""
|
|
328
|
+
visitor = MarkdownForgeVisitor()
|
|
329
|
+
for node in nodes:
|
|
330
|
+
node.accept(visitor)
|
|
331
|
+
return visitor.get_markdown()
|
|
332
|
+
|
|
333
|
+
@staticmethod
|
|
334
|
+
def vault_wide_embed_targets(pages: list[LogseqPage]) -> set[str]:
|
|
335
|
+
"""Lowercased block ids referenced via ``((uuid))`` or ``block_refs`` anywhere in ``pages``."""
|
|
336
|
+
targets: set[str] = set()
|
|
337
|
+
for page in pages:
|
|
338
|
+
for node in _flatten_nodes_preorder(page.root_nodes):
|
|
339
|
+
targets |= _outgoing_embed_ids(node)
|
|
340
|
+
return targets
|
|
341
|
+
|
|
342
|
+
@staticmethod
|
|
343
|
+
def build_vault_obsidian_suffix_map(
|
|
344
|
+
pages: list[LogseqPage],
|
|
345
|
+
*,
|
|
346
|
+
vault_wide_ref_targets: set[str] | None = None,
|
|
347
|
+
) -> dict[str, str]:
|
|
348
|
+
"""Map every block synthetic ``uuid`` to a per-vault-stable ``^`` anchor suffix string."""
|
|
349
|
+
targets = (
|
|
350
|
+
vault_wide_ref_targets
|
|
351
|
+
if vault_wide_ref_targets is not None
|
|
352
|
+
else ForgeExporter.vault_wide_embed_targets(pages)
|
|
353
|
+
)
|
|
354
|
+
merged: dict[str, str] = {}
|
|
355
|
+
for page in pages:
|
|
356
|
+
flat = _flatten_nodes_preorder(page.root_nodes)
|
|
357
|
+
need = _nodes_needing_trailing_anchor(flat, vault_wide_ref_targets=targets)
|
|
358
|
+
merged.update(_allocate_obsidian_suffixes(flat, need))
|
|
359
|
+
return merged
|
|
360
|
+
|
|
361
|
+
@staticmethod
|
|
362
|
+
def to_obsidian_markdown(
|
|
363
|
+
nodes: list[LogseqNode],
|
|
364
|
+
page_properties: dict[str, Any],
|
|
365
|
+
*,
|
|
366
|
+
embed_resolver: EmbedResolver | None = None,
|
|
367
|
+
global_suffix_map: dict[str, str] | None = None,
|
|
368
|
+
vault_wide_ref_targets: set[str] | None = None,
|
|
369
|
+
) -> str:
|
|
370
|
+
"""Render Obsidian-compatible Markdown (YAML frontmatter, ``^`` block ids, wikilinks preserved)."""
|
|
371
|
+
props = dict(page_properties)
|
|
372
|
+
page_title = str(props.get("title") or "Untitled")
|
|
373
|
+
flat = _flatten_nodes_preorder(nodes)
|
|
374
|
+
need_suffix = _nodes_needing_trailing_anchor(
|
|
375
|
+
flat,
|
|
376
|
+
vault_wide_ref_targets=vault_wide_ref_targets,
|
|
377
|
+
)
|
|
378
|
+
local_alloc = _allocate_obsidian_suffixes(flat, need_suffix)
|
|
379
|
+
suffix_map: dict[str, str] = {}
|
|
380
|
+
for n in flat:
|
|
381
|
+
if n.uuid not in need_suffix:
|
|
382
|
+
continue
|
|
383
|
+
if global_suffix_map is not None and n.uuid in global_suffix_map:
|
|
384
|
+
suffix_map[n.uuid] = global_suffix_map[n.uuid]
|
|
385
|
+
else:
|
|
386
|
+
suffix_map[n.uuid] = local_alloc[n.uuid]
|
|
387
|
+
local_index = _build_local_embed_index(flat)
|
|
388
|
+
header = _page_properties_to_yaml_frontmatter(props)
|
|
389
|
+
visitor = ObsidianForgeVisitor(
|
|
390
|
+
page_title=page_title,
|
|
391
|
+
suffix_map=suffix_map,
|
|
392
|
+
needs_suffix=need_suffix,
|
|
393
|
+
local_index=local_index,
|
|
394
|
+
embed_resolver=embed_resolver,
|
|
395
|
+
header=header,
|
|
396
|
+
)
|
|
397
|
+
for node in nodes:
|
|
398
|
+
node.accept(visitor)
|
|
399
|
+
return visitor.get_markdown()
|