logseq-matryca-parser 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,427 @@
1
+ """LENS topology extraction and interactive graph visualization."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import networkx as nx # type: ignore[import-untyped]
11
+ from pyvis.network import Network # type: ignore[import-untyped]
12
+
13
+ from logseq_matryca_parser.logos_core import ASTVisitor, LogseqNode, LogseqPage
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class NetworkXVisitor(ASTVisitor):
19
+ """Populate a NetworkX graph from Logseq node references."""
20
+
21
+ def __init__(self, graph: nx.Graph, page_title: str) -> None:
22
+ self._graph = graph
23
+ self._page_title = page_title
24
+
25
+ def visit_node(self, node: LogseqNode) -> None:
26
+ if not self._graph.has_node(self._page_title):
27
+ self._graph.add_node(self._page_title, group="page")
28
+
29
+ for ref in node.refs:
30
+ ref_group = "tag" if (ref in node.tags or ref.startswith("#")) else "page"
31
+ if not self._graph.has_node(ref):
32
+ self._graph.add_node(ref, group=ref_group)
33
+ self._graph.add_edge(self._page_title, ref)
34
+
35
+ logger.debug(
36
+ "LENS visit_node page=%s refs=%d cumulative_edges=%d",
37
+ self._page_title,
38
+ len(node.refs),
39
+ self._graph.number_of_edges(),
40
+ )
41
+
42
+ def depart_node(self, node: LogseqNode) -> None:
43
+ _ = node
44
+
45
+
46
+ class GraphVisualizer:
47
+ """Build and visualize a Logseq topology graph."""
48
+
49
+ def __init__(self, pages: list[LogseqPage]) -> None:
50
+ self._pages = pages
51
+ self._graph: nx.Graph = nx.Graph()
52
+
53
+ @property
54
+ def graph(self) -> nx.Graph:
55
+ return self._graph
56
+
57
+ def build_network(self) -> None:
58
+ self._graph = nx.Graph()
59
+ page_block_counts = {page.title: self._count_page_blocks(page) for page in self._pages}
60
+ for page in self._pages:
61
+ self._graph.add_node(page.title, group="page")
62
+ visitor = NetworkXVisitor(graph=self._graph, page_title=page.title)
63
+ for root_node in page.root_nodes:
64
+ root_node.accept(visitor)
65
+
66
+ degree_by_node = dict(self._graph.degree())
67
+ for node_name in self._graph.nodes:
68
+ current_group = self._graph.nodes[node_name].get("group")
69
+ group = self._classify_node_group(node_name=node_name, current_group=current_group)
70
+ degree = int(degree_by_node.get(node_name, 0))
71
+ page_block_count = page_block_counts.get(node_name)
72
+ title = (
73
+ f"<b>{node_name}</b><br>"
74
+ f"Group: {group}<br>"
75
+ f"Connections: {degree}"
76
+ )
77
+ if page_block_count is not None:
78
+ title = f"{title}<br>Blocks: {page_block_count}"
79
+
80
+ self._graph.nodes[node_name].update(
81
+ {
82
+ "group": group,
83
+ "value": degree + 1,
84
+ "title": title,
85
+ }
86
+ )
87
+ logger.debug(
88
+ "LENS build_network completed nodes=%d edges=%d",
89
+ self._graph.number_of_nodes(),
90
+ self._graph.number_of_edges(),
91
+ )
92
+
93
+ def get_deep_statistics(self) -> dict[str, Any]:
94
+ degree_items = sorted(
95
+ self._graph.degree(),
96
+ key=lambda item: item[1],
97
+ reverse=True,
98
+ )
99
+ top_connected = [
100
+ {
101
+ "node": node_name,
102
+ "degree": degree,
103
+ "group": str(self._graph.nodes[node_name].get("group", "unknown")),
104
+ }
105
+ for node_name, degree in degree_items[:10]
106
+ ]
107
+
108
+ largest_pages: list[dict[str, str | int]] = [
109
+ {"page": page.title, "block_count": self._count_page_blocks(page)}
110
+ for page in self._pages
111
+ ]
112
+ largest_pages = sorted(
113
+ largest_pages,
114
+ key=lambda item: int(item["block_count"]),
115
+ reverse=True,
116
+ )[:5]
117
+
118
+ return {
119
+ "total_nodes": self._graph.number_of_nodes(),
120
+ "total_edges": self._graph.number_of_edges(),
121
+ "top_connected_nodes": top_connected,
122
+ "largest_pages": largest_pages,
123
+ }
124
+
125
+ @staticmethod
126
+ def _count_page_blocks(page: LogseqPage) -> int:
127
+ total_blocks = 0
128
+ stack = list(page.root_nodes)
129
+ while stack:
130
+ current_node = stack.pop()
131
+ total_blocks += 1
132
+ stack.extend(current_node.children)
133
+ return total_blocks
134
+
135
+ @classmethod
136
+ def _classify_node_group(cls, node_name: str, current_group: Any) -> str:
137
+ normalized_name = node_name.strip()
138
+ if normalized_name.lower().startswith("progetti___"):
139
+ return "project"
140
+ if cls._looks_like_journal(normalized_name):
141
+ return "journal"
142
+ if current_group == "tag" or normalized_name.startswith("#"):
143
+ return "tag"
144
+ return "page"
145
+
146
+ @staticmethod
147
+ def _looks_like_journal(node_name: str) -> bool:
148
+ if re.match(r"^\d{4}_\d{2}_\d{2}$", node_name):
149
+ return True
150
+ if re.match(r"^\d{4}-\d{2}-\d{2}$", node_name):
151
+ return True
152
+ return bool(re.match(r"^\[\[[A-Za-z]{3} \d{1,2}(st|nd|rd|th), \d{4}\]\]$", node_name))
153
+
154
+ def export_html(self, output_path: Path) -> None:
155
+ output_path.parent.mkdir(parents=True, exist_ok=True)
156
+ network = Network(height="100vh", width="100%", bgcolor="#111827", font_color="white")
157
+ network.from_nx(self._graph)
158
+ network.force_atlas_2based(
159
+ gravity=-50,
160
+ central_gravity=0.01,
161
+ spring_length=100,
162
+ spring_strength=0.08,
163
+ damping=0.4,
164
+ overlap=0,
165
+ )
166
+ network.toggle_stabilization(False)
167
+ network.options.edges.smooth = False
168
+ network.show_buttons(filter_=["physics", "nodes"])
169
+ network.save_graph(str(output_path))
170
+ output_html = output_path.read_text(encoding="utf-8")
171
+ if 'name="viewport"' not in output_html:
172
+ output_html = re.sub(
173
+ r"<head([^>]*)>",
174
+ (
175
+ r'<head\1>'
176
+ r'<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">'
177
+ ),
178
+ output_html,
179
+ count=1,
180
+ )
181
+ output_html = output_html.replace(
182
+ '<div id="loadingBar">',
183
+ '<div id="loadingBar" style="display: none !important;">',
184
+ )
185
+ custom_css = """
186
+ html, body, #mynetwork {
187
+ margin: 0;
188
+ padding: 0;
189
+ width: 100vw;
190
+ height: 100vh;
191
+ overflow: hidden;
192
+ background-color: #111827;
193
+ }
194
+ /* Responsive HUD */
195
+ @media (max-width: 600px) {
196
+ #hud-sidebar {
197
+ width: 90% !important;
198
+ left: 5% !important;
199
+ right: 5% !important;
200
+ top: 10px !important;
201
+ max-height: 80vh !important;
202
+ }
203
+ #hud-toggle {
204
+ right: 16px !important;
205
+ top: 10px !important;
206
+ }
207
+ }
208
+ #hud-sidebar {
209
+ position: fixed;
210
+ top: 16px;
211
+ right: 16px;
212
+ width: 350px;
213
+ background: rgba(255, 255, 255, 0.04) !important;
214
+ -webkit-backdrop-filter: blur(10px) !important;
215
+ backdrop-filter: blur(10px) !important;
216
+ border: 1px solid rgba(255, 255, 255, 0.15) !important;
217
+ border-bottom: 1px solid rgba(255, 255, 255, 0.05) !important;
218
+ box-shadow: 0 4px 30px rgba(0, 0, 0, 0.5) !important;
219
+ border-radius: 12px !important;
220
+ color: white;
221
+ padding: 15px;
222
+ z-index: 9999;
223
+ max-height: 90vh;
224
+ overflow-y: auto;
225
+ overflow-x: hidden !important;
226
+ transform: translateX(0);
227
+ transition: transform 220ms ease-in-out, opacity 220ms ease-in-out;
228
+ opacity: 1;
229
+ }
230
+ #hud-sidebar.hud-hidden {
231
+ transform: translateX(calc(100% + 28px));
232
+ opacity: 0;
233
+ pointer-events: none;
234
+ }
235
+ #hud-toggle {
236
+ position: fixed;
237
+ top: 16px;
238
+ right: 388px;
239
+ z-index: 10000;
240
+ border: 1px solid rgba(148, 163, 184, 0.35);
241
+ background: rgba(15, 23, 42, 0.92);
242
+ color: #ffffff;
243
+ border-radius: 8px;
244
+ padding: 8px 12px;
245
+ font-family: sans-serif;
246
+ font-size: 13px;
247
+ cursor: pointer;
248
+ box-shadow: 0 8px 20px rgba(2, 6, 23, 0.35);
249
+ }
250
+ #hud-toggle:hover {
251
+ background: rgba(30, 41, 59, 0.95);
252
+ }
253
+ #hud-sidebar #config {
254
+ width: 100%;
255
+ max-width: 100%;
256
+ background: transparent !important;
257
+ background-color: transparent !important;
258
+ }
259
+ /* Nuke all vis-configuration backgrounds except color preview blocks */
260
+ #hud-sidebar .vis-configuration:not(.vis-config-colorBlock):not(.vis-color-picker) {
261
+ background: transparent !important;
262
+ background-color: transparent !important;
263
+ border: none !important;
264
+ color: #e5e7eb !important;
265
+ }
266
+
267
+ /* Clean up the labels */
268
+ #hud-sidebar .vis-config-label {
269
+ color: #f9fafb !important;
270
+ font-weight: 500 !important;
271
+ text-shadow: none !important;
272
+ margin-bottom: 2px !important;
273
+ }
274
+
275
+ /* Style text/number inputs without breaking native sliders */
276
+ #hud-sidebar input[type="text"],
277
+ #hud-sidebar input[type="number"],
278
+ #hud-sidebar select {
279
+ background-color: #374151 !important;
280
+ color: white !important;
281
+ border: 1px solid #4b5563 !important;
282
+ border-radius: 4px;
283
+ padding: 2px 4px;
284
+ outline: none;
285
+ }
286
+
287
+ /* Ensure range sliders and checkboxes remain native and readable */
288
+ #hud-sidebar input[type="range"],
289
+ #hud-sidebar input[type="checkbox"] {
290
+ background: transparent !important;
291
+ margin: 0 5px;
292
+ }
293
+
294
+ /* Ensure color blocks have a visible border on dark glass */
295
+ #hud-sidebar .vis-config-colorBlock {
296
+ border: 1px solid #9ca3af !important;
297
+ border-radius: 3px !important;
298
+ }
299
+
300
+ /* Prevent horizontal overflow and shrink inputs */
301
+ #hud-sidebar * {
302
+ box-sizing: border-box !important;
303
+ }
304
+ #hud-sidebar .vis-configuration-wrapper {
305
+ width: 100% !important;
306
+ max-width: 100% !important;
307
+ }
308
+ #hud-sidebar .vis-config-item {
309
+ max-width: 100% !important;
310
+ white-space: normal !important;
311
+ }
312
+
313
+ /* Shrink the range sliders so they fit next to labels */
314
+ #hud-sidebar input[type="range"] {
315
+ max-width: 100px !important;
316
+ width: 100px !important;
317
+ min-width: 50px !important;
318
+ }
319
+
320
+ /* Shrink the number/text input boxes */
321
+ #hud-sidebar input[type="number"],
322
+ #hud-sidebar input[type="text"] {
323
+ max-width: 60px !important;
324
+ }
325
+ """
326
+ custom_js = """
327
+ (function() {
328
+ const configPanel = document.getElementById('config');
329
+ if (!configPanel) {
330
+ return;
331
+ }
332
+
333
+ const hudSidebar = document.createElement('div');
334
+ hudSidebar.id = 'hud-sidebar';
335
+
336
+ const hudToggle = document.createElement('button');
337
+ hudToggle.id = 'hud-toggle';
338
+ hudToggle.type = 'button';
339
+ hudToggle.textContent = 'Hide Controls';
340
+
341
+ document.body.appendChild(hudSidebar);
342
+ document.body.appendChild(hudToggle);
343
+
344
+ // 1. Create the Custom Controls HTML
345
+ const customControls = document.createElement('div');
346
+ customControls.innerHTML = `
347
+ <div style="margin-bottom: 15px; padding-bottom: 15px; border-bottom: 1px solid rgba(255,255,255,0.1);">
348
+ <h3 style="margin-top:0; color: #F9FAFB; font-size: 14px; text-transform: uppercase; letter-spacing: 1px;">Logseq Filters</h3>
349
+ <label style="display:flex; align-items:center; gap:8px; font-size: 13px; color: #E5E7EB; cursor:pointer; margin-bottom: 8px;">
350
+ <input type="checkbox" id="filter-journals" checked style="accent-color: #3B82F6; cursor:pointer;"> Show Daily Journals
351
+ </label>
352
+ <label style="display:flex; align-items:center; gap:8px; font-size: 13px; color: #E5E7EB; cursor:pointer; margin-bottom: 12px;">
353
+ <input type="checkbox" id="filter-tags" checked style="accent-color: #3B82F6; cursor:pointer;"> Show Tags
354
+ </label>
355
+ <button id="btn-reset" style="width: 100%; padding: 6px; background: #374151; color: white; border: 1px solid #4B5563; border-radius: 4px; cursor: pointer; font-size: 13px; font-weight: 500; transition: background 0.2s;">
356
+ 🔄 Reset Graph & Physics
357
+ </button>
358
+ </div>
359
+ `;
360
+ hudSidebar.appendChild(customControls);
361
+
362
+ // 2. Append the original config div below our filters
363
+ hudSidebar.appendChild(configPanel);
364
+
365
+ // 3. Add Event Listeners for Hover Effects
366
+ const resetButton = document.getElementById('btn-reset');
367
+ const filterJournals = document.getElementById('filter-journals');
368
+ const filterTags = document.getElementById('filter-tags');
369
+ if (!resetButton || !filterJournals || !filterTags) {
370
+ return;
371
+ }
372
+ resetButton.onmouseover = function() { this.style.background = '#4B5563'; };
373
+ resetButton.onmouseout = function() { this.style.background = '#374151'; };
374
+
375
+ // 4. Implement Reset Logic
376
+ resetButton.addEventListener('click', () => {
377
+ window.location.reload();
378
+ });
379
+
380
+ // 5. Implement Dynamic Filtering using PyVis global 'nodes' DataSet
381
+ function applyFilters() {
382
+ if (typeof nodes !== 'undefined') {
383
+ const updates = [];
384
+ nodes.get().forEach(node => {
385
+ let isHidden = false;
386
+ if (!filterJournals.checked && node.group === 'journal') isHidden = true;
387
+ if (!filterTags.checked && node.group === 'tag') isHidden = true;
388
+ updates.push({ id: node.id, hidden: isHidden });
389
+ });
390
+ nodes.update(updates);
391
+ }
392
+ }
393
+ filterJournals.addEventListener('change', applyFilters);
394
+ filterTags.addEventListener('change', applyFilters);
395
+
396
+ hudToggle.onclick = function() {
397
+ const isHidden = hudSidebar.classList.toggle('hud-hidden');
398
+ hudToggle.textContent = isHidden ? 'Show Controls' : 'Hide Controls';
399
+ };
400
+
401
+ setTimeout(() => {
402
+ const sidebar = document.getElementById('hud-sidebar');
403
+ const configDiv = document.getElementById('config');
404
+ if (sidebar && configDiv) {
405
+ let lastScroll = 0;
406
+
407
+ // Record scroll position continuously
408
+ sidebar.addEventListener('scroll', () => { lastScroll = sidebar.scrollTop; }, { passive: true });
409
+ sidebar.addEventListener('mousedown', () => { lastScroll = sidebar.scrollTop; });
410
+
411
+ // Watch for vis.js rebuilding the config panel
412
+ const observer = new MutationObserver(() => {
413
+ sidebar.scrollTop = lastScroll;
414
+ });
415
+
416
+ // Observe changes to the children of the config div
417
+ observer.observe(configDiv, { childList: true, subtree: true });
418
+ }
419
+ }, 500);
420
+ })();
421
+ """
422
+ output_html = output_html.replace(
423
+ "</body>",
424
+ f"<style>{custom_css}</style><script>{custom_js}</script></body>",
425
+ )
426
+ output_path.write_text(output_html, encoding="utf-8")
427
+ logger.debug("LENS HTML graph exported to %s", output_path)
@@ -0,0 +1,171 @@
1
+ """Core models and interfaces for the LOGOS parser module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from abc import ABC, abstractmethod
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
11
+
12
+
13
+ class ASTVisitor(ABC):
14
+ """Visitor interface used by adapters and exporters."""
15
+
16
+ @abstractmethod
17
+ def visit_node(self, node: "LogseqNode") -> None:
18
+ """Called when entering a node."""
19
+
20
+ @abstractmethod
21
+ def depart_node(self, node: "LogseqNode") -> None:
22
+ """Called when leaving a node."""
23
+
24
+
25
+ class LogseqNode(BaseModel):
26
+ """Single Logseq AST node.
27
+
28
+ ``path`` lists synthetic block UUIDs from the outline root to this node; LogseqGraph uses it
29
+ with ``properties`` to compute inherited metadata (see ``get_effective_properties`` on
30
+ ``LogseqGraph``).
31
+ """
32
+
33
+ model_config = ConfigDict(strict=True, frozen=True)
34
+
35
+ uuid: str
36
+ source_uuid: str | None = None
37
+ synthetic_id: bool = False
38
+ content: str
39
+ clean_text: str = ""
40
+ indent_level: int
41
+ properties: dict[str, Any] = Field(default_factory=dict)
42
+ wikilinks: list[str] = Field(default_factory=list)
43
+ tags: list[str] = Field(default_factory=list)
44
+ block_refs: list[str] = Field(default_factory=list)
45
+ refs: list[str] = Field(default_factory=list)
46
+ task_status: str | None = None
47
+ task_priority: str | None = None
48
+ scheduled_at: int | None = None
49
+ deadline_at: int | None = None
50
+ repeater: str | None = None
51
+ parent_id: str | None = None
52
+ left_id: str | None = None
53
+ path: list[str] = Field(default_factory=list)
54
+ source_path: str | None = None
55
+ line_start: int | None = None
56
+ line_end: int | None = None
57
+ outline_path: list[int] = Field(default_factory=list)
58
+ properties_order: list[str] = Field(default_factory=list)
59
+ created_at: int | None = None
60
+ updated_at: int | None = None
61
+ children: list["LogseqNode"] = Field(default_factory=list)
62
+
63
+ @model_validator(mode="before")
64
+ @classmethod
65
+ def _derive_clean_text(cls, data: Any) -> Any:
66
+ if isinstance(data, dict) and "clean_text" not in data:
67
+ content = data.get("content", "")
68
+ if isinstance(content, str):
69
+ data["clean_text"] = content
70
+ return data
71
+
72
+ def accept(self, visitor: ASTVisitor) -> None:
73
+ """Traverse this node with a visitor."""
74
+ visitor.visit_node(self)
75
+ for child in self.children:
76
+ child.accept(visitor)
77
+ visitor.depart_node(self)
78
+
79
+ def add_child(self, node: "LogseqNode") -> "LogseqNode":
80
+ """Return a copy with one additional child."""
81
+ return self.model_copy(update={"children": [*self.children, node]})
82
+
83
+
84
+ class LogseqPage(BaseModel):
85
+ """Container model for a parsed Logseq page."""
86
+
87
+ model_config = ConfigDict(strict=True, frozen=True)
88
+
89
+ title: str
90
+ raw_content: str
91
+ properties: dict[str, Any] = Field(default_factory=dict)
92
+ refs: list[str] = Field(default_factory=list)
93
+ created_at: int | None = None
94
+ updated_at: int | None = None
95
+ namespace_chain: list[str] = Field(default_factory=list)
96
+ source_path: str | None = None
97
+ graph_root: str | None = None
98
+ root_nodes: list[LogseqNode] = Field(default_factory=list)
99
+
100
+ def resolve_asset_path(self, asset_link: str) -> str | None:
101
+ """Resolve a Logseq asset link to an absolute filesystem path."""
102
+ normalized_link = asset_link.strip().replace("\\", "/")
103
+ if not normalized_link:
104
+ return None
105
+
106
+ if normalized_link.startswith("file://"):
107
+ filesystem_path = normalized_link.replace("file://", "", 1)
108
+ if os.name == "nt" and filesystem_path.startswith("/"):
109
+ filesystem_path = filesystem_path[1:]
110
+ return str(Path(filesystem_path).resolve())
111
+
112
+ graph_root = self._infer_graph_root()
113
+ if graph_root is not None and (
114
+ normalized_link.startswith("../assets/") or normalized_link.startswith("assets/")
115
+ ):
116
+ root_relative = normalized_link
117
+ while root_relative.startswith("../"):
118
+ root_relative = root_relative[3:]
119
+ root_relative_path = Path(root_relative)
120
+ return str((graph_root / root_relative_path).resolve())
121
+
122
+ if self.source_path:
123
+ local_candidate = (Path(self.source_path).parent / normalized_link).resolve()
124
+ if local_candidate.exists():
125
+ return str(local_candidate)
126
+
127
+ if graph_root is not None:
128
+ fallback_asset = (graph_root / "assets" / Path(normalized_link).name).resolve()
129
+ if fallback_asset.exists():
130
+ return str(fallback_asset)
131
+
132
+ return None
133
+
134
+ def _infer_graph_root(self) -> Path | None:
135
+ if self.graph_root:
136
+ return Path(self.graph_root).resolve()
137
+ if not self.source_path:
138
+ return None
139
+
140
+ source_path = Path(self.source_path).resolve()
141
+ marker_dirs = {"pages", "journals", "assets", "logseq"}
142
+ for parent in source_path.parents:
143
+ if parent.name in marker_dirs:
144
+ return parent.parent.resolve()
145
+ return None
146
+
147
+
148
+ class SovereignNotePackage(BaseModel):
149
+ """Universal payload exported from the parser."""
150
+
151
+ model_config = ConfigDict(strict=True, frozen=True)
152
+
153
+ slug: str
154
+ raw_content: str
155
+ parsed_ast: LogseqNode
156
+ metadata: dict[str, Any] = Field(default_factory=dict)
157
+ checksum: str
158
+ version: str = "1.0.0"
159
+
160
+
161
+ class LogosNode(LogseqNode):
162
+ """Backward-compatible mutable wrapper used by legacy callers/tests."""
163
+
164
+ model_config = ConfigDict(strict=True, frozen=False)
165
+
166
+ def add_child(self, node: "LogseqNode") -> "LogseqNode":
167
+ self.children.append(node)
168
+ return self
169
+
170
+ # Explicit model rebuild for recursive fields (Nuitka/AOT compatibility).
171
+ LogseqNode.model_rebuild()