logseq-matryca-parser 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,531 @@
1
+ """KINETIC command line interface for Logseq graph parsing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import re
8
+ import sys
9
+ from collections.abc import Iterable
10
+ from enum import Enum
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ if TYPE_CHECKING:
15
+ from logseq_matryca_parser.graph import LogseqGraph
16
+
17
+ import typer
18
+ from rich.console import Console
19
+ from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
20
+ from rich.table import Table
21
+
22
+ from logseq_matryca_parser import logseq_agent_write
23
+ from logseq_matryca_parser.forge import ForgeExporter
24
+ from logseq_matryca_parser.logos_core import LogseqNode, LogseqPage
25
+ from logseq_matryca_parser.logos_parser import LogosParser
26
+ from logseq_matryca_parser.synapse import SynapseAdapter
27
+
28
+ logger = logging.getLogger(__name__)
29
+ app = typer.Typer(help="KINETIC CLI for parsing and exporting Logseq graphs.", no_args_is_help=True)
30
+ console = Console()
31
+
32
+
33
+ class ExportFormat(str, Enum):
34
+ JSON = "json"
35
+ MARKDOWN = "markdown"
36
+ LANGCHAIN = "langchain"
37
+ LANGCHAIN_ENRICHED = "langchain-enriched"
38
+ OBSIDIAN = "obsidian"
39
+
40
+
41
+ def _discover_graph_files(graph_path: Path) -> list[Path]:
42
+ files: list[Path] = []
43
+ for folder_name in ("pages", "journals"):
44
+ target = graph_path / folder_name
45
+ if not target.exists():
46
+ logger.debug("Skipping missing graph subdirectory: %s", target)
47
+ continue
48
+ files.extend(sorted(target.rglob("*.md")))
49
+ logger.debug("Discovered %d markdown files in graph %s", len(files), graph_path)
50
+ return files
51
+
52
+
53
+ def _iter_nodes(nodes: Iterable[LogseqNode]) -> Iterable[LogseqNode]:
54
+ for node in nodes:
55
+ yield node
56
+ if node.children:
57
+ yield from _iter_nodes(node.children)
58
+
59
+
60
+ def _parse_graph(graph_path: Path) -> list[LogseqPage]:
61
+ parser = LogosParser()
62
+ files = _discover_graph_files(graph_path)
63
+ if not files:
64
+ return []
65
+
66
+ pages: list[LogseqPage] = []
67
+ with Progress(
68
+ SpinnerColumn(),
69
+ TextColumn("[progress.description]{task.description}"),
70
+ BarColumn(),
71
+ TextColumn("{task.completed}/{task.total}"),
72
+ TimeElapsedColumn(),
73
+ console=console,
74
+ ) as progress:
75
+ task_id = progress.add_task("Parsing Logseq graph", total=len(files))
76
+ for file_path in files:
77
+ logger.debug("Parsing graph file: %s", file_path)
78
+ pages.append(parser.parse_page_file(file_path))
79
+ progress.advance(task_id)
80
+ return pages
81
+
82
+
83
+ def _build_stats_table(pages: list[LogseqPage]) -> Table:
84
+ total_blocks = 0
85
+ total_tags = 0
86
+ total_tasks = 0
87
+
88
+ for page in pages:
89
+ for node in _iter_nodes(page.root_nodes):
90
+ total_blocks += 1
91
+ total_tags += len(node.tags)
92
+ if node.task_status is not None:
93
+ total_tasks += 1
94
+
95
+ table = Table(title="Graph Scan Statistics")
96
+ table.add_column("Metric", style="cyan")
97
+ table.add_column("Value", justify="right", style="bold green")
98
+ table.add_row("Total Pages", str(len(pages)))
99
+ table.add_row("Total Blocks", str(total_blocks))
100
+ table.add_row("Total Tags extracted", str(total_tags))
101
+ table.add_row("Total Tasks found", str(total_tasks))
102
+ return table
103
+
104
+
105
+ def _build_deep_stats_tables(stats: dict[str, Any]) -> tuple[Table, Table, Table]:
106
+ overview_table = Table(title="LENS Deep Statistics")
107
+ overview_table.add_column("Metric", style="cyan")
108
+ overview_table.add_column("Value", justify="right", style="bold green")
109
+ overview_table.add_row("Total Nodes", str(stats["total_nodes"]))
110
+ overview_table.add_row("Total Edges", str(stats["total_edges"]))
111
+
112
+ connectivity_table = Table(title="Top 10 Most Connected Nodes")
113
+ connectivity_table.add_column("Node", style="cyan")
114
+ connectivity_table.add_column("Group", style="magenta")
115
+ connectivity_table.add_column("Degree", justify="right", style="bold green")
116
+ for entry in stats["top_connected_nodes"]:
117
+ connectivity_table.add_row(str(entry["node"]), str(entry["group"]), str(entry["degree"]))
118
+
119
+ largest_pages_table = Table(title="Top 5 Largest Pages")
120
+ largest_pages_table.add_column("Page", style="cyan")
121
+ largest_pages_table.add_column("Block Count", justify="right", style="bold green")
122
+ for entry in stats["largest_pages"]:
123
+ largest_pages_table.add_row(str(entry["page"]), str(entry["block_count"]))
124
+
125
+ return overview_table, connectivity_table, largest_pages_table
126
+
127
+
128
+ def _build_official_logseq_demo_pages() -> list[LogseqPage]:
129
+ """Synthetic pages mimicking the official Logseq example graph (no disk I/O)."""
130
+ # Star hub: "Logseq" at center, spokes to core concepts, journals, and a tag.
131
+ hub_refs: list[str] = [
132
+ "[[Contents]]",
133
+ "[[Graph]]",
134
+ "[[Page]]",
135
+ "[[Block]]",
136
+ "[[Journal]]",
137
+ "2023_01_01",
138
+ "2024_06_15",
139
+ "logseq",
140
+ ]
141
+ hub = LogseqNode(
142
+ uuid="showcase-hub",
143
+ content="Logseq is a local-first, privacy-focused outliner and graph for knowledge work.",
144
+ clean_text="Logseq is a local-first, privacy-focused outliner and graph for knowledge work.",
145
+ indent_level=0,
146
+ refs=hub_refs,
147
+ tags=["logseq", "outliner"],
148
+ )
149
+ # Secondary block for a little depth (cross-link to [[Page]]).
150
+ branch = LogseqNode(
151
+ uuid="showcase-branch",
152
+ content="A block is a node in a tree. Blocks can nest and reference others.",
153
+ clean_text="A block is a node in a tree. Blocks can nest and reference others.",
154
+ indent_level=1,
155
+ parent_id=hub.uuid,
156
+ refs=["[[Block]]", "[[Page]]", "logseq"],
157
+ tags=["logseq"],
158
+ )
159
+ root_tree = hub.model_copy(update={"children": [branch]})
160
+ return [
161
+ LogseqPage(
162
+ title="Logseq",
163
+ raw_content="",
164
+ source_path=None,
165
+ graph_root=None,
166
+ root_nodes=[root_tree],
167
+ )
168
+ ]
169
+
170
+
171
+ @app.command()
172
+ def scan(graph_path: Path = typer.Argument(..., help="Path to the Logseq graph root.")) -> None:
173
+ """Scan a graph and print aggregate parsing statistics."""
174
+ if not graph_path.exists() or not graph_path.is_dir():
175
+ console.print(f"[bold red]Invalid graph path:[/] {graph_path}")
176
+ raise typer.Exit(code=1)
177
+
178
+ pages = _parse_graph(graph_path.resolve())
179
+ if not pages:
180
+ console.print("[yellow]No Markdown files found under pages/ or journals/.[/]")
181
+ raise typer.Exit(code=0)
182
+
183
+ console.print(_build_stats_table(pages))
184
+
185
+
186
+ @app.command()
187
+ def visualize(
188
+ graph_path: Path = typer.Argument(..., help="Path to the Logseq graph root."),
189
+ output_html: Path = typer.Argument(..., help="Output HTML path for network visualization."),
190
+ ) -> None:
191
+ """Parse a graph, compute deep topology stats, and export an interactive HTML network."""
192
+ if not graph_path.exists() or not graph_path.is_dir():
193
+ console.print(f"[bold red]Invalid graph path:[/] {graph_path}")
194
+ raise typer.Exit(code=1)
195
+
196
+ pages = _parse_graph(graph_path.resolve())
197
+ if not pages:
198
+ console.print("[yellow]No Markdown files found under pages/ or journals/.[/]")
199
+ raise typer.Exit(code=0)
200
+
201
+ try:
202
+ from logseq_matryca_parser.lens import GraphVisualizer
203
+
204
+ visualizer = GraphVisualizer(pages=pages)
205
+ except ImportError:
206
+ console.print(
207
+ "[bold red]Missing visualization dependencies.[/] Please install them using: "
208
+ "[cyan]pip install 'logseq-matryca-parser[viz]'[/]"
209
+ )
210
+ raise typer.Exit(1) from None
211
+
212
+ visualizer.build_network()
213
+ stats = visualizer.get_deep_statistics()
214
+
215
+ overview_table, connectivity_table, largest_pages_table = _build_deep_stats_tables(stats)
216
+ console.print(overview_table)
217
+ console.print(connectivity_table)
218
+ console.print(largest_pages_table)
219
+
220
+ visualizer.export_html(output_html)
221
+ console.print(f"[bold green]Visualization HTML written:[/] {output_html}")
222
+
223
+
224
+ @app.command()
225
+ def demo(
226
+ output_html: Path = typer.Argument(
227
+ Path("showcase.html"),
228
+ help="Path for the standalone showcase HTML (default: showcase.html in cwd).",
229
+ ),
230
+ ) -> None:
231
+ """Build a sample graph from the official Logseq demo topology and write showcase HTML (no graph files read)."""
232
+ pages = _build_official_logseq_demo_pages()
233
+ try:
234
+ from logseq_matryca_parser.lens import GraphVisualizer
235
+
236
+ visualizer = GraphVisualizer(pages=pages)
237
+ except ImportError:
238
+ console.print(
239
+ "[bold red]Missing visualization dependencies.[/] Please install them using: "
240
+ "[cyan]pip install 'logseq-matryca-parser[viz]'[/]"
241
+ )
242
+ raise typer.Exit(1) from None
243
+
244
+ visualizer.build_network()
245
+ visualizer.export_html(output_html.resolve())
246
+ console.print(
247
+ f"[bold green]Showcase example written:[/] {output_html.resolve()} "
248
+ f"(open in a browser to preview the LENS graph)."
249
+ )
250
+
251
+
252
+ def _export_json(pages: list[LogseqPage], output_path: Path) -> Path:
253
+ payload: list[dict[str, Any]] = []
254
+ for page in pages:
255
+ page_payload = {
256
+ "title": page.title,
257
+ "source_path": page.source_path,
258
+ "graph_root": page.graph_root,
259
+ "properties": page.properties,
260
+ "refs": page.refs,
261
+ "created_at": page.created_at,
262
+ "updated_at": page.updated_at,
263
+ "ast": json.loads(ForgeExporter.to_json(page.root_nodes)),
264
+ }
265
+ payload.append(page_payload)
266
+ destination = output_path / "graph.json"
267
+ destination.write_text(json.dumps(payload, indent=2), encoding="utf-8")
268
+ return destination
269
+
270
+
271
+ def _export_markdown(pages: list[LogseqPage], output_path: Path) -> Path:
272
+ destination = output_path / "graph.md"
273
+ sections: list[str] = []
274
+ for page in pages:
275
+ sections.append(f"# {page.title}")
276
+ sections.append(ForgeExporter.to_clean_markdown(page.root_nodes))
277
+ sections.append("")
278
+ destination.write_text("\n".join(sections).rstrip() + "\n", encoding="utf-8")
279
+ return destination
280
+
281
+
282
+ def _export_langchain(pages: list[LogseqPage], output_path: Path) -> Path:
283
+ payload: list[dict[str, Any]] = []
284
+ for page in pages:
285
+ docs = SynapseAdapter.to_langchain_documents(page.root_nodes, source_name=page.title)
286
+ payload.extend(
287
+ {
288
+ "page_content": doc.page_content,
289
+ "metadata": doc.metadata,
290
+ }
291
+ for doc in docs
292
+ )
293
+ destination = output_path / "langchain.json"
294
+ destination.write_text(json.dumps(payload, indent=2), encoding="utf-8")
295
+ return destination
296
+
297
+
298
+ def _export_langchain_enriched(graph: LogseqGraph, output_path: Path) -> tuple[Path, int]:
299
+ """Serialize context-enriched LangChain documents for the full loaded graph."""
300
+ all_roots: list[LogseqNode] = []
301
+ for page in graph.pages.values():
302
+ all_roots.extend(page.root_nodes)
303
+ docs = SynapseAdapter.to_context_enriched_chunks(all_roots, graph)
304
+ payload: list[dict[str, Any]] = [
305
+ {"page_content": doc.page_content, "metadata": doc.metadata} for doc in docs
306
+ ]
307
+ destination = output_path / "langchain_enriched.json"
308
+ destination.write_text(json.dumps(payload, indent=2), encoding="utf-8")
309
+ return destination, len(payload)
310
+
311
+
312
+ def _page_tree_contains_node_uuid(roots: list[LogseqNode], needle_uuid: str) -> bool:
313
+ for node in roots:
314
+ if node.uuid == needle_uuid:
315
+ return True
316
+ if node.children and _page_tree_contains_node_uuid(node.children, needle_uuid):
317
+ return True
318
+ return False
319
+
320
+
321
+ def _safe_obsidian_vault_relative_path(page_title: str) -> Path:
322
+ segments = [
323
+ re.sub(r'[<>:"|?*\\]', "_", segment) for segment in page_title.split("/") if segment
324
+ ]
325
+ if not segments:
326
+ return Path("untitled.md")
327
+ *parents, leaf = segments
328
+ if not parents:
329
+ return Path(f"{leaf}.md")
330
+ return Path(*parents) / f"{leaf}.md"
331
+
332
+
333
+ def _export_obsidian(graph: LogseqGraph, output_path: Path) -> int:
334
+ """Write one Obsidian-compatible Markdown file per page (namespace folders)."""
335
+ pages_list = list(graph.pages.values())
336
+ targets = ForgeExporter.vault_wide_embed_targets(pages_list)
337
+ suffix_map = ForgeExporter.build_vault_obsidian_suffix_map(
338
+ pages_list,
339
+ vault_wide_ref_targets=targets,
340
+ )
341
+
342
+ def embed_resolver(ref: str) -> tuple[str, str] | None:
343
+ node = graph.get_node_by_embed_ref(ref)
344
+ if node is None:
345
+ return None
346
+ for title, page in graph.pages.items():
347
+ if _page_tree_contains_node_uuid(page.root_nodes, node.uuid):
348
+ anchor = suffix_map.get(node.uuid, node.uuid.replace("-", "")[:8])
349
+ return title, anchor
350
+ return None
351
+
352
+ count = 0
353
+ for page in graph.pages.values():
354
+ props = {**page.properties, "title": page.title}
355
+ md = ForgeExporter.to_obsidian_markdown(
356
+ page.root_nodes,
357
+ props,
358
+ embed_resolver=embed_resolver,
359
+ global_suffix_map=suffix_map,
360
+ vault_wide_ref_targets=targets,
361
+ )
362
+ rel = _safe_obsidian_vault_relative_path(page.title)
363
+ out_file = output_path / rel
364
+ out_file.parent.mkdir(parents=True, exist_ok=True)
365
+ out_file.write_text(md, encoding="utf-8")
366
+ count += 1
367
+ return count
368
+
369
+
370
+ @app.command()
371
+ def export(
372
+ graph_path: Path = typer.Argument(..., help="Path to the Logseq graph root."),
373
+ output_path: Path = typer.Argument(..., help="Output directory for exported artifacts."),
374
+ format: ExportFormat = typer.Option(ExportFormat.JSON, "--format", "-f", help="Export format."),
375
+ ) -> None:
376
+ """Parse an entire graph and export it to the selected format."""
377
+ if not graph_path.exists() or not graph_path.is_dir():
378
+ console.print(f"[bold red]Invalid graph path:[/] {graph_path}")
379
+ raise typer.Exit(code=1)
380
+
381
+ resolved_graph = graph_path.resolve()
382
+
383
+ if format is ExportFormat.LANGCHAIN_ENRICHED:
384
+ from logseq_matryca_parser.graph import LogseqGraph
385
+
386
+ graph = LogseqGraph.load_directory(resolved_graph)
387
+ if not graph.pages:
388
+ console.print("[yellow]No Markdown files found under pages/ or journals/.[/]")
389
+ raise typer.Exit(code=0)
390
+ output_path.mkdir(parents=True, exist_ok=True)
391
+ try:
392
+ destination, chunk_count = _export_langchain_enriched(graph, output_path)
393
+ except ImportError:
394
+ console.print(
395
+ "[bold red]Missing AI export dependencies.[/] Please install them using: "
396
+ "[cyan]pip install 'logseq-matryca-parser[ai]'[/]"
397
+ )
398
+ raise typer.Exit(1) from None
399
+ console.print(
400
+ f"[bold green]Synthesized[/] [cyan]{chunk_count}[/] contextual chunks; "
401
+ f"[bold green]written to[/] {destination}"
402
+ )
403
+ return
404
+
405
+ if format is ExportFormat.OBSIDIAN:
406
+ from logseq_matryca_parser.graph import LogseqGraph
407
+
408
+ graph = LogseqGraph.load_directory(resolved_graph)
409
+ if not graph.pages:
410
+ console.print("[yellow]No Markdown files found under pages/ or journals/.[/]")
411
+ raise typer.Exit(code=0)
412
+ output_path.mkdir(parents=True, exist_ok=True)
413
+ file_count = _export_obsidian(graph, output_path)
414
+ console.print(
415
+ f"[bold green]Obsidian vault export completed:[/] [cyan]{file_count}[/] markdown "
416
+ f"files under {output_path.resolve()}"
417
+ )
418
+ return
419
+
420
+ pages = _parse_graph(resolved_graph)
421
+ if not pages:
422
+ console.print("[yellow]No Markdown files found under pages/ or journals/.[/]")
423
+ raise typer.Exit(code=0)
424
+
425
+ output_path.mkdir(parents=True, exist_ok=True)
426
+
427
+ if format is ExportFormat.JSON:
428
+ destination = _export_json(pages, output_path)
429
+ elif format is ExportFormat.MARKDOWN:
430
+ destination = _export_markdown(pages, output_path)
431
+ else:
432
+ try:
433
+ destination = _export_langchain(pages, output_path)
434
+ except ImportError:
435
+ console.print(
436
+ "[bold red]Missing AI export dependencies.[/] Please install them using: "
437
+ "[cyan]pip install 'logseq-matryca-parser[ai]'[/]"
438
+ )
439
+ raise typer.Exit(1) from None
440
+
441
+ console.print(f"[bold green]Export completed:[/] {destination}")
442
+
443
+
444
+ def _require_absolute_path(path: Path, label: str) -> Path:
445
+ expanded = path.expanduser()
446
+ if not expanded.is_absolute():
447
+ console.print(f"[bold red]{label} must be an absolute path:[/] {path}")
448
+ raise typer.Exit(code=1)
449
+ return expanded.resolve()
450
+
451
+
452
+ @app.command()
453
+ def append(
454
+ content: str = typer.Argument(..., help="Markdown text to append to the agent file."),
455
+ config: Path = typer.Option(
456
+ ...,
457
+ "--config",
458
+ help="Absolute path to the Logseq config.edn file.",
459
+ metavar="PATH",
460
+ ),
461
+ pages: Path = typer.Option(
462
+ ...,
463
+ "--pages",
464
+ help="Absolute path to the Logseq pages directory.",
465
+ metavar="PATH",
466
+ ),
467
+ tags: list[str] = typer.Option(
468
+ [],
469
+ "--tags",
470
+ help="Optional context tags for the block (repeat for multiple).",
471
+ ),
472
+ ) -> None:
473
+ """Append a block to the weekly agent page via logseq_agent_write."""
474
+ config_path = _require_absolute_path(config, "--config")
475
+ pages_dir = _require_absolute_path(pages, "--pages")
476
+
477
+ result = logseq_agent_write(
478
+ content,
479
+ str(config_path),
480
+ str(pages_dir),
481
+ context_tags=tags or None,
482
+ )
483
+
484
+ if result.get("status") == "success":
485
+ path_str = result.get("path", "")
486
+ console.print(
487
+ f"[bold green]Appended to agent page:[/] {path_str}",
488
+ no_wrap=True,
489
+ overflow="ignore",
490
+ crop=False,
491
+ )
492
+ return
493
+
494
+ message = result.get("message", "Unknown error.")
495
+ console.print(f"[bold red]Append failed:[/] {message}")
496
+ raise typer.Exit(code=1)
497
+
498
+
499
+ @app.command()
500
+ def agent_read(
501
+ graph_path: Path = typer.Argument(..., help="Path to the Logseq graph root."),
502
+ tag: str | None = typer.Option(None, "--tag", help="Filter nodes by tag."),
503
+ query: str | None = typer.Option(None, "--query", help="Substring search on clean_text."),
504
+ ) -> None:
505
+ """Load a graph, filter nodes, and print ultra-dense X-Ray text to stdout (no Rich)."""
506
+ from logseq_matryca_parser.agent_press import SessionAliasRegistry, to_xray_markdown
507
+ from logseq_matryca_parser.graph import LogseqGraph
508
+
509
+ if not graph_path.exists() or not graph_path.is_dir():
510
+ print(f"Invalid graph path: {graph_path}", file=sys.stderr)
511
+ raise typer.Exit(code=1)
512
+
513
+ graph = LogseqGraph.load_directory(graph_path.resolve())
514
+ if tag is not None:
515
+ nodes = graph.query().has_tag(tag).execute()
516
+ elif query is not None:
517
+ nodes = graph.search_content(query)
518
+ else:
519
+ nodes = graph.query().execute()
520
+
521
+ registry = SessionAliasRegistry()
522
+ registry.generate_aliases(nodes)
523
+ output = to_xray_markdown(nodes, registry)
524
+ if output:
525
+ sys.stdout.write(output)
526
+ if not output.endswith("\n"):
527
+ sys.stdout.write("\n")
528
+
529
+
530
+ if __name__ == "__main__":
531
+ app()