logseq-matryca-parser 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,7 @@
1
+ Logseq Matryca Parser (Logos Protocol)
2
+ Copyright 2026 Marco Porcellato & Matryca.ai
3
+
4
+ Architect: Marco Porcellato
5
+ Protocol Name: Logos
6
+ This module provides deterministic parsing for the Matryca ecosystem.
7
+ Released under Apache 2.0.
@@ -0,0 +1,61 @@
1
+ """Public package exports for logseq_matryca_parser."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+ __version__ = "0.3.0"
8
+
9
+ from .agent_writer import LogseqConfigReader, logseq_agent_write
10
+ from .exceptions import BlockReferenceError, LogseqIndentationError, LogseqParserError
11
+ from .forge import (
12
+ FlatListForgeVisitor,
13
+ ForgeExporter,
14
+ JSONForgeVisitor,
15
+ MarkdownForgeVisitor,
16
+ ObsidianForgeVisitor,
17
+ )
18
+ from .graph import LogseqGraph
19
+ from .logos_core import ASTVisitor, LogseqNode, LogseqPage, LogosNode, SovereignNotePackage
20
+ from .logos_parser import (
21
+ LOGSEQ_PATTERNS,
22
+ LogosParser,
23
+ PageRegistry,
24
+ StackMachineParser,
25
+ clean_node_content,
26
+ is_system_block,
27
+ )
28
+
29
+
30
+ def ensure_aot_compatibility() -> None:
31
+ """Best-effort runtime check for AOT-unsafe dynamic metadata imports."""
32
+ if "importlib.metadata" in sys.modules:
33
+ return
34
+
35
+
36
+ __all__ = [
37
+ "__version__",
38
+ "ASTVisitor",
39
+ "BlockReferenceError",
40
+ "FlatListForgeVisitor",
41
+ "ForgeExporter",
42
+ "JSONForgeVisitor",
43
+ "LOGSEQ_PATTERNS",
44
+ "LogosNode",
45
+ "LogosParser",
46
+ "LogseqConfigReader",
47
+ "LogseqGraph",
48
+ "LogseqIndentationError",
49
+ "LogseqNode",
50
+ "LogseqPage",
51
+ "LogseqParserError",
52
+ "PageRegistry",
53
+ "SovereignNotePackage",
54
+ "StackMachineParser",
55
+ "clean_node_content",
56
+ "ensure_aot_compatibility",
57
+ "is_system_block",
58
+ "logseq_agent_write",
59
+ "MarkdownForgeVisitor",
60
+ "ObsidianForgeVisitor",
61
+ ]
@@ -0,0 +1,8 @@
1
+ """Allow running the CLI via ``python -m logseq_matryca_parser``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from logseq_matryca_parser.kinetic import app
6
+
7
+ if __name__ == "__main__":
8
+ app(prog_name="matryca-parse")
@@ -0,0 +1,99 @@
1
+ """Agent-native "Printing Press" exports: UUID aliases and ultra-dense X-Ray AST text."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from pathlib import Path
8
+
9
+ from logseq_matryca_parser.logos_core import LogseqNode
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def _flatten_subtrees(nodes: list[LogseqNode]) -> list[LogseqNode]:
15
+ """Depth-first list of nodes under each root, preserving outline order."""
16
+ flat: list[LogseqNode] = []
17
+ seen: set[str] = set()
18
+
19
+ def walk(node: LogseqNode) -> None:
20
+ if node.uuid in seen:
21
+ return
22
+ seen.add(node.uuid)
23
+ flat.append(node)
24
+ for child in node.children:
25
+ walk(child)
26
+
27
+ for root in nodes:
28
+ walk(root)
29
+ return flat
30
+
31
+
32
+ class SessionAliasRegistry:
33
+ """Maps sequential integer aliases to Logseq block UUIDs for a single agent session."""
34
+
35
+ def __init__(self) -> None:
36
+ self._alias_to_uuid: dict[int, str] = {}
37
+ self._uuid_to_alias: dict[str, int] = {}
38
+
39
+ def generate_aliases(self, nodes: list[LogseqNode]) -> dict[int, str]:
40
+ """Assign ``0..n-1`` to each unique node UUID (including nested children)."""
41
+ self._alias_to_uuid.clear()
42
+ self._uuid_to_alias.clear()
43
+ next_alias = 0
44
+ for node in _flatten_subtrees(nodes):
45
+ if node.uuid in self._uuid_to_alias:
46
+ continue
47
+ self._alias_to_uuid[next_alias] = node.uuid
48
+ self._uuid_to_alias[node.uuid] = next_alias
49
+ logger.debug("SessionAliasRegistry alias=%s uuid=%s", next_alias, node.uuid)
50
+ next_alias += 1
51
+ return dict(self._alias_to_uuid)
52
+
53
+ def resolve_alias(self, alias: int) -> str | None:
54
+ """Return the Logseq UUID for ``alias``, or ``None`` if unknown."""
55
+ return self._alias_to_uuid.get(alias)
56
+
57
+ def alias_for_uuid(self, node_uuid: str) -> int | None:
58
+ """Return the session alias for ``node_uuid``, or ``None`` if unregistered."""
59
+ return self._uuid_to_alias.get(node_uuid)
60
+
61
+ def save_to_disk(self, filepath: Path) -> None:
62
+ """Persist ``alias -> uuid`` mapping as JSON for cross-invocation state."""
63
+ payload = {str(alias): uuid for alias, uuid in self._alias_to_uuid.items()}
64
+ filepath.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
65
+ logger.debug("SessionAliasRegistry saved %s aliases to %s", len(payload), filepath)
66
+
67
+ @classmethod
68
+ def load_from_disk(cls, filepath: Path) -> SessionAliasRegistry:
69
+ """Reconstruct a registry from a JSON file written by :meth:`save_to_disk`."""
70
+ raw = json.loads(filepath.read_text(encoding="utf-8"))
71
+ registry = cls()
72
+ for alias_str, node_uuid in raw.items():
73
+ alias = int(alias_str)
74
+ registry._alias_to_uuid[alias] = node_uuid
75
+ registry._uuid_to_alias[node_uuid] = alias
76
+ logger.debug("SessionAliasRegistry loaded %s aliases from %s", len(registry._alias_to_uuid), filepath)
77
+ return registry
78
+
79
+
80
+ def to_xray_markdown(nodes: list[LogseqNode], registry: SessionAliasRegistry) -> str:
81
+ """Serialize outline topology as ``{indent}[{alias}] {clean_text}`` lines only."""
82
+ lines: list[str] = []
83
+
84
+ def emit(node: LogseqNode) -> None:
85
+ alias = registry.alias_for_uuid(node.uuid)
86
+ if alias is None:
87
+ logger.debug("to_xray_markdown skip unregistered uuid=%s", node.uuid)
88
+ return
89
+ indent = " " * node.indent_level
90
+ text = node.clean_text.strip()
91
+ if text:
92
+ lines.append(f"{indent}[{alias}] {text}")
93
+ for child in node.children:
94
+ emit(child)
95
+
96
+ for root in nodes:
97
+ emit(root)
98
+
99
+ return "\n".join(lines)
@@ -0,0 +1,250 @@
1
+ """Agent write helpers: weekly append-only logging and headless AST markdown splicing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import re
8
+ import tempfile
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING, NotRequired, TypedDict
12
+
13
+ if TYPE_CHECKING:
14
+ from logseq_matryca_parser.graph import LogseqGraph
15
+
16
+ from logseq_matryca_parser.logos_core import LogseqNode
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class AgentWriteResult(TypedDict):
22
+ """Structured result from :func:`logseq_agent_write`."""
23
+
24
+ status: str
25
+ path: NotRequired[str]
26
+ entry: NotRequired[str]
27
+ message: NotRequired[str]
28
+
29
+ _DEFAULT_JOURNAL_FORMAT = "%Y-%m-%d"
30
+ _JOURNAL_PAGE_TITLE_FORMAT_RE = re.compile(
31
+ r':journal/page-title-format\s+"([^"]*)"',
32
+ )
33
+
34
+ _ENGLISH_MONTH_ABBR: tuple[str, ...] = (
35
+ "Jan",
36
+ "Feb",
37
+ "Mar",
38
+ "Apr",
39
+ "May",
40
+ "Jun",
41
+ "Jul",
42
+ "Aug",
43
+ "Sep",
44
+ "Oct",
45
+ "Nov",
46
+ "Dec",
47
+ )
48
+ _ENGLISH_MONTH_FULL: tuple[str, ...] = (
49
+ "January",
50
+ "February",
51
+ "March",
52
+ "April",
53
+ "May",
54
+ "June",
55
+ "July",
56
+ "August",
57
+ "September",
58
+ "October",
59
+ "November",
60
+ "December",
61
+ )
62
+
63
+
64
+ class LogseqConfigReader:
65
+ """Load ``config.edn`` and translate Clojure/Java-style date tokens to Python ``strftime``."""
66
+
67
+ # Longest tokens first to avoid partial replacements (e.g. ``yyyy`` before ``yy``).
68
+ TOKEN_MAP: dict[str, str] = {
69
+ "yyyy": "%Y",
70
+ "yy": "%y",
71
+ "MMMM": "%B",
72
+ "MMM": "%b",
73
+ "MM": "%m",
74
+ "dd": "%d",
75
+ "HH": "%H",
76
+ "mm": "%M",
77
+ "ss": "%S",
78
+ "do": "{day_ordinal}",
79
+ }
80
+
81
+ def __init__(self, config_path: str) -> None:
82
+ self.config_path = config_path
83
+
84
+ def load_journal_format(self) -> str:
85
+ """Return ``:journal/page-title-format`` from ``config.edn``."""
86
+ try:
87
+ with open(self.config_path, encoding="utf-8") as f:
88
+ content = f.read()
89
+ except OSError as exc:
90
+ logger.warning("Could not read Logseq config at %s: %s", self.config_path, exc)
91
+ return _DEFAULT_JOURNAL_FORMAT
92
+
93
+ match = _JOURNAL_PAGE_TITLE_FORMAT_RE.search(content)
94
+ if match:
95
+ return match.group(1)
96
+ return _DEFAULT_JOURNAL_FORMAT
97
+
98
+ @staticmethod
99
+ def get_day_ordinal(day: int) -> str:
100
+ """Return English ordinal suffix (``st``, ``nd``, ``rd``, ``th``) for ``day``."""
101
+ if 11 <= day <= 13:
102
+ return "th"
103
+ suffixes = {1: "st", 2: "nd", 3: "rd"}
104
+ return suffixes.get(day % 10, "th")
105
+
106
+ def translate_to_python(self, clojure_format: str) -> str:
107
+ """Map Logseq/Java-style pattern letters to a Python ``strftime``-compatible string."""
108
+ py_format = clojure_format
109
+ for clojure_token, python_token in sorted(
110
+ self.TOKEN_MAP.items(),
111
+ key=lambda item: len(item[0]),
112
+ reverse=True,
113
+ ):
114
+ py_format = py_format.replace(clojure_token, python_token)
115
+ return py_format
116
+
117
+ def format_timestamp(self, dt: datetime) -> str:
118
+ """Format ``dt`` using the journal title format from config (including ordinal days)."""
119
+ clojure_format = self.load_journal_format()
120
+ py_format = self.translate_to_python(clojure_format)
121
+ # Force English month tokens so strftime does not follow the process locale (Logseq links).
122
+ if "%B" in py_format:
123
+ py_format = py_format.replace("%B", _ENGLISH_MONTH_FULL[dt.month - 1])
124
+ if "%b" in py_format:
125
+ py_format = py_format.replace("%b", _ENGLISH_MONTH_ABBR[dt.month - 1])
126
+ base_date = dt.strftime(py_format)
127
+ if "{day_ordinal}" in base_date:
128
+ suffix = self.get_day_ordinal(dt.day)
129
+ base_date = re.sub(r"\{day_ordinal\}", f"{dt.day}{suffix}", base_date)
130
+ return base_date
131
+
132
+
133
+ def logseq_agent_write(
134
+ content: str,
135
+ config_path: str,
136
+ pages_dir: str,
137
+ context_tags: list[str] | None = None,
138
+ ) -> AgentWriteResult:
139
+ """Append a single Logseq-style block to the weekly agent page (append-only, sandbox-friendly)."""
140
+ now = datetime.now()
141
+ reader = LogseqConfigReader(config_path)
142
+ title = reader.format_timestamp(now)
143
+ timestamp_tag = f"[[{title}]]"
144
+ week_id = now.strftime("%Y-W%W")
145
+ filename = f"{week_id}-agent.md"
146
+ file_path = os.path.join(pages_dir, filename)
147
+
148
+ tag_links = "".join(f" [[{tag}]]" for tag in (context_tags or []))
149
+ block_lines = [f"- {timestamp_tag}{tag_links}"]
150
+ if content.strip():
151
+ block_lines.append(content.rstrip("\n"))
152
+ block_text = "\n".join(block_lines) + "\n"
153
+
154
+ try:
155
+ os.makedirs(pages_dir, exist_ok=True)
156
+ with open(file_path, mode="a", encoding="utf-8") as out:
157
+ out.write(block_text)
158
+ except OSError as exc:
159
+ logger.exception("logseq_agent_write failed for path %s", file_path)
160
+ return {"status": "error", "message": str(exc)}
161
+ return {"status": "success", "path": file_path}
162
+
163
+
164
+ def _deepest_line_end(node: LogseqNode) -> int:
165
+ """Return the 1-based ``line_end`` of the deepest last descendant (or ``node`` itself)."""
166
+ cursor = node
167
+ while cursor.children:
168
+ cursor = cursor.children[-1]
169
+ if cursor.line_end is None:
170
+ msg = f"Node uuid={node.uuid} has no line_end for markdown splice"
171
+ raise ValueError(msg)
172
+ return cursor.line_end
173
+
174
+
175
+ def append_child_to_node(graph: LogseqGraph, target_uuid: str, content: str) -> None:
176
+ """Insert a child bullet under ``target_uuid`` in the on-disk source markdown file."""
177
+ target_node = graph.get_node_by_uuid(target_uuid)
178
+ if target_node is None:
179
+ msg = f"No node registered for uuid={target_uuid}"
180
+ raise ValueError(msg)
181
+ if not target_node.source_path:
182
+ msg = f"Node uuid={target_uuid} has no source_path"
183
+ raise ValueError(msg)
184
+
185
+ source_path = Path(target_node.source_path)
186
+ insert_after_line = _deepest_line_end(target_node)
187
+ child_level = target_node.indent_level + 1
188
+ indent = " " * (child_level * graph.tab_size)
189
+ new_line = f"{indent}- {content.rstrip()}"
190
+
191
+ raw_text = source_path.read_text(encoding="utf-8")
192
+ lines = raw_text.splitlines(keepends=True)
193
+ insert_index = insert_after_line
194
+ if insert_index < 0 or insert_index > len(lines):
195
+ msg = (
196
+ f"Insertion index {insert_index} out of range for {source_path} "
197
+ f"(lines={len(lines)}, target line_end={insert_after_line})"
198
+ )
199
+ raise ValueError(msg)
200
+
201
+ lines.insert(insert_index, f"{new_line}\n")
202
+ updated = "".join(lines)
203
+ logger.debug(
204
+ "append_child_to_node target=%s path=%s insert_index=%s indent_level=%s",
205
+ target_uuid,
206
+ source_path,
207
+ insert_index,
208
+ child_level,
209
+ )
210
+
211
+ fd, temp_path = tempfile.mkstemp(
212
+ dir=source_path.parent,
213
+ prefix=f".{source_path.name}.",
214
+ suffix=".tmp",
215
+ )
216
+ try:
217
+ with os.fdopen(fd, "w", encoding="utf-8") as handle:
218
+ handle.write(updated)
219
+ os.replace(temp_path, source_path)
220
+ except OSError:
221
+ if os.path.exists(temp_path):
222
+ os.unlink(temp_path)
223
+ raise
224
+
225
+
226
+ def _demo() -> None:
227
+ """Print a small table for manual checks (``python -m logseq_matryca_parser.agent_writer``)."""
228
+
229
+ class _MockConfigReader(LogseqConfigReader):
230
+ def load_journal_format(self) -> str:
231
+ return "MMM do, yyyy"
232
+
233
+ reader = _MockConfigReader("/path/to/config.edn")
234
+ test_dates = [
235
+ datetime(2026, 5, 1),
236
+ datetime(2026, 5, 2),
237
+ datetime(2026, 5, 3),
238
+ datetime(2026, 5, 11),
239
+ datetime(2026, 5, 22),
240
+ ]
241
+ print(f"{'Data Originale':<20} | {'Formato Logseq':<15} | {'Output Finale'}")
242
+ print("-" * 60)
243
+ for dt in test_dates:
244
+ fmt = reader.load_journal_format()
245
+ output = reader.format_timestamp(dt)
246
+ print(f"{dt.strftime('%Y-%m-%d'):<20} | {fmt:<15} | {output}")
247
+
248
+
249
+ if __name__ == "__main__":
250
+ _demo()
@@ -0,0 +1,13 @@
1
+ """Domain-specific parser exceptions."""
2
+
3
+
4
+ class LogseqParserError(Exception):
5
+ """Base exception for parser failures."""
6
+
7
+
8
+ class LogseqIndentationError(LogseqParserError):
9
+ """Raised when indentation jumps violate stack-machine constraints."""
10
+
11
+
12
+ class BlockReferenceError(LogseqParserError):
13
+ """Raised when a block reference cannot be resolved."""