logseq-matryca-parser 1.0.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.cursorignore +28 -3
- logseq_matryca_parser-1.2.0/.github/workflows/github_release.yml +53 -0
- logseq_matryca_parser-1.2.0/CHANGELOG.md +67 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/CONTRIBUTING.md +14 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/PKG-INFO +75 -11
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/README.md +74 -10
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/ARCHITECTURE.md +73 -10
- logseq_matryca_parser-1.2.0/docs/RELEASE_PROCESS.md +84 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/design-docs/LOGSEQ_ASSET_RESOLUTION_SPEC.md +2 -0
- logseq_matryca_parser-1.2.0/docs/logseq_ast_primer.md +217 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/pyproject.toml +1 -1
- logseq_matryca_parser-1.2.0/scripts/debug_pre_release.py +170 -0
- logseq_matryca_parser-1.2.0/scripts/extract_changelog.py +138 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/__init__.py +1 -1
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/agent_writer.py +1 -1
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/graph.py +195 -22
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/logos_core.py +7 -6
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/logos_parser.py +438 -88
- logseq_matryca_parser-1.2.0/src/logseq_matryca_parser/logseq_markdown.py +241 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/logseq_paths.py +24 -2
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_graph.py +79 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_logos_parser.py +449 -15
- logseq_matryca_parser-1.2.0/tests/test_logseq_markdown.py +203 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_logseq_paths.py +23 -0
- logseq_matryca_parser-1.2.0/tests/test_pre_release_roundtrip.py +79 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/uv.lock +4 -4
- logseq_matryca_parser-1.0.0/.cursorrules +0 -5
- logseq_matryca_parser-1.0.0/docs/logseq_ast_primer.md +0 -82
- logseq_matryca_parser-1.0.0/src/logseq_matryca_parser/logseq_markdown.py +0 -111
- logseq_matryca_parser-1.0.0/tests/test_logseq_markdown.py +0 -98
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.github/FUNDING.yml +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.github/dependabot.yml +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.github/workflows/ci.yml +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.github/workflows/pypi_publish.yml +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.gitignore +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.pre-commit-config.yaml +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/.repomixignore +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/LICENSE +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/Makefile +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/NOTICE +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/ROADMAP_AGENT_NATIVE_XRAY.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/ROADMAP_HEADLESS_WRITER.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/ROADMAP_OBSIDIAN_ADAPTER.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/SECURITY.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/claude-skill-logseq-read/SKILL.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/claude-skill-logseq-read/scripts/parse_logseq.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/design-docs/ARCHITECTURE_BLUEPRINT.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/design-docs/CODE_SCAFFOLD.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/design-docs/LOGSEQ_DATASCRIPT_MAPPING.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/design-docs/LOGSEQ_TEMPORAL_ONTOLOGY.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/design-docs/OFFICIAL_MLDOC_SPECS.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/design-docs/REFERENCE_SPEC.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/error_log.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/roadmaps/ROADMAP_CLI_HYDRATION_AND_ENRICHMENT.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/roadmaps/ROADMAP_CONTEXT_SYNTHESIS_AND_SCOPING.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/roadmaps/ROADMAP_EMBED_EXPANSION_AND_FLUENT_QUERIES.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/roadmaps/ROADMAP_GRAPH_RAG_SEMANTICS.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/roadmaps/ROADMAP_INCREMENTAL_WATCHER.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/roadmaps/ROADMAP_INLINE_SHIELD_AND_NAMESPACES.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/roadmaps/ROADMAP_ROBUSTNESS_AND_SOFT_BREAKS.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/roadmaps/ROADMAP_TOML_FIX_AND_PYPI_DISTRIBUTION.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/docs/roadmaps/ROADMAP_UUID_AND_GRAPH_SUPERPOWERS.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/examples/demo_logseq_journal.md +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/examples/run_demo.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/legacy/local_digestor.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/lib/bindings/utils.js +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/lib/tom-select/tom-select.complete.min.js +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/lib/tom-select/tom-select.css +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/lib/vis-9.1.2/vis-network.css +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/lib/vis-9.1.2/vis-network.min.js +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/repomix-output-parser.xml +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/.gitignore +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/NOTICE +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/__main__.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/agent_press.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/exceptions.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/forge.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/kinetic.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/lens.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/pyproject.toml +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/src/logseq_matryca_parser/synapse.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_agent_press.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_agent_writer.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_forge.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_kinetic.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_lens.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_package_version.py +0 -0
- {logseq_matryca_parser-1.0.0 → logseq_matryca_parser-1.2.0}/tests/test_synapse.py +0 -0
|
@@ -1,10 +1,35 @@
|
|
|
1
1
|
# .cursorignore
|
|
2
2
|
# (Nota: Cursor ignora già automaticamente tutto ciò che è nel .gitignore)
|
|
3
3
|
|
|
4
|
+
# =========================
|
|
5
|
+
# Python & Virtual Environments
|
|
6
|
+
# =========================
|
|
7
|
+
.venv/
|
|
8
|
+
venv/
|
|
9
|
+
env/
|
|
10
|
+
__pycache__/
|
|
11
|
+
*.pyc
|
|
12
|
+
|
|
13
|
+
# =========================
|
|
14
|
+
# Linter & Test Caches
|
|
15
|
+
# =========================
|
|
16
|
+
.ruff_cache/
|
|
17
|
+
.mypy_cache/
|
|
18
|
+
.pytest_cache/
|
|
19
|
+
.coverage
|
|
20
|
+
htmlcov/
|
|
21
|
+
|
|
22
|
+
# =========================
|
|
23
|
+
# Build Artifacts
|
|
24
|
+
# =========================
|
|
25
|
+
dist/
|
|
26
|
+
build/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
|
|
4
29
|
# =========================
|
|
5
30
|
# Lockfiles (Letali per il Codebase Indexing)
|
|
6
31
|
# =========================
|
|
7
|
-
# I lockfile devono stare su Git, ma l'IA non deve MAI leggerli,
|
|
32
|
+
# I lockfile devono stare su Git, ma l'IA non deve MAI leggerli,
|
|
8
33
|
# sono solo un muro di versioni incomprensibili.
|
|
9
34
|
poetry.lock
|
|
10
35
|
uv.lock
|
|
@@ -33,7 +58,7 @@ tests/fixtures/*.md
|
|
|
33
58
|
# =========================
|
|
34
59
|
# Assets Vettoriali
|
|
35
60
|
# =========================
|
|
36
|
-
# Le immagini PNG/JPG Cursor le ignora da solo, ma gli SVG sono file di testo!
|
|
61
|
+
# Le immagini PNG/JPG Cursor le ignora da solo, ma gli SVG sono file di testo!
|
|
37
62
|
# Se l'IA legge un SVG, legge migliaia di coordinate matematiche inutili.
|
|
38
63
|
*.svg
|
|
39
64
|
|
|
@@ -42,4 +67,4 @@ tests/fixtures/*.md
|
|
|
42
67
|
# =========================
|
|
43
68
|
.vscode/
|
|
44
69
|
.idea/
|
|
45
|
-
.clinerules
|
|
70
|
+
.clinerules
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
name: GitHub Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
tag:
|
|
10
|
+
description: "Existing tag to publish (e.g. v1.1.1)"
|
|
11
|
+
required: true
|
|
12
|
+
type: string
|
|
13
|
+
|
|
14
|
+
permissions:
|
|
15
|
+
contents: write
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
release:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
steps:
|
|
21
|
+
- name: Checkout
|
|
22
|
+
uses: actions/checkout@v6
|
|
23
|
+
with:
|
|
24
|
+
fetch-depth: 0
|
|
25
|
+
|
|
26
|
+
- name: Resolve tag name
|
|
27
|
+
id: tag
|
|
28
|
+
run: |
|
|
29
|
+
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
|
30
|
+
echo "name=${{ inputs.tag }}" >> "$GITHUB_OUTPUT"
|
|
31
|
+
else
|
|
32
|
+
echo "name=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
- name: Build release notes from CHANGELOG
|
|
36
|
+
id: notes
|
|
37
|
+
run: |
|
|
38
|
+
BODY_FILE="$(mktemp)"
|
|
39
|
+
python scripts/extract_changelog.py "${{ steps.tag.outputs.name }}" > "$BODY_FILE"
|
|
40
|
+
{
|
|
41
|
+
echo "body<<CHANGELOG_EOF"
|
|
42
|
+
cat "$BODY_FILE"
|
|
43
|
+
echo "CHANGELOG_EOF"
|
|
44
|
+
} >> "$GITHUB_OUTPUT"
|
|
45
|
+
|
|
46
|
+
- name: Create or update GitHub Release
|
|
47
|
+
uses: softprops/action-gh-release@v2
|
|
48
|
+
with:
|
|
49
|
+
tag_name: ${{ steps.tag.outputs.name }}
|
|
50
|
+
name: ${{ steps.tag.outputs.name }}
|
|
51
|
+
body: ${{ steps.notes.outputs.body }}
|
|
52
|
+
generate_release_notes: false
|
|
53
|
+
make_latest: true
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to **logseq-matryca-parser** (The Logos Protocol) are documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [1.2.0] - 2026-05-29
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **Asset extraction** — `LogseqNode.assets` collects markdown images, `{{pdf}}` macros, and local `[label](path)` attachments; `resolve_asset_path` decodes percent-encoded paths (`%20`).
|
|
15
|
+
- **YAML frontmatter** — `---` blocks at file start populate `LogseqPage.properties` like native Logseq page properties.
|
|
16
|
+
- **`page-tags::`** — block and page properties named `page-tags` inject implicit graph tokens like `tags::`.
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
|
|
20
|
+
- **Round-trip serialization** — soft-break continuations no longer double-indent; list-shaped block properties (`tags::` + bullets) serialize as Logseq bullet lists instead of Python repr; `:LOGBOOK:` drawers and derived temporal fields (`scheduled::`, `repeater::`, …) are not emitted as bogus `key::` lines; YAML frontmatter pages round-trip with `---` fences and stable block UUIDs; `title` from YAML or `title::` frontmatter overrides the graph page title for deterministic IDs.
|
|
21
|
+
- **Property comma-split in wikilinks** — `tags::` / `alias::` comma separation ignores commas inside `[[...]]` tokens.
|
|
22
|
+
- **Properties after code fences** — `key::` lines immediately following a closing fence are parsed into block properties (Logseq contiguity exception).
|
|
23
|
+
- **Org warning periods** — `DEADLINE` / `SCHEDULED` payloads with `-3d`-style warning periods parse without datetime failures.
|
|
24
|
+
- **Quoted property values** — outer `"` / `'` are stripped from block property values in the AST.
|
|
25
|
+
- **Query macro shielding** — `{{query}}` / `{{advancedquery}}` inline macros do not emit false wikilink graph tokens (embed macros still do).
|
|
26
|
+
- **Case-insensitive page routing** — `LogseqGraph.get_page` and `resolve_relative_page_link` resolve titles via a lowercase index (Datomic / Logseq parity).
|
|
27
|
+
- **HTML comment shielding** — wikilinks and tags inside `<!-- ... -->` are masked before entity extraction (no ghost graph links).
|
|
28
|
+
- **Graph token parity** — list-shaped block properties (`tags::` with bullets) feed wikilinks/tags into the AST; page-level properties (YAML and `key::` frontmatter) merge into `page.refs`.
|
|
29
|
+
- **Temporal ranges and repeaters** — `SCHEDULED` / `DEADLINE` markers with `HH:MM - HH:MM` ranges parse using the start time; repeater tokens (`.+1w`, `++1d`) are stripped before datetime parsing.
|
|
30
|
+
- **Legacy namespace filenames** — `filename_to_page_title` decodes `___`, `%2F`, and Dendron-style `.` separators.
|
|
31
|
+
- **BOM-prefixed graph files** — `parse_page_file` reads with `utf-8-sig` so Windows-synced BOM bytes do not break the first bullet.
|
|
32
|
+
- **Markdown escape shielding** — `\#` and `\[\[` no longer yield tags or wikilinks in graph metadata.
|
|
33
|
+
- **Empty bullets** — bare `-` / `*` lines parse as empty blocks instead of failing `BULLET_PATTERN`.
|
|
34
|
+
- **Wikilink header anchors** — `[[Page#Section]]` resolves to the page name only for graph routing.
|
|
35
|
+
- **Hybrid alias links** — `[Alias]([[Page]])` is no longer treated as a file asset.
|
|
36
|
+
|
|
37
|
+
## [1.1.1] - 2026-05-28
|
|
38
|
+
|
|
39
|
+
### Added
|
|
40
|
+
|
|
41
|
+
- **Graph page aliases** — `LogseqGraph.load_directory` honors `title::`, `alias::` / `aliases::` for `pages` lookup and backlinks; incremental reload re-applies enrichment after watcher edits.
|
|
42
|
+
- **LaTeX math shielding** — `_shield_inline_code` masks `$$...$$` and `$...$` spans so wikilinks/tags inside equations are not extracted.
|
|
43
|
+
- **Datalog query dead zones** — `#+BEGIN_QUERY` … `#+END_QUERY` blocks are ignored for entity extraction (parse-loop state plus shielding).
|
|
44
|
+
- **Numbered list blocks** — `logos_parser.py` recognizes ordered-list markers (`1. `, `12. `, etc.) as outliner bullets alongside `-` and `*`.
|
|
45
|
+
- **Markdown task checkboxes** — `[ ]`, `[-]`, and `[x]`/`[X]` on block text map to `TODO`, `DOING`, and `DONE` before Org-mode prefix fallback.
|
|
46
|
+
|
|
47
|
+
### Fixed
|
|
48
|
+
|
|
49
|
+
- **Logseq OG parity (parser)** — `{{embed [[Page]]}}` and similar macros expose nested wikilinks; Unicode tags and markdown boundaries (`**#tag**`, `==#tag==`); comma-separated `tags::` / `alias::` / `aliases::` inject implicit graph tokens; `~~~` fences share code-block immunity with ` ``` ` fences.
|
|
50
|
+
- **Property contiguity** — block `key:: value` lines apply only while contiguous below the bullet; after a soft-break, later `key::` lines stay in `content` / `clean_text` (Logseq-native behavior).
|
|
51
|
+
- **Property bullet lists** — empty `alias::` / `tags::` followed by indented `-` bullets serialize as `list[str]` without orphan AST children.
|
|
52
|
+
- **Case-insensitive property keys** — all property keys normalized to lowercase at parse time; `TITLE::` frontmatter overrides graph page titles like `title::`.
|
|
53
|
+
- **Extended task markers** — `DELEGATED`, `POSTPONED`, `IN-PROGRESS` (longest-prefix matching) alongside existing Org-mode statuses.
|
|
54
|
+
- **Aliased block references** — `[Visible](((uuid)))` clean text retains visible alias only (no surrounding `[` `]`).
|
|
55
|
+
|
|
56
|
+
## [1.0.0] - 2026-05-28
|
|
57
|
+
|
|
58
|
+
### Added
|
|
59
|
+
|
|
60
|
+
- **LOGOS engine** — deterministic Stack-Machine parser (`StackMachineParser`) producing strict `LogseqPage` / `LogseqNode` ASTs from Spatial Markdown.
|
|
61
|
+
- **SYNAPSE adapters** — LangChain and LlamaIndex exporters with parent-child lineage metadata.
|
|
62
|
+
- **FORGE exporters** — JSON, Markdown, Obsidian, and enriched chunk payloads.
|
|
63
|
+
- **LENS visualizer** — interactive topology HTML via NetworkX / PyVis.
|
|
64
|
+
- **KINETIC CLI** — `matryca-parse` Typer entry point for export, visualization, and agent read/write workflows.
|
|
65
|
+
- **Headless CRUD** — append-only agent writer and X-Ray press utilities for sovereign graph mutation.
|
|
66
|
+
- **Logseq-native serialization** — round-trip page and block property layout via `logseq_markdown.py`.
|
|
67
|
+
- **Graph query layer** — `LogseqGraph` with backlinks, effective property inheritance, and optional filesystem watcher.
|
|
@@ -8,6 +8,20 @@ To maintain the architectural integrity of the project, please follow the guidel
|
|
|
8
8
|
|
|
9
9
|
---
|
|
10
10
|
|
|
11
|
+
## 📚 Documentation
|
|
12
|
+
|
|
13
|
+
User-facing behavior is documented in:
|
|
14
|
+
|
|
15
|
+
- [`README.md`](README.md) — overview, quickstart, and feature matrix
|
|
16
|
+
- [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — LOGOS, SYNAPSE, `LogseqGraph`, agents, and data flow
|
|
17
|
+
- [`docs/logseq_ast_primer.md`](docs/logseq_ast_primer.md) — Logseq Spatial Markdown domain rules
|
|
18
|
+
- [`CHANGELOG.md`](CHANGELOG.md) — shipped releases (current: **1.2.0**) and **Unreleased** changes (Keep a Changelog)
|
|
19
|
+
- [`docs/RELEASE_PROCESS.md`](docs/RELEASE_PROCESS.md) — version bump, tag, and PyPI publish checklist
|
|
20
|
+
|
|
21
|
+
When you add or change observable parser or graph behavior, update the relevant doc sections and add a bullet under **`## [Unreleased]`** in `CHANGELOG.md` (see [`.cursor/rules/05-auto-changelog.mdc`](.cursor/rules/05-auto-changelog.mdc)).
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
11
25
|
## 🏛️ Architectural Philosophy
|
|
12
26
|
|
|
13
27
|
Before writing any code, please understand our core principles:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: logseq-matryca-parser
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: The Logos Protocol: Deterministic Logseq AST parsing for Matryca.ai.
|
|
5
5
|
Project-URL: Homepage, https://github.com/MarcoPorcellato/logseq-matryca-parser
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/MarcoPorcellato/logseq-matryca-parser/issues
|
|
@@ -43,11 +43,12 @@ Description-Content-Type: text/markdown
|
|
|
43
43
|
[](https://github.com/MarcoPorcellato/logseq-matryca-parser/actions)
|
|
44
44
|
[](https://www.python.org/downloads/)
|
|
45
45
|
[](https://github.com/MarcoPorcellato/logseq-matryca-parser/blob/main/LICENSE)
|
|
46
|
-
[](https://pypi.org/project/logseq-matryca-parser/)
|
|
47
|
+
[](https://pypi.org/project/logseq-matryca-parser/)
|
|
47
48
|
[](#)
|
|
48
49
|

|
|
49
50
|
|
|
50
|
-
**v1.
|
|
51
|
+
**v1.2.0** — Graph parity, multimodal assets & format-preserving round-trip (see [CHANGELOG](CHANGELOG.md)) — **233 tests**, YAML frontmatter ingest/serialize, asset path resolution, case-insensitive page routing, and extended LOGOS shielding; ready for production Enterprise integration.
|
|
51
52
|
|
|
52
53
|
> *Turning a forest of local plain-text files into a unified semantic powerhouse.*
|
|
53
54
|
|
|
@@ -57,7 +58,7 @@ Description-Content-Type: text/markdown
|
|
|
57
58
|
|
|
58
59
|
[👉 **TRY THE LIVE INTERACTIVE DEMO**](https://MarcoPorcellato.github.io/logseq-matryca-parser/)
|
|
59
60
|
|
|
60
|
-
[📘 **
|
|
61
|
+
[📘 **ARCHITECTURE**](docs/ARCHITECTURE.md) · [AST Primer](docs/logseq_ast_primer.md) · [Changelog](CHANGELOG.md) · [Release process](docs/RELEASE_PROCESS.md)
|
|
61
62
|
|
|
62
63
|
</div>
|
|
63
64
|
|
|
@@ -100,6 +101,9 @@ It acts as the strict **File System Driver** for your LLM OS. By using a determi
|
|
|
100
101
|
| **Block references `((uuid))`** | Treated as opaque text or dropped | **Resolved** against `LogseqGraph`; optional **embed expansion** and **Obsidian `[[Page#^anchor]]`** export |
|
|
101
102
|
| **Property inheritance** | Page-level frontmatter at best | **`get_effective_properties`**: page + ancestor outline keys merged top-down (Org-mode style), then exposed on enriched chunks |
|
|
102
103
|
| **Live sync** | Re-read whole tree or poll | **`LogseqGraph.start_watching()`** (optional `watchdog`): **per-file invalidation** — re-parse one page, purge stale UUIDs from registries, refresh backlinks |
|
|
104
|
+
| **Page aliases & titles** | Filename-only or manual link maps | **`title::`**, **`alias::`** / **`aliases::`** re-key `graph.pages` and wire **backlinks** for alias wikilinks |
|
|
105
|
+
| **Case-insensitive pages** | Exact string match on filenames | **`get_page`** / **`resolve_relative_page_link`** use a lowercase index (Datomic / Logseq parity) |
|
|
106
|
+
| **Attachments & assets** | Opaque `` text in chunks | **`LogseqNode.assets`** + **`LogseqPage.resolve_asset_path`** for graph-root PDFs and images |
|
|
103
107
|
|
|
104
108
|
---
|
|
105
109
|
|
|
@@ -138,7 +142,57 @@ Logseq Matryca Parser is a deterministic **Stack-Machine engine** that acts as t
|
|
|
138
142
|
|
|
139
143
|
---
|
|
140
144
|
|
|
141
|
-
## ⚡ Recent superpowers (
|
|
145
|
+
## ⚡ Recent superpowers (v1.2.0)
|
|
146
|
+
|
|
147
|
+
### Graph parity, assets, and parser hardening
|
|
148
|
+
|
|
149
|
+
| Area | Capability |
|
|
150
|
+
| :--- | :--- |
|
|
151
|
+
| **Asset extraction** | `LogseqNode.assets` collects markdown images, `{{pdf}}` macros, and local `[label](path)` attachments; `LogseqPage.resolve_asset_path` maps to absolute paths (`%20` decode, graph-root relative). |
|
|
152
|
+
| **YAML frontmatter** | `---` blocks at file start populate `LogseqPage.properties` like native `key::` lines; **`title:`** in YAML sets `page.title` at parse; **`serialize_logseq_page`** preserves `---` fences on round-trip when the source file used YAML. |
|
|
153
|
+
| **`page-tags::`** | Block and page `page-tags::` inject implicit graph tokens like `tags::`; list-shaped values feed `refs`. |
|
|
154
|
+
| **Case-insensitive routing** | `LogseqGraph.get_page` and `resolve_relative_page_link` resolve titles via a lowercase index (Datomic parity). |
|
|
155
|
+
| **Extended shielding** | HTML comments, `{{query}}` / `{{advancedquery}}`, and escaped `\#` / `\[\[` do not emit false graph tokens (embed macros still harvest nested wikilinks). |
|
|
156
|
+
| **Property & temporal fixes** | Comma-split ignores commas inside `[[wikilinks]]`; properties after code fences; quoted value stripping; `SCHEDULED`/`DEADLINE` ranges, repeaters, and Org warning periods; legacy `___` / `%2F` / Dendron filenames; UTF-8 BOM via `utf-8-sig`. |
|
|
157
|
+
|
|
158
|
+
### Round-trip serialization (v1.2.0)
|
|
159
|
+
|
|
160
|
+
| Area | Capability |
|
|
161
|
+
| :--- | :--- |
|
|
162
|
+
| **Soft-break bodies** | Multiline block continuations serialize without double-indenting alignment spaces. |
|
|
163
|
+
| **List-shaped block props** | `tags::` / `page-tags::` with indented `-` bullets round-trip as Logseq lists (not Python repr). |
|
|
164
|
+
| **`:LOGBOOK:` drawers** | Org drawers re-emit as `:LOGBOOK:` / `:END:` blocks, not bogus `logbook::` property lines. |
|
|
165
|
+
| **Derived temporal keys** | Parsed `scheduled::`, `repeater::`, and related derived fields are omitted from serialized `key::` output. |
|
|
166
|
+
| **Stable block UUIDs** | Parse → `serialize_logseq_page` → parse preserves block `id::` / UUIDs on the same outline. |
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
from logseq_matryca_parser.graph import LogseqGraph
|
|
170
|
+
from logseq_matryca_parser.logos_parser import LogosParser
|
|
171
|
+
|
|
172
|
+
graph = LogseqGraph.load_directory("/path/to/logseq/graph")
|
|
173
|
+
|
|
174
|
+
# Case-insensitive page lookup
|
|
175
|
+
page = graph.get_page("my page") # same object as graph.pages["My Page"]
|
|
176
|
+
|
|
177
|
+
# Assets on a parsed block (Vision / document pipelines)
|
|
178
|
+
single = LogosParser().parse_page_file("pages/Notes.md")
|
|
179
|
+
block = single.root_nodes[0]
|
|
180
|
+
if block.assets:
|
|
181
|
+
abs_path = single.resolve_asset_path(block.assets[0])
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Deep dive: [Architecture §3.1 — LOGOS](docs/ARCHITECTURE.md#31-logos--deterministic-stack-machine-parsing) · [§3.6 — LogseqGraph](docs/ARCHITECTURE.md#36-logseqgraph--namespace-scoping-o1-invalidation-live-watch) · [AST primer](docs/logseq_ast_primer.md).
|
|
185
|
+
|
|
186
|
+
### Still included from v1.1.1
|
|
187
|
+
|
|
188
|
+
| Area | Capability |
|
|
189
|
+
| :--- | :--- |
|
|
190
|
+
| **Graph index** | `title::` / `TITLE::` overrides filename titles; `alias::` / `aliases::` inject extra `graph.pages` keys. |
|
|
191
|
+
| **Backlinks** | `[[Dev]]` resolves against alias keys (`get_backlinks("Dev")`). |
|
|
192
|
+
| **Incremental reload** | `invalidate_and_reload_page` re-applies title/alias enrichment after watcher edits. |
|
|
193
|
+
| **Parser shields** | LaTeX, `#+BEGIN_QUERY`, fenced code, drawers; `{{embed [[Page]]}}` harvests nested wikilinks. |
|
|
194
|
+
| **Property contiguity** | `key::` contiguous under bullets; soft-break closes the window (fence exception in v1.2.0). |
|
|
195
|
+
| **Tasks & bullets** | GFM checkboxes, extended Org markers, ordered-list bullets, aliased `((uuid))` clean text. |
|
|
142
196
|
|
|
143
197
|
### Obsidian-native export
|
|
144
198
|
Compile an entire Logseq graph into an **Obsidian vault layout**: YAML frontmatter from page properties, list body preserved, Logseq `((uuid))` links rewritten to **`[[Page#^anchor]]`**, and trailing **`^block-id`** on referenced blocks. Namespace titles become nested folders (e.g. `Projects/AI/Demo.md`).
|
|
@@ -194,13 +248,15 @@ For graph hygiene, **`LogseqGraph.get_broken_references()`** flags nodes whose `
|
|
|
194
248
|
|
|
195
249
|
| Feature | Description |
|
|
196
250
|
| :--- | :--- |
|
|
197
|
-
| **LOGOS Engine** | Deterministic AST parsing.
|
|
198
|
-
| **
|
|
251
|
+
| **LOGOS Engine** | Deterministic AST parsing. YAML + native frontmatter ingest, **format-preserving** `serialize_logseq_page` (YAML vs `key::` by source), list-shaped block property layout, **assets**, property contiguity (incl. post-fence), comma-safe wikilink splits, temporal ranges/repeaters, legacy filename decode, BOM-safe reads, and **shielded** code/math/query/HTML/escape regions. |
|
|
252
|
+
| **Multimodal assets** | **`LogseqNode.assets`** + **`LogseqPage.resolve_asset_path`** for PDFs and images relative to the graph root (Vision / document RAG). |
|
|
253
|
+
| **LogseqGraph** | In-memory vault: `pages` index (with **title/alias enrichment** and **case-insensitive lookup**), backlinks, effective properties, namespace resolution, fluent `GraphQuery`, optional **watchdog** invalidation. |
|
|
254
|
+
| **Advanced Task Extraction** | Task **state** (TODO / DOING / DELEGATED / IN-PROGRESS / …), **priority** markers `[#A]`–`[#C]` promoted to `task_priority`, and **SCHEDULED** / **DEADLINE** Logseq timestamps normalized to **UTC Unix epoch seconds** on `scheduled_at` / `deadline_at` for temporal graph and retrieval pipelines. |
|
|
199
255
|
| **SYNAPSE Adapter** | Native exports for **LangChain** and **LlamaIndex** with automated lineage metadata; **context-enriched** chunks with breadcrumbs, embed expansion, and inherited properties. |
|
|
200
256
|
| **FORGE** | JSON, clean Markdown, and **Obsidian** vault serialization (`ObsidianForgeVisitor`, `ForgeExporter.to_obsidian_markdown`). |
|
|
201
257
|
| **LENS Visualizer** | 60FPS interactive graph rendering (10k+ nodes) with Glassmorphism HUD. |
|
|
202
258
|
| **Agent-Native Printing Press** | [`agent_press.py`](src/logseq_matryca_parser/agent_press.py): **`SessionAliasRegistry`** maps session aliases ↔ block UUIDs; **`to_xray_markdown`** emits token-minimal outline text for autonomous agents (`matryca-parse agent-read`). |
|
|
203
|
-
| **Native Markdown Serialization** | [`logseq_markdown.py`](src/logseq_matryca_parser/logseq_markdown.py) + [`logseq_paths.py`](src/logseq_matryca_parser/logseq_paths.py): rebuild and write Logseq-compliant markdown
|
|
259
|
+
| **Native Markdown Serialization** | [`logseq_markdown.py`](src/logseq_matryca_parser/logseq_markdown.py) + [`logseq_paths.py`](src/logseq_matryca_parser/logseq_paths.py): rebuild and write Logseq-compliant markdown from an AST—page header preserves **YAML `---` or native `key::`** by source format, block properties at **parent whitespace + 2 spaces** (including bullet-list `tags::`), `:LOGBOOK:` drawers, and namespace titles via **`___`** pathing rules. |
|
|
204
260
|
| **Headless Write Engine** | [`agent_writer.py`](src/logseq_matryca_parser/agent_writer.py): **`append_child_to_node`** splices child bullets into on-disk Markdown from AST topology; **`serialize_logseq_page`** / **`write_logseq_page`** emit full pages; **`matryca-parse agent-write`** resolves aliases via **`.matryca_xray_state.json`**. |
|
|
205
261
|
| **AST Linters** | **`LogseqGraph.get_broken_references()`** returns originating nodes when `block_refs` target UUIDs absent from the global registry. |
|
|
206
262
|
| **Sovereign AI** | 100% Local. Zero telemetry. Private by design. |
|
|
@@ -227,8 +283,8 @@ Marker syntax (`[#A]`, `SCHEDULED: <...>`, `DEADLINE: <...>`) is stripped from `
|
|
|
227
283
|
## 🛠️ Quickstart
|
|
228
284
|
|
|
229
285
|
```bash
|
|
230
|
-
# Install from
|
|
231
|
-
pip install
|
|
286
|
+
# Install from PyPI (latest: v1.2.0)
|
|
287
|
+
pip install logseq-matryca-parser
|
|
232
288
|
|
|
233
289
|
# Optional: filesystem watcher for live incremental graph updates
|
|
234
290
|
pip install 'logseq-matryca-parser[watch]'
|
|
@@ -248,11 +304,19 @@ matryca-parse export /path/to/logseq/graph output --format obsidian
|
|
|
248
304
|
|
|
249
305
|
### Python API
|
|
250
306
|
```python
|
|
307
|
+
from logseq_matryca_parser.graph import LogseqGraph
|
|
251
308
|
from logseq_matryca_parser.logos_parser import LogosParser
|
|
252
309
|
from logseq_matryca_parser.synapse import SynapseAdapter
|
|
253
310
|
|
|
254
|
-
# Parse to AST
|
|
311
|
+
# Parse a single page to AST (YAML or native frontmatter; utf-8-sig BOM-safe)
|
|
255
312
|
page = LogosParser().parse_page_file("page.md")
|
|
313
|
+
if page.root_nodes[0].assets:
|
|
314
|
+
absolute = page.resolve_asset_path(page.root_nodes[0].assets[0])
|
|
315
|
+
|
|
316
|
+
# Load the whole vault (pages, backlinks, node registry)
|
|
317
|
+
graph = LogseqGraph.load_directory("/path/to/logseq/graph")
|
|
318
|
+
page_obj = graph.get_page("My Page") # case-insensitive
|
|
319
|
+
effective = graph.get_effective_properties(page_obj.root_nodes[0].uuid)
|
|
256
320
|
|
|
257
321
|
# Export to LangChain with lineage metadata
|
|
258
322
|
docs = SynapseAdapter.to_langchain_documents(page.root_nodes, source_name=page.title)
|
|
@@ -7,11 +7,12 @@
|
|
|
7
7
|
[](https://github.com/MarcoPorcellato/logseq-matryca-parser/actions)
|
|
8
8
|
[](https://www.python.org/downloads/)
|
|
9
9
|
[](https://github.com/MarcoPorcellato/logseq-matryca-parser/blob/main/LICENSE)
|
|
10
|
-
[](https://pypi.org/project/logseq-matryca-parser/)
|
|
11
|
+
[](https://pypi.org/project/logseq-matryca-parser/)
|
|
11
12
|
[](#)
|
|
12
13
|

|
|
13
14
|
|
|
14
|
-
**v1.
|
|
15
|
+
**v1.2.0** — Graph parity, multimodal assets & format-preserving round-trip (see [CHANGELOG](CHANGELOG.md)) — **233 tests**, YAML frontmatter ingest/serialize, asset path resolution, case-insensitive page routing, and extended LOGOS shielding; ready for production Enterprise integration.
|
|
15
16
|
|
|
16
17
|
> *Turning a forest of local plain-text files into a unified semantic powerhouse.*
|
|
17
18
|
|
|
@@ -21,7 +22,7 @@
|
|
|
21
22
|
|
|
22
23
|
[👉 **TRY THE LIVE INTERACTIVE DEMO**](https://MarcoPorcellato.github.io/logseq-matryca-parser/)
|
|
23
24
|
|
|
24
|
-
[📘 **
|
|
25
|
+
[📘 **ARCHITECTURE**](docs/ARCHITECTURE.md) · [AST Primer](docs/logseq_ast_primer.md) · [Changelog](CHANGELOG.md) · [Release process](docs/RELEASE_PROCESS.md)
|
|
25
26
|
|
|
26
27
|
</div>
|
|
27
28
|
|
|
@@ -64,6 +65,9 @@ It acts as the strict **File System Driver** for your LLM OS. By using a determi
|
|
|
64
65
|
| **Block references `((uuid))`** | Treated as opaque text or dropped | **Resolved** against `LogseqGraph`; optional **embed expansion** and **Obsidian `[[Page#^anchor]]`** export |
|
|
65
66
|
| **Property inheritance** | Page-level frontmatter at best | **`get_effective_properties`**: page + ancestor outline keys merged top-down (Org-mode style), then exposed on enriched chunks |
|
|
66
67
|
| **Live sync** | Re-read whole tree or poll | **`LogseqGraph.start_watching()`** (optional `watchdog`): **per-file invalidation** — re-parse one page, purge stale UUIDs from registries, refresh backlinks |
|
|
68
|
+
| **Page aliases & titles** | Filename-only or manual link maps | **`title::`**, **`alias::`** / **`aliases::`** re-key `graph.pages` and wire **backlinks** for alias wikilinks |
|
|
69
|
+
| **Case-insensitive pages** | Exact string match on filenames | **`get_page`** / **`resolve_relative_page_link`** use a lowercase index (Datomic / Logseq parity) |
|
|
70
|
+
| **Attachments & assets** | Opaque `` text in chunks | **`LogseqNode.assets`** + **`LogseqPage.resolve_asset_path`** for graph-root PDFs and images |
|
|
67
71
|
|
|
68
72
|
---
|
|
69
73
|
|
|
@@ -102,7 +106,57 @@ Logseq Matryca Parser is a deterministic **Stack-Machine engine** that acts as t
|
|
|
102
106
|
|
|
103
107
|
---
|
|
104
108
|
|
|
105
|
-
## ⚡ Recent superpowers (
|
|
109
|
+
## ⚡ Recent superpowers (v1.2.0)
|
|
110
|
+
|
|
111
|
+
### Graph parity, assets, and parser hardening
|
|
112
|
+
|
|
113
|
+
| Area | Capability |
|
|
114
|
+
| :--- | :--- |
|
|
115
|
+
| **Asset extraction** | `LogseqNode.assets` collects markdown images, `{{pdf}}` macros, and local `[label](path)` attachments; `LogseqPage.resolve_asset_path` maps to absolute paths (`%20` decode, graph-root relative). |
|
|
116
|
+
| **YAML frontmatter** | `---` blocks at file start populate `LogseqPage.properties` like native `key::` lines; **`title:`** in YAML sets `page.title` at parse; **`serialize_logseq_page`** preserves `---` fences on round-trip when the source file used YAML. |
|
|
117
|
+
| **`page-tags::`** | Block and page `page-tags::` inject implicit graph tokens like `tags::`; list-shaped values feed `refs`. |
|
|
118
|
+
| **Case-insensitive routing** | `LogseqGraph.get_page` and `resolve_relative_page_link` resolve titles via a lowercase index (Datomic parity). |
|
|
119
|
+
| **Extended shielding** | HTML comments, `{{query}}` / `{{advancedquery}}`, and escaped `\#` / `\[\[` do not emit false graph tokens (embed macros still harvest nested wikilinks). |
|
|
120
|
+
| **Property & temporal fixes** | Comma-split ignores commas inside `[[wikilinks]]`; properties after code fences; quoted value stripping; `SCHEDULED`/`DEADLINE` ranges, repeaters, and Org warning periods; legacy `___` / `%2F` / Dendron filenames; UTF-8 BOM via `utf-8-sig`. |
|
|
121
|
+
|
|
122
|
+
### Round-trip serialization (v1.2.0)
|
|
123
|
+
|
|
124
|
+
| Area | Capability |
|
|
125
|
+
| :--- | :--- |
|
|
126
|
+
| **Soft-break bodies** | Multiline block continuations serialize without double-indenting alignment spaces. |
|
|
127
|
+
| **List-shaped block props** | `tags::` / `page-tags::` with indented `-` bullets round-trip as Logseq lists (not Python repr). |
|
|
128
|
+
| **`:LOGBOOK:` drawers** | Org drawers re-emit as `:LOGBOOK:` / `:END:` blocks, not bogus `logbook::` property lines. |
|
|
129
|
+
| **Derived temporal keys** | Parsed `scheduled::`, `repeater::`, and related derived fields are omitted from serialized `key::` output. |
|
|
130
|
+
| **Stable block UUIDs** | Parse → `serialize_logseq_page` → parse preserves block `id::` / UUIDs on the same outline. |
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from logseq_matryca_parser.graph import LogseqGraph
|
|
134
|
+
from logseq_matryca_parser.logos_parser import LogosParser
|
|
135
|
+
|
|
136
|
+
graph = LogseqGraph.load_directory("/path/to/logseq/graph")
|
|
137
|
+
|
|
138
|
+
# Case-insensitive page lookup
|
|
139
|
+
page = graph.get_page("my page") # same object as graph.pages["My Page"]
|
|
140
|
+
|
|
141
|
+
# Assets on a parsed block (Vision / document pipelines)
|
|
142
|
+
single = LogosParser().parse_page_file("pages/Notes.md")
|
|
143
|
+
block = single.root_nodes[0]
|
|
144
|
+
if block.assets:
|
|
145
|
+
abs_path = single.resolve_asset_path(block.assets[0])
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Deep dive: [Architecture §3.1 — LOGOS](docs/ARCHITECTURE.md#31-logos--deterministic-stack-machine-parsing) · [§3.6 — LogseqGraph](docs/ARCHITECTURE.md#36-logseqgraph--namespace-scoping-o1-invalidation-live-watch) · [AST primer](docs/logseq_ast_primer.md).
|
|
149
|
+
|
|
150
|
+
### Still included from v1.1.1
|
|
151
|
+
|
|
152
|
+
| Area | Capability |
|
|
153
|
+
| :--- | :--- |
|
|
154
|
+
| **Graph index** | `title::` / `TITLE::` overrides filename titles; `alias::` / `aliases::` inject extra `graph.pages` keys. |
|
|
155
|
+
| **Backlinks** | `[[Dev]]` resolves against alias keys (`get_backlinks("Dev")`). |
|
|
156
|
+
| **Incremental reload** | `invalidate_and_reload_page` re-applies title/alias enrichment after watcher edits. |
|
|
157
|
+
| **Parser shields** | LaTeX, `#+BEGIN_QUERY`, fenced code, drawers; `{{embed [[Page]]}}` harvests nested wikilinks. |
|
|
158
|
+
| **Property contiguity** | `key::` contiguous under bullets; soft-break closes the window (fence exception in v1.2.0). |
|
|
159
|
+
| **Tasks & bullets** | GFM checkboxes, extended Org markers, ordered-list bullets, aliased `((uuid))` clean text. |
|
|
106
160
|
|
|
107
161
|
### Obsidian-native export
|
|
108
162
|
Compile an entire Logseq graph into an **Obsidian vault layout**: YAML frontmatter from page properties, list body preserved, Logseq `((uuid))` links rewritten to **`[[Page#^anchor]]`**, and trailing **`^block-id`** on referenced blocks. Namespace titles become nested folders (e.g. `Projects/AI/Demo.md`).
|
|
@@ -158,13 +212,15 @@ For graph hygiene, **`LogseqGraph.get_broken_references()`** flags nodes whose `
|
|
|
158
212
|
|
|
159
213
|
| Feature | Description |
|
|
160
214
|
| :--- | :--- |
|
|
161
|
-
| **LOGOS Engine** | Deterministic AST parsing.
|
|
162
|
-
| **
|
|
215
|
+
| **LOGOS Engine** | Deterministic AST parsing. YAML + native frontmatter ingest, **format-preserving** `serialize_logseq_page` (YAML vs `key::` by source), list-shaped block property layout, **assets**, property contiguity (incl. post-fence), comma-safe wikilink splits, temporal ranges/repeaters, legacy filename decode, BOM-safe reads, and **shielded** code/math/query/HTML/escape regions. |
|
|
216
|
+
| **Multimodal assets** | **`LogseqNode.assets`** + **`LogseqPage.resolve_asset_path`** for PDFs and images relative to the graph root (Vision / document RAG). |
|
|
217
|
+
| **LogseqGraph** | In-memory vault: `pages` index (with **title/alias enrichment** and **case-insensitive lookup**), backlinks, effective properties, namespace resolution, fluent `GraphQuery`, optional **watchdog** invalidation. |
|
|
218
|
+
| **Advanced Task Extraction** | Task **state** (TODO / DOING / DELEGATED / IN-PROGRESS / …), **priority** markers `[#A]`–`[#C]` promoted to `task_priority`, and **SCHEDULED** / **DEADLINE** Logseq timestamps normalized to **UTC Unix epoch seconds** on `scheduled_at` / `deadline_at` for temporal graph and retrieval pipelines. |
|
|
163
219
|
| **SYNAPSE Adapter** | Native exports for **LangChain** and **LlamaIndex** with automated lineage metadata; **context-enriched** chunks with breadcrumbs, embed expansion, and inherited properties. |
|
|
164
220
|
| **FORGE** | JSON, clean Markdown, and **Obsidian** vault serialization (`ObsidianForgeVisitor`, `ForgeExporter.to_obsidian_markdown`). |
|
|
165
221
|
| **LENS Visualizer** | 60FPS interactive graph rendering (10k+ nodes) with Glassmorphism HUD. |
|
|
166
222
|
| **Agent-Native Printing Press** | [`agent_press.py`](src/logseq_matryca_parser/agent_press.py): **`SessionAliasRegistry`** maps session aliases ↔ block UUIDs; **`to_xray_markdown`** emits token-minimal outline text for autonomous agents (`matryca-parse agent-read`). |
|
|
167
|
-
| **Native Markdown Serialization** | [`logseq_markdown.py`](src/logseq_matryca_parser/logseq_markdown.py) + [`logseq_paths.py`](src/logseq_matryca_parser/logseq_paths.py): rebuild and write Logseq-compliant markdown
|
|
223
|
+
| **Native Markdown Serialization** | [`logseq_markdown.py`](src/logseq_matryca_parser/logseq_markdown.py) + [`logseq_paths.py`](src/logseq_matryca_parser/logseq_paths.py): rebuild and write Logseq-compliant markdown from an AST—page header preserves **YAML `---` or native `key::`** by source format, block properties at **parent whitespace + 2 spaces** (including bullet-list `tags::`), `:LOGBOOK:` drawers, and namespace titles via **`___`** pathing rules. |
|
|
168
224
|
| **Headless Write Engine** | [`agent_writer.py`](src/logseq_matryca_parser/agent_writer.py): **`append_child_to_node`** splices child bullets into on-disk Markdown from AST topology; **`serialize_logseq_page`** / **`write_logseq_page`** emit full pages; **`matryca-parse agent-write`** resolves aliases via **`.matryca_xray_state.json`**. |
|
|
169
225
|
| **AST Linters** | **`LogseqGraph.get_broken_references()`** returns originating nodes when `block_refs` target UUIDs absent from the global registry. |
|
|
170
226
|
| **Sovereign AI** | 100% Local. Zero telemetry. Private by design. |
|
|
@@ -191,8 +247,8 @@ Marker syntax (`[#A]`, `SCHEDULED: <...>`, `DEADLINE: <...>`) is stripped from `
|
|
|
191
247
|
## 🛠️ Quickstart
|
|
192
248
|
|
|
193
249
|
```bash
|
|
194
|
-
# Install from
|
|
195
|
-
pip install
|
|
250
|
+
# Install from PyPI (latest: v1.2.0)
|
|
251
|
+
pip install logseq-matryca-parser
|
|
196
252
|
|
|
197
253
|
# Optional: filesystem watcher for live incremental graph updates
|
|
198
254
|
pip install 'logseq-matryca-parser[watch]'
|
|
@@ -212,11 +268,19 @@ matryca-parse export /path/to/logseq/graph output --format obsidian
|
|
|
212
268
|
|
|
213
269
|
### Python API
|
|
214
270
|
```python
|
|
271
|
+
from logseq_matryca_parser.graph import LogseqGraph
|
|
215
272
|
from logseq_matryca_parser.logos_parser import LogosParser
|
|
216
273
|
from logseq_matryca_parser.synapse import SynapseAdapter
|
|
217
274
|
|
|
218
|
-
# Parse to AST
|
|
275
|
+
# Parse a single page to AST (YAML or native frontmatter; utf-8-sig BOM-safe)
|
|
219
276
|
page = LogosParser().parse_page_file("page.md")
|
|
277
|
+
if page.root_nodes[0].assets:
|
|
278
|
+
absolute = page.resolve_asset_path(page.root_nodes[0].assets[0])
|
|
279
|
+
|
|
280
|
+
# Load the whole vault (pages, backlinks, node registry)
|
|
281
|
+
graph = LogseqGraph.load_directory("/path/to/logseq/graph")
|
|
282
|
+
page_obj = graph.get_page("My Page") # case-insensitive
|
|
283
|
+
effective = graph.get_effective_properties(page_obj.root_nodes[0].uuid)
|
|
220
284
|
|
|
221
285
|
# Export to LangChain with lineage metadata
|
|
222
286
|
docs = SynapseAdapter.to_langchain_documents(page.root_nodes, source_name=page.title)
|