logseq-matryca-parser 1.2.1__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/CHANGELOG.md +25 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/CONTRIBUTING.md +2 -1
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/PKG-INFO +60 -12
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/README.md +59 -11
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/SECURITY.md +1 -1
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/ARCHITECTURE.md +16 -8
- logseq_matryca_parser-1.3.0/docs/CODEQL.md +31 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/logseq_ast_primer.md +1 -1
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_INCREMENTAL_WATCHER.md +2 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_UUID_AND_GRAPH_SUPERPOWERS.md +2 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/examples/run_demo.py +3 -2
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/legacy/local_digestor.py +6 -9
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/pyproject.toml +22 -1
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/__init__.py +26 -5
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/forge.py +1 -4
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/graph.py +100 -18
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/kinetic.py +104 -57
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/lens.py +9 -6
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/logos_core.py +5 -5
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/logos_parser.py +37 -8
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/logseq_paths.py +17 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/synapse.py +134 -45
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_agent_writer.py +5 -1
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_graph.py +65 -1
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_kinetic.py +27 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_lens.py +9 -1
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_logos_parser.py +34 -9
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_logseq_paths.py +2 -2
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_package_version.py +17 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_synapse.py +70 -6
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/uv.lock +97 -81
- logseq_matryca_parser-1.2.1/.github/workflows/codeql.yml +0 -37
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.cursorignore +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/CODEOWNERS +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/FUNDING.yml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/dependabot.yml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/workflows/ci.yml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/workflows/daily-metrics.yml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/workflows/github_release.yml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.github/workflows/pypi_publish.yml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.gitignore +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.pre-commit-config.yaml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/.repomixignore +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/CODE_OF_CONDUCT.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/LICENSE +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/Makefile +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/NOTICE +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/claude-skill-logseq-read/SKILL.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/claude-skill-logseq-read/scripts/parse_logseq.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/RELEASE_PROCESS.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/design-docs/ARCHITECTURE_BLUEPRINT.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/design-docs/CODE_SCAFFOLD.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/design-docs/LOGSEQ_ASSET_RESOLUTION_SPEC.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/design-docs/LOGSEQ_DATASCRIPT_MAPPING.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/design-docs/LOGSEQ_TEMPORAL_ONTOLOGY.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/design-docs/OFFICIAL_MLDOC_SPECS.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/design-docs/REFERENCE_SPEC.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/error_log.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_AGENT_NATIVE_XRAY.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_CLI_HYDRATION_AND_ENRICHMENT.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_CONTEXT_SYNTHESIS_AND_SCOPING.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_EMBED_EXPANSION_AND_FLUENT_QUERIES.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_GRAPH_RAG_SEMANTICS.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_HEADLESS_WRITER.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_INLINE_SHIELD_AND_NAMESPACES.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_OBSIDIAN_ADAPTER.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_ROBUSTNESS_AND_SOFT_BREAKS.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/docs/roadmaps/ROADMAP_TOML_FIX_AND_PYPI_DISTRIBUTION.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/examples/demo_logseq_journal.md +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/lib/bindings/utils.js +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/lib/tom-select/tom-select.complete.min.js +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/lib/tom-select/tom-select.css +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/lib/vis-9.1.2/vis-network.css +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/lib/vis-9.1.2/vis-network.min.js +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/metrics/history.json +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/repomix-output-parser.xml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/scripts/debug_pre_release.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/scripts/extract_changelog.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/.gitignore +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/NOTICE +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/__main__.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/agent_press.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/agent_writer.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/exceptions.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/logseq_markdown.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/src/logseq_matryca_parser/pyproject.toml +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_agent_press.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_forge.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_logseq_markdown.py +0 -0
- {logseq_matryca_parser-1.2.1 → logseq_matryca_parser-1.3.0}/tests/test_pre_release_roundtrip.py +0 -0
|
@@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.3.0] - 2026-06-19
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
|
|
14
|
+
- **Sprint 1 architectural quick wins** — **`discover_graph_files`** moved from `kinetic.py` to `logseq_paths.py` (decouples CLI from `LogseqGraph`); KINETIC optional-dependency errors now recommend **`uv sync --extra ai|viz`**; **`lens.py`** lazy-imports NetworkX/PyVis; **SYNAPSE** exports vector-store-safe metadata via **`SynapseMetadata`** / **`build_synapse_metadata`** (`task_priority`, temporal epochs, `source_uuid`, joined `path`/`refs`); explicit **`[tool.ruff]`** config in `pyproject.toml`.
|
|
15
|
+
- **Sprint 2 runtime robustness** — **`LogseqGraphWatcher`** debounces filesystem events (~500ms) and ignores editor temp/swap files; **`StackMachineParser(strict_refs=True)`** raises **`BlockReferenceError`** for unresolved same-page `((uuid))` refs (default off); **KINETIC** adds **`@app.callback()`** with **`--verbose`** / **`--graph`**, **`rich_markup_mode="rich"`**, and shared graph-path resolution.
|
|
16
|
+
- **Sprint 3 architecture** — **`LogseqGraph`** uses **`validate_assignment=True`** (no frozen/`object.__setattr__` hack); **SYNAPSE** **`LlamaIndexVisitor`** adds **`SOURCE`**, **`NEXT`**, and **`PREVIOUS`** relationships; package root **`__init__.py`** exports **`SynapseAdapter`**, **`SessionAliasRegistry`**, **`GraphVisualizer`**, and core LOGOS symbols via explicit **`__all__`**.
|
|
17
|
+
- **Optional AI stack** — `llama-index-core` bumped to `0.14.22` via lock refresh.
|
|
18
|
+
- **Documentation** — README, ARCHITECTURE, CONTRIBUTING, SECURITY, CODEQL, AST primer, and roadmaps updated for **1.3.0** (public API, watcher debounce, `strict_refs`, LlamaIndex spatial edges, `uv` install).
|
|
19
|
+
|
|
20
|
+
### Security
|
|
21
|
+
|
|
22
|
+
- **Transitive dependency hardening** — `uv` constraints pin `aiohttp>=3.14.1` (11 Dependabot alerts); `nltk` overridden to `v3.10.0-rc1` from upstream Git until NLTK 3.10.0 ships on PyPI (GHSA-p4gq-832x-fm9v). Affects optional `[ai]` / `[all]` extras only; core install unchanged.
|
|
23
|
+
|
|
24
|
+
## [1.2.2] - 2026-06-18
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- **CodeQL CI conflict** — removed `.github/workflows/codeql.yml`; SAST runs via GitHub **CodeQL default setup** only (advanced workflow + default setup cannot coexist). See [`docs/CODEQL.md`](docs/CODEQL.md).
|
|
29
|
+
|
|
30
|
+
### Changed
|
|
31
|
+
|
|
32
|
+
- **README** — CodeQL references updated to default setup; link to `docs/CODEQL.md`.
|
|
33
|
+
- **Documentation** — `docs/CODEQL.md` added; CONTRIBUTING and SECURITY updated for **1.2.2**.
|
|
34
|
+
|
|
10
35
|
## [1.2.1] - 2026-06-18
|
|
11
36
|
|
|
12
37
|
### Added
|
|
@@ -15,8 +15,9 @@ User-facing behavior is documented in:
|
|
|
15
15
|
- [`README.md`](README.md) — overview, quickstart, and feature matrix
|
|
16
16
|
- [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — LOGOS, SYNAPSE, `LogseqGraph`, agents, and data flow
|
|
17
17
|
- [`docs/logseq_ast_primer.md`](docs/logseq_ast_primer.md) — Logseq Spatial Markdown domain rules
|
|
18
|
-
- [`CHANGELOG.md`](CHANGELOG.md) — shipped releases (current: **1.
|
|
18
|
+
- [`CHANGELOG.md`](CHANGELOG.md) — shipped releases (current: **1.3.0**) and **Unreleased** changes (Keep a Changelog)
|
|
19
19
|
- [`docs/RELEASE_PROCESS.md`](docs/RELEASE_PROCESS.md) — version bump, tag, and PyPI publish checklist
|
|
20
|
+
- [`docs/CODEQL.md`](docs/CODEQL.md) — CodeQL default setup (no custom `codeql.yml`)
|
|
20
21
|
|
|
21
22
|
When you add or change observable parser or graph behavior, update the relevant doc sections and add a bullet under **`## [Unreleased]`** in `CHANGELOG.md` (see [`.cursor/rules/05-auto-changelog.mdc`](.cursor/rules/05-auto-changelog.mdc)).
|
|
22
23
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: logseq-matryca-parser
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: The Logos Protocol: Deterministic Logseq AST parsing for Matryca.ai.
|
|
5
5
|
Project-URL: Homepage, https://github.com/MarcoPorcellato/logseq-matryca-parser
|
|
6
6
|
Project-URL: Repository, https://github.com/MarcoPorcellato/logseq-matryca-parser
|
|
@@ -53,7 +53,7 @@ Description-Content-Type: text/markdown
|
|
|
53
53
|
[](#)
|
|
54
54
|

|
|
55
55
|
|
|
56
|
-
**v1.
|
|
56
|
+
**v1.3.0** — Architecture & runtime hardening (see [CHANGELOG](CHANGELOG.md)) — **244 tests**, public API exports, watcher debounce, `strict_refs`, LlamaIndex spatial edges; SAST via GitHub CodeQL **default setup** ([`docs/CODEQL.md`](docs/CODEQL.md)).
|
|
57
57
|
|
|
58
58
|
> *Turning a forest of local plain-text files into a unified semantic powerhouse.*
|
|
59
59
|
|
|
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
|
|
|
63
63
|
|
|
64
64
|
[👉 **TRY THE LIVE INTERACTIVE DEMO**](https://MarcoPorcellato.github.io/logseq-matryca-parser/)
|
|
65
65
|
|
|
66
|
-
[📘 **ARCHITECTURE**](docs/ARCHITECTURE.md) · [AST Primer](docs/logseq_ast_primer.md) · [Changelog](CHANGELOG.md) · [Release process](docs/RELEASE_PROCESS.md)
|
|
66
|
+
[📘 **ARCHITECTURE**](docs/ARCHITECTURE.md) · [AST Primer](docs/logseq_ast_primer.md) · [CodeQL](docs/CODEQL.md) · [Changelog](CHANGELOG.md) · [Release process](docs/RELEASE_PROCESS.md)
|
|
67
67
|
|
|
68
68
|
</div>
|
|
69
69
|
|
|
@@ -147,6 +147,34 @@ Logseq Matryca Parser is a deterministic **Stack-Machine engine** that acts as t
|
|
|
147
147
|
|
|
148
148
|
---
|
|
149
149
|
|
|
150
|
+
## ⚡ Release highlights (v1.3.0)
|
|
151
|
+
|
|
152
|
+
Minor release — architectural quick wins, runtime robustness, and expanded public API. No breaking changes to default parser behavior.
|
|
153
|
+
|
|
154
|
+
| Area | Change |
|
|
155
|
+
| :--- | :--- |
|
|
156
|
+
| **Public API** | Root **`logseq_matryca_parser`** exports **`SynapseAdapter`**, **`SessionAliasRegistry`**, **`GraphVisualizer`**, **`discover_graph_files`**, and core LOGOS symbols via explicit **`__all__`**. |
|
|
157
|
+
| **Graph model** | **`LogseqGraph`** uses **`validate_assignment=True`** instead of frozen/`object.__setattr__` for incremental reloads. |
|
|
158
|
+
| **Live watcher** | **`start_watching()`** debounces filesystem events (~500ms) and ignores editor temp/swap files (`.swp`, `~`, `.tmp`, `.DS_Store`). |
|
|
159
|
+
| **Strict refs** | **`StackMachineParser(strict_refs=True)`** raises **`BlockReferenceError`** for unresolved same-page `((uuid))` refs (default off). |
|
|
160
|
+
| **SYNAPSE** | **`SynapseMetadata`** / **`build_synapse_metadata`** for vector-store-safe fields; **LlamaIndex** adds **`SOURCE`**, **`NEXT`**, **`PREVIOUS`** relationships. |
|
|
161
|
+
| **KINETIC CLI** | Global **`--verbose`** / **`--graph`** via **`@app.callback()`**; optional-dependency hints recommend **`uv sync --extra ai\|viz`**. |
|
|
162
|
+
| **LENS** | Lazy-imports NetworkX/PyVis so core installs stay lightweight. |
|
|
163
|
+
| **Security** | Transitive **`aiohttp`** / **`nltk`** constraints for optional **`[ai]`** extras. |
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## ⚡ Release highlights (v1.2.2)
|
|
168
|
+
|
|
169
|
+
Patch release — fixes a failing CodeQL GitHub Actions workflow; **no parser or public API changes**.
|
|
170
|
+
|
|
171
|
+
| Area | Change |
|
|
172
|
+
| :--- | :--- |
|
|
173
|
+
| **CodeQL** | Removed duplicate `.github/workflows/codeql.yml`; scanning continues via GitHub **default setup** (Node 24 runners). |
|
|
174
|
+
| **Docs** | New [`docs/CODEQL.md`](docs/CODEQL.md) explains default vs advanced setup and troubleshooting. |
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
150
178
|
## ⚡ Release highlights (v1.2.1)
|
|
151
179
|
|
|
152
180
|
Infrastructure and contributor experience — no parser API breaks.
|
|
@@ -155,11 +183,11 @@ Infrastructure and contributor experience — no parser API breaks.
|
|
|
155
183
|
| :--- | :--- |
|
|
156
184
|
| **Python matrix** | CI and PyPI pre-flight test **3.12** and **3.13**; PyPI classifier for 3.13. |
|
|
157
185
|
| **Quality gates** | `make all` parity in GitHub Actions (`uv sync --all-extras` → lint, mypy, pytest with **≥80%** coverage). |
|
|
158
|
-
| **Security** | CodeQL SAST, `pip-audit` on production deps, expanded `SECURITY.md`, PyPI publish blocked until pre-flight passes. |
|
|
186
|
+
| **Security** | GitHub CodeQL default setup (SAST), `pip-audit` on production deps, expanded `SECURITY.md`, PyPI publish blocked until pre-flight passes. |
|
|
159
187
|
| **Community** | `CODE_OF_CONDUCT.md`, `CODEOWNERS`, issue-template config, CONTRIBUTING with `uv` workflow. |
|
|
160
188
|
| **Docs** | Root `ROADMAP_*.md` consolidated under [`docs/roadmaps/`](docs/roadmaps/). |
|
|
161
189
|
|
|
162
|
-
Contributor setup: [`CONTRIBUTING.md`](CONTRIBUTING.md) · Security: [`SECURITY.md`](SECURITY.md)
|
|
190
|
+
Contributor setup: [`CONTRIBUTING.md`](CONTRIBUTING.md) · Security: [`SECURITY.md`](SECURITY.md) · CodeQL: [`docs/CODEQL.md`](docs/CODEQL.md)
|
|
163
191
|
|
|
164
192
|
---
|
|
165
193
|
|
|
@@ -225,7 +253,7 @@ matryca-parse export /path/to/logseq/graph /path/to/obsidian/vault --format obsi
|
|
|
225
253
|
> **Note:** Wikilinks currently use the **Logseq page title** (e.g. `[[Target#^…]]`). Vault files may live under namespace folders (`Projects/AI/Demo.md`). Obsidian usually resolves unique titles; aligning link text to folder paths is a possible future refinement.
|
|
226
254
|
|
|
227
255
|
### Live incremental watcher
|
|
228
|
-
`LogseqGraph` supports **surgical file invalidation** (optional dependency: `
|
|
256
|
+
`LogseqGraph` supports **surgical file invalidation** (optional dependency: `uv sync --extra watch`). `start_watching()` runs a recursive **watchdog** observer with **~500ms debounce** and ignores editor temp/swap files: on `created` / `modified` under `pages/` or `journals/`, only that file is re-parsed; stale synthetic UUIDs are purged from `_node_registry` and scrubbed from `_backlink_registry`—no full-graph cold reload.
|
|
229
257
|
|
|
230
258
|
### Fluent topological queries
|
|
231
259
|
Filter the global node registry with a **chainable** API (tags, task state, ancestry under a parent UUID):
|
|
@@ -304,12 +332,17 @@ Marker syntax (`[#A]`, `SCHEDULED: <...>`, `DEADLINE: <...>`) is stripped from `
|
|
|
304
332
|
## 🛠️ Quickstart
|
|
305
333
|
|
|
306
334
|
```bash
|
|
307
|
-
# Install from PyPI (latest: v1.
|
|
308
|
-
pip install logseq-matryca-parser
|
|
335
|
+
# Install from PyPI (latest: v1.3.0)
|
|
336
|
+
uv pip install logseq-matryca-parser
|
|
309
337
|
|
|
310
338
|
# Optional: filesystem watcher for live incremental graph updates
|
|
311
|
-
pip install 'logseq-matryca-parser[watch]'
|
|
339
|
+
uv pip install 'logseq-matryca-parser[watch]'
|
|
312
340
|
|
|
341
|
+
# Or clone and sync all extras locally
|
|
342
|
+
uv sync --all-extras
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
```bash
|
|
313
346
|
# 1. Visualize your local graph (LENS)
|
|
314
347
|
matryca-parse visualize /path/to/logseq/graph my-map.html
|
|
315
348
|
|
|
@@ -321,13 +354,23 @@ matryca-parse export /path/to/logseq/graph output --format langchain-enriched
|
|
|
321
354
|
|
|
322
355
|
# 4. Obsidian vault (YAML frontmatter + ^ block ids)
|
|
323
356
|
matryca-parse export /path/to/logseq/graph output --format obsidian
|
|
357
|
+
|
|
358
|
+
# Global options (all subcommands): --verbose, --graph /path/to/vault
|
|
359
|
+
matryca-parse --graph /path/to/logseq/graph --verbose export output --format json
|
|
324
360
|
```
|
|
325
361
|
|
|
326
362
|
### Python API
|
|
363
|
+
|
|
364
|
+
Prefer the package root for stable imports (see **`__all__`** in **`logseq_matryca_parser`**):
|
|
365
|
+
|
|
327
366
|
```python
|
|
328
|
-
from logseq_matryca_parser
|
|
329
|
-
|
|
330
|
-
|
|
367
|
+
from logseq_matryca_parser import (
|
|
368
|
+
LogseqGraph,
|
|
369
|
+
LogosParser,
|
|
370
|
+
SynapseAdapter,
|
|
371
|
+
SessionAliasRegistry,
|
|
372
|
+
discover_graph_files,
|
|
373
|
+
)
|
|
331
374
|
|
|
332
375
|
# Parse a single page to AST (YAML or native frontmatter; utf-8-sig BOM-safe)
|
|
333
376
|
page = LogosParser().parse_page_file("page.md")
|
|
@@ -341,6 +384,11 @@ effective = graph.get_effective_properties(page_obj.root_nodes[0].uuid)
|
|
|
341
384
|
|
|
342
385
|
# Export to LangChain with lineage metadata
|
|
343
386
|
docs = SynapseAdapter.to_langchain_documents(page.root_nodes, source_name=page.title)
|
|
387
|
+
|
|
388
|
+
# Optional strict same-page block-ref validation at parse time
|
|
389
|
+
from logseq_matryca_parser import StackMachineParser
|
|
390
|
+
|
|
391
|
+
strict_page = StackMachineParser(strict_refs=True).parse_page_file("page.md")
|
|
344
392
|
```
|
|
345
393
|
|
|
346
394
|
### 🤖 Agentic Write Access (Append-Only)
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
[](#)
|
|
13
13
|

|
|
14
14
|
|
|
15
|
-
**v1.
|
|
15
|
+
**v1.3.0** — Architecture & runtime hardening (see [CHANGELOG](CHANGELOG.md)) — **244 tests**, public API exports, watcher debounce, `strict_refs`, LlamaIndex spatial edges; SAST via GitHub CodeQL **default setup** ([`docs/CODEQL.md`](docs/CODEQL.md)).
|
|
16
16
|
|
|
17
17
|
> *Turning a forest of local plain-text files into a unified semantic powerhouse.*
|
|
18
18
|
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
|
|
23
23
|
[👉 **TRY THE LIVE INTERACTIVE DEMO**](https://MarcoPorcellato.github.io/logseq-matryca-parser/)
|
|
24
24
|
|
|
25
|
-
[📘 **ARCHITECTURE**](docs/ARCHITECTURE.md) · [AST Primer](docs/logseq_ast_primer.md) · [Changelog](CHANGELOG.md) · [Release process](docs/RELEASE_PROCESS.md)
|
|
25
|
+
[📘 **ARCHITECTURE**](docs/ARCHITECTURE.md) · [AST Primer](docs/logseq_ast_primer.md) · [CodeQL](docs/CODEQL.md) · [Changelog](CHANGELOG.md) · [Release process](docs/RELEASE_PROCESS.md)
|
|
26
26
|
|
|
27
27
|
</div>
|
|
28
28
|
|
|
@@ -106,6 +106,34 @@ Logseq Matryca Parser is a deterministic **Stack-Machine engine** that acts as t
|
|
|
106
106
|
|
|
107
107
|
---
|
|
108
108
|
|
|
109
|
+
## ⚡ Release highlights (v1.3.0)
|
|
110
|
+
|
|
111
|
+
Minor release — architectural quick wins, runtime robustness, and expanded public API. No breaking changes to default parser behavior.
|
|
112
|
+
|
|
113
|
+
| Area | Change |
|
|
114
|
+
| :--- | :--- |
|
|
115
|
+
| **Public API** | Root **`logseq_matryca_parser`** exports **`SynapseAdapter`**, **`SessionAliasRegistry`**, **`GraphVisualizer`**, **`discover_graph_files`**, and core LOGOS symbols via explicit **`__all__`**. |
|
|
116
|
+
| **Graph model** | **`LogseqGraph`** uses **`validate_assignment=True`** instead of frozen/`object.__setattr__` for incremental reloads. |
|
|
117
|
+
| **Live watcher** | **`start_watching()`** debounces filesystem events (~500ms) and ignores editor temp/swap files (`.swp`, `~`, `.tmp`, `.DS_Store`). |
|
|
118
|
+
| **Strict refs** | **`StackMachineParser(strict_refs=True)`** raises **`BlockReferenceError`** for unresolved same-page `((uuid))` refs (default off). |
|
|
119
|
+
| **SYNAPSE** | **`SynapseMetadata`** / **`build_synapse_metadata`** for vector-store-safe fields; **LlamaIndex** adds **`SOURCE`**, **`NEXT`**, **`PREVIOUS`** relationships. |
|
|
120
|
+
| **KINETIC CLI** | Global **`--verbose`** / **`--graph`** via **`@app.callback()`**; optional-dependency hints recommend **`uv sync --extra ai\|viz`**. |
|
|
121
|
+
| **LENS** | Lazy-imports NetworkX/PyVis so core installs stay lightweight. |
|
|
122
|
+
| **Security** | Transitive **`aiohttp`** / **`nltk`** constraints for optional **`[ai]`** extras. |
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## ⚡ Release highlights (v1.2.2)
|
|
127
|
+
|
|
128
|
+
Patch release — fixes a failing CodeQL GitHub Actions workflow; **no parser or public API changes**.
|
|
129
|
+
|
|
130
|
+
| Area | Change |
|
|
131
|
+
| :--- | :--- |
|
|
132
|
+
| **CodeQL** | Removed duplicate `.github/workflows/codeql.yml`; scanning continues via GitHub **default setup** (Node 24 runners). |
|
|
133
|
+
| **Docs** | New [`docs/CODEQL.md`](docs/CODEQL.md) explains default vs advanced setup and troubleshooting. |
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
109
137
|
## ⚡ Release highlights (v1.2.1)
|
|
110
138
|
|
|
111
139
|
Infrastructure and contributor experience — no parser API breaks.
|
|
@@ -114,11 +142,11 @@ Infrastructure and contributor experience — no parser API breaks.
|
|
|
114
142
|
| :--- | :--- |
|
|
115
143
|
| **Python matrix** | CI and PyPI pre-flight test **3.12** and **3.13**; PyPI classifier for 3.13. |
|
|
116
144
|
| **Quality gates** | `make all` parity in GitHub Actions (`uv sync --all-extras` → lint, mypy, pytest with **≥80%** coverage). |
|
|
117
|
-
| **Security** | CodeQL SAST, `pip-audit` on production deps, expanded `SECURITY.md`, PyPI publish blocked until pre-flight passes. |
|
|
145
|
+
| **Security** | GitHub CodeQL default setup (SAST), `pip-audit` on production deps, expanded `SECURITY.md`, PyPI publish blocked until pre-flight passes. |
|
|
118
146
|
| **Community** | `CODE_OF_CONDUCT.md`, `CODEOWNERS`, issue-template config, CONTRIBUTING with `uv` workflow. |
|
|
119
147
|
| **Docs** | Root `ROADMAP_*.md` consolidated under [`docs/roadmaps/`](docs/roadmaps/). |
|
|
120
148
|
|
|
121
|
-
Contributor setup: [`CONTRIBUTING.md`](CONTRIBUTING.md) · Security: [`SECURITY.md`](SECURITY.md)
|
|
149
|
+
Contributor setup: [`CONTRIBUTING.md`](CONTRIBUTING.md) · Security: [`SECURITY.md`](SECURITY.md) · CodeQL: [`docs/CODEQL.md`](docs/CODEQL.md)
|
|
122
150
|
|
|
123
151
|
---
|
|
124
152
|
|
|
@@ -184,7 +212,7 @@ matryca-parse export /path/to/logseq/graph /path/to/obsidian/vault --format obsi
|
|
|
184
212
|
> **Note:** Wikilinks currently use the **Logseq page title** (e.g. `[[Target#^…]]`). Vault files may live under namespace folders (`Projects/AI/Demo.md`). Obsidian usually resolves unique titles; aligning link text to folder paths is a possible future refinement.
|
|
185
213
|
|
|
186
214
|
### Live incremental watcher
|
|
187
|
-
`LogseqGraph` supports **surgical file invalidation** (optional dependency: `
|
|
215
|
+
`LogseqGraph` supports **surgical file invalidation** (optional dependency: `uv sync --extra watch`). `start_watching()` runs a recursive **watchdog** observer with **~500ms debounce** and ignores editor temp/swap files: on `created` / `modified` under `pages/` or `journals/`, only that file is re-parsed; stale synthetic UUIDs are purged from `_node_registry` and scrubbed from `_backlink_registry`—no full-graph cold reload.
|
|
188
216
|
|
|
189
217
|
### Fluent topological queries
|
|
190
218
|
Filter the global node registry with a **chainable** API (tags, task state, ancestry under a parent UUID):
|
|
@@ -263,12 +291,17 @@ Marker syntax (`[#A]`, `SCHEDULED: <...>`, `DEADLINE: <...>`) is stripped from `
|
|
|
263
291
|
## 🛠️ Quickstart
|
|
264
292
|
|
|
265
293
|
```bash
|
|
266
|
-
# Install from PyPI (latest: v1.
|
|
267
|
-
pip install logseq-matryca-parser
|
|
294
|
+
# Install from PyPI (latest: v1.3.0)
|
|
295
|
+
uv pip install logseq-matryca-parser
|
|
268
296
|
|
|
269
297
|
# Optional: filesystem watcher for live incremental graph updates
|
|
270
|
-
pip install 'logseq-matryca-parser[watch]'
|
|
298
|
+
uv pip install 'logseq-matryca-parser[watch]'
|
|
271
299
|
|
|
300
|
+
# Or clone and sync all extras locally
|
|
301
|
+
uv sync --all-extras
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
```bash
|
|
272
305
|
# 1. Visualize your local graph (LENS)
|
|
273
306
|
matryca-parse visualize /path/to/logseq/graph my-map.html
|
|
274
307
|
|
|
@@ -280,13 +313,23 @@ matryca-parse export /path/to/logseq/graph output --format langchain-enriched
|
|
|
280
313
|
|
|
281
314
|
# 4. Obsidian vault (YAML frontmatter + ^ block ids)
|
|
282
315
|
matryca-parse export /path/to/logseq/graph output --format obsidian
|
|
316
|
+
|
|
317
|
+
# Global options (all subcommands): --verbose, --graph /path/to/vault
|
|
318
|
+
matryca-parse --graph /path/to/logseq/graph --verbose export output --format json
|
|
283
319
|
```
|
|
284
320
|
|
|
285
321
|
### Python API
|
|
322
|
+
|
|
323
|
+
Prefer the package root for stable imports (see **`__all__`** in **`logseq_matryca_parser`**):
|
|
324
|
+
|
|
286
325
|
```python
|
|
287
|
-
from logseq_matryca_parser
|
|
288
|
-
|
|
289
|
-
|
|
326
|
+
from logseq_matryca_parser import (
|
|
327
|
+
LogseqGraph,
|
|
328
|
+
LogosParser,
|
|
329
|
+
SynapseAdapter,
|
|
330
|
+
SessionAliasRegistry,
|
|
331
|
+
discover_graph_files,
|
|
332
|
+
)
|
|
290
333
|
|
|
291
334
|
# Parse a single page to AST (YAML or native frontmatter; utf-8-sig BOM-safe)
|
|
292
335
|
page = LogosParser().parse_page_file("page.md")
|
|
@@ -300,6 +343,11 @@ effective = graph.get_effective_properties(page_obj.root_nodes[0].uuid)
|
|
|
300
343
|
|
|
301
344
|
# Export to LangChain with lineage metadata
|
|
302
345
|
docs = SynapseAdapter.to_langchain_documents(page.root_nodes, source_name=page.title)
|
|
346
|
+
|
|
347
|
+
# Optional strict same-page block-ref validation at parse time
|
|
348
|
+
from logseq_matryca_parser import StackMachineParser
|
|
349
|
+
|
|
350
|
+
strict_page = StackMachineParser(strict_refs=True).parse_page_file("page.md")
|
|
303
351
|
```
|
|
304
352
|
|
|
305
353
|
### 🤖 Agentic Write Access (Append-Only)
|
|
@@ -6,7 +6,7 @@ Security fixes are provided **only for the latest released version** on [PyPI](h
|
|
|
6
6
|
|
|
7
7
|
| Version | Supported |
|
|
8
8
|
| ------- | --------- |
|
|
9
|
-
| **1.
|
|
9
|
+
| **1.3.0** (latest) | Yes |
|
|
10
10
|
| Older releases | No |
|
|
11
11
|
|
|
12
12
|
We recommend always running the current release and upgrading promptly when a security advisory is published.
|
|
@@ -82,7 +82,7 @@ title Logseq Matryca Parser — C4 Level 2 (Containers)
|
|
|
82
82
|
Person(knowledgeWorker, "Knowledge Worker", "Local operator of a sovereign Logseq graph.")
|
|
83
83
|
|
|
84
84
|
System_Boundary(matrycaEcosystem, "Matryca.ai Ecosystem") {
|
|
85
|
-
Container(kinetic, "KINETIC", "Typer / Rich CLI", "CLI — export (json, markdown, langchain, langchain-enriched, obsidian), visualize, demo, graph scans, `agent-read` / `agent-write` (X-Ray + headless splice), weekly append (`append`).")
|
|
85
|
+
Container(kinetic, "KINETIC", "Typer / Rich CLI", "CLI — global `--verbose` / `--graph` callback; export (json, markdown, langchain, langchain-enriched, obsidian), visualize, demo, graph scans, `agent-read` / `agent-write` (X-Ray + headless splice), weekly append (`append`).")
|
|
86
86
|
Container(logos, "LOGOS", "Python / Pydantic", "Stack-Machine AST engine — LogseqPage and LogseqNode models.")
|
|
87
87
|
Container(synapse, "SYNAPSE", "LangChain / LlamaIndex", "Framework-native exporters with parent-child metadata.")
|
|
88
88
|
Container(lens, "LENS", "NetworkX / PyVis", "Reference-topology visualization to interactive HTML.")
|
|
@@ -258,7 +258,9 @@ classDiagram
|
|
|
258
258
|
|
|
259
259
|
- **LangChain.** [`LangChainVisitor`](../src/logseq_matryca_parser/synapse.py) emits one **`Document`** per node with `page_content=node.clean_text` and metadata unioning **`node.properties`** with lineage fields (`uuid`, `parent_id`, `indent_level`, `source`, **`path`** — the UUID ancestry chain — `left_id`, `refs`, `task_status`, repeater, `created_at`). The underlying **`LogseqNode`** additionally carries **`task_priority`**, **`scheduled_at`**, and **`deadline_at`** (§3.1); adapters or custom visitors can project those into metadata when feeding **downstream graph databases** or **GraphRAG** filters. This preserves **parent context** explicitly in retrieval filters and re-ranking.
|
|
260
260
|
|
|
261
|
-
- **LlamaIndex.** [`LlamaIndexVisitor`](../src/logseq_matryca_parser/synapse.py) constructs **`TextNode`** instances keyed by **`id_=node.uuid`**. It wires **`NodeRelationship.PARENT`** and **`CHILD`** via **`RelatedNodeInfo`**, back-linking when the parent appears earlier in preorder traversal — encoding **topology as first-class edges** beyond flat metadata dictionaries.
|
|
261
|
+
- **LlamaIndex.** [`LlamaIndexVisitor`](../src/logseq_matryca_parser/synapse.py) constructs **`TextNode`** instances keyed by **`id_=node.uuid`**. It wires **`NodeRelationship.PARENT`** and **`CHILD`** via **`RelatedNodeInfo`**, back-linking when the parent appears earlier in preorder traversal — encoding **topology as first-class edges** beyond flat metadata dictionaries. From **v1.3.0**, it also emits **`SOURCE`** (page-level anchor via **`page_source_node_id`**), **`NEXT`**, and **`PREVIOUS`** sibling edges for spatial traversal in vector stores.
|
|
262
|
+
|
|
263
|
+
- **Vector-store metadata.** **`SynapseMetadata`** and **`build_synapse_metadata`** project **`task_priority`**, temporal epoch fields, **`source_uuid`**, and joined **`path`** / **`refs`** strings into LangChain/LlamaIndex metadata without leaking raw Python list reprs.
|
|
262
264
|
|
|
263
265
|
Together, adapters guarantee that **embedding units align with intentional block boundaries**, not splitter accidents.
|
|
264
266
|
|
|
@@ -276,7 +278,7 @@ The KINETIC **`export --format langchain-enriched`** path serializes these docum
|
|
|
276
278
|
|
|
277
279
|
### 3.3 LENS — NetworkX topology + PyVis interactive visualization
|
|
278
280
|
|
|
279
|
-
**LENS** (`logseq_matryca_parser.lens.GraphVisualizer`) builds a **`networkx.Graph`** over **page ⇄ wiki/tag reference** projections using `NetworkXVisitor` during AST preorder walks. Nodes receive **degree-based sizing** (“sun” hotspots) and subgroup classification (`page`, `tag`, `journal`, etc.).
|
|
281
|
+
**LENS** (`logseq_matryca_parser.lens.GraphVisualizer`) builds a **`networkx.Graph`** over **page ⇄ wiki/tag reference** projections using `NetworkXVisitor` during AST preorder walks. Nodes receive **degree-based sizing** (“sun” hotspots) and subgroup classification (`page`, `tag`, `journal`, etc.). **NetworkX** and **PyVis** are **lazy-imported** (optional **`[viz]`** extra) so core installs avoid pulling visualization dependencies until `GraphVisualizer` is used.
|
|
280
282
|
|
|
281
283
|
Visualization export uses **`pyvis`** with **`force_atlas_2based`** physics, fullscreen canvas, HUD filters, glassmorphism control chrome, and stabilized layout configuration suitable for **large graphs at interactive frame rates** in the browser (product positioning targets fluid exploration of graphs on the order of **10⁴ nodes**).
|
|
282
284
|
|
|
@@ -296,14 +298,14 @@ Both paths keep **existing topology intact** relative to their contract: append-
|
|
|
296
298
|
|
|
297
299
|
### 3.6 `LogseqGraph` — namespace scoping, O(1) invalidation, live watch
|
|
298
300
|
|
|
299
|
-
The **in-memory graph** ([`graph.py`](../src/logseq_matryca_parser/graph.py)) is the runtime **RAM image** of the sovereign vault: `pages: dict[str, LogseqPage]`, a private **`_node_registry`** keyed by synthetic block UUID, and a **`_backlink_registry`** mapping normalized link targets to source node UUIDs.
|
|
301
|
+
The **in-memory graph** ([`graph.py`](../src/logseq_matryca_parser/graph.py)) is the runtime **RAM image** of the sovereign vault: `pages: dict[str, LogseqPage]`, a private **`_node_registry`** keyed by synthetic block UUID, and a **`_backlink_registry`** mapping normalized link targets to source node UUIDs. **`LogseqGraph`** uses **`ConfigDict(strict=True, validate_assignment=True)`** (not frozen) so **`invalidate_and_reload_page`** can mutate registries and page maps without `object.__setattr__` workarounds.
|
|
300
302
|
|
|
301
303
|
#### Page title overrides and alias indexing (`_enrich_pages_index`)
|
|
302
304
|
|
|
303
305
|
After every bulk or incremental parse, the graph applies a **post-parse enrichment pass** before backlink construction:
|
|
304
306
|
|
|
305
307
|
1. **Filename → canonical title.** Each markdown file is first keyed by **`derive_page_title_from_source_path`** (see §3.9).
|
|
306
|
-
2. **`title::` override.** If page frontmatter contains a non-empty string **`title`**, the
|
|
308
|
+
2. **`title::` override.** If page frontmatter contains a non-empty string **`title`**, the `LogseqPage` is updated via **`model_copy(update={"title": custom})`**, the old filename key is removed from **`pages`**, and the page is re-inserted under the custom title (collision with another file’s title is skipped with a debug log).
|
|
307
309
|
3. **Alias injection.** For each canonical dict entry where **`dict_key == page.title`**, values from **`alias::`** and **`aliases::`** are normalized (comma-separated strings or Python lists; `[[Page]]` / `#tag` adornments stripped using the same rules as [`logseq_markdown.py`](../src/logseq_matryca_parser/logseq_markdown.py)) and registered as **additional keys** pointing at the **same `LogseqPage` instance** — e.g. `pages["Dev"]` and `pages["Development"]` share identity.
|
|
308
310
|
4. **Backlinks.** **`_build_backlink_registry`** walks **unique pages** (`id(page)` deduplication) so alias keys do not double-count outgoing links. Incoming wikilinks such as **`[[Dev]]`** normalize to lowercase registry keys and resolve through **`get_backlinks("Dev")`** like any other page title.
|
|
309
311
|
|
|
@@ -341,7 +343,11 @@ This keeps **global indexes consistent** without rebuilding the entire graph —
|
|
|
341
343
|
|
|
342
344
|
#### Live filesystem watcher (`start_watching`)
|
|
343
345
|
|
|
344
|
-
**`LogseqGraph.start_watching(callback=None)`** (optional **`watchdog`** install) returns a **`LogseqGraphWatcher`** that schedules a recursive **`Observer`** on the graph root. **`on_modified` / `on_created`** events for tracked Markdown call **`invalidate_and_reload_page`**, then optionally invoke **`callback(path)`** — the intended hook for **vector store patch**, **re-embedding**, or UI refresh. Event routing ignores directories and non-tracked extensions so the hot path stays tight.
|
|
346
|
+
**`LogseqGraph.start_watching(callback=None, debounce_seconds=0.5)`** (optional **`watchdog`** install) returns a **`LogseqGraphWatcher`** that schedules a recursive **`Observer`** on the graph root. **`on_modified` / `on_created`** events for tracked Markdown call **`invalidate_and_reload_page`**, then optionally invoke **`callback(path)`** — the intended hook for **vector store patch**, **re-embedding**, or UI refresh. **`_DebouncedGraphEventRouter`** coalesces rapid save bursts (~500ms default) and ignores editor temp/swap artifacts (`.swp`, `~`, `.tmp`, `.DS_Store`). Event routing ignores directories and non-tracked extensions so the hot path stays tight.
|
|
347
|
+
|
|
348
|
+
#### Parse-time reference validation (`strict_refs`)
|
|
349
|
+
|
|
350
|
+
**`StackMachineParser(..., strict_refs=False)`** (default) resolves same-page `((uuid))` block refs leniently. When **`strict_refs=True`**, unresolved refs raise **`BlockReferenceError`** at parse time — complementary to **`LogseqGraph.get_broken_references()`**, which scans the loaded graph post-hoc.
|
|
345
351
|
|
|
346
352
|
#### Fluent topological queries (`GraphQuery`)
|
|
347
353
|
|
|
@@ -404,6 +410,8 @@ The compound CLI commands **`agent_read`** and **`agent_write`** in [`kinetic.py
|
|
|
404
410
|
|
|
405
411
|
Rich styling injects **ANSI escape sequences** that waste tokens and can cause models to **hallucinate markup** as content. `agent-read` is **stdout-pure** so shell pipelines, MCP tools, and headless agents receive **unescaped plain text** only. Human-oriented commands (`scan`, `export`, `visualize`) keep Rich; the **machine-native read/write paths** opt out where token fidelity matters.
|
|
406
412
|
|
|
413
|
+
**Global CLI options (v1.3.0).** [`kinetic.py`](../src/logseq_matryca_parser/kinetic.py) registers **`@app.callback()`** with **`--verbose`** and **`--graph`** so every subcommand shares graph-path resolution and debug logging. Optional extras (`[ai]`, `[viz]`) print **`uv sync --extra …`** hints on import failure.
|
|
414
|
+
|
|
407
415
|
This complements §3.4 **AGENT WRITER** (weekly append + headless splice) and §3.2 **SYNAPSE** (human/RAG chunking): one stack, multiple projections — **enriched chunks for vectors**, **X-Ray + alias state for agent context**, **append / splice for durable writes**.
|
|
408
416
|
|
|
409
417
|
### 3.8 Bidirectional I/O and Logseq Layouts
|
|
@@ -459,7 +467,7 @@ Logseq namespaces use **`/`** in titles (e.g. `Projects/AI`). On disk, each segm
|
|
|
459
467
|
|
|
460
468
|
#### Graph discovery filters
|
|
461
469
|
|
|
462
|
-
When scanning a vault root, **`is_excluded_graph_path`** drops noise directories — notably **`.recycle`**, **`.git`**, and the internal **`logseq`** config tree — so incremental watchers and bulk loaders never ingest backup blobs or VCS metadata as pages. This keeps **`LogseqGraph.load_directory`** and **`invalidate_and_reload_page`** focused on sovereign content under `pages/` and `journals/`.
|
|
470
|
+
When scanning a vault root, **`discover_graph_files`** (in [`logseq_paths.py`](../src/logseq_matryca_parser/logseq_paths.py), shared by **`LogseqGraph.load_directory`** and **KINETIC**) enumerates `pages/` and `journals/` markdown. **`is_excluded_graph_path`** drops noise directories — notably **`.recycle`**, **`.git`**, and the internal **`logseq`** config tree — so incremental watchers and bulk loaders never ingest backup blobs or VCS metadata as pages. This keeps **`LogseqGraph.load_directory`** and **`invalidate_and_reload_page`** focused on sovereign content under `pages/` and `journals/`.
|
|
463
471
|
|
|
464
472
|
---
|
|
465
473
|
|
|
@@ -528,4 +536,4 @@ Recursive and character-budget chunkers assume **approximately flat prose**. Log
|
|
|
528
536
|
|
|
529
537
|
---
|
|
530
538
|
|
|
531
|
-
*This document reflects the implementations in `src/logseq_matryca_parser/logos_parser.py`, `synapse.py`, `graph.py`, `forge.py`, `lens.py`, `logos_core.py`, `agent_writer.py`, `agent_press.py`, `logseq_markdown.py`, and `
|
|
539
|
+
*This document reflects the implementations in `src/logseq_matryca_parser/logos_parser.py`, `synapse.py`, `graph.py`, `forge.py`, `lens.py`, `logos_core.py`, `agent_writer.py`, `agent_press.py`, `logseq_markdown.py`, `logseq_paths.py`, `kinetic.py`, and the public exports in `__init__.py`, and complements narrative primers such as [`logseq_ast_primer.md`](logseq_ast_primer.md).*
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# CodeQL code scanning
|
|
2
|
+
|
|
3
|
+
**Logseq Matryca Parser** (v1.3.0+) uses **GitHub CodeQL default setup** for static analysis (SAST) on Python.
|
|
4
|
+
|
|
5
|
+
## Why there is no `codeql.yml` workflow
|
|
6
|
+
|
|
7
|
+
GitHub does not allow **default setup** and a custom **advanced** CodeQL workflow at the same time. Uploading SARIF from `.github/workflows/codeql.yml` fails with:
|
|
8
|
+
|
|
9
|
+
> CodeQL analyses from advanced configurations cannot be processed when the default setup is enabled
|
|
10
|
+
|
|
11
|
+
Default setup is the recommended path for this repository: GitHub maintains the analysis configuration, runs on current runner images (Node 24+), and scans Python without duplicating CI.
|
|
12
|
+
|
|
13
|
+
## Where to see results
|
|
14
|
+
|
|
15
|
+
- **Security → Code scanning** on the repository
|
|
16
|
+
- [CodeQL status page](https://github.com/MarcoPorcellato/logseq-matryca-parser/security/code-scanning/tools/CodeQL/status/) for coverage and run history
|
|
17
|
+
|
|
18
|
+
## Switching to an advanced workflow (optional)
|
|
19
|
+
|
|
20
|
+
Only if you need a custom `codeql.yml` (extra queries, manual build steps, etc.):
|
|
21
|
+
|
|
22
|
+
1. **Settings → Advanced Security → Code scanning**
|
|
23
|
+
2. Next to **CodeQL analysis**, open the menu and choose **Disable CodeQL** (disables default setup)
|
|
24
|
+
3. Add or restore `.github/workflows/codeql.yml` using [github/codeql-action](https://github.com/github/codeql-action) **v4** or newer
|
|
25
|
+
|
|
26
|
+
Do not re-enable default setup while an advanced workflow is active.
|
|
27
|
+
|
|
28
|
+
## Related
|
|
29
|
+
|
|
30
|
+
- [`SECURITY.md`](../SECURITY.md) — vulnerability reporting
|
|
31
|
+
- [Troubleshooting: default setup enabled](https://docs.github.com/en/code-security/reference/code-scanning/sarif-files/troubleshoot-sarif-uploads/default-setup-enabled)
|
|
@@ -214,4 +214,4 @@ If you feed Logseq Markdown into `RecursiveCharacterTextSplitter` (LangChain) or
|
|
|
214
214
|
|
|
215
215
|
The **Logos Protocol** solves this by walking the AST deterministically, isolating properties, shielding dead-zone literals, and using the `SYNAPSE` adapter to export native LangChain `Document` or LlamaIndex `TextNode` objects. Every generated object retains its exact hierarchical lineage in the metadata, feeding your local LLM perfectly structured data.
|
|
216
216
|
|
|
217
|
-
For vault-wide navigation (aliases, backlinks, namespace shadowing, assets), load the graph with **`LogseqGraph`** — see the [README](../README.md) and [CHANGELOG](../CHANGELOG.md) (graph parity from **v1.2.0**;
|
|
217
|
+
For vault-wide navigation (aliases, backlinks, namespace shadowing, assets), load the graph with **`LogseqGraph`** — see the [README](../README.md) and [CHANGELOG](../CHANGELOG.md) (graph parity from **v1.2.0**; **v1.3.0** adds watcher debounce, `strict_refs`, public API exports, and LlamaIndex spatial relationships).
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# 📜 Architectural Contract: Live Incremental Invalidation & File Watcher
|
|
2
2
|
**Contract Status:** Wave 7 — Implemented (incremental invalidation + lazy watchdog watcher)
|
|
3
|
+
|
|
4
|
+
> **Implementation note (v1.3.0):** `LogseqGraph` now uses `validate_assignment=True` (not `frozen=True`); the watcher debounces events (~500ms) and ignores editor temp files. Historical spec text below describes the original contract.
|
|
3
5
|
**Target Stack:** Python 3.12+ | Pydantic V2 | Watchdog (Optional/Lazy Dependency)
|
|
4
6
|
**Inspiration Architectures:**
|
|
5
7
|
- `Microsoft/language-server-protocol` (Incremental text document synchronization and dependency invalidation)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# 📜 Architectural Contract: Synthetic UUID Hardening & Graph Orchestration Module
|
|
2
2
|
**Contract Status:** Completed (Autonomous Execution)
|
|
3
|
+
|
|
4
|
+
> **Implementation note (v1.3.0):** `discover_graph_files` lives in `logseq_paths.py` (not `kinetic.py`); `LogseqGraph` uses `validate_assignment=True` instead of `frozen=True`. Historical spec text below describes the original contract.
|
|
3
5
|
**Target Stack:** Python 3.12+ | Pydantic V2 (Strict/Frozen Models) | Local-First No-DB
|
|
4
6
|
|
|
5
7
|
**Execution checklist**
|
|
@@ -11,10 +11,11 @@ root_dir = Path(__file__).parent.parent
|
|
|
11
11
|
sys.path.append(str(root_dir / "src"))
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
|
-
from logseq_matryca_parser.logos_parser import LogosParser
|
|
15
|
-
from logseq_matryca_parser.forge import ForgeExporter
|
|
16
14
|
from rich.console import Console
|
|
17
15
|
from rich.panel import Panel
|
|
16
|
+
|
|
17
|
+
from logseq_matryca_parser.forge import ForgeExporter
|
|
18
|
+
from logseq_matryca_parser.logos_parser import LogosParser
|
|
18
19
|
except ImportError as e:
|
|
19
20
|
print(f"Errore: Assicurati di aver installato le dipendenze (pip install rich typer pydantic). {e}")
|
|
20
21
|
sys.exit(1)
|
|
@@ -5,10 +5,9 @@ import re
|
|
|
5
5
|
import shutil
|
|
6
6
|
import urllib.request
|
|
7
7
|
from datetime import datetime
|
|
8
|
-
from typing import Any,
|
|
8
|
+
from typing import Any, Literal, TypeVar
|
|
9
9
|
|
|
10
10
|
from pydantic import BaseModel, ValidationError
|
|
11
|
-
|
|
12
11
|
from smart_router.core.ingestion_engine import IngestionEngine
|
|
13
12
|
from smart_router.core.librarian_models import (
|
|
14
13
|
EntityNodeSchema,
|
|
@@ -31,7 +30,7 @@ async def emit_note_package(
|
|
|
31
30
|
slug: str,
|
|
32
31
|
content: str,
|
|
33
32
|
ontology_class: NodeType,
|
|
34
|
-
metadata:
|
|
33
|
+
metadata: dict[str, str],
|
|
35
34
|
indentation_level: int,
|
|
36
35
|
) -> None:
|
|
37
36
|
package = SovereignNotePackage(
|
|
@@ -90,9 +89,7 @@ class LogseqASTParser:
|
|
|
90
89
|
trees.append("\n".join(current_tree))
|
|
91
90
|
current_tree = [line]
|
|
92
91
|
else:
|
|
93
|
-
if current_tree:
|
|
94
|
-
current_tree.append(line)
|
|
95
|
-
elif line.strip():
|
|
92
|
+
if current_tree or line.strip():
|
|
96
93
|
current_tree.append(line)
|
|
97
94
|
if current_tree:
|
|
98
95
|
trees.append("\n".join(current_tree))
|
|
@@ -122,7 +119,7 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
122
119
|
def call_local_llm(
|
|
123
120
|
system_prompt: str, user_content: str, response_model: type[T]
|
|
124
121
|
) -> T | None:
|
|
125
|
-
payload:
|
|
122
|
+
payload: dict[str, Any] = {
|
|
126
123
|
"model": "local-model",
|
|
127
124
|
"messages": [
|
|
128
125
|
{"role": "system", "content": system_prompt},
|
|
@@ -204,7 +201,7 @@ async def main() -> None:
|
|
|
204
201
|
if filename.startswith("."):
|
|
205
202
|
continue
|
|
206
203
|
filepath = os.path.join(INBOX_DIR, filename)
|
|
207
|
-
with open(filepath,
|
|
204
|
+
with open(filepath, encoding="utf-8") as f:
|
|
208
205
|
global_skeleton = f.read()
|
|
209
206
|
macro_buckets = LogseqASTParser.chunk_into_buckets(
|
|
210
207
|
global_skeleton, max_chars=15000
|
|
@@ -247,7 +244,7 @@ async def main() -> None:
|
|
|
247
244
|
if not note_filename.endswith(".md"):
|
|
248
245
|
note_filename += ".md"
|
|
249
246
|
try:
|
|
250
|
-
metadata:
|
|
247
|
+
metadata: dict[str, str] = {
|
|
251
248
|
"source_filename": filename,
|
|
252
249
|
"confidence": str(extracted_base.confidence),
|
|
253
250
|
"extracted_at": datetime.now().isoformat(),
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "logseq-matryca-parser"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.3.0"
|
|
8
8
|
description = "The Logos Protocol: Deterministic Logseq AST parsing for Matryca.ai."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.12"
|
|
@@ -76,6 +76,14 @@ dev = [
|
|
|
76
76
|
"ruff>=0.15.12",
|
|
77
77
|
]
|
|
78
78
|
|
|
79
|
+
[tool.uv]
|
|
80
|
+
constraint-dependencies = [
|
|
81
|
+
"aiohttp>=3.14.1",
|
|
82
|
+
]
|
|
83
|
+
override-dependencies = [
|
|
84
|
+
"nltk @ git+https://github.com/nltk/nltk@v3.10.0-rc1",
|
|
85
|
+
]
|
|
86
|
+
|
|
79
87
|
[tool.pytest.ini_options]
|
|
80
88
|
testpaths = ["tests"]
|
|
81
89
|
|
|
@@ -95,3 +103,16 @@ module = [
|
|
|
95
103
|
"watchdog.*",
|
|
96
104
|
]
|
|
97
105
|
ignore_missing_imports = true
|
|
106
|
+
|
|
107
|
+
[tool.ruff]
|
|
108
|
+
target-version = "py312"
|
|
109
|
+
line-length = 100
|
|
110
|
+
exclude = ["legacy", "examples", "scripts"]
|
|
111
|
+
|
|
112
|
+
[tool.ruff.lint]
|
|
113
|
+
select = ["E", "F", "I", "UP", "B", "SIM"]
|
|
114
|
+
ignore = ["E501"]
|
|
115
|
+
|
|
116
|
+
[tool.ruff.lint.per-file-ignores]
|
|
117
|
+
"src/logseq_matryca_parser/kinetic.py" = ["B008"]
|
|
118
|
+
"tests/**" = ["SIM117"]
|