paperfigg 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paperfigg-0.2.6/LICENSE +21 -0
- paperfigg-0.2.6/PKG-INFO +185 -0
- paperfigg-0.2.6/README.md +155 -0
- paperfigg-0.2.6/paperfig/__init__.py +3 -0
- paperfigg-0.2.6/paperfig/agents/__init__.py +1 -0
- paperfigg-0.2.6/paperfig/agents/architecture_critic.py +133 -0
- paperfigg-0.2.6/paperfig/agents/critic.py +124 -0
- paperfigg-0.2.6/paperfig/agents/generator.py +75 -0
- paperfigg-0.2.6/paperfig/agents/planner.py +134 -0
- paperfigg-0.2.6/paperfig/audits/__init__.py +3 -0
- paperfigg-0.2.6/paperfig/audits/reproducibility.py +131 -0
- paperfigg-0.2.6/paperfig/cli.py +630 -0
- paperfigg-0.2.6/paperfig/command_catalog.py +24 -0
- paperfigg-0.2.6/paperfig/docsgen/__init__.py +9 -0
- paperfigg-0.2.6/paperfig/docsgen/drift.py +86 -0
- paperfigg-0.2.6/paperfig/docsgen/manifest.py +57 -0
- paperfigg-0.2.6/paperfig/docsgen/renderer.py +60 -0
- paperfigg-0.2.6/paperfig/exporters/__init__.py +1 -0
- paperfigg-0.2.6/paperfig/exporters/latex.py +16 -0
- paperfigg-0.2.6/paperfig/exporters/png.py +13 -0
- paperfigg-0.2.6/paperfig/exporters/svg.py +9 -0
- paperfigg-0.2.6/paperfig/lab/__init__.py +3 -0
- paperfigg-0.2.6/paperfig/lab/agents/__init__.py +1 -0
- paperfigg-0.2.6/paperfig/lab/agents/designer.py +8 -0
- paperfigg-0.2.6/paperfig/lab/agents/executor.py +62 -0
- paperfigg-0.2.6/paperfig/lab/agents/hypothesis.py +10 -0
- paperfigg-0.2.6/paperfig/lab/agents/reviewer.py +23 -0
- paperfigg-0.2.6/paperfig/lab/orchestrator.py +171 -0
- paperfigg-0.2.6/paperfig/lab/policy.py +38 -0
- paperfigg-0.2.6/paperfig/lab/registry.py +36 -0
- paperfigg-0.2.6/paperfig/lab/types.py +35 -0
- paperfigg-0.2.6/paperfig/pipeline/__init__.py +1 -0
- paperfigg-0.2.6/paperfig/pipeline/orchestrator.py +539 -0
- paperfigg-0.2.6/paperfig/prompts/__init__.py +1 -0
- paperfigg-0.2.6/paperfig/prompts/critique_architecture.txt +11 -0
- paperfigg-0.2.6/paperfig/prompts/critique_figure.txt +14 -0
- paperfigg-0.2.6/paperfig/prompts/plan_figure.txt +33 -0
- paperfigg-0.2.6/paperfig/prompts/repro_audit.txt +6 -0
- paperfigg-0.2.6/paperfig/styles/__init__.py +1 -0
- paperfigg-0.2.6/paperfig/styles/conference_default.json +24 -0
- paperfigg-0.2.6/paperfig/templates/__init__.py +4 -0
- paperfigg-0.2.6/paperfig/templates/compiler.py +43 -0
- paperfigg-0.2.6/paperfig/templates/flows/ablation_matrix.yaml +15 -0
- paperfigg-0.2.6/paperfig/templates/flows/dataset_characteristics.yaml +15 -0
- paperfigg-0.2.6/paperfig/templates/flows/error_analysis_breakdown.yaml +15 -0
- paperfigg-0.2.6/paperfig/templates/flows/limitations_threats_to_validity.yaml +15 -0
- paperfigg-0.2.6/paperfig/templates/flows/methodology_pipeline.yaml +15 -0
- paperfigg-0.2.6/paperfig/templates/flows/results_summary_plot.yaml +15 -0
- paperfigg-0.2.6/paperfig/templates/flows/system_overview.yaml +15 -0
- paperfigg-0.2.6/paperfig/templates/flows/training_compute_profile.yaml +15 -0
- paperfigg-0.2.6/paperfig/templates/loader.py +78 -0
- paperfigg-0.2.6/paperfig/utils/__init__.py +10 -0
- paperfigg-0.2.6/paperfig/utils/config.py +59 -0
- paperfigg-0.2.6/paperfig/utils/paperbanana.py +165 -0
- paperfigg-0.2.6/paperfig/utils/pdf_parser.py +104 -0
- paperfigg-0.2.6/paperfig/utils/prompts.py +8 -0
- paperfigg-0.2.6/paperfig/utils/structured_data.py +35 -0
- paperfigg-0.2.6/paperfig/utils/style_refs.py +23 -0
- paperfigg-0.2.6/paperfig/utils/traceability.py +57 -0
- paperfigg-0.2.6/paperfig/utils/types.py +115 -0
- paperfigg-0.2.6/paperfigg.egg-info/PKG-INFO +185 -0
- paperfigg-0.2.6/paperfigg.egg-info/SOURCES.txt +75 -0
- paperfigg-0.2.6/paperfigg.egg-info/dependency_links.txt +1 -0
- paperfigg-0.2.6/paperfigg.egg-info/entry_points.txt +2 -0
- paperfigg-0.2.6/paperfigg.egg-info/requires.txt +26 -0
- paperfigg-0.2.6/paperfigg.egg-info/top_level.txt +1 -0
- paperfigg-0.2.6/pyproject.toml +40 -0
- paperfigg-0.2.6/setup.cfg +4 -0
- paperfigg-0.2.6/tests/test_architecture_critic.py +46 -0
- paperfigg-0.2.6/tests/test_critic.py +77 -0
- paperfigg-0.2.6/tests/test_docsgen.py +67 -0
- paperfigg-0.2.6/tests/test_lab.py +81 -0
- paperfigg-0.2.6/tests/test_orchestrator.py +384 -0
- paperfigg-0.2.6/tests/test_paperbanana.py +136 -0
- paperfigg-0.2.6/tests/test_parser.py +39 -0
- paperfigg-0.2.6/tests/test_repro_audit.py +64 -0
- paperfigg-0.2.6/tests/test_templates.py +58 -0
paperfigg-0.2.6/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 paperfig contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
paperfigg-0.2.6/PKG-INFO
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: paperfigg
|
|
3
|
+
Version: 0.2.6
|
|
4
|
+
Summary: Agentic CLI for generating academic figures from research papers
|
|
5
|
+
Author: paperfig contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Provides-Extra: cli
|
|
11
|
+
Requires-Dist: typer>=0.9.0; extra == "cli"
|
|
12
|
+
Requires-Dist: rich>=13.7.0; extra == "cli"
|
|
13
|
+
Provides-Extra: png
|
|
14
|
+
Requires-Dist: cairosvg>=2.7.0; extra == "png"
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
17
|
+
Requires-Dist: coverage[toml]>=7.0.0; extra == "dev"
|
|
18
|
+
Requires-Dist: ruff>=0.6.0; extra == "dev"
|
|
19
|
+
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
20
|
+
Provides-Extra: pdf
|
|
21
|
+
Requires-Dist: pdfminer.six>=20221105; extra == "pdf"
|
|
22
|
+
Requires-Dist: pypdf>=3.0.0; extra == "pdf"
|
|
23
|
+
Provides-Extra: svg
|
|
24
|
+
Requires-Dist: cairosvg>=2.7.0; extra == "svg"
|
|
25
|
+
Provides-Extra: mcp
|
|
26
|
+
Requires-Dist: mcp>=1.0.0; extra == "mcp"
|
|
27
|
+
Provides-Extra: yaml
|
|
28
|
+
Requires-Dist: PyYAML>=6.0.0; extra == "yaml"
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# paperfig
|
|
32
|
+
[](https://github.com/oluwafemidiakhoa/paperfig/actions/workflows/ci.yml)
|
|
33
|
+
[](https://github.com/oluwafemidiakhoa/paperfig/actions/workflows/docs-drift.yml)
|
|
34
|
+
[](https://github.com/oluwafemidiakhoa/paperfig/actions/workflows/publish.yml)
|
|
35
|
+
|
|
36
|
+
`paperfig` is a production-grade CLI that converts research papers (PDF or Markdown) into publication-ready academic figures using an agentic planning -> generation -> critique pipeline.
|
|
37
|
+
|
|
38
|
+
The core differentiator is that agent reasoning and architecture decisions are stored as versioned repo artifacts (architecture docs, flows, Mermaid diagrams, templates, audits) so humans and agents can evolve the system together.
|
|
39
|
+
|
|
40
|
+
## Install
|
|
41
|
+
- Standard CLI + PNG export:
|
|
42
|
+
- `pip install "paperfigg[cli,png]"`
|
|
43
|
+
- Developer tooling:
|
|
44
|
+
- `pip install "paperfigg[cli,png,dev,yaml,pdf,mcp]"`
|
|
45
|
+
- CLI-first local install:
|
|
46
|
+
- `pipx install .`
|
|
47
|
+
- `uv tool install .`
|
|
48
|
+
- Published package name is `paperfigg`; CLI command remains `paperfig`.
|
|
49
|
+
|
|
50
|
+
## Quickstart (Mock Mode, No Keys)
|
|
51
|
+
Mock mode is designed for instant local runs and realistic output artifacts.
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install "paperfigg[cli,png]"
|
|
55
|
+
paperfig doctor
|
|
56
|
+
paperfig generate examples/sample_paper.md --mode mock
|
|
57
|
+
paperfig docs check
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## 1-Minute Demo
|
|
61
|
+
```bash
|
|
62
|
+
pip install "paperfigg[cli,png]"
|
|
63
|
+
paperfig doctor
|
|
64
|
+
paperfig generate examples/sample_paper.md --mode mock
|
|
65
|
+
ls runs/*/figures/*/final/figure.svg
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Full Mode (PaperBanana MCP)
|
|
69
|
+
Use full mode when you want real PaperBanana generation via MCP.
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install "paperfigg[cli,png,mcp]"
|
|
73
|
+
export PAPERFIG_MCP_SERVER=paperbanana
|
|
74
|
+
export PAPERFIG_MCP_COMMAND="python -m your_mcp_server"
|
|
75
|
+
paperfig doctor --probe-mcp
|
|
76
|
+
paperfig generate examples/sample_paper.md --mode real
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## What You Get
|
|
80
|
+
- Generated figures (SVG and optional transparent PNG)
|
|
81
|
+
- LaTeX include snippets
|
|
82
|
+
- Captions and figure plans
|
|
83
|
+
- Traceability mapping from figure elements to source text spans
|
|
84
|
+
- Governance artifacts (`docs_drift_report.json`, `architecture_critique.json`, `repro_audit.json`)
|
|
85
|
+
|
|
86
|
+
Sample proof assets are committed in `docs/gallery/sample_paper`:
|
|
87
|
+
- `docs/gallery/sample_paper/fig-21a078a0.svg`
|
|
88
|
+
- `docs/gallery/sample_paper/plan.json`
|
|
89
|
+
- `docs/gallery/sample_paper/repro_audit.json`
|
|
90
|
+
- `docs/gallery/sample_paper/architecture_critique.json`
|
|
91
|
+
|
|
92
|
+

|
|
93
|
+
|
|
94
|
+
## What It Does
|
|
95
|
+
- Parses papers and extracts methodology, system description, and results sections.
|
|
96
|
+
- Plans figures through reusable flow templates and fallback heuristics.
|
|
97
|
+
- Generates figures via PaperBanana MCP and iterates with critique feedback loops.
|
|
98
|
+
- Regenerates docs and gates on drift for architecture governance.
|
|
99
|
+
- Runs reproducibility audits and architecture critiques as first-class run artifacts.
|
|
100
|
+
- Provides a constrained autonomous lab scaffold for iterative research experiments.
|
|
101
|
+
|
|
102
|
+
## How The Agentic System Works
|
|
103
|
+
The system uses specialized agents:
|
|
104
|
+
- `PlannerAgent` chooses figures and template-aligned abstractions.
|
|
105
|
+
- `GeneratorAgent` calls PaperBanana via MCP and emits traceable figure elements.
|
|
106
|
+
- `CriticAgent` scores faithfulness, readability, conciseness, and aesthetics.
|
|
107
|
+
- `ArchitectureCriticAgent` audits run-level architecture quality and governance completeness.
|
|
108
|
+
|
|
109
|
+
Full architecture documentation and flow diagrams live in `docs/architecture`.
|
|
110
|
+
|
|
111
|
+
## CLI Usage
|
|
112
|
+
<!-- AUTO-GEN:START cli-command-catalog -->
|
|
113
|
+
- `paperfig generate`
|
|
114
|
+
- `paperfig critique`
|
|
115
|
+
- `paperfig export`
|
|
116
|
+
- `paperfig doctor`
|
|
117
|
+
- `paperfig inspect`
|
|
118
|
+
- `paperfig docs regenerate`
|
|
119
|
+
- `paperfig docs check`
|
|
120
|
+
- `paperfig templates list`
|
|
121
|
+
- `paperfig templates validate`
|
|
122
|
+
- `paperfig critique-architecture`
|
|
123
|
+
- `paperfig audit`
|
|
124
|
+
- `paperfig lab init`
|
|
125
|
+
- `paperfig lab propose`
|
|
126
|
+
- `paperfig lab run`
|
|
127
|
+
- `paperfig lab review`
|
|
128
|
+
- `paperfig lab status`
|
|
129
|
+
<!-- AUTO-GEN:END cli-command-catalog -->
|
|
130
|
+
|
|
131
|
+
## Flow Template Pack
|
|
132
|
+
<!-- AUTO-GEN:START flow-template-catalog -->
|
|
133
|
+
- `ablation_matrix` (ablation)
|
|
134
|
+
- `dataset_characteristics` (dataset_overview)
|
|
135
|
+
- `error_analysis_breakdown` (error_analysis)
|
|
136
|
+
- `limitations_threats_to_validity` (limitations)
|
|
137
|
+
- `methodology_pipeline` (methodology)
|
|
138
|
+
- `results_summary_plot` (results_plot)
|
|
139
|
+
- `system_overview` (system_overview)
|
|
140
|
+
- `training_compute_profile` (compute_profile)
|
|
141
|
+
<!-- AUTO-GEN:END flow-template-catalog -->
|
|
142
|
+
|
|
143
|
+
## Outputs
|
|
144
|
+
Each run creates a `runs/<run_id>/` workspace containing:
|
|
145
|
+
- `figures/<figure_id>/figure.svg`
|
|
146
|
+
- `figures/<figure_id>/traceability.json`
|
|
147
|
+
- `captions.txt`
|
|
148
|
+
- `inspect.json`
|
|
149
|
+
- `docs_drift_report.json`
|
|
150
|
+
- `architecture_critique.json`
|
|
151
|
+
- `repro_audit.json`
|
|
152
|
+
- `exports/` with PNG, SVG, LaTeX snippets, and `export_report.json`
|
|
153
|
+
|
|
154
|
+
## Configuration
|
|
155
|
+
Default config lives in `paperfig.yaml`:
|
|
156
|
+
- docs scope and manifest path (`docs/docs_manifest.yaml`)
|
|
157
|
+
- architecture critique mode and severity gate
|
|
158
|
+
- reproducibility audit mode (`soft` by default)
|
|
159
|
+
- template pack (`expanded_v1`)
|
|
160
|
+
- lab registry path and sandbox policy (`config/lab_policy.yaml`)
|
|
161
|
+
|
|
162
|
+
## Verification
|
|
163
|
+
- Run unit/integration tests: `python3 -m unittest discover -s tests -v`
|
|
164
|
+
- Run docs drift check: `./scripts/check_docs_drift.sh`
|
|
165
|
+
- Run full quality checks: `./scripts/check_quality.sh`
|
|
166
|
+
|
|
167
|
+
## CI
|
|
168
|
+
- GitHub Actions pipeline: `.github/workflows/ci.yml`
|
|
169
|
+
- GitHub Actions docs drift gate: `.github/workflows/docs-drift.yml`
|
|
170
|
+
- GitHub Actions PyPI publish: `.github/workflows/publish.yml`
|
|
171
|
+
- Publish workflow expects `PYPI_API_TOKEN` secret in GitHub environment `pypi`.
|
|
172
|
+
- Manual `publish.yml` runs are dry-run by default; set workflow input `publish=true` to actually upload.
|
|
173
|
+
- GitLab pipeline: `.gitlab-ci.yml`
|
|
174
|
+
- All wrappers call shared scripts in `scripts/` (no duplicated CI logic in YAML)
|
|
175
|
+
|
|
176
|
+
## Community
|
|
177
|
+
- Changelog: `CHANGELOG.md`
|
|
178
|
+
- Contributing: `CONTRIBUTING.md`
|
|
179
|
+
- Code of conduct: `CODE_OF_CONDUCT.md`
|
|
180
|
+
- Citation metadata: `CITATION.cff`
|
|
181
|
+
|
|
182
|
+
## Architecture Docs
|
|
183
|
+
See:
|
|
184
|
+
- `docs/architecture/AI-Architecture-Analysis.md`
|
|
185
|
+
- `docs/architecture/flows/index.md`
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# paperfig
|
|
2
|
+
[](https://github.com/oluwafemidiakhoa/paperfig/actions/workflows/ci.yml)
|
|
3
|
+
[](https://github.com/oluwafemidiakhoa/paperfig/actions/workflows/docs-drift.yml)
|
|
4
|
+
[](https://github.com/oluwafemidiakhoa/paperfig/actions/workflows/publish.yml)
|
|
5
|
+
|
|
6
|
+
`paperfig` is a production-grade CLI that converts research papers (PDF or Markdown) into publication-ready academic figures using an agentic planning -> generation -> critique pipeline.
|
|
7
|
+
|
|
8
|
+
The core differentiator is that agent reasoning and architecture decisions are stored as versioned repo artifacts (architecture docs, flows, Mermaid diagrams, templates, audits) so humans and agents can evolve the system together.
|
|
9
|
+
|
|
10
|
+
## Install
|
|
11
|
+
- Standard CLI + PNG export:
|
|
12
|
+
- `pip install "paperfigg[cli,png]"`
|
|
13
|
+
- Developer tooling:
|
|
14
|
+
- `pip install "paperfigg[cli,png,dev,yaml,pdf,mcp]"`
|
|
15
|
+
- CLI-first local install:
|
|
16
|
+
- `pipx install .`
|
|
17
|
+
- `uv tool install .`
|
|
18
|
+
- Published package name is `paperfigg`; CLI command remains `paperfig`.
|
|
19
|
+
|
|
20
|
+
## Quickstart (Mock Mode, No Keys)
|
|
21
|
+
Mock mode is designed for instant local runs and realistic output artifacts.
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install "paperfigg[cli,png]"
|
|
25
|
+
paperfig doctor
|
|
26
|
+
paperfig generate examples/sample_paper.md --mode mock
|
|
27
|
+
paperfig docs check
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## 1-Minute Demo
|
|
31
|
+
```bash
|
|
32
|
+
pip install "paperfigg[cli,png]"
|
|
33
|
+
paperfig doctor
|
|
34
|
+
paperfig generate examples/sample_paper.md --mode mock
|
|
35
|
+
ls runs/*/figures/*/final/figure.svg
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Full Mode (PaperBanana MCP)
|
|
39
|
+
Use full mode when you want real PaperBanana generation via MCP.
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install "paperfigg[cli,png,mcp]"
|
|
43
|
+
export PAPERFIG_MCP_SERVER=paperbanana
|
|
44
|
+
export PAPERFIG_MCP_COMMAND="python -m your_mcp_server"
|
|
45
|
+
paperfig doctor --probe-mcp
|
|
46
|
+
paperfig generate examples/sample_paper.md --mode real
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## What You Get
|
|
50
|
+
- Generated figures (SVG and optional transparent PNG)
|
|
51
|
+
- LaTeX include snippets
|
|
52
|
+
- Captions and figure plans
|
|
53
|
+
- Traceability mapping from figure elements to source text spans
|
|
54
|
+
- Governance artifacts (`docs_drift_report.json`, `architecture_critique.json`, `repro_audit.json`)
|
|
55
|
+
|
|
56
|
+
Sample proof assets are committed in `docs/gallery/sample_paper`:
|
|
57
|
+
- `docs/gallery/sample_paper/fig-21a078a0.svg`
|
|
58
|
+
- `docs/gallery/sample_paper/plan.json`
|
|
59
|
+
- `docs/gallery/sample_paper/repro_audit.json`
|
|
60
|
+
- `docs/gallery/sample_paper/architecture_critique.json`
|
|
61
|
+
|
|
62
|
+

|
|
63
|
+
|
|
64
|
+
## What It Does
|
|
65
|
+
- Parses papers and extracts methodology, system description, and results sections.
|
|
66
|
+
- Plans figures through reusable flow templates and fallback heuristics.
|
|
67
|
+
- Generates figures via PaperBanana MCP and iterates with critique feedback loops.
|
|
68
|
+
- Regenerates docs and gates on drift for architecture governance.
|
|
69
|
+
- Runs reproducibility audits and architecture critiques as first-class run artifacts.
|
|
70
|
+
- Provides a constrained autonomous lab scaffold for iterative research experiments.
|
|
71
|
+
|
|
72
|
+
## How The Agentic System Works
|
|
73
|
+
The system uses specialized agents:
|
|
74
|
+
- `PlannerAgent` chooses figures and template-aligned abstractions.
|
|
75
|
+
- `GeneratorAgent` calls PaperBanana via MCP and emits traceable figure elements.
|
|
76
|
+
- `CriticAgent` scores faithfulness, readability, conciseness, and aesthetics.
|
|
77
|
+
- `ArchitectureCriticAgent` audits run-level architecture quality and governance completeness.
|
|
78
|
+
|
|
79
|
+
Full architecture documentation and flow diagrams live in `docs/architecture`.
|
|
80
|
+
|
|
81
|
+
## CLI Usage
|
|
82
|
+
<!-- AUTO-GEN:START cli-command-catalog -->
|
|
83
|
+
- `paperfig generate`
|
|
84
|
+
- `paperfig critique`
|
|
85
|
+
- `paperfig export`
|
|
86
|
+
- `paperfig doctor`
|
|
87
|
+
- `paperfig inspect`
|
|
88
|
+
- `paperfig docs regenerate`
|
|
89
|
+
- `paperfig docs check`
|
|
90
|
+
- `paperfig templates list`
|
|
91
|
+
- `paperfig templates validate`
|
|
92
|
+
- `paperfig critique-architecture`
|
|
93
|
+
- `paperfig audit`
|
|
94
|
+
- `paperfig lab init`
|
|
95
|
+
- `paperfig lab propose`
|
|
96
|
+
- `paperfig lab run`
|
|
97
|
+
- `paperfig lab review`
|
|
98
|
+
- `paperfig lab status`
|
|
99
|
+
<!-- AUTO-GEN:END cli-command-catalog -->
|
|
100
|
+
|
|
101
|
+
## Flow Template Pack
|
|
102
|
+
<!-- AUTO-GEN:START flow-template-catalog -->
|
|
103
|
+
- `ablation_matrix` (ablation)
|
|
104
|
+
- `dataset_characteristics` (dataset_overview)
|
|
105
|
+
- `error_analysis_breakdown` (error_analysis)
|
|
106
|
+
- `limitations_threats_to_validity` (limitations)
|
|
107
|
+
- `methodology_pipeline` (methodology)
|
|
108
|
+
- `results_summary_plot` (results_plot)
|
|
109
|
+
- `system_overview` (system_overview)
|
|
110
|
+
- `training_compute_profile` (compute_profile)
|
|
111
|
+
<!-- AUTO-GEN:END flow-template-catalog -->
|
|
112
|
+
|
|
113
|
+
## Outputs
|
|
114
|
+
Each run creates a `runs/<run_id>/` workspace containing:
|
|
115
|
+
- `figures/<figure_id>/figure.svg`
|
|
116
|
+
- `figures/<figure_id>/traceability.json`
|
|
117
|
+
- `captions.txt`
|
|
118
|
+
- `inspect.json`
|
|
119
|
+
- `docs_drift_report.json`
|
|
120
|
+
- `architecture_critique.json`
|
|
121
|
+
- `repro_audit.json`
|
|
122
|
+
- `exports/` with PNG, SVG, LaTeX snippets, and `export_report.json`
|
|
123
|
+
|
|
124
|
+
## Configuration
|
|
125
|
+
Default config lives in `paperfig.yaml`:
|
|
126
|
+
- docs scope and manifest path (`docs/docs_manifest.yaml`)
|
|
127
|
+
- architecture critique mode and severity gate
|
|
128
|
+
- reproducibility audit mode (`soft` by default)
|
|
129
|
+
- template pack (`expanded_v1`)
|
|
130
|
+
- lab registry path and sandbox policy (`config/lab_policy.yaml`)
|
|
131
|
+
|
|
132
|
+
## Verification
|
|
133
|
+
- Run unit/integration tests: `python3 -m unittest discover -s tests -v`
|
|
134
|
+
- Run docs drift check: `./scripts/check_docs_drift.sh`
|
|
135
|
+
- Run full quality checks: `./scripts/check_quality.sh`
|
|
136
|
+
|
|
137
|
+
## CI
|
|
138
|
+
- GitHub Actions pipeline: `.github/workflows/ci.yml`
|
|
139
|
+
- GitHub Actions docs drift gate: `.github/workflows/docs-drift.yml`
|
|
140
|
+
- GitHub Actions PyPI publish: `.github/workflows/publish.yml`
|
|
141
|
+
- Publish workflow expects `PYPI_API_TOKEN` secret in GitHub environment `pypi`.
|
|
142
|
+
- Manual `publish.yml` runs are dry-run by default; set workflow input `publish=true` to actually upload.
|
|
143
|
+
- GitLab pipeline: `.gitlab-ci.yml`
|
|
144
|
+
- All wrappers call shared scripts in `scripts/` (no duplicated CI logic in YAML)
|
|
145
|
+
|
|
146
|
+
## Community
|
|
147
|
+
- Changelog: `CHANGELOG.md`
|
|
148
|
+
- Contributing: `CONTRIBUTING.md`
|
|
149
|
+
- Code of conduct: `CODE_OF_CONDUCT.md`
|
|
150
|
+
- Citation metadata: `CITATION.cff`
|
|
151
|
+
|
|
152
|
+
## Architecture Docs
|
|
153
|
+
See:
|
|
154
|
+
- `docs/architecture/AI-Architecture-Analysis.md`
|
|
155
|
+
- `docs/architecture/flows/index.md`
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__all__ = ["planner", "generator", "critic", "architecture_critic"]
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import asdict
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List
|
|
8
|
+
|
|
9
|
+
from paperfig.utils.prompts import load_prompt
|
|
10
|
+
from paperfig.utils.types import ArchitectureCritiqueFinding, ArchitectureCritiqueReport
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
SEVERITY_ORDER = {
|
|
14
|
+
"info": 0,
|
|
15
|
+
"minor": 1,
|
|
16
|
+
"major": 2,
|
|
17
|
+
"critical": 3,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ArchitectureCriticAgent:
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
self.prompt = load_prompt("critique_architecture.txt")
|
|
24
|
+
|
|
25
|
+
def critique(self, run_dir: Path, block_severity: str = "critical") -> ArchitectureCritiqueReport:
|
|
26
|
+
run_id = run_dir.name
|
|
27
|
+
findings: List[ArchitectureCritiqueFinding] = []
|
|
28
|
+
|
|
29
|
+
inspect_path = run_dir / "inspect.json"
|
|
30
|
+
if not inspect_path.exists():
|
|
31
|
+
findings.append(
|
|
32
|
+
ArchitectureCritiqueFinding(
|
|
33
|
+
finding_id="missing_inspect",
|
|
34
|
+
severity="major",
|
|
35
|
+
title="Missing inspect summary",
|
|
36
|
+
description="Run is missing inspect.json, limiting architecture observability.",
|
|
37
|
+
evidence=str(inspect_path),
|
|
38
|
+
suggestion="Regenerate inspect summary via pipeline finalization or `paperfig inspect`.",
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
else:
|
|
42
|
+
inspect_data = json.loads(inspect_path.read_text(encoding="utf-8"))
|
|
43
|
+
failed = inspect_data.get("aggregate", {}).get("failed_count", 0)
|
|
44
|
+
if isinstance(failed, int) and failed > 0:
|
|
45
|
+
findings.append(
|
|
46
|
+
ArchitectureCritiqueFinding(
|
|
47
|
+
finding_id="failed_figures",
|
|
48
|
+
severity="major",
|
|
49
|
+
title="Failed figures present",
|
|
50
|
+
description="At least one figure did not pass final critique.",
|
|
51
|
+
evidence=f"failed_count={failed}",
|
|
52
|
+
suggestion="Review failed figures and rerun with improved prompts/templates.",
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
avg_cov = inspect_data.get("aggregate", {}).get("avg_traceability_coverage")
|
|
57
|
+
if isinstance(avg_cov, (int, float)) and avg_cov < 0.8:
|
|
58
|
+
findings.append(
|
|
59
|
+
ArchitectureCritiqueFinding(
|
|
60
|
+
finding_id="traceability_coverage_low",
|
|
61
|
+
severity="major",
|
|
62
|
+
title="Low traceability coverage",
|
|
63
|
+
description="Average traceability coverage is below recommended threshold.",
|
|
64
|
+
evidence=f"avg_traceability_coverage={avg_cov}",
|
|
65
|
+
suggestion="Ensure all figure elements include source span mappings.",
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
plan_path = run_dir / "plan.json"
|
|
70
|
+
if not plan_path.exists():
|
|
71
|
+
findings.append(
|
|
72
|
+
ArchitectureCritiqueFinding(
|
|
73
|
+
finding_id="missing_plan",
|
|
74
|
+
severity="critical",
|
|
75
|
+
title="Missing plan artifact",
|
|
76
|
+
description="Run is missing plan.json.",
|
|
77
|
+
evidence=str(plan_path),
|
|
78
|
+
suggestion="Investigate planner stage and regenerate run artifacts.",
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
else:
|
|
82
|
+
plan_data = json.loads(plan_path.read_text(encoding="utf-8"))
|
|
83
|
+
if isinstance(plan_data, list) and not plan_data:
|
|
84
|
+
findings.append(
|
|
85
|
+
ArchitectureCritiqueFinding(
|
|
86
|
+
finding_id="empty_plan",
|
|
87
|
+
severity="critical",
|
|
88
|
+
title="Empty figure plan",
|
|
89
|
+
description="Planner generated no figures for this run.",
|
|
90
|
+
evidence="plan.json contains 0 entries",
|
|
91
|
+
suggestion="Review section extraction and template trigger rules.",
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
docs_report_path = run_dir / "docs_drift_report.json"
|
|
96
|
+
if not docs_report_path.exists():
|
|
97
|
+
findings.append(
|
|
98
|
+
ArchitectureCritiqueFinding(
|
|
99
|
+
finding_id="missing_docs_drift_report",
|
|
100
|
+
severity="minor",
|
|
101
|
+
title="Missing docs drift report",
|
|
102
|
+
description="No docs_drift_report.json was found for this run.",
|
|
103
|
+
evidence=str(docs_report_path),
|
|
104
|
+
suggestion="Enable docs check in the generation finalization stage.",
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
max_seen = max((SEVERITY_ORDER.get(item.severity, 0) for item in findings), default=0)
|
|
109
|
+
blocked = max_seen >= SEVERITY_ORDER.get(block_severity, SEVERITY_ORDER["critical"])
|
|
110
|
+
|
|
111
|
+
summary = "No architecture findings."
|
|
112
|
+
if findings:
|
|
113
|
+
summary = f"{len(findings)} finding(s); highest severity={self._severity_name(max_seen)}"
|
|
114
|
+
|
|
115
|
+
return ArchitectureCritiqueReport(
|
|
116
|
+
run_id=run_id,
|
|
117
|
+
block_severity=block_severity,
|
|
118
|
+
findings=findings,
|
|
119
|
+
blocked=blocked,
|
|
120
|
+
summary=summary,
|
|
121
|
+
generated_at=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def _severity_name(value: int) -> str:
|
|
126
|
+
for name, rank in SEVERITY_ORDER.items():
|
|
127
|
+
if rank == value:
|
|
128
|
+
return name
|
|
129
|
+
return "info"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def report_to_dict(report: ArchitectureCritiqueReport) -> Dict[str, object]:
|
|
133
|
+
return asdict(report)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Dict, List
|
|
5
|
+
|
|
6
|
+
from paperfig.utils.prompts import load_prompt
|
|
7
|
+
from paperfig.utils.types import CritiqueReport, FigurePlan, PaperContent
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CriticAgent:
|
|
11
|
+
def __init__(self, threshold: float = 0.75, dimension_threshold: float = 0.55) -> None:
|
|
12
|
+
self.threshold = threshold
|
|
13
|
+
self.dimension_threshold = dimension_threshold
|
|
14
|
+
self.prompt = load_prompt("critique_figure.txt")
|
|
15
|
+
|
|
16
|
+
def critique(self, svg_path: Path, plan: FigurePlan, paper: PaperContent) -> CritiqueReport:
|
|
17
|
+
svg_text = svg_path.read_text(encoding="utf-8")
|
|
18
|
+
issues: List[str] = []
|
|
19
|
+
recommendations: List[str] = []
|
|
20
|
+
dimensions = self._score_dimensions(svg_text, plan, paper)
|
|
21
|
+
score = sum(dimensions.values()) / len(dimensions)
|
|
22
|
+
failed_dimensions = [name for name, value in dimensions.items() if value < self.dimension_threshold]
|
|
23
|
+
|
|
24
|
+
if "readability" in failed_dimensions:
|
|
25
|
+
issues.append("Readability below threshold: labels or visual structure are insufficient.")
|
|
26
|
+
recommendations.append("Add clear labels, improve hierarchy, and avoid dense overlaps.")
|
|
27
|
+
if "faithfulness" in failed_dimensions:
|
|
28
|
+
issues.append("Faithfulness below threshold: figure support from source spans is weak.")
|
|
29
|
+
recommendations.append("Tie every key label and relation to explicit source text spans.")
|
|
30
|
+
if "conciseness" in failed_dimensions:
|
|
31
|
+
issues.append("Conciseness below threshold: figure is either too sparse or overloaded.")
|
|
32
|
+
recommendations.append("Keep only essential elements and remove decorative clutter.")
|
|
33
|
+
if "aesthetics" in failed_dimensions:
|
|
34
|
+
issues.append("Aesthetics below threshold: layout balance and presentation need refinement.")
|
|
35
|
+
recommendations.append("Improve alignment, spacing, and consistent visual encoding.")
|
|
36
|
+
|
|
37
|
+
passed = (
|
|
38
|
+
score >= self.threshold
|
|
39
|
+
and not failed_dimensions
|
|
40
|
+
and not any("invalid" in issue.lower() for issue in issues)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if not passed:
|
|
44
|
+
recommendations.append("Revise layout to improve clarity and alignment with the paper.")
|
|
45
|
+
|
|
46
|
+
return CritiqueReport(
|
|
47
|
+
figure_id=plan.figure_id,
|
|
48
|
+
score=round(min(score, 1.0), 3),
|
|
49
|
+
threshold=self.threshold,
|
|
50
|
+
quality_dimensions={k: round(v, 3) for k, v in dimensions.items()},
|
|
51
|
+
dimension_threshold=self.dimension_threshold,
|
|
52
|
+
failed_dimensions=failed_dimensions,
|
|
53
|
+
issues=issues,
|
|
54
|
+
recommendations=recommendations,
|
|
55
|
+
passed=passed,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def _score_dimensions(
|
|
59
|
+
self,
|
|
60
|
+
svg_text: str,
|
|
61
|
+
plan: FigurePlan,
|
|
62
|
+
paper: PaperContent,
|
|
63
|
+
) -> Dict[str, float]:
|
|
64
|
+
return {
|
|
65
|
+
"faithfulness": self._score_faithfulness(svg_text, plan, paper),
|
|
66
|
+
"readability": self._score_readability(svg_text),
|
|
67
|
+
"conciseness": self._score_conciseness(svg_text),
|
|
68
|
+
"aesthetics": self._score_aesthetics(svg_text),
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
def _score_faithfulness(self, svg_text: str, plan: FigurePlan, paper: PaperContent) -> float:
|
|
72
|
+
score = 0.35
|
|
73
|
+
if plan.source_spans:
|
|
74
|
+
score += 0.3
|
|
75
|
+
if len(plan.description.strip()) > 20:
|
|
76
|
+
score += 0.1
|
|
77
|
+
if plan.kind == "results_plot" and paper.sections.get("results") and paper.sections["results"].text:
|
|
78
|
+
score += 0.15
|
|
79
|
+
if "mock paperbanana output" in svg_text.lower():
|
|
80
|
+
score += 0.05
|
|
81
|
+
return min(score, 1.0)
|
|
82
|
+
|
|
83
|
+
def _score_readability(self, svg_text: str) -> float:
|
|
84
|
+
score = 0.3
|
|
85
|
+
text_count = svg_text.count("<text")
|
|
86
|
+
if text_count >= 2:
|
|
87
|
+
score += 0.25
|
|
88
|
+
elif text_count == 1:
|
|
89
|
+
score += 0.15
|
|
90
|
+
if any(tag in svg_text for tag in ("<rect", "<path", "<line", "<circle")):
|
|
91
|
+
score += 0.2
|
|
92
|
+
if "font-size" in svg_text:
|
|
93
|
+
score += 0.1
|
|
94
|
+
if "viewBox" in svg_text:
|
|
95
|
+
score += 0.1
|
|
96
|
+
return min(score, 1.0)
|
|
97
|
+
|
|
98
|
+
def _score_conciseness(self, svg_text: str) -> float:
|
|
99
|
+
score = 0.5
|
|
100
|
+
length = len(svg_text)
|
|
101
|
+
if 250 <= length <= 9000:
|
|
102
|
+
score += 0.25
|
|
103
|
+
elif length > 12000:
|
|
104
|
+
score -= 0.2
|
|
105
|
+
else:
|
|
106
|
+
score -= 0.1
|
|
107
|
+
primitive_count = sum(svg_text.count(tag) for tag in ("<rect", "<path", "<line", "<circle", "<polygon"))
|
|
108
|
+
if 1 <= primitive_count <= 40:
|
|
109
|
+
score += 0.2
|
|
110
|
+
elif primitive_count > 120:
|
|
111
|
+
score -= 0.2
|
|
112
|
+
return max(min(score, 1.0), 0.0)
|
|
113
|
+
|
|
114
|
+
def _score_aesthetics(self, svg_text: str) -> float:
|
|
115
|
+
score = 0.35
|
|
116
|
+
if "viewBox" in svg_text and "width" in svg_text and "height" in svg_text:
|
|
117
|
+
score += 0.2
|
|
118
|
+
if "stroke" in svg_text:
|
|
119
|
+
score += 0.15
|
|
120
|
+
if "fill" in svg_text:
|
|
121
|
+
score += 0.15
|
|
122
|
+
if "font-family" in svg_text:
|
|
123
|
+
score += 0.1
|
|
124
|
+
return min(score, 1.0)
|