pull-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pull_cli-0.1.0/.github/workflows/ci.yml +45 -0
- pull_cli-0.1.0/.github/workflows/publish.yml +54 -0
- pull_cli-0.1.0/.gitignore +17 -0
- pull_cli-0.1.0/AGENTS.md +9 -0
- pull_cli-0.1.0/LICENSE +21 -0
- pull_cli-0.1.0/PKG-INFO +218 -0
- pull_cli-0.1.0/README.md +180 -0
- pull_cli-0.1.0/RELEASING.md +71 -0
- pull_cli-0.1.0/pull_ai_confluence_design.md +1793 -0
- pull_cli-0.1.0/pyproject.toml +72 -0
- pull_cli-0.1.0/src/pull_cli/__init__.py +5 -0
- pull_cli-0.1.0/src/pull_cli/__main__.py +6 -0
- pull_cli-0.1.0/src/pull_cli/assets.py +235 -0
- pull_cli-0.1.0/src/pull_cli/attachment_extractors.py +85 -0
- pull_cli-0.1.0/src/pull_cli/cli.py +329 -0
- pull_cli-0.1.0/src/pull_cli/clients/__init__.py +8 -0
- pull_cli-0.1.0/src/pull_cli/clients/base.py +29 -0
- pull_cli-0.1.0/src/pull_cli/clients/cloud_v2.py +132 -0
- pull_cli-0.1.0/src/pull_cli/clients/data_center.py +360 -0
- pull_cli-0.1.0/src/pull_cli/clients/hybrid.py +15 -0
- pull_cli-0.1.0/src/pull_cli/config.py +82 -0
- pull_cli-0.1.0/src/pull_cli/crawler.py +51 -0
- pull_cli-0.1.0/src/pull_cli/envelope.py +59 -0
- pull_cli-0.1.0/src/pull_cli/errors.py +50 -0
- pull_cli-0.1.0/src/pull_cli/extractor.py +344 -0
- pull_cli-0.1.0/src/pull_cli/guide.py +115 -0
- pull_cli-0.1.0/src/pull_cli/html_normalizer.py +111 -0
- pull_cli-0.1.0/src/pull_cli/links.py +186 -0
- pull_cli-0.1.0/src/pull_cli/macros.py +527 -0
- pull_cli-0.1.0/src/pull_cli/markdown_writer.py +24 -0
- pull_cli-0.1.0/src/pull_cli/models.py +232 -0
- pull_cli-0.1.0/src/pull_cli/paths.py +45 -0
- pull_cli-0.1.0/src/pull_cli/resolver.py +72 -0
- pull_cli-0.1.0/src/pull_cli/security.py +103 -0
- pull_cli-0.1.0/src/pull_cli/validator.py +398 -0
- pull_cli-0.1.0/src/pull_cli/writer.py +792 -0
- pull_cli-0.1.0/tests/__init__.py +1 -0
- pull_cli-0.1.0/tests/conftest.py +114 -0
- pull_cli-0.1.0/tests/generate_fixture_output.py +48 -0
- pull_cli-0.1.0/tests/test_cli.py +205 -0
- pull_cli-0.1.0/tests/test_clients.py +137 -0
- pull_cli-0.1.0/tests/test_extraction.py +794 -0
- pull_cli-0.1.0/tests/test_resolver.py +47 -0
- pull_cli-0.1.0/uv.lock +1407 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, master]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main, master]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
name: Python ${{ matrix.python-version }}
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
strategy:
|
|
17
|
+
fail-fast: false
|
|
18
|
+
matrix:
|
|
19
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- name: Check out repository
|
|
23
|
+
uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Install uv
|
|
26
|
+
uses: astral-sh/setup-uv@v5
|
|
27
|
+
with:
|
|
28
|
+
enable-cache: true
|
|
29
|
+
|
|
30
|
+
- name: Set up Python
|
|
31
|
+
uses: actions/setup-python@v5
|
|
32
|
+
with:
|
|
33
|
+
python-version: ${{ matrix.python-version }}
|
|
34
|
+
|
|
35
|
+
- name: Install dependencies
|
|
36
|
+
run: uv sync --all-extras
|
|
37
|
+
|
|
38
|
+
- name: Lint
|
|
39
|
+
run: uv run ruff check .
|
|
40
|
+
|
|
41
|
+
- name: Test
|
|
42
|
+
run: uv run pytest
|
|
43
|
+
|
|
44
|
+
- name: Build
|
|
45
|
+
run: uv build
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
name: Build and publish distribution
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
environment:
|
|
15
|
+
name: pypi
|
|
16
|
+
url: https://pypi.org/project/pull-cli/
|
|
17
|
+
permissions:
|
|
18
|
+
contents: read
|
|
19
|
+
id-token: write
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- name: Check out repository
|
|
23
|
+
uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Install uv
|
|
26
|
+
uses: astral-sh/setup-uv@v5
|
|
27
|
+
with:
|
|
28
|
+
enable-cache: true
|
|
29
|
+
|
|
30
|
+
- name: Set up Python
|
|
31
|
+
uses: actions/setup-python@v5
|
|
32
|
+
with:
|
|
33
|
+
python-version: "3.13"
|
|
34
|
+
|
|
35
|
+
- name: Install dependencies
|
|
36
|
+
run: uv sync --all-extras
|
|
37
|
+
|
|
38
|
+
- name: Check release tag matches package version
|
|
39
|
+
run: |
|
|
40
|
+
package_version="$(uv run python -c 'import pull_cli; print(pull_cli.__version__)')"
|
|
41
|
+
tag_version="${GITHUB_REF_NAME#v}"
|
|
42
|
+
if [ "$package_version" != "$tag_version" ]; then
|
|
43
|
+
echo "Release tag $GITHUB_REF_NAME does not match package version $package_version"
|
|
44
|
+
exit 1
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
- name: Build distributions
|
|
48
|
+
run: uv build
|
|
49
|
+
|
|
50
|
+
- name: Check distribution metadata
|
|
51
|
+
run: uvx --from twine twine check dist/*
|
|
52
|
+
|
|
53
|
+
- name: Publish package distributions to PyPI
|
|
54
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
pull_cli-0.1.0/AGENTS.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Repository Guidance
|
|
2
|
+
|
|
3
|
+
- Keep `pull` focused on AI analysis of the current rendered Confluence page. Do not turn it into an alias of `confpub page pull`.
|
|
4
|
+
- Preserve the stdout/stderr contract: JSON mode writes exactly one JSON object to stdout; progress and diagnostics belong on stderr.
|
|
5
|
+
- Do not log or write credentials, tokens, cookies, Authorization headers, or signed download URLs. Route all persisted API data through the redaction helpers.
|
|
6
|
+
- Keep Confluence access read-only. Do not add publishing, draft mutation, or permission-bypass behavior.
|
|
7
|
+
- Prefer deterministic mocked fixtures for tests. Live Confluence checks are useful smoke tests, not required unit tests.
|
|
8
|
+
- When adding macro behavior, implement it through the registry and add a fixture test plus warning/manifest expectations.
|
|
9
|
+
- All manifest paths must remain relative to the output root, and `pull validate` should be extended when new output artifacts are added.
|
pull_cli-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Thomas Rohde
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pull_cli-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pull-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI-optimized Confluence evidence package extractor
|
|
5
|
+
Project-URL: Homepage, https://github.com/ThomasRohde/pull-cli
|
|
6
|
+
Project-URL: Repository, https://github.com/ThomasRohde/pull-cli
|
|
7
|
+
Project-URL: Issues, https://github.com/ThomasRohde/pull-cli/issues
|
|
8
|
+
Author-email: Thomas Rohde <rohde.thomas@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,atlassian,cli,confluence,markdown
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: atlassian-python-api>=4.0.7
|
|
23
|
+
Requires-Dist: beautifulsoup4>=4.12.3
|
|
24
|
+
Requires-Dist: lxml>=5.2.0
|
|
25
|
+
Requires-Dist: markdownify>=0.13.1
|
|
26
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: hatch>=1.14.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest>=8.3.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: ruff>=0.6.9; extra == 'dev'
|
|
31
|
+
Provides-Extra: extract
|
|
32
|
+
Requires-Dist: defusedxml>=0.7.1; extra == 'extract'
|
|
33
|
+
Requires-Dist: openpyxl>=3.1.5; extra == 'extract'
|
|
34
|
+
Requires-Dist: pypdf>=5.0.0; extra == 'extract'
|
|
35
|
+
Requires-Dist: python-docx>=1.1.2; extra == 'extract'
|
|
36
|
+
Requires-Dist: python-pptx>=1.0.2; extra == 'extract'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# pull-cli
|
|
40
|
+
|
|
41
|
+
`pull-cli` installs the `pull` command, a read-only Confluence extractor for AI-consumable evidence packages. It is rendered-page-first: page Markdown, and the optional Markdown bundle in full mode, are based on the current published page as visible to the authenticated user, while storage XML is kept for macro recovery, provenance, and fallback.
|
|
42
|
+
|
|
43
|
+
The default output mode is `simple`: a quiet agent-facing package with the root AI Markdown file, per-page Markdown files, assets/sidecars, and validation control files. Use `--output-mode full` when you also want `bundle.md`, page HTML snapshots, and storage-source sidecars.
|
|
44
|
+
|
|
45
|
+
Confluence access is implemented through `atlassian-python-api` behind a small `pull_cli.clients` protocol. The extraction, redaction, manifest, asset, link, and validation contracts remain owned by `pull-cli`.
|
|
46
|
+
|
|
47
|
+
## Install
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uvx pull-cli --help
|
|
51
|
+
uv tool install pull-cli
|
|
52
|
+
pip install pull-cli
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
The package name is `pull-cli`. The import package is `pull_cli`. Console scripts are `pull` and `pull-cli`.
|
|
56
|
+
|
|
57
|
+
## Quickstart
|
|
58
|
+
|
|
59
|
+
Cloud:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
set PULL_URL=https://example.atlassian.net/wiki
|
|
63
|
+
set PULL_USER=you@example.com
|
|
64
|
+
set PULL_TOKEN=your-api-token
|
|
65
|
+
pull 123456 -o pulled-confluence
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Data Center or Server:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
set PULL_URL=https://confluence.example.com/confluence
|
|
72
|
+
set PULL_TOKEN=your-personal-access-token
|
|
73
|
+
pull --page-id 123456 -o pulled-confluence
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
`CONFPUB_URL`, `CONFPUB_USER`, `CONFPUB_TOKEN`, and `CONFPUB_SSL_VERIFY` are accepted as compatibility fallbacks after `PULL_*` variables.
|
|
77
|
+
|
|
78
|
+
## CLI Examples
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pull 123456 -o pulled
|
|
82
|
+
pull "https://example.atlassian.net/wiki/spaces/EA/pages/123456/Architecture" -o pulled
|
|
83
|
+
pull --space EA --title "Architecture Overview" -o pulled
|
|
84
|
+
pull --page-id 123456 --tree --depth 3 --max-pages 100 -o tree
|
|
85
|
+
pull --page-id 123456 --tree --assets all --extract-attachments -o offline
|
|
86
|
+
pull --page-id 123456 --tree --comments -o with-comments
|
|
87
|
+
pull --page-id 123456 --output-mode full -o full-evidence
|
|
88
|
+
pull --page-id 123456 --output-mode simple --bundle -o simple-with-bundle
|
|
89
|
+
pull --page-id 123456 --json -o pulled
|
|
90
|
+
pull validate pulled
|
|
91
|
+
pull guide --json
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Selector resolution order is: explicit `--page-id`, explicit `--url`, positional URL, positional numeric page ID, then `--space` plus `--title`.
|
|
95
|
+
|
|
96
|
+
## Output Package
|
|
97
|
+
|
|
98
|
+
Default `simple` mode:
|
|
99
|
+
|
|
100
|
+
```text
|
|
101
|
+
pulled-confluence/
|
|
102
|
+
├── page-title.md
|
|
103
|
+
├── page-title.yaml
|
|
104
|
+
├── manifest.yaml
|
|
105
|
+
├── pages/
|
|
106
|
+
│ └── 0001-page-slug/
|
|
107
|
+
│ ├── index.md
|
|
108
|
+
│ ├── page.json
|
|
109
|
+
│ ├── comments.md # with --comments, only when comments exist
|
|
110
|
+
│ └── assets/
|
|
111
|
+
└── diagnostics/
|
|
112
|
+
├── warnings.jsonl
|
|
113
|
+
└── unresolved-links.md
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
`page-title.md` is named from the sanitized root page title and is the recommended first file to give another AI agent. In simple mode it links only the reading/navigation surface: page Markdown paths, assets, sidecars, and explicitly requested agent-facing extras such as `bundle.md` or `chunks.jsonl`. Warning counts are shown, but control files are not linked from the root AI Markdown.
|
|
117
|
+
|
|
118
|
+
`page-title.yaml` is the machine-readable version of that AI navigation manifest, also named from the sanitized root page title. It intentionally omits noisy provenance and raw API details; use `manifest.yaml` when you need full validation/provenance data. The exact generated filenames are recorded in `manifest.yaml` under `paths.ai_entry` and `paths.ai_manifest`. AI navigation paths are package-root-relative: resolve them against the directory containing the root AI Markdown/YAML file, not the caller's shell working directory.
|
|
119
|
+
|
|
120
|
+
`manifest.yaml`, `page.json`, and diagnostics files are still written in simple mode so `pull validate <output-dir>` and provenance checks work. `--force` never deletes stale files from earlier runs; use `--clean` when switching modes if you need the physical tree to contain only files from the new mode.
|
|
121
|
+
|
|
122
|
+
`--output-mode full` adds the full evidence artifacts:
|
|
123
|
+
|
|
124
|
+
```text
|
|
125
|
+
pulled-confluence/
|
|
126
|
+
├── bundle.md
|
|
127
|
+
└── pages/
|
|
128
|
+
└── 0001-page-slug/
|
|
129
|
+
├── index.html
|
|
130
|
+
└── source.storage.xml
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
`bundle.md` concatenates pages in page/tree order with stable delimiters for AI use; local links embedded in the bundle are rebased to the package root. `index.html` and `source.storage.xml` are raw/reference artifacts, not the primary navigation surface.
|
|
134
|
+
|
|
135
|
+
For tree pulls, nested page paths are the default. The manifest always carries stable numeric ordering.
|
|
136
|
+
|
|
137
|
+
## Auth and Config
|
|
138
|
+
|
|
139
|
+
Resolution order:
|
|
140
|
+
|
|
141
|
+
1. CLI flags such as `--base-url`, `--user`, `--token`, `--ssl-verify`.
|
|
142
|
+
2. `PULL_*` environment variables.
|
|
143
|
+
3. Optional YAML config from `--config`.
|
|
144
|
+
4. `CONFPUB_*` compatibility environment variables.
|
|
145
|
+
|
|
146
|
+
`--ssl-verify` accepts `true`, `false`, or a CA bundle path.
|
|
147
|
+
|
|
148
|
+
## Macro, Asset, and Link Behavior
|
|
149
|
+
|
|
150
|
+
The extractor uses a macro adapter registry. Current adapters cover panels/admonitions, code/noformat, status, expand, tabs, layout flattening, TOC placeholders, children/page tree links when in scope, include/excerpt placeholders or inline source when available, attachments, displayed files, Jira placeholders, diagram snapshots, dynamic snapshots, HTML macro sanitization, and unknown macro placeholders.
|
|
151
|
+
|
|
152
|
+
Asset policy defaults to `visible`: rendered images, visible attachment links, file macros, and rendered diagram images where discoverable. `--assets page` downloads all page attachments. `--assets all` includes visible/referenced assets plus all page attachments and macro-listed files where discoverable. `--no-assets` skips downloads and preserves source links with warnings.
|
|
153
|
+
|
|
154
|
+
Local links to pages in the pulled tree are rewritten to relative `index.md` paths. Downloaded asset links are rewritten to local files. External, mailto, Jira, and out-of-scope Confluence links are preserved. Same-page anchors are normalized where possible; unresolved anchors become diagnostics.
|
|
155
|
+
|
|
156
|
+
## Comments
|
|
157
|
+
|
|
158
|
+
Comments are skipped by default. Use `--comments` to fetch page-level and inline comments for each pulled page. When comments exist, `pull` writes a page-local `comments.md` sidecar with agent-readable metadata and Markdown-converted comment bodies.
|
|
159
|
+
|
|
160
|
+
Comment sidecars are agent-facing reading surfaces: the root AI Markdown page hierarchy links them in simple mode, the page `index.md` header links the local sidecar, and the AI YAML includes the optional comments path and count. If one page's comments cannot be fetched, the pull continues with `W_COMMENTS_FETCH_FAILED` and validation can still pass for the partial package.
|
|
161
|
+
|
|
162
|
+
## JSON Mode
|
|
163
|
+
|
|
164
|
+
With `--json` or `LLM=true`, stdout is exactly one JSON object with:
|
|
165
|
+
|
|
166
|
+
```json
|
|
167
|
+
{
|
|
168
|
+
"schema_version": "1.0",
|
|
169
|
+
"request_id": "req_...",
|
|
170
|
+
"ok": true,
|
|
171
|
+
"command": "pull",
|
|
172
|
+
"target": {},
|
|
173
|
+
"result": {},
|
|
174
|
+
"warnings": [],
|
|
175
|
+
"errors": [],
|
|
176
|
+
"metrics": {}
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Progress, retries, warnings, and debug output belong on stderr.
|
|
181
|
+
|
|
182
|
+
## Security
|
|
183
|
+
|
|
184
|
+
`pull` is read-only. It does not mutate Confluence, fetch drafts by default, bypass permissions, or call LLM services. Tokens, Authorization headers, cookies, signed download query parameters, and token-like strings are redacted before JSON envelopes, manifests, page metadata, and diagnostics are written.
|
|
185
|
+
|
|
186
|
+
Rendered HTML snapshots are sanitized by removing executable tags and event attributes. HTML macro content is made inert before conversion.
|
|
187
|
+
|
|
188
|
+
## Validation
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
pull validate pulled-confluence
|
|
192
|
+
pull validate pulled-confluence/manifest.yaml --json
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Validation checks manifest shape, AI navigation manifest paths, relative paths, page files, optional comment sidecars, asset checksums, diagnostics JSONL, Markdown local links, and token-like markers in text outputs.
|
|
196
|
+
|
|
197
|
+
## Development
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
uv sync --all-extras
|
|
201
|
+
uv run ruff check .
|
|
202
|
+
uv run pytest
|
|
203
|
+
uv build
|
|
204
|
+
uv run pull --help
|
|
205
|
+
uv run pull guide --json
|
|
206
|
+
uv run python tests/generate_fixture_output.py .tmp/generated-fixture
|
|
207
|
+
uv run pull validate .tmp/generated-fixture
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Live smoke testing requires a readable Confluence page and credentials through `PULL_*` or `CONFPUB_*`.
|
|
211
|
+
|
|
212
|
+
## Releasing
|
|
213
|
+
|
|
214
|
+
Versions are managed from `src/pull_cli/__init__.py` through Hatch. Use `uv run hatch version patch`, `uv run hatch version minor`, or `uv run hatch version major`; `pull --version`, built package metadata, and GitHub release tags are expected to match. See [RELEASING.md](RELEASING.md) for the PyPI trusted publisher setup and release flow.
|
|
215
|
+
|
|
216
|
+
## License
|
|
217
|
+
|
|
218
|
+
MIT. See [LICENSE](LICENSE).
|
pull_cli-0.1.0/README.md
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# pull-cli
|
|
2
|
+
|
|
3
|
+
`pull-cli` installs the `pull` command, a read-only Confluence extractor for AI-consumable evidence packages. It is rendered-page-first: page Markdown, and the optional Markdown bundle in full mode, are based on the current published page as visible to the authenticated user, while storage XML is kept for macro recovery, provenance, and fallback.
|
|
4
|
+
|
|
5
|
+
The default output mode is `simple`: a quiet agent-facing package with the root AI Markdown file, per-page Markdown files, assets/sidecars, and validation control files. Use `--output-mode full` when you also want `bundle.md`, page HTML snapshots, and storage-source sidecars.
|
|
6
|
+
|
|
7
|
+
Confluence access is implemented through `atlassian-python-api` behind a small `pull_cli.clients` protocol. The extraction, redaction, manifest, asset, link, and validation contracts remain owned by `pull-cli`.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
uvx pull-cli --help
|
|
13
|
+
uv tool install pull-cli
|
|
14
|
+
pip install pull-cli
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
The package name is `pull-cli`. The import package is `pull_cli`. Console scripts are `pull` and `pull-cli`.
|
|
18
|
+
|
|
19
|
+
## Quickstart
|
|
20
|
+
|
|
21
|
+
Cloud:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
set PULL_URL=https://example.atlassian.net/wiki
|
|
25
|
+
set PULL_USER=you@example.com
|
|
26
|
+
set PULL_TOKEN=your-api-token
|
|
27
|
+
pull 123456 -o pulled-confluence
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Data Center or Server:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
set PULL_URL=https://confluence.example.com/confluence
|
|
34
|
+
set PULL_TOKEN=your-personal-access-token
|
|
35
|
+
pull --page-id 123456 -o pulled-confluence
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
`CONFPUB_URL`, `CONFPUB_USER`, `CONFPUB_TOKEN`, and `CONFPUB_SSL_VERIFY` are accepted as compatibility fallbacks after `PULL_*` variables.
|
|
39
|
+
|
|
40
|
+
## CLI Examples
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pull 123456 -o pulled
|
|
44
|
+
pull "https://example.atlassian.net/wiki/spaces/EA/pages/123456/Architecture" -o pulled
|
|
45
|
+
pull --space EA --title "Architecture Overview" -o pulled
|
|
46
|
+
pull --page-id 123456 --tree --depth 3 --max-pages 100 -o tree
|
|
47
|
+
pull --page-id 123456 --tree --assets all --extract-attachments -o offline
|
|
48
|
+
pull --page-id 123456 --tree --comments -o with-comments
|
|
49
|
+
pull --page-id 123456 --output-mode full -o full-evidence
|
|
50
|
+
pull --page-id 123456 --output-mode simple --bundle -o simple-with-bundle
|
|
51
|
+
pull --page-id 123456 --json -o pulled
|
|
52
|
+
pull validate pulled
|
|
53
|
+
pull guide --json
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Selector resolution order is: explicit `--page-id`, explicit `--url`, positional URL, positional numeric page ID, then `--space` plus `--title`.
|
|
57
|
+
|
|
58
|
+
## Output Package
|
|
59
|
+
|
|
60
|
+
Default `simple` mode:
|
|
61
|
+
|
|
62
|
+
```text
|
|
63
|
+
pulled-confluence/
|
|
64
|
+
├── page-title.md
|
|
65
|
+
├── page-title.yaml
|
|
66
|
+
├── manifest.yaml
|
|
67
|
+
├── pages/
|
|
68
|
+
│ └── 0001-page-slug/
|
|
69
|
+
│ ├── index.md
|
|
70
|
+
│ ├── page.json
|
|
71
|
+
│ ├── comments.md # with --comments, only when comments exist
|
|
72
|
+
│ └── assets/
|
|
73
|
+
└── diagnostics/
|
|
74
|
+
├── warnings.jsonl
|
|
75
|
+
└── unresolved-links.md
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
`page-title.md` is named from the sanitized root page title and is the recommended first file to give another AI agent. In simple mode it links only the reading/navigation surface: page Markdown paths, assets, sidecars, and explicitly requested agent-facing extras such as `bundle.md` or `chunks.jsonl`. Warning counts are shown, but control files are not linked from the root AI Markdown.
|
|
79
|
+
|
|
80
|
+
`page-title.yaml` is the machine-readable version of that AI navigation manifest, also named from the sanitized root page title. It intentionally omits noisy provenance and raw API details; use `manifest.yaml` when you need full validation/provenance data. The exact generated filenames are recorded in `manifest.yaml` under `paths.ai_entry` and `paths.ai_manifest`. AI navigation paths are package-root-relative: resolve them against the directory containing the root AI Markdown/YAML file, not the caller's shell working directory.
|
|
81
|
+
|
|
82
|
+
`manifest.yaml`, `page.json`, and diagnostics files are still written in simple mode so `pull validate <output-dir>` and provenance checks work. `--force` never deletes stale files from earlier runs; use `--clean` when switching modes if you need the physical tree to contain only files from the new mode.
|
|
83
|
+
|
|
84
|
+
`--output-mode full` adds the full evidence artifacts:
|
|
85
|
+
|
|
86
|
+
```text
|
|
87
|
+
pulled-confluence/
|
|
88
|
+
├── bundle.md
|
|
89
|
+
└── pages/
|
|
90
|
+
└── 0001-page-slug/
|
|
91
|
+
├── index.html
|
|
92
|
+
└── source.storage.xml
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
`bundle.md` concatenates pages in page/tree order with stable delimiters for AI use; local links embedded in the bundle are rebased to the package root. `index.html` and `source.storage.xml` are raw/reference artifacts, not the primary navigation surface.
|
|
96
|
+
|
|
97
|
+
For tree pulls, nested page paths are the default. The manifest always carries stable numeric ordering.
|
|
98
|
+
|
|
99
|
+
## Auth and Config
|
|
100
|
+
|
|
101
|
+
Resolution order:
|
|
102
|
+
|
|
103
|
+
1. CLI flags such as `--base-url`, `--user`, `--token`, `--ssl-verify`.
|
|
104
|
+
2. `PULL_*` environment variables.
|
|
105
|
+
3. Optional YAML config from `--config`.
|
|
106
|
+
4. `CONFPUB_*` compatibility environment variables.
|
|
107
|
+
|
|
108
|
+
`--ssl-verify` accepts `true`, `false`, or a CA bundle path.
|
|
109
|
+
|
|
110
|
+
## Macro, Asset, and Link Behavior
|
|
111
|
+
|
|
112
|
+
The extractor uses a macro adapter registry. Current adapters cover panels/admonitions, code/noformat, status, expand, tabs, layout flattening, TOC placeholders, children/page tree links when in scope, include/excerpt placeholders or inline source when available, attachments, displayed files, Jira placeholders, diagram snapshots, dynamic snapshots, HTML macro sanitization, and unknown macro placeholders.
|
|
113
|
+
|
|
114
|
+
Asset policy defaults to `visible`: rendered images, visible attachment links, file macros, and rendered diagram images where discoverable. `--assets page` downloads all page attachments. `--assets all` includes visible/referenced assets plus all page attachments and macro-listed files where discoverable. `--no-assets` skips downloads and preserves source links with warnings.
|
|
115
|
+
|
|
116
|
+
Local links to pages in the pulled tree are rewritten to relative `index.md` paths. Downloaded asset links are rewritten to local files. External, mailto, Jira, and out-of-scope Confluence links are preserved. Same-page anchors are normalized where possible; unresolved anchors become diagnostics.
|
|
117
|
+
|
|
118
|
+
## Comments
|
|
119
|
+
|
|
120
|
+
Comments are skipped by default. Use `--comments` to fetch page-level and inline comments for each pulled page. When comments exist, `pull` writes a page-local `comments.md` sidecar with agent-readable metadata and Markdown-converted comment bodies.
|
|
121
|
+
|
|
122
|
+
Comment sidecars are agent-facing reading surfaces: the root AI Markdown page hierarchy links them in simple mode, the page `index.md` header links the local sidecar, and the AI YAML includes the optional comments path and count. If one page's comments cannot be fetched, the pull continues with `W_COMMENTS_FETCH_FAILED` and validation can still pass for the partial package.
|
|
123
|
+
|
|
124
|
+
## JSON Mode
|
|
125
|
+
|
|
126
|
+
With `--json` or `LLM=true`, stdout is exactly one JSON object with:
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"schema_version": "1.0",
|
|
131
|
+
"request_id": "req_...",
|
|
132
|
+
"ok": true,
|
|
133
|
+
"command": "pull",
|
|
134
|
+
"target": {},
|
|
135
|
+
"result": {},
|
|
136
|
+
"warnings": [],
|
|
137
|
+
"errors": [],
|
|
138
|
+
"metrics": {}
|
|
139
|
+
}
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Progress, retries, warnings, and debug output belong on stderr.
|
|
143
|
+
|
|
144
|
+
## Security
|
|
145
|
+
|
|
146
|
+
`pull` is read-only. It does not mutate Confluence, fetch drafts by default, bypass permissions, or call LLM services. Tokens, Authorization headers, cookies, signed download query parameters, and token-like strings are redacted before JSON envelopes, manifests, page metadata, and diagnostics are written.
|
|
147
|
+
|
|
148
|
+
Rendered HTML snapshots are sanitized by removing executable tags and event attributes. HTML macro content is made inert before conversion.
|
|
149
|
+
|
|
150
|
+
## Validation
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
pull validate pulled-confluence
|
|
154
|
+
pull validate pulled-confluence/manifest.yaml --json
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Validation checks manifest shape, AI navigation manifest paths, relative paths, page files, optional comment sidecars, asset checksums, diagnostics JSONL, Markdown local links, and token-like markers in text outputs.
|
|
158
|
+
|
|
159
|
+
## Development
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
uv sync --all-extras
|
|
163
|
+
uv run ruff check .
|
|
164
|
+
uv run pytest
|
|
165
|
+
uv build
|
|
166
|
+
uv run pull --help
|
|
167
|
+
uv run pull guide --json
|
|
168
|
+
uv run python tests/generate_fixture_output.py .tmp/generated-fixture
|
|
169
|
+
uv run pull validate .tmp/generated-fixture
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Live smoke testing requires a readable Confluence page and credentials through `PULL_*` or `CONFPUB_*`.
|
|
173
|
+
|
|
174
|
+
## Releasing
|
|
175
|
+
|
|
176
|
+
Versions are managed from `src/pull_cli/__init__.py` through Hatch. Use `uv run hatch version patch`, `uv run hatch version minor`, or `uv run hatch version major`; `pull --version`, built package metadata, and GitHub release tags are expected to match. See [RELEASING.md](RELEASING.md) for the PyPI trusted publisher setup and release flow.
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Releasing
|
|
2
|
+
|
|
3
|
+
## Versioning
|
|
4
|
+
|
|
5
|
+
`pull-cli` uses SemVer-style public versions: `MAJOR.MINOR.PATCH`.
|
|
6
|
+
|
|
7
|
+
The single source of truth is `src/pull_cli/__init__.py`:
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
__version__ = "0.1.0"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
`pyproject.toml` reads that value dynamically through Hatch, and the CLI reports the same value through:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
uv run pull --version
|
|
17
|
+
uv run pull version
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Bump versions with Hatch:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uv run hatch version patch
|
|
24
|
+
uv run hatch version minor
|
|
25
|
+
uv run hatch version major
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Use patch for backwards-compatible fixes, minor for backwards-compatible features, and major for intentional breaking changes.
|
|
29
|
+
|
|
30
|
+
## PyPI Trusted Publisher
|
|
31
|
+
|
|
32
|
+
Configure the PyPI trusted publisher with these values:
|
|
33
|
+
|
|
34
|
+
```text
|
|
35
|
+
PyPI Project Name: pull-cli
|
|
36
|
+
Owner: ThomasRohde
|
|
37
|
+
Repository name: pull-cli
|
|
38
|
+
Workflow name: publish.yml
|
|
39
|
+
Environment name: pypi
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
The publishing workflow is `.github/workflows/publish.yml`. It uses the `pypi` GitHub environment and PyPI trusted publishing, so no PyPI API token is stored in GitHub.
|
|
43
|
+
|
|
44
|
+
## Release Flow
|
|
45
|
+
|
|
46
|
+
1. Bump the version:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
uv run hatch version patch
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
2. Verify locally:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv run pull --version
|
|
56
|
+
uv run ruff check .
|
|
57
|
+
uv run pytest
|
|
58
|
+
uv build
|
|
59
|
+
uvx --from twine twine check dist/*
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
3. Commit the version bump:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
git add src/pull_cli/__init__.py
|
|
66
|
+
git commit -m "Release v$(uv run hatch version)"
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
4. Push the commit, create a GitHub release tagged `vX.Y.Z`, and publish that release.
|
|
70
|
+
|
|
71
|
+
The `publish.yml` workflow only runs when a GitHub release is published. It checks that the release tag, after removing a leading `v`, matches `pull_cli.__version__` before building and uploading distributions to PyPI.
|