pull-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. pull_cli-0.1.0/.github/workflows/ci.yml +45 -0
  2. pull_cli-0.1.0/.github/workflows/publish.yml +54 -0
  3. pull_cli-0.1.0/.gitignore +17 -0
  4. pull_cli-0.1.0/AGENTS.md +9 -0
  5. pull_cli-0.1.0/LICENSE +21 -0
  6. pull_cli-0.1.0/PKG-INFO +218 -0
  7. pull_cli-0.1.0/README.md +180 -0
  8. pull_cli-0.1.0/RELEASING.md +71 -0
  9. pull_cli-0.1.0/pull_ai_confluence_design.md +1793 -0
  10. pull_cli-0.1.0/pyproject.toml +72 -0
  11. pull_cli-0.1.0/src/pull_cli/__init__.py +5 -0
  12. pull_cli-0.1.0/src/pull_cli/__main__.py +6 -0
  13. pull_cli-0.1.0/src/pull_cli/assets.py +235 -0
  14. pull_cli-0.1.0/src/pull_cli/attachment_extractors.py +85 -0
  15. pull_cli-0.1.0/src/pull_cli/cli.py +329 -0
  16. pull_cli-0.1.0/src/pull_cli/clients/__init__.py +8 -0
  17. pull_cli-0.1.0/src/pull_cli/clients/base.py +29 -0
  18. pull_cli-0.1.0/src/pull_cli/clients/cloud_v2.py +132 -0
  19. pull_cli-0.1.0/src/pull_cli/clients/data_center.py +360 -0
  20. pull_cli-0.1.0/src/pull_cli/clients/hybrid.py +15 -0
  21. pull_cli-0.1.0/src/pull_cli/config.py +82 -0
  22. pull_cli-0.1.0/src/pull_cli/crawler.py +51 -0
  23. pull_cli-0.1.0/src/pull_cli/envelope.py +59 -0
  24. pull_cli-0.1.0/src/pull_cli/errors.py +50 -0
  25. pull_cli-0.1.0/src/pull_cli/extractor.py +344 -0
  26. pull_cli-0.1.0/src/pull_cli/guide.py +115 -0
  27. pull_cli-0.1.0/src/pull_cli/html_normalizer.py +111 -0
  28. pull_cli-0.1.0/src/pull_cli/links.py +186 -0
  29. pull_cli-0.1.0/src/pull_cli/macros.py +527 -0
  30. pull_cli-0.1.0/src/pull_cli/markdown_writer.py +24 -0
  31. pull_cli-0.1.0/src/pull_cli/models.py +232 -0
  32. pull_cli-0.1.0/src/pull_cli/paths.py +45 -0
  33. pull_cli-0.1.0/src/pull_cli/resolver.py +72 -0
  34. pull_cli-0.1.0/src/pull_cli/security.py +103 -0
  35. pull_cli-0.1.0/src/pull_cli/validator.py +398 -0
  36. pull_cli-0.1.0/src/pull_cli/writer.py +792 -0
  37. pull_cli-0.1.0/tests/__init__.py +1 -0
  38. pull_cli-0.1.0/tests/conftest.py +114 -0
  39. pull_cli-0.1.0/tests/generate_fixture_output.py +48 -0
  40. pull_cli-0.1.0/tests/test_cli.py +205 -0
  41. pull_cli-0.1.0/tests/test_clients.py +137 -0
  42. pull_cli-0.1.0/tests/test_extraction.py +794 -0
  43. pull_cli-0.1.0/tests/test_resolver.py +47 -0
  44. pull_cli-0.1.0/uv.lock +1407 -0
@@ -0,0 +1,45 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master]
6
+ pull_request:
7
+ branches: [main, master]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ test:
14
+ name: Python ${{ matrix.python-version }}
15
+ runs-on: ubuntu-latest
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ python-version: ["3.11", "3.12", "3.13"]
20
+
21
+ steps:
22
+ - name: Check out repository
23
+ uses: actions/checkout@v4
24
+
25
+ - name: Install uv
26
+ uses: astral-sh/setup-uv@v5
27
+ with:
28
+ enable-cache: true
29
+
30
+ - name: Set up Python
31
+ uses: actions/setup-python@v5
32
+ with:
33
+ python-version: ${{ matrix.python-version }}
34
+
35
+ - name: Install dependencies
36
+ run: uv sync --all-extras
37
+
38
+ - name: Lint
39
+ run: uv run ruff check .
40
+
41
+ - name: Test
42
+ run: uv run pytest
43
+
44
+ - name: Build
45
+ run: uv build
@@ -0,0 +1,54 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ jobs:
11
+ publish:
12
+ name: Build and publish distribution
13
+ runs-on: ubuntu-latest
14
+ environment:
15
+ name: pypi
16
+ url: https://pypi.org/project/pull-cli/
17
+ permissions:
18
+ contents: read
19
+ id-token: write
20
+
21
+ steps:
22
+ - name: Check out repository
23
+ uses: actions/checkout@v4
24
+
25
+ - name: Install uv
26
+ uses: astral-sh/setup-uv@v5
27
+ with:
28
+ enable-cache: true
29
+
30
+ - name: Set up Python
31
+ uses: actions/setup-python@v5
32
+ with:
33
+ python-version: "3.13"
34
+
35
+ - name: Install dependencies
36
+ run: uv sync --all-extras
37
+
38
+ - name: Check release tag matches package version
39
+ run: |
40
+ package_version="$(uv run python -c 'import pull_cli; print(pull_cli.__version__)')"
41
+ tag_version="${GITHUB_REF_NAME#v}"
42
+ if [ "$package_version" != "$tag_version" ]; then
43
+ echo "Release tag $GITHUB_REF_NAME does not match package version $package_version"
44
+ exit 1
45
+ fi
46
+
47
+ - name: Build distributions
48
+ run: uv build
49
+
50
+ - name: Check distribution metadata
51
+ run: uvx --from twine twine check dist/*
52
+
53
+ - name: Publish package distributions to PyPI
54
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,17 @@
1
+ .venv/
2
+ .tmp/
3
+ dist/
4
+ build/
5
+ __pycache__/
6
+ .pytest_cache/
7
+ .ruff_cache/
8
+ .coverage
9
+ htmlcov/
10
+ *.py[cod]
11
+ *.egg-info/
12
+ .env
13
+ .env.*
14
+ !.env.example
15
+ *.log
16
+ .DS_Store
17
+ Thumbs.db
@@ -0,0 +1,9 @@
1
+ # Repository Guidance
2
+
3
+ - Keep `pull` focused on AI analysis of the current rendered Confluence page. Do not turn it into an alias of `confpub page pull`.
4
+ - Preserve the stdout/stderr contract: JSON mode writes exactly one JSON object to stdout; progress and diagnostics belong on stderr.
5
+ - Do not log or write credentials, tokens, cookies, Authorization headers, or signed download URLs. Route all persisted API data through the redaction helpers.
6
+ - Keep Confluence access read-only. Do not add publishing, draft mutation, or permission-bypass behavior.
7
+ - Prefer deterministic mocked fixtures for tests. Live Confluence checks are useful smoke tests, not required unit tests.
8
+ - When adding macro behavior, implement it through the registry and add a fixture test plus warning/manifest expectations.
9
+ - All manifest paths must remain relative to the output root, and `pull validate` should be extended when new output artifacts are added.
pull_cli-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Thomas Rohde
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,218 @@
1
+ Metadata-Version: 2.4
2
+ Name: pull-cli
3
+ Version: 0.1.0
4
+ Summary: AI-optimized Confluence evidence package extractor
5
+ Project-URL: Homepage, https://github.com/ThomasRohde/pull-cli
6
+ Project-URL: Repository, https://github.com/ThomasRohde/pull-cli
7
+ Project-URL: Issues, https://github.com/ThomasRohde/pull-cli/issues
8
+ Author-email: Thomas Rohde <rohde.thomas@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: ai,atlassian,cli,confluence,markdown
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: atlassian-python-api>=4.0.7
23
+ Requires-Dist: beautifulsoup4>=4.12.3
24
+ Requires-Dist: lxml>=5.2.0
25
+ Requires-Dist: markdownify>=0.13.1
26
+ Requires-Dist: pyyaml>=6.0.2
27
+ Provides-Extra: dev
28
+ Requires-Dist: hatch>=1.14.0; extra == 'dev'
29
+ Requires-Dist: pytest>=8.3.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.6.9; extra == 'dev'
31
+ Provides-Extra: extract
32
+ Requires-Dist: defusedxml>=0.7.1; extra == 'extract'
33
+ Requires-Dist: openpyxl>=3.1.5; extra == 'extract'
34
+ Requires-Dist: pypdf>=5.0.0; extra == 'extract'
35
+ Requires-Dist: python-docx>=1.1.2; extra == 'extract'
36
+ Requires-Dist: python-pptx>=1.0.2; extra == 'extract'
37
+ Description-Content-Type: text/markdown
38
+
39
+ # pull-cli
40
+
41
+ `pull-cli` installs the `pull` command, a read-only Confluence extractor for AI-consumable evidence packages. It is rendered-page-first: page Markdown, and the optional Markdown bundle in full mode, are based on the current published page as visible to the authenticated user, while storage XML is kept for macro recovery, provenance, and fallback.
42
+
43
+ The default output mode is `simple`: a quiet agent-facing package with the root AI Markdown file, per-page Markdown files, assets/sidecars, and validation control files. Use `--output-mode full` when you also want `bundle.md`, page HTML snapshots, and storage-source sidecars.
44
+
45
+ Confluence access is implemented through `atlassian-python-api` behind a small `pull_cli.clients` protocol. The extraction, redaction, manifest, asset, link, and validation contracts remain owned by `pull-cli`.
46
+
47
+ ## Install
48
+
49
+ ```bash
50
+ uvx pull-cli --help
51
+ uv tool install pull-cli
52
+ pip install pull-cli
53
+ ```
54
+
55
+ The package name is `pull-cli`. The import package is `pull_cli`. Console scripts are `pull` and `pull-cli`.
56
+
57
+ ## Quickstart
58
+
59
+ Cloud:
60
+
61
+ ```bash
62
+ set PULL_URL=https://example.atlassian.net/wiki
63
+ set PULL_USER=you@example.com
64
+ set PULL_TOKEN=your-api-token
65
+ pull 123456 -o pulled-confluence
66
+ ```
67
+
68
+ Data Center or Server:
69
+
70
+ ```bash
71
+ set PULL_URL=https://confluence.example.com/confluence
72
+ set PULL_TOKEN=your-personal-access-token
73
+ pull --page-id 123456 -o pulled-confluence
74
+ ```
75
+
76
+ `CONFPUB_URL`, `CONFPUB_USER`, `CONFPUB_TOKEN`, and `CONFPUB_SSL_VERIFY` are accepted as compatibility fallbacks after `PULL_*` variables.
77
+
78
+ ## CLI Examples
79
+
80
+ ```bash
81
+ pull 123456 -o pulled
82
+ pull "https://example.atlassian.net/wiki/spaces/EA/pages/123456/Architecture" -o pulled
83
+ pull --space EA --title "Architecture Overview" -o pulled
84
+ pull --page-id 123456 --tree --depth 3 --max-pages 100 -o tree
85
+ pull --page-id 123456 --tree --assets all --extract-attachments -o offline
86
+ pull --page-id 123456 --tree --comments -o with-comments
87
+ pull --page-id 123456 --output-mode full -o full-evidence
88
+ pull --page-id 123456 --output-mode simple --bundle -o simple-with-bundle
89
+ pull --page-id 123456 --json -o pulled
90
+ pull validate pulled
91
+ pull guide --json
92
+ ```
93
+
94
+ Selector resolution order is: explicit `--page-id`, explicit `--url`, positional URL, positional numeric page ID, then `--space` plus `--title`.
95
+
96
+ ## Output Package
97
+
98
+ Default `simple` mode:
99
+
100
+ ```text
101
+ pulled-confluence/
102
+ ├── page-title.md
103
+ ├── page-title.yaml
104
+ ├── manifest.yaml
105
+ ├── pages/
106
+ │ └── 0001-page-slug/
107
+ │ ├── index.md
108
+ │ ├── page.json
109
+ │ ├── comments.md # with --comments, only when comments exist
110
+ │ └── assets/
111
+ └── diagnostics/
112
+ ├── warnings.jsonl
113
+ └── unresolved-links.md
114
+ ```
115
+
116
+ `page-title.md` is named from the sanitized root page title and is the recommended first file to give another AI agent. In simple mode it links only the reading/navigation surface: page Markdown paths, assets, sidecars, and explicitly requested agent-facing extras such as `bundle.md` or `chunks.jsonl`. Warning counts are shown, but control files are not linked from the root AI Markdown.
117
+
118
+ `page-title.yaml` is the machine-readable version of that AI navigation manifest, also named from the sanitized root page title. It intentionally omits noisy provenance and raw API details; use `manifest.yaml` when you need full validation/provenance data. The exact generated filenames are recorded in `manifest.yaml` under `paths.ai_entry` and `paths.ai_manifest`. AI navigation paths are package-root-relative: resolve them against the directory containing the root AI Markdown/YAML file, not the caller's shell working directory.
119
+
120
+ `manifest.yaml`, `page.json`, and diagnostics files are still written in simple mode so `pull validate <output-dir>` and provenance checks work. `--force` never deletes stale files from earlier runs; use `--clean` when switching modes if you need the physical tree to contain only files from the new mode.
121
+
122
+ `--output-mode full` adds the full evidence artifacts:
123
+
124
+ ```text
125
+ pulled-confluence/
126
+ ├── bundle.md
127
+ └── pages/
128
+ └── 0001-page-slug/
129
+ ├── index.html
130
+ └── source.storage.xml
131
+ ```
132
+
133
+ `bundle.md` concatenates pages in page/tree order with stable delimiters for AI use; local links embedded in the bundle are rebased to the package root. `index.html` and `source.storage.xml` are raw/reference artifacts, not the primary navigation surface.
134
+
135
+ For tree pulls, nested page paths are the default. The manifest always carries stable numeric ordering.
136
+
137
+ ## Auth and Config
138
+
139
+ Resolution order:
140
+
141
+ 1. CLI flags such as `--base-url`, `--user`, `--token`, `--ssl-verify`.
142
+ 2. `PULL_*` environment variables.
143
+ 3. Optional YAML config from `--config`.
144
+ 4. `CONFPUB_*` compatibility environment variables.
145
+
146
+ `--ssl-verify` accepts `true`, `false`, or a CA bundle path.
147
+
148
+ ## Macro, Asset, and Link Behavior
149
+
150
+ The extractor uses a macro adapter registry. Current adapters cover panels/admonitions, code/noformat, status, expand, tabs, layout flattening, TOC placeholders, children/page tree links when in scope, include/excerpt placeholders or inline source when available, attachments, displayed files, Jira placeholders, diagram snapshots, dynamic snapshots, HTML macro sanitization, and unknown macro placeholders.
151
+
152
+ Asset policy defaults to `visible`: rendered images, visible attachment links, file macros, and rendered diagram images where discoverable. `--assets page` downloads all page attachments. `--assets all` includes visible/referenced assets plus all page attachments and macro-listed files where discoverable. `--no-assets` skips downloads and preserves source links with warnings.
153
+
154
+ Local links to pages in the pulled tree are rewritten to relative `index.md` paths. Downloaded asset links are rewritten to local files. External, mailto, Jira, and out-of-scope Confluence links are preserved. Same-page anchors are normalized where possible; unresolved anchors become diagnostics.
155
+
156
+ ## Comments
157
+
158
+ Comments are skipped by default. Use `--comments` to fetch page-level and inline comments for each pulled page. When comments exist, `pull` writes a page-local `comments.md` sidecar with agent-readable metadata and Markdown-converted comment bodies.
159
+
160
+ Comment sidecars are agent-facing reading surfaces: the root AI Markdown page hierarchy links them in simple mode, the page `index.md` header links the local sidecar, and the AI YAML includes the optional comments path and count. If one page's comments cannot be fetched, the pull continues with `W_COMMENTS_FETCH_FAILED` and validation can still pass for the partial package.
161
+
162
+ ## JSON Mode
163
+
164
+ With `--json` or `LLM=true`, stdout is exactly one JSON object with:
165
+
166
+ ```json
167
+ {
168
+ "schema_version": "1.0",
169
+ "request_id": "req_...",
170
+ "ok": true,
171
+ "command": "pull",
172
+ "target": {},
173
+ "result": {},
174
+ "warnings": [],
175
+ "errors": [],
176
+ "metrics": {}
177
+ }
178
+ ```
179
+
180
+ Progress, retries, warnings, and debug output belong on stderr.
181
+
182
+ ## Security
183
+
184
+ `pull` is read-only. It does not mutate Confluence, fetch drafts by default, bypass permissions, or call LLM services. Tokens, Authorization headers, cookies, signed download query parameters, and token-like strings are redacted before JSON envelopes, manifests, page metadata, and diagnostics are written.
185
+
186
+ Rendered HTML snapshots are sanitized by removing executable tags and event attributes. HTML macro content is made inert before conversion.
187
+
188
+ ## Validation
189
+
190
+ ```bash
191
+ pull validate pulled-confluence
192
+ pull validate pulled-confluence/manifest.yaml --json
193
+ ```
194
+
195
+ Validation checks manifest shape, AI navigation manifest paths, relative paths, page files, optional comment sidecars, asset checksums, diagnostics JSONL, Markdown local links, and token-like markers in text outputs.
196
+
197
+ ## Development
198
+
199
+ ```bash
200
+ uv sync --all-extras
201
+ uv run ruff check .
202
+ uv run pytest
203
+ uv build
204
+ uv run pull --help
205
+ uv run pull guide --json
206
+ uv run python tests/generate_fixture_output.py .tmp/generated-fixture
207
+ uv run pull validate .tmp/generated-fixture
208
+ ```
209
+
210
+ Live smoke testing requires a readable Confluence page and credentials through `PULL_*` or `CONFPUB_*`.
211
+
212
+ ## Releasing
213
+
214
+ Versions are managed from `src/pull_cli/__init__.py` through Hatch. Use `uv run hatch version patch`, `uv run hatch version minor`, or `uv run hatch version major`; `pull --version`, built package metadata, and GitHub release tags are expected to match. See [RELEASING.md](RELEASING.md) for the PyPI trusted publisher setup and release flow.
215
+
216
+ ## License
217
+
218
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,180 @@
1
+ # pull-cli
2
+
3
+ `pull-cli` installs the `pull` command, a read-only Confluence extractor for AI-consumable evidence packages. It is rendered-page-first: page Markdown, and the optional Markdown bundle in full mode, are based on the current published page as visible to the authenticated user, while storage XML is kept for macro recovery, provenance, and fallback.
4
+
5
+ The default output mode is `simple`: a quiet agent-facing package with the root AI Markdown file, per-page Markdown files, assets/sidecars, and validation control files. Use `--output-mode full` when you also want `bundle.md`, page HTML snapshots, and storage-source sidecars.
6
+
7
+ Confluence access is implemented through `atlassian-python-api` behind a small `pull_cli.clients` protocol. The extraction, redaction, manifest, asset, link, and validation contracts remain owned by `pull-cli`.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ uvx pull-cli --help
13
+ uv tool install pull-cli
14
+ pip install pull-cli
15
+ ```
16
+
17
+ The package name is `pull-cli`. The import package is `pull_cli`. Console scripts are `pull` and `pull-cli`.
18
+
19
+ ## Quickstart
20
+
21
+ Cloud:
22
+
23
+ ```bash
24
+ set PULL_URL=https://example.atlassian.net/wiki
25
+ set PULL_USER=you@example.com
26
+ set PULL_TOKEN=your-api-token
27
+ pull 123456 -o pulled-confluence
28
+ ```
29
+
30
+ Data Center or Server:
31
+
32
+ ```bash
33
+ set PULL_URL=https://confluence.example.com/confluence
34
+ set PULL_TOKEN=your-personal-access-token
35
+ pull --page-id 123456 -o pulled-confluence
36
+ ```
37
+
38
+ `CONFPUB_URL`, `CONFPUB_USER`, `CONFPUB_TOKEN`, and `CONFPUB_SSL_VERIFY` are accepted as compatibility fallbacks after `PULL_*` variables.
39
+
40
+ ## CLI Examples
41
+
42
+ ```bash
43
+ pull 123456 -o pulled
44
+ pull "https://example.atlassian.net/wiki/spaces/EA/pages/123456/Architecture" -o pulled
45
+ pull --space EA --title "Architecture Overview" -o pulled
46
+ pull --page-id 123456 --tree --depth 3 --max-pages 100 -o tree
47
+ pull --page-id 123456 --tree --assets all --extract-attachments -o offline
48
+ pull --page-id 123456 --tree --comments -o with-comments
49
+ pull --page-id 123456 --output-mode full -o full-evidence
50
+ pull --page-id 123456 --output-mode simple --bundle -o simple-with-bundle
51
+ pull --page-id 123456 --json -o pulled
52
+ pull validate pulled
53
+ pull guide --json
54
+ ```
55
+
56
+ Selector resolution order is: explicit `--page-id`, explicit `--url`, positional URL, positional numeric page ID, then `--space` plus `--title`.
57
+
58
+ ## Output Package
59
+
60
+ Default `simple` mode:
61
+
62
+ ```text
63
+ pulled-confluence/
64
+ ├── page-title.md
65
+ ├── page-title.yaml
66
+ ├── manifest.yaml
67
+ ├── pages/
68
+ │ └── 0001-page-slug/
69
+ │ ├── index.md
70
+ │ ├── page.json
71
+ │ ├── comments.md # with --comments, only when comments exist
72
+ │ └── assets/
73
+ └── diagnostics/
74
+ ├── warnings.jsonl
75
+ └── unresolved-links.md
76
+ ```
77
+
78
+ `page-title.md` is named from the sanitized root page title and is the recommended first file to give another AI agent. In simple mode it links only the reading/navigation surface: page Markdown paths, assets, sidecars, and explicitly requested agent-facing extras such as `bundle.md` or `chunks.jsonl`. Warning counts are shown, but control files are not linked from the root AI Markdown.
79
+
80
+ `page-title.yaml` is the machine-readable version of that AI navigation manifest, also named from the sanitized root page title. It intentionally omits noisy provenance and raw API details; use `manifest.yaml` when you need full validation/provenance data. The exact generated filenames are recorded in `manifest.yaml` under `paths.ai_entry` and `paths.ai_manifest`. AI navigation paths are package-root-relative: resolve them against the directory containing the root AI Markdown/YAML file, not the caller's shell working directory.
81
+
82
+ `manifest.yaml`, `page.json`, and diagnostics files are still written in simple mode so `pull validate <output-dir>` and provenance checks work. `--force` never deletes stale files from earlier runs; use `--clean` when switching modes if you need the physical tree to contain only files from the new mode.
83
+
84
+ `--output-mode full` adds the full evidence artifacts:
85
+
86
+ ```text
87
+ pulled-confluence/
88
+ ├── bundle.md
89
+ └── pages/
90
+ └── 0001-page-slug/
91
+ ├── index.html
92
+ └── source.storage.xml
93
+ ```
94
+
95
+ `bundle.md` concatenates pages in page/tree order with stable delimiters for AI use; local links embedded in the bundle are rebased to the package root. `index.html` and `source.storage.xml` are raw/reference artifacts, not the primary navigation surface.
96
+
97
+ For tree pulls, nested page paths are the default. The manifest always carries stable numeric ordering.
98
+
99
+ ## Auth and Config
100
+
101
+ Resolution order:
102
+
103
+ 1. CLI flags such as `--base-url`, `--user`, `--token`, `--ssl-verify`.
104
+ 2. `PULL_*` environment variables.
105
+ 3. Optional YAML config from `--config`.
106
+ 4. `CONFPUB_*` compatibility environment variables.
107
+
108
+ `--ssl-verify` accepts `true`, `false`, or a CA bundle path.
109
+
110
+ ## Macro, Asset, and Link Behavior
111
+
112
+ The extractor uses a macro adapter registry. Current adapters cover panels/admonitions, code/noformat, status, expand, tabs, layout flattening, TOC placeholders, children/page tree links when in scope, include/excerpt placeholders or inline source when available, attachments, displayed files, Jira placeholders, diagram snapshots, dynamic snapshots, HTML macro sanitization, and unknown macro placeholders.
113
+
114
+ Asset policy defaults to `visible`: rendered images, visible attachment links, file macros, and rendered diagram images where discoverable. `--assets page` downloads all page attachments. `--assets all` includes visible/referenced assets plus all page attachments and macro-listed files where discoverable. `--no-assets` skips downloads and preserves source links with warnings.
115
+
116
+ Local links to pages in the pulled tree are rewritten to relative `index.md` paths. Downloaded asset links are rewritten to local files. External, mailto, Jira, and out-of-scope Confluence links are preserved. Same-page anchors are normalized where possible; unresolved anchors become diagnostics.
117
+
118
+ ## Comments
119
+
120
+ Comments are skipped by default. Use `--comments` to fetch page-level and inline comments for each pulled page. When comments exist, `pull` writes a page-local `comments.md` sidecar with agent-readable metadata and Markdown-converted comment bodies.
121
+
122
+ Comment sidecars are agent-facing reading surfaces: the root AI Markdown page hierarchy links them in simple mode, the page `index.md` header links the local sidecar, and the AI YAML includes the optional comments path and count. If one page's comments cannot be fetched, the pull continues with `W_COMMENTS_FETCH_FAILED` and validation can still pass for the partial package.
123
+
124
+ ## JSON Mode
125
+
126
+ With `--json` or `LLM=true`, stdout is exactly one JSON object with:
127
+
128
+ ```json
129
+ {
130
+ "schema_version": "1.0",
131
+ "request_id": "req_...",
132
+ "ok": true,
133
+ "command": "pull",
134
+ "target": {},
135
+ "result": {},
136
+ "warnings": [],
137
+ "errors": [],
138
+ "metrics": {}
139
+ }
140
+ ```
141
+
142
+ Progress, retries, warnings, and debug output belong on stderr.
143
+
144
+ ## Security
145
+
146
+ `pull` is read-only. It does not mutate Confluence, fetch drafts by default, bypass permissions, or call LLM services. Tokens, Authorization headers, cookies, signed download query parameters, and token-like strings are redacted before JSON envelopes, manifests, page metadata, and diagnostics are written.
147
+
148
+ Rendered HTML snapshots are sanitized by removing executable tags and event attributes. HTML macro content is made inert before conversion.
149
+
150
+ ## Validation
151
+
152
+ ```bash
153
+ pull validate pulled-confluence
154
+ pull validate pulled-confluence/manifest.yaml --json
155
+ ```
156
+
157
+ Validation checks manifest shape, AI navigation manifest paths, relative paths, page files, optional comment sidecars, asset checksums, diagnostics JSONL, Markdown local links, and token-like markers in text outputs.
158
+
159
+ ## Development
160
+
161
+ ```bash
162
+ uv sync --all-extras
163
+ uv run ruff check .
164
+ uv run pytest
165
+ uv build
166
+ uv run pull --help
167
+ uv run pull guide --json
168
+ uv run python tests/generate_fixture_output.py .tmp/generated-fixture
169
+ uv run pull validate .tmp/generated-fixture
170
+ ```
171
+
172
+ Live smoke testing requires a readable Confluence page and credentials through `PULL_*` or `CONFPUB_*`.
173
+
174
+ ## Releasing
175
+
176
+ Versions are managed from `src/pull_cli/__init__.py` through Hatch. Use `uv run hatch version patch`, `uv run hatch version minor`, or `uv run hatch version major`; `pull --version`, built package metadata, and GitHub release tags are expected to match. See [RELEASING.md](RELEASING.md) for the PyPI trusted publisher setup and release flow.
177
+
178
+ ## License
179
+
180
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,71 @@
1
+ # Releasing
2
+
3
+ ## Versioning
4
+
5
+ `pull-cli` uses SemVer-style public versions: `MAJOR.MINOR.PATCH`.
6
+
7
+ The single source of truth is `src/pull_cli/__init__.py`:
8
+
9
+ ```python
10
+ __version__ = "0.1.0"
11
+ ```
12
+
13
+ `pyproject.toml` reads that value dynamically through Hatch, and the CLI reports the same value through:
14
+
15
+ ```bash
16
+ uv run pull --version
17
+ uv run pull version
18
+ ```
19
+
20
+ Bump versions with Hatch:
21
+
22
+ ```bash
23
+ uv run hatch version patch
24
+ uv run hatch version minor
25
+ uv run hatch version major
26
+ ```
27
+
28
+ Use patch for backwards-compatible fixes, minor for backwards-compatible features, and major for intentional breaking changes.
29
+
30
+ ## PyPI Trusted Publisher
31
+
32
+ Configure the PyPI trusted publisher with these values:
33
+
34
+ ```text
35
+ PyPI Project Name: pull-cli
36
+ Owner: ThomasRohde
37
+ Repository name: pull-cli
38
+ Workflow name: publish.yml
39
+ Environment name: pypi
40
+ ```
41
+
42
+ The publishing workflow is `.github/workflows/publish.yml`. It uses the `pypi` GitHub environment and PyPI trusted publishing, so no PyPI API token is stored in GitHub.
43
+
44
+ ## Release Flow
45
+
46
+ 1. Bump the version:
47
+
48
+ ```bash
49
+ uv run hatch version patch
50
+ ```
51
+
52
+ 2. Verify locally:
53
+
54
+ ```bash
55
+ uv run pull --version
56
+ uv run ruff check .
57
+ uv run pytest
58
+ uv build
59
+ uvx --from twine twine check dist/*
60
+ ```
61
+
62
+ 3. Commit the version bump:
63
+
64
+ ```bash
65
+ git add src/pull_cli/__init__.py
66
+ git commit -m "Release v$(uv run hatch version)"
67
+ ```
68
+
69
+ 4. Push the commit, create a GitHub release tagged `vX.Y.Z`, and publish that release.
70
+
71
+ The `publish.yml` workflow only runs when a GitHub release is published. It checks that the release tag, after removing a leading `v`, matches `pull_cli.__version__` before building and uploading distributions to PyPI.