pdfdancer-client-python 0.2.5__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdfdancer-client-python might be problematic. Click here for more details.
- pdfdancer_client_python-0.2.7/PKG-INFO +189 -0
- pdfdancer_client_python-0.2.7/README.md +163 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/pyproject.toml +1 -1
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer/pdfdancer_v1.py +20 -8
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer/types.py +0 -7
- pdfdancer_client_python-0.2.7/src/pdfdancer_client_python.egg-info/PKG-INFO +189 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer_client_python.egg-info/SOURCES.txt +1 -0
- pdfdancer_client_python-0.2.7/tests/conftest.py +9 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/e2e/test_page.py +2 -2
- pdfdancer_client_python-0.2.5/PKG-INFO +0 -305
- pdfdancer_client_python-0.2.5/README.md +0 -279
- pdfdancer_client_python-0.2.5/src/pdfdancer_client_python.egg-info/PKG-INFO +0 -305
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/.github/workflows/ci.yml +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/.gitignore +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/CLAUDE.md +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/docs/openapi.yml +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/release.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/requirements-dev.txt +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/requirements.txt +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/setup.cfg +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer/__init__.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer/exceptions.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer/image_builder.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer/models.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer/paragraph_builder.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer_client_python.egg-info/dependency_links.txt +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer_client_python.egg-info/requires.txt +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer_client_python.egg-info/top_level.txt +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/__init__.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/e2e/__init__.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/e2e/test_acroform.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/e2e/test_form_x_objects.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/e2e/test_image.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/e2e/test_line.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/e2e/test_paragraph.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/e2e/test_path.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/fixtures/DancingScript-Regular.ttf +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/fixtures/JetBrainsMono-Regular.ttf +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/fixtures/ObviouslyAwesome.pdf +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/fixtures/basic-paths.pdf +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/fixtures/form-xobject-example.pdf +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/fixtures/logo-80.png +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/fixtures/mixed-form-types.pdf +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/test_authentication.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/test_models.py +0 -0
- {pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/tests/test_openapi_compliance.py +0 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pdfdancer-client-python
|
|
3
|
+
Version: 0.2.7
|
|
4
|
+
Summary: Python client for PDFDancer API
|
|
5
|
+
Author-email: "The Famous Cat Ltd." <hi@thefamouscat.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://www.pdfdancer.com/
|
|
8
|
+
Project-URL: Repository, https://github.com/MenschMachine/pdfdancer-client-python
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: requests>=2.25.0
|
|
18
|
+
Requires-Dist: pydantic>=1.8.0
|
|
19
|
+
Requires-Dist: typing-extensions>=4.0.0
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
23
|
+
Requires-Dist: black>=22.0; extra == "dev"
|
|
24
|
+
Requires-Dist: flake8>=5.0; extra == "dev"
|
|
25
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
26
|
+
|
|
27
|
+
# PDFDancer Python Client
|
|
28
|
+
|
|
29
|
+
Automate PDF clean-up, redaction, form filling, and content injection against the PDFDancer API from Python. The client gives you page-scoped selectors, fluent editors, and builders so you can read, modify, and export PDFs programmatically in just a few lines.
|
|
30
|
+
|
|
31
|
+
## Highlights
|
|
32
|
+
|
|
33
|
+
- Locate anything inside a PDF—paragraphs, text lines, images, vector paths, pages, AcroForm fields—by page, coordinates, or text prefixes
|
|
34
|
+
- Edit or delete existing content with fluent paragraph/text editors and safe apply-on-exit context managers
|
|
35
|
+
- Fill or update form fields and propagate the changes back to the document instantly
|
|
36
|
+
- Add brand-new content with paragraph/image builders, custom fonts, and precise page positioning
|
|
37
|
+
- Download results as bytes for downstream processing or save directly to disk with one method call
|
|
38
|
+
|
|
39
|
+
## Core Capabilities
|
|
40
|
+
|
|
41
|
+
- Clean up layout by moving or deleting paragraphs, text lines, or shapes on specific pages
|
|
42
|
+
- Search and filter content (e.g., paragraphs starting with "Invoice") to drive custom workflows
|
|
43
|
+
- Redact or replace text in bulk with chained editor operations
|
|
44
|
+
- Populate AcroForms for contract generation or onboarding flows
|
|
45
|
+
- Insert logos, signatures, and generated paragraphs at deterministic coordinates
|
|
46
|
+
- Export modified PDFs as bytes for APIs, S3 uploads, or direct file saves
|
|
47
|
+
|
|
48
|
+
## Requirements
|
|
49
|
+
|
|
50
|
+
- Python 3.9 or newer
|
|
51
|
+
- A PDFDancer API token (set `PDFDANCER_TOKEN` or pass `token=...`)
|
|
52
|
+
- Network access to a PDFDancer service (defaults to `https://api.pdfdancer.com`; override with `PDFDANCER_BASE_URL`)
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install pdfdancer-client-python
|
|
58
|
+
|
|
59
|
+
# Editable install for local development
|
|
60
|
+
pip install -e .
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Getting Started
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from pathlib import Path
|
|
67
|
+
from pdfdancer import Color, PDFDancer
|
|
68
|
+
|
|
69
|
+
with PDFDancer.open(
|
|
70
|
+
pdf_data=Path("input.pdf"),
|
|
71
|
+
token="your-api-token", # optional when PDFDANCER_TOKEN is set
|
|
72
|
+
base_url="https://api.pdfdancer.com",
|
|
73
|
+
) as pdf:
|
|
74
|
+
# Locate existing content
|
|
75
|
+
heading = pdf.page(0).select_paragraphs_starting_with("Executive Summary")[0]
|
|
76
|
+
heading.edit().replace("Overview").apply()
|
|
77
|
+
|
|
78
|
+
# Add a new paragraph using the fluent builder
|
|
79
|
+
pdf.new_paragraph() \
|
|
80
|
+
.text("Generated with PDFDancer") \
|
|
81
|
+
.font("Helvetica", 12) \
|
|
82
|
+
.color(Color(70, 70, 70)) \
|
|
83
|
+
.line_spacing(1.4) \
|
|
84
|
+
.at(page_index=0, x=72, y=520) \
|
|
85
|
+
.add()
|
|
86
|
+
|
|
87
|
+
# Persist the modified document
|
|
88
|
+
pdf.save("output.pdf")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Authentication Tips
|
|
92
|
+
|
|
93
|
+
- Prefer setting `PDFDANCER_TOKEN` in your environment for local development.
|
|
94
|
+
- Override the API host by setting `PDFDANCER_BASE_URL` or passing `base_url="https://sandbox.pdfdancer.com"`.
|
|
95
|
+
- Use the `timeout` parameter on `PDFDancer.open()` to adjust HTTP read timeouts.
|
|
96
|
+
|
|
97
|
+
## Selecting PDF Content
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
with PDFDancer.open("report.pdf") as pdf: # environment variables provide token/URL
|
|
101
|
+
all_paragraphs = pdf.select_paragraphs()
|
|
102
|
+
page_zero_images = pdf.page(0).select_images()
|
|
103
|
+
form_fields = pdf.page(2).select_form_fields()
|
|
104
|
+
paths_at_cursor = pdf.page(3).select_paths_at(x=150, y=320)
|
|
105
|
+
|
|
106
|
+
page = pdf.page(0).get()
|
|
107
|
+
print(page.internal_id, page.position.bounding_rect)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Selectors return rich objects (`ParagraphObject`, `TextLineObject`, `ImageObject`, `FormFieldObject`, etc.) with helpers such as `delete()`, `move_to(x, y)`, or `edit()` depending on the object type.
|
|
111
|
+
|
|
112
|
+
## Editing Text and Forms
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
with PDFDancer.open("report.pdf") as pdf:
|
|
116
|
+
paragraph = pdf.page(0).select_paragraphs_starting_with("Disclaimer")[0]
|
|
117
|
+
|
|
118
|
+
# Chain updates explicitly…
|
|
119
|
+
paragraph.edit() \
|
|
120
|
+
.replace("Updated disclaimer text") \
|
|
121
|
+
.font("Roboto-Regular", 11) \
|
|
122
|
+
.line_spacing(1.1) \
|
|
123
|
+
.move_to(72, 140) \
|
|
124
|
+
.apply()
|
|
125
|
+
|
|
126
|
+
# …or use the context manager to auto-apply on success
|
|
127
|
+
with paragraph.edit() as edit:
|
|
128
|
+
edit.replace("Context-managed update").color(Color(120, 0, 0))
|
|
129
|
+
|
|
130
|
+
# Update an AcroForm field
|
|
131
|
+
field = pdf.page(1).select_form_fields_by_name("signature")[0]
|
|
132
|
+
field.edit().value("Signed by Jane Doe").apply()
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Adding New Content
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
with PDFDancer.open("report.pdf") as pdf:
|
|
139
|
+
# Register fonts from the service
|
|
140
|
+
fonts = pdf.find_fonts("Roboto", 12)
|
|
141
|
+
pdf.register_font("/path/to/custom.ttf")
|
|
142
|
+
|
|
143
|
+
# Paragraphs
|
|
144
|
+
pdf.new_paragraph() \
|
|
145
|
+
.text("Greetings from PDFDancer!") \
|
|
146
|
+
.font(fonts[0].name, fonts[0].size) \
|
|
147
|
+
.at(page_index=0, x=220, y=480) \
|
|
148
|
+
.add()
|
|
149
|
+
|
|
150
|
+
# Raster images
|
|
151
|
+
pdf.new_image() \
|
|
152
|
+
.from_file(Path("logo.png")) \
|
|
153
|
+
.at(page=0, x=48, y=700) \
|
|
154
|
+
.add()
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Downloading Results
|
|
158
|
+
|
|
159
|
+
- `pdf.get_pdf_file()` returns the modified PDF as `bytes` (ideal for storage services or HTTP responses).
|
|
160
|
+
- `pdf.save("output.pdf")` writes directly to disk, creating directories when needed.
|
|
161
|
+
|
|
162
|
+
## Error Handling
|
|
163
|
+
|
|
164
|
+
Most operations raise subclasses of `PdfDancerException`:
|
|
165
|
+
|
|
166
|
+
- `ValidationException` for client-side validation issues (missing token, invalid coordinates, etc.).
|
|
167
|
+
- `FontNotFoundException` when the service cannot locate a requested font.
|
|
168
|
+
- `HttpClientException` for transport or server errors with detailed messages.
|
|
169
|
+
- `SessionException` when session creation fails.
|
|
170
|
+
|
|
171
|
+
Wrap complex workflows in `try/except` blocks to surface actionable errors to your users.
|
|
172
|
+
|
|
173
|
+
## Local Development
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
python -m venv venv
|
|
177
|
+
source venv/bin/activate # Windows: venv\Scripts\activate
|
|
178
|
+
pip install -e .
|
|
179
|
+
pip install -r requirements-dev.txt
|
|
180
|
+
|
|
181
|
+
pytest -q # run the fast unit suite
|
|
182
|
+
pytest tests/e2e # integration tests (requires live API + fixtures)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Package builds are handled by `python -m build`, and release artifacts are published via `python release.py`.
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
MIT © The Famous Cat Ltd.
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# PDFDancer Python Client
|
|
2
|
+
|
|
3
|
+
Automate PDF clean-up, redaction, form filling, and content injection against the PDFDancer API from Python. The client gives you page-scoped selectors, fluent editors, and builders so you can read, modify, and export PDFs programmatically in just a few lines.
|
|
4
|
+
|
|
5
|
+
## Highlights
|
|
6
|
+
|
|
7
|
+
- Locate anything inside a PDF—paragraphs, text lines, images, vector paths, pages, AcroForm fields—by page, coordinates, or text prefixes
|
|
8
|
+
- Edit or delete existing content with fluent paragraph/text editors and safe apply-on-exit context managers
|
|
9
|
+
- Fill or update form fields and propagate the changes back to the document instantly
|
|
10
|
+
- Add brand-new content with paragraph/image builders, custom fonts, and precise page positioning
|
|
11
|
+
- Download results as bytes for downstream processing or save directly to disk with one method call
|
|
12
|
+
|
|
13
|
+
## Core Capabilities
|
|
14
|
+
|
|
15
|
+
- Clean up layout by moving or deleting paragraphs, text lines, or shapes on specific pages
|
|
16
|
+
- Search and filter content (e.g., paragraphs starting with "Invoice") to drive custom workflows
|
|
17
|
+
- Redact or replace text in bulk with chained editor operations
|
|
18
|
+
- Populate AcroForms for contract generation or onboarding flows
|
|
19
|
+
- Insert logos, signatures, and generated paragraphs at deterministic coordinates
|
|
20
|
+
- Export modified PDFs as bytes for APIs, S3 uploads, or direct file saves
|
|
21
|
+
|
|
22
|
+
## Requirements
|
|
23
|
+
|
|
24
|
+
- Python 3.9 or newer
|
|
25
|
+
- A PDFDancer API token (set `PDFDANCER_TOKEN` or pass `token=...`)
|
|
26
|
+
- Network access to a PDFDancer service (defaults to `https://api.pdfdancer.com`; override with `PDFDANCER_BASE_URL`)
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install pdfdancer-client-python
|
|
32
|
+
|
|
33
|
+
# Editable install for local development
|
|
34
|
+
pip install -e .
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Getting Started
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
from pdfdancer import Color, PDFDancer
|
|
42
|
+
|
|
43
|
+
with PDFDancer.open(
|
|
44
|
+
pdf_data=Path("input.pdf"),
|
|
45
|
+
token="your-api-token", # optional when PDFDANCER_TOKEN is set
|
|
46
|
+
base_url="https://api.pdfdancer.com",
|
|
47
|
+
) as pdf:
|
|
48
|
+
# Locate existing content
|
|
49
|
+
heading = pdf.page(0).select_paragraphs_starting_with("Executive Summary")[0]
|
|
50
|
+
heading.edit().replace("Overview").apply()
|
|
51
|
+
|
|
52
|
+
# Add a new paragraph using the fluent builder
|
|
53
|
+
pdf.new_paragraph() \
|
|
54
|
+
.text("Generated with PDFDancer") \
|
|
55
|
+
.font("Helvetica", 12) \
|
|
56
|
+
.color(Color(70, 70, 70)) \
|
|
57
|
+
.line_spacing(1.4) \
|
|
58
|
+
.at(page_index=0, x=72, y=520) \
|
|
59
|
+
.add()
|
|
60
|
+
|
|
61
|
+
# Persist the modified document
|
|
62
|
+
pdf.save("output.pdf")
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Authentication Tips
|
|
66
|
+
|
|
67
|
+
- Prefer setting `PDFDANCER_TOKEN` in your environment for local development.
|
|
68
|
+
- Override the API host by setting `PDFDANCER_BASE_URL` or passing `base_url="https://sandbox.pdfdancer.com"`.
|
|
69
|
+
- Use the `timeout` parameter on `PDFDancer.open()` to adjust HTTP read timeouts.
|
|
70
|
+
|
|
71
|
+
## Selecting PDF Content
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
with PDFDancer.open("report.pdf") as pdf: # environment variables provide token/URL
|
|
75
|
+
all_paragraphs = pdf.select_paragraphs()
|
|
76
|
+
page_zero_images = pdf.page(0).select_images()
|
|
77
|
+
form_fields = pdf.page(2).select_form_fields()
|
|
78
|
+
paths_at_cursor = pdf.page(3).select_paths_at(x=150, y=320)
|
|
79
|
+
|
|
80
|
+
page = pdf.page(0).get()
|
|
81
|
+
print(page.internal_id, page.position.bounding_rect)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Selectors return rich objects (`ParagraphObject`, `TextLineObject`, `ImageObject`, `FormFieldObject`, etc.) with helpers such as `delete()`, `move_to(x, y)`, or `edit()` depending on the object type.
|
|
85
|
+
|
|
86
|
+
## Editing Text and Forms
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
with PDFDancer.open("report.pdf") as pdf:
|
|
90
|
+
paragraph = pdf.page(0).select_paragraphs_starting_with("Disclaimer")[0]
|
|
91
|
+
|
|
92
|
+
# Chain updates explicitly…
|
|
93
|
+
paragraph.edit() \
|
|
94
|
+
.replace("Updated disclaimer text") \
|
|
95
|
+
.font("Roboto-Regular", 11) \
|
|
96
|
+
.line_spacing(1.1) \
|
|
97
|
+
.move_to(72, 140) \
|
|
98
|
+
.apply()
|
|
99
|
+
|
|
100
|
+
# …or use the context manager to auto-apply on success
|
|
101
|
+
with paragraph.edit() as edit:
|
|
102
|
+
edit.replace("Context-managed update").color(Color(120, 0, 0))
|
|
103
|
+
|
|
104
|
+
# Update an AcroForm field
|
|
105
|
+
field = pdf.page(1).select_form_fields_by_name("signature")[0]
|
|
106
|
+
field.edit().value("Signed by Jane Doe").apply()
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Adding New Content
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
with PDFDancer.open("report.pdf") as pdf:
|
|
113
|
+
# Register fonts from the service
|
|
114
|
+
fonts = pdf.find_fonts("Roboto", 12)
|
|
115
|
+
pdf.register_font("/path/to/custom.ttf")
|
|
116
|
+
|
|
117
|
+
# Paragraphs
|
|
118
|
+
pdf.new_paragraph() \
|
|
119
|
+
.text("Greetings from PDFDancer!") \
|
|
120
|
+
.font(fonts[0].name, fonts[0].size) \
|
|
121
|
+
.at(page_index=0, x=220, y=480) \
|
|
122
|
+
.add()
|
|
123
|
+
|
|
124
|
+
# Raster images
|
|
125
|
+
pdf.new_image() \
|
|
126
|
+
.from_file(Path("logo.png")) \
|
|
127
|
+
.at(page=0, x=48, y=700) \
|
|
128
|
+
.add()
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Downloading Results
|
|
132
|
+
|
|
133
|
+
- `pdf.get_pdf_file()` returns the modified PDF as `bytes` (ideal for storage services or HTTP responses).
|
|
134
|
+
- `pdf.save("output.pdf")` writes directly to disk, creating directories when needed.
|
|
135
|
+
|
|
136
|
+
## Error Handling
|
|
137
|
+
|
|
138
|
+
Most operations raise subclasses of `PdfDancerException`:
|
|
139
|
+
|
|
140
|
+
- `ValidationException` for client-side validation issues (missing token, invalid coordinates, etc.).
|
|
141
|
+
- `FontNotFoundException` when the service cannot locate a requested font.
|
|
142
|
+
- `HttpClientException` for transport or server errors with detailed messages.
|
|
143
|
+
- `SessionException` when session creation fails.
|
|
144
|
+
|
|
145
|
+
Wrap complex workflows in `try/except` blocks to surface actionable errors to your users.
|
|
146
|
+
|
|
147
|
+
## Local Development
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
python -m venv venv
|
|
151
|
+
source venv/bin/activate # Windows: venv\Scripts\activate
|
|
152
|
+
pip install -e .
|
|
153
|
+
pip install -r requirements-dev.txt
|
|
154
|
+
|
|
155
|
+
pytest -q # run the fast unit suite
|
|
156
|
+
pytest tests/e2e # integration tests (requires live API + fixtures)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Package builds are handled by `python -m build`, and release artifacts are published via `python release.py`.
|
|
160
|
+
|
|
161
|
+
## License
|
|
162
|
+
|
|
163
|
+
MIT © The Famous Cat Ltd.
|
{pdfdancer_client_python-0.2.5 → pdfdancer_client_python-0.2.7}/src/pdfdancer/pdfdancer_v1.py
RENAMED
|
@@ -26,13 +26,16 @@ from .models import (
|
|
|
26
26
|
FindRequest, DeleteRequest, MoveRequest, AddRequest, ModifyRequest, ModifyTextRequest, ChangeFormFieldRequest,
|
|
27
27
|
ShapeType, PositionMode
|
|
28
28
|
)
|
|
29
|
-
from .types import PathObject, ParagraphObject, TextLineObject, ImageObject, FormObject, FormFieldObject
|
|
29
|
+
from .types import PathObject, ParagraphObject, TextLineObject, ImageObject, FormObject, FormFieldObject
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class PageClient:
|
|
33
33
|
def __init__(self, page_index: int, root: "PDFDancer"):
|
|
34
34
|
self.page_index = page_index
|
|
35
35
|
self.root = root
|
|
36
|
+
self.object_type = ObjectType.PAGE
|
|
37
|
+
self.position = Position.at_page(page_index)
|
|
38
|
+
self.internal_id = f"PAGE-{page_index}"
|
|
36
39
|
|
|
37
40
|
def select_paths_at(self, x: float, y: float) -> List[PathObject]:
|
|
38
41
|
# noinspection PyProtectedMember
|
|
@@ -99,8 +102,17 @@ class PageClient:
|
|
|
99
102
|
# noinspection PyProtectedMember
|
|
100
103
|
return self.root._to_form_field_objects(self.root._find_form_fields(position))
|
|
101
104
|
|
|
102
|
-
|
|
103
|
-
|
|
105
|
+
@classmethod
|
|
106
|
+
def from_ref(cls, root: 'PDFDancer', object_ref: ObjectRef) -> 'PageClient':
|
|
107
|
+
page_client = PageClient(page_index=object_ref.position.page_index, root=root)
|
|
108
|
+
return page_client
|
|
109
|
+
|
|
110
|
+
def delete(self) -> bool:
|
|
111
|
+
# noinspection PyProtectedMember
|
|
112
|
+
return self.root._delete_page(self._ref())
|
|
113
|
+
|
|
114
|
+
def _ref(self):
|
|
115
|
+
return ObjectRef(internal_id=self.internal_id, position=self.position, type=self.object_type)
|
|
104
116
|
|
|
105
117
|
|
|
106
118
|
class PDFDancer:
|
|
@@ -472,7 +484,7 @@ class PDFDancer:
|
|
|
472
484
|
|
|
473
485
|
# Page Operations
|
|
474
486
|
|
|
475
|
-
def pages(self) -> List[
|
|
487
|
+
def pages(self) -> List[PageClient]:
|
|
476
488
|
return self._to_page_objects(self._get_pages())
|
|
477
489
|
|
|
478
490
|
def _get_pages(self) -> List[ObjectRef]:
|
|
@@ -898,8 +910,8 @@ class PDFDancer:
|
|
|
898
910
|
return [FormFieldObject(self, ref.internal_id, ref.type, ref.position, ref.name, ref.value) for ref in
|
|
899
911
|
path_refs]
|
|
900
912
|
|
|
901
|
-
def _to_page_objects(self, path_refs: List[ObjectRef]) -> List[
|
|
902
|
-
return [
|
|
913
|
+
def _to_page_objects(self, path_refs: List[ObjectRef]) -> List[PageClient]:
|
|
914
|
+
return [PageClient.from_ref(self, ref) for ref in path_refs]
|
|
903
915
|
|
|
904
|
-
def _to_page_object(self, ref: ObjectRef) ->
|
|
905
|
-
return
|
|
916
|
+
def _to_page_object(self, ref: ObjectRef) -> PageClient:
|
|
917
|
+
return PageClient.from_ref(self, ref)
|
|
@@ -239,10 +239,3 @@ class FormFieldObject(PDFObjectBase):
|
|
|
239
239
|
ref.name = self.name
|
|
240
240
|
ref.value = self.value
|
|
241
241
|
return ref
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
class PageObject(PDFObjectBase):
|
|
245
|
-
|
|
246
|
-
def delete(self) -> bool:
|
|
247
|
-
# noinspection PyProtectedMember
|
|
248
|
-
return self._client._delete_page(self.object_ref())
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pdfdancer-client-python
|
|
3
|
+
Version: 0.2.7
|
|
4
|
+
Summary: Python client for PDFDancer API
|
|
5
|
+
Author-email: "The Famous Cat Ltd." <hi@thefamouscat.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://www.pdfdancer.com/
|
|
8
|
+
Project-URL: Repository, https://github.com/MenschMachine/pdfdancer-client-python
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: requests>=2.25.0
|
|
18
|
+
Requires-Dist: pydantic>=1.8.0
|
|
19
|
+
Requires-Dist: typing-extensions>=4.0.0
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
23
|
+
Requires-Dist: black>=22.0; extra == "dev"
|
|
24
|
+
Requires-Dist: flake8>=5.0; extra == "dev"
|
|
25
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
26
|
+
|
|
27
|
+
# PDFDancer Python Client
|
|
28
|
+
|
|
29
|
+
Automate PDF clean-up, redaction, form filling, and content injection against the PDFDancer API from Python. The client gives you page-scoped selectors, fluent editors, and builders so you can read, modify, and export PDFs programmatically in just a few lines.
|
|
30
|
+
|
|
31
|
+
## Highlights
|
|
32
|
+
|
|
33
|
+
- Locate anything inside a PDF—paragraphs, text lines, images, vector paths, pages, AcroForm fields—by page, coordinates, or text prefixes
|
|
34
|
+
- Edit or delete existing content with fluent paragraph/text editors and safe apply-on-exit context managers
|
|
35
|
+
- Fill or update form fields and propagate the changes back to the document instantly
|
|
36
|
+
- Add brand-new content with paragraph/image builders, custom fonts, and precise page positioning
|
|
37
|
+
- Download results as bytes for downstream processing or save directly to disk with one method call
|
|
38
|
+
|
|
39
|
+
## Core Capabilities
|
|
40
|
+
|
|
41
|
+
- Clean up layout by moving or deleting paragraphs, text lines, or shapes on specific pages
|
|
42
|
+
- Search and filter content (e.g., paragraphs starting with "Invoice") to drive custom workflows
|
|
43
|
+
- Redact or replace text in bulk with chained editor operations
|
|
44
|
+
- Populate AcroForms for contract generation or onboarding flows
|
|
45
|
+
- Insert logos, signatures, and generated paragraphs at deterministic coordinates
|
|
46
|
+
- Export modified PDFs as bytes for APIs, S3 uploads, or direct file saves
|
|
47
|
+
|
|
48
|
+
## Requirements
|
|
49
|
+
|
|
50
|
+
- Python 3.9 or newer
|
|
51
|
+
- A PDFDancer API token (set `PDFDANCER_TOKEN` or pass `token=...`)
|
|
52
|
+
- Network access to a PDFDancer service (defaults to `https://api.pdfdancer.com`; override with `PDFDANCER_BASE_URL`)
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install pdfdancer-client-python
|
|
58
|
+
|
|
59
|
+
# Editable install for local development
|
|
60
|
+
pip install -e .
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Getting Started
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from pathlib import Path
|
|
67
|
+
from pdfdancer import Color, PDFDancer
|
|
68
|
+
|
|
69
|
+
with PDFDancer.open(
|
|
70
|
+
pdf_data=Path("input.pdf"),
|
|
71
|
+
token="your-api-token", # optional when PDFDANCER_TOKEN is set
|
|
72
|
+
base_url="https://api.pdfdancer.com",
|
|
73
|
+
) as pdf:
|
|
74
|
+
# Locate existing content
|
|
75
|
+
heading = pdf.page(0).select_paragraphs_starting_with("Executive Summary")[0]
|
|
76
|
+
heading.edit().replace("Overview").apply()
|
|
77
|
+
|
|
78
|
+
# Add a new paragraph using the fluent builder
|
|
79
|
+
pdf.new_paragraph() \
|
|
80
|
+
.text("Generated with PDFDancer") \
|
|
81
|
+
.font("Helvetica", 12) \
|
|
82
|
+
.color(Color(70, 70, 70)) \
|
|
83
|
+
.line_spacing(1.4) \
|
|
84
|
+
.at(page_index=0, x=72, y=520) \
|
|
85
|
+
.add()
|
|
86
|
+
|
|
87
|
+
# Persist the modified document
|
|
88
|
+
pdf.save("output.pdf")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Authentication Tips
|
|
92
|
+
|
|
93
|
+
- Prefer setting `PDFDANCER_TOKEN` in your environment for local development.
|
|
94
|
+
- Override the API host by setting `PDFDANCER_BASE_URL` or passing `base_url="https://sandbox.pdfdancer.com"`.
|
|
95
|
+
- Use the `timeout` parameter on `PDFDancer.open()` to adjust HTTP read timeouts.
|
|
96
|
+
|
|
97
|
+
## Selecting PDF Content
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
with PDFDancer.open("report.pdf") as pdf: # environment variables provide token/URL
|
|
101
|
+
all_paragraphs = pdf.select_paragraphs()
|
|
102
|
+
page_zero_images = pdf.page(0).select_images()
|
|
103
|
+
form_fields = pdf.page(2).select_form_fields()
|
|
104
|
+
paths_at_cursor = pdf.page(3).select_paths_at(x=150, y=320)
|
|
105
|
+
|
|
106
|
+
page = pdf.page(0).get()
|
|
107
|
+
print(page.internal_id, page.position.bounding_rect)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Selectors return rich objects (`ParagraphObject`, `TextLineObject`, `ImageObject`, `FormFieldObject`, etc.) with helpers such as `delete()`, `move_to(x, y)`, or `edit()` depending on the object type.
|
|
111
|
+
|
|
112
|
+
## Editing Text and Forms
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
with PDFDancer.open("report.pdf") as pdf:
|
|
116
|
+
paragraph = pdf.page(0).select_paragraphs_starting_with("Disclaimer")[0]
|
|
117
|
+
|
|
118
|
+
# Chain updates explicitly…
|
|
119
|
+
paragraph.edit() \
|
|
120
|
+
.replace("Updated disclaimer text") \
|
|
121
|
+
.font("Roboto-Regular", 11) \
|
|
122
|
+
.line_spacing(1.1) \
|
|
123
|
+
.move_to(72, 140) \
|
|
124
|
+
.apply()
|
|
125
|
+
|
|
126
|
+
# …or use the context manager to auto-apply on success
|
|
127
|
+
with paragraph.edit() as edit:
|
|
128
|
+
edit.replace("Context-managed update").color(Color(120, 0, 0))
|
|
129
|
+
|
|
130
|
+
# Update an AcroForm field
|
|
131
|
+
field = pdf.page(1).select_form_fields_by_name("signature")[0]
|
|
132
|
+
field.edit().value("Signed by Jane Doe").apply()
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Adding New Content
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
with PDFDancer.open("report.pdf") as pdf:
|
|
139
|
+
# Register fonts from the service
|
|
140
|
+
fonts = pdf.find_fonts("Roboto", 12)
|
|
141
|
+
pdf.register_font("/path/to/custom.ttf")
|
|
142
|
+
|
|
143
|
+
# Paragraphs
|
|
144
|
+
pdf.new_paragraph() \
|
|
145
|
+
.text("Greetings from PDFDancer!") \
|
|
146
|
+
.font(fonts[0].name, fonts[0].size) \
|
|
147
|
+
.at(page_index=0, x=220, y=480) \
|
|
148
|
+
.add()
|
|
149
|
+
|
|
150
|
+
# Raster images
|
|
151
|
+
pdf.new_image() \
|
|
152
|
+
.from_file(Path("logo.png")) \
|
|
153
|
+
.at(page=0, x=48, y=700) \
|
|
154
|
+
.add()
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Downloading Results
|
|
158
|
+
|
|
159
|
+
- `pdf.get_pdf_file()` returns the modified PDF as `bytes` (ideal for storage services or HTTP responses).
|
|
160
|
+
- `pdf.save("output.pdf")` writes directly to disk, creating directories when needed.
|
|
161
|
+
|
|
162
|
+
## Error Handling
|
|
163
|
+
|
|
164
|
+
Most operations raise subclasses of `PdfDancerException`:
|
|
165
|
+
|
|
166
|
+
- `ValidationException` for client-side validation issues (missing token, invalid coordinates, etc.).
|
|
167
|
+
- `FontNotFoundException` when the service cannot locate a requested font.
|
|
168
|
+
- `HttpClientException` for transport or server errors with detailed messages.
|
|
169
|
+
- `SessionException` when session creation fails.
|
|
170
|
+
|
|
171
|
+
Wrap complex workflows in `try/except` blocks to surface actionable errors to your users.
|
|
172
|
+
|
|
173
|
+
## Local Development
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
python -m venv venv
|
|
177
|
+
source venv/bin/activate # Windows: venv\Scripts\activate
|
|
178
|
+
pip install -e .
|
|
179
|
+
pip install -r requirements-dev.txt
|
|
180
|
+
|
|
181
|
+
pytest -q # run the fast unit suite
|
|
182
|
+
pytest tests/e2e # integration tests (requires live API + fixtures)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Package builds are handled by `python -m build`, and release artifacts are published via `python release.py`.
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
MIT © The Famous Cat Ltd.
|
|
@@ -20,6 +20,7 @@ src/pdfdancer_client_python.egg-info/dependency_links.txt
|
|
|
20
20
|
src/pdfdancer_client_python.egg-info/requires.txt
|
|
21
21
|
src/pdfdancer_client_python.egg-info/top_level.txt
|
|
22
22
|
tests/__init__.py
|
|
23
|
+
tests/conftest.py
|
|
23
24
|
tests/test_authentication.py
|
|
24
25
|
tests/test_models.py
|
|
25
26
|
tests/test_openapi_compliance.py
|
|
@@ -16,7 +16,7 @@ def test_get_page():
|
|
|
16
16
|
base_url, token, pdf_path = _require_env_and_fixture("ObviouslyAwesome.pdf")
|
|
17
17
|
|
|
18
18
|
with PDFDancer.open(pdf_path, token=token, base_url=base_url, timeout=30.0) as pdf:
|
|
19
|
-
page = pdf.page(2)
|
|
19
|
+
page = pdf.page(2)
|
|
20
20
|
assert page is not None
|
|
21
21
|
assert page.position.page_index == 2
|
|
22
22
|
assert page.internal_id is not None
|
|
@@ -26,7 +26,7 @@ def test_delete_page():
|
|
|
26
26
|
base_url, token, pdf_path = _require_env_and_fixture("ObviouslyAwesome.pdf")
|
|
27
27
|
|
|
28
28
|
with PDFDancer.open(pdf_path, token=token, base_url=base_url, timeout=30.0) as pdf:
|
|
29
|
-
page3 = pdf.page(3)
|
|
29
|
+
page3 = pdf.page(3)
|
|
30
30
|
page3.delete()
|
|
31
31
|
|
|
32
32
|
pages_after = pdf.pages()
|