pdfdancer-client-python 0.2.4__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. pdfdancer_client_python-0.2.6/PKG-INFO +189 -0
  2. pdfdancer_client_python-0.2.6/README.md +163 -0
  3. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/pyproject.toml +1 -1
  4. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer/types.py +0 -15
  5. pdfdancer_client_python-0.2.6/src/pdfdancer_client_python.egg-info/PKG-INFO +189 -0
  6. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer_client_python.egg-info/SOURCES.txt +1 -0
  7. pdfdancer_client_python-0.2.6/tests/conftest.py +9 -0
  8. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/e2e/__init__.py +1 -1
  9. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/e2e/test_acroform.py +5 -5
  10. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/e2e/test_form_x_objects.py +1 -1
  11. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/e2e/test_image.py +1 -1
  12. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/e2e/test_page.py +1 -1
  13. pdfdancer_client_python-0.2.4/PKG-INFO +0 -305
  14. pdfdancer_client_python-0.2.4/README.md +0 -279
  15. pdfdancer_client_python-0.2.4/src/pdfdancer_client_python.egg-info/PKG-INFO +0 -305
  16. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/.github/workflows/ci.yml +0 -0
  17. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/.gitignore +0 -0
  18. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/CLAUDE.md +0 -0
  19. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/docs/openapi.yml +0 -0
  20. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/release.py +0 -0
  21. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/requirements-dev.txt +0 -0
  22. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/requirements.txt +0 -0
  23. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/setup.cfg +0 -0
  24. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer/__init__.py +0 -0
  25. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer/exceptions.py +0 -0
  26. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer/image_builder.py +0 -0
  27. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer/models.py +0 -0
  28. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer/paragraph_builder.py +0 -0
  29. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer/pdfdancer_v1.py +0 -0
  30. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer_client_python.egg-info/dependency_links.txt +0 -0
  31. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer_client_python.egg-info/requires.txt +0 -0
  32. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/src/pdfdancer_client_python.egg-info/top_level.txt +0 -0
  33. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/__init__.py +0 -0
  34. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/e2e/test_line.py +0 -0
  35. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/e2e/test_paragraph.py +0 -0
  36. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/e2e/test_path.py +0 -0
  37. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/fixtures/DancingScript-Regular.ttf +0 -0
  38. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/fixtures/JetBrainsMono-Regular.ttf +0 -0
  39. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/fixtures/ObviouslyAwesome.pdf +0 -0
  40. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/fixtures/basic-paths.pdf +0 -0
  41. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/fixtures/form-xobject-example.pdf +0 -0
  42. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/fixtures/logo-80.png +0 -0
  43. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/fixtures/mixed-form-types.pdf +0 -0
  44. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/test_authentication.py +0 -0
  45. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/test_models.py +0 -0
  46. {pdfdancer_client_python-0.2.4 → pdfdancer_client_python-0.2.6}/tests/test_openapi_compliance.py +0 -0
@@ -0,0 +1,189 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdfdancer-client-python
3
+ Version: 0.2.6
4
+ Summary: Python client for PDFDancer API
5
+ Author-email: "The Famous Cat Ltd." <hi@thefamouscat.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://www.pdfdancer.com/
8
+ Project-URL: Repository, https://github.com/MenschMachine/pdfdancer-client-python
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: requests>=2.25.0
18
+ Requires-Dist: pydantic>=1.8.0
19
+ Requires-Dist: typing-extensions>=4.0.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7.0; extra == "dev"
22
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
23
+ Requires-Dist: black>=22.0; extra == "dev"
24
+ Requires-Dist: flake8>=5.0; extra == "dev"
25
+ Requires-Dist: mypy>=1.0; extra == "dev"
26
+
27
+ # PDFDancer Python Client
28
+
29
+ Automate PDF clean-up, redaction, form filling, and content injection against the PDFDancer API from Python. The client gives you page-scoped selectors, fluent editors, and builders so you can read, modify, and export PDFs programmatically in just a few lines.
30
+
31
+ ## Highlights
32
+
33
+ - Locate anything inside a PDF—paragraphs, text lines, images, vector paths, pages, AcroForm fields—by page, coordinates, or text prefixes
34
+ - Edit or delete existing content with fluent paragraph/text editors and safe apply-on-exit context managers
35
+ - Fill or update form fields and propagate the changes back to the document instantly
36
+ - Add brand-new content with paragraph/image builders, custom fonts, and precise page positioning
37
+ - Download results as bytes for downstream processing or save directly to disk with one method call
38
+
39
+ ## Core Capabilities
40
+
41
+ - Clean up layout by moving or deleting paragraphs, text lines, or shapes on specific pages
42
+ - Search and filter content (e.g., paragraphs starting with "Invoice") to drive custom workflows
43
+ - Redact or replace text in bulk with chained editor operations
44
+ - Populate AcroForms for contract generation or onboarding flows
45
+ - Insert logos, signatures, and generated paragraphs at deterministic coordinates
46
+ - Export modified PDFs as bytes for APIs, S3 uploads, or direct file saves
47
+
48
+ ## Requirements
49
+
50
+ - Python 3.9 or newer
51
+ - A PDFDancer API token (set `PDFDANCER_TOKEN` or pass `token=...`)
52
+ - Network access to a PDFDancer service (defaults to `https://api.pdfdancer.com`; override with `PDFDANCER_BASE_URL`)
53
+
54
+ ## Installation
55
+
56
+ ```bash
57
+ pip install pdfdancer-client-python
58
+
59
+ # Editable install for local development
60
+ pip install -e .
61
+ ```
62
+
63
+ ## Getting Started
64
+
65
+ ```python
66
+ from pathlib import Path
67
+ from pdfdancer import Color, PDFDancer
68
+
69
+ with PDFDancer.open(
70
+ pdf_data=Path("input.pdf"),
71
+ token="your-api-token", # optional when PDFDANCER_TOKEN is set
72
+ base_url="https://api.pdfdancer.com",
73
+ ) as pdf:
74
+ # Locate existing content
75
+ heading = pdf.page(0).select_paragraphs_starting_with("Executive Summary")[0]
76
+ heading.edit().replace("Overview").apply()
77
+
78
+ # Add a new paragraph using the fluent builder
79
+ pdf.new_paragraph() \
80
+ .text("Generated with PDFDancer") \
81
+ .font("Helvetica", 12) \
82
+ .color(Color(70, 70, 70)) \
83
+ .line_spacing(1.4) \
84
+ .at(page_index=0, x=72, y=520) \
85
+ .add()
86
+
87
+ # Persist the modified document
88
+ pdf.save("output.pdf")
89
+ ```
90
+
91
+ ### Authentication Tips
92
+
93
+ - Prefer setting `PDFDANCER_TOKEN` in your environment for local development.
94
+ - Override the API host by setting `PDFDANCER_BASE_URL` or passing `base_url="https://sandbox.pdfdancer.com"`.
95
+ - Use the `timeout` parameter on `PDFDancer.open()` to adjust HTTP read timeouts.
96
+
97
+ ## Selecting PDF Content
98
+
99
+ ```python
100
+ with PDFDancer.open("report.pdf") as pdf: # environment variables provide token/URL
101
+ all_paragraphs = pdf.select_paragraphs()
102
+ page_zero_images = pdf.page(0).select_images()
103
+ form_fields = pdf.page(2).select_form_fields()
104
+ paths_at_cursor = pdf.page(3).select_paths_at(x=150, y=320)
105
+
106
+ page = pdf.page(0).get()
107
+ print(page.internal_id, page.position.bounding_rect)
108
+ ```
109
+
110
+ Selectors return rich objects (`ParagraphObject`, `TextLineObject`, `ImageObject`, `FormFieldObject`, etc.) with helpers such as `delete()`, `move_to(x, y)`, or `edit()` depending on the object type.
111
+
112
+ ## Editing Text and Forms
113
+
114
+ ```python
115
+ with PDFDancer.open("report.pdf") as pdf:
116
+ paragraph = pdf.page(0).select_paragraphs_starting_with("Disclaimer")[0]
117
+
118
+ # Chain updates explicitly…
119
+ paragraph.edit() \
120
+ .replace("Updated disclaimer text") \
121
+ .font("Roboto-Regular", 11) \
122
+ .line_spacing(1.1) \
123
+ .move_to(72, 140) \
124
+ .apply()
125
+
126
+ # …or use the context manager to auto-apply on success
127
+ with paragraph.edit() as edit:
128
+ edit.replace("Context-managed update").color(Color(120, 0, 0))
129
+
130
+ # Update an AcroForm field
131
+ field = pdf.page(1).select_form_fields_by_name("signature")[0]
132
+ field.edit().value("Signed by Jane Doe").apply()
133
+ ```
134
+
135
+ ## Adding New Content
136
+
137
+ ```python
138
+ with PDFDancer.open("report.pdf") as pdf:
139
+ # Register fonts from the service
140
+ fonts = pdf.find_fonts("Roboto", 12)
141
+ pdf.register_font("/path/to/custom.ttf")
142
+
143
+ # Paragraphs
144
+ pdf.new_paragraph() \
145
+ .text("Greetings from PDFDancer!") \
146
+ .font(fonts[0].name, fonts[0].size) \
147
+ .at(page_index=0, x=220, y=480) \
148
+ .add()
149
+
150
+ # Raster images
151
+ pdf.new_image() \
152
+ .from_file(Path("logo.png")) \
153
+ .at(page=0, x=48, y=700) \
154
+ .add()
155
+ ```
156
+
157
+ ## Downloading Results
158
+
159
+ - `pdf.get_pdf_file()` returns the modified PDF as `bytes` (ideal for storage services or HTTP responses).
160
+ - `pdf.save("output.pdf")` writes directly to disk, creating directories when needed.
161
+
162
+ ## Error Handling
163
+
164
+ Most operations raise subclasses of `PdfDancerException`:
165
+
166
+ - `ValidationException` for client-side validation issues (missing token, invalid coordinates, etc.).
167
+ - `FontNotFoundException` when the service cannot locate a requested font.
168
+ - `HttpClientException` for transport or server errors with detailed messages.
169
+ - `SessionException` when session creation fails.
170
+
171
+ Wrap complex workflows in `try/except` blocks to surface actionable errors to your users.
172
+
173
+ ## Local Development
174
+
175
+ ```bash
176
+ python -m venv venv
177
+ source venv/bin/activate # Windows: venv\Scripts\activate
178
+ pip install -e .
179
+ pip install -r requirements-dev.txt
180
+
181
+ pytest -q # run the fast unit suite
182
+ pytest tests/e2e # integration tests (requires live API + fixtures)
183
+ ```
184
+
185
+ Package builds are handled by `python -m build`, and release artifacts are published via `python release.py`.
186
+
187
+ ## License
188
+
189
+ MIT © The Famous Cat Ltd.
@@ -0,0 +1,163 @@
1
+ # PDFDancer Python Client
2
+
3
+ Automate PDF clean-up, redaction, form filling, and content injection against the PDFDancer API from Python. The client gives you page-scoped selectors, fluent editors, and builders so you can read, modify, and export PDFs programmatically in just a few lines.
4
+
5
+ ## Highlights
6
+
7
+ - Locate anything inside a PDF—paragraphs, text lines, images, vector paths, pages, AcroForm fields—by page, coordinates, or text prefixes
8
+ - Edit or delete existing content with fluent paragraph/text editors and safe apply-on-exit context managers
9
+ - Fill or update form fields and propagate the changes back to the document instantly
10
+ - Add brand-new content with paragraph/image builders, custom fonts, and precise page positioning
11
+ - Download results as bytes for downstream processing or save directly to disk with one method call
12
+
13
+ ## Core Capabilities
14
+
15
+ - Clean up layout by moving or deleting paragraphs, text lines, or shapes on specific pages
16
+ - Search and filter content (e.g., paragraphs starting with "Invoice") to drive custom workflows
17
+ - Redact or replace text in bulk with chained editor operations
18
+ - Populate AcroForms for contract generation or onboarding flows
19
+ - Insert logos, signatures, and generated paragraphs at deterministic coordinates
20
+ - Export modified PDFs as bytes for APIs, S3 uploads, or direct file saves
21
+
22
+ ## Requirements
23
+
24
+ - Python 3.9 or newer
25
+ - A PDFDancer API token (set `PDFDANCER_TOKEN` or pass `token=...`)
26
+ - Network access to a PDFDancer service (defaults to `https://api.pdfdancer.com`; override with `PDFDANCER_BASE_URL`)
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install pdfdancer-client-python
32
+
33
+ # Editable install for local development
34
+ pip install -e .
35
+ ```
36
+
37
+ ## Getting Started
38
+
39
+ ```python
40
+ from pathlib import Path
41
+ from pdfdancer import Color, PDFDancer
42
+
43
+ with PDFDancer.open(
44
+ pdf_data=Path("input.pdf"),
45
+ token="your-api-token", # optional when PDFDANCER_TOKEN is set
46
+ base_url="https://api.pdfdancer.com",
47
+ ) as pdf:
48
+ # Locate existing content
49
+ heading = pdf.page(0).select_paragraphs_starting_with("Executive Summary")[0]
50
+ heading.edit().replace("Overview").apply()
51
+
52
+ # Add a new paragraph using the fluent builder
53
+ pdf.new_paragraph() \
54
+ .text("Generated with PDFDancer") \
55
+ .font("Helvetica", 12) \
56
+ .color(Color(70, 70, 70)) \
57
+ .line_spacing(1.4) \
58
+ .at(page_index=0, x=72, y=520) \
59
+ .add()
60
+
61
+ # Persist the modified document
62
+ pdf.save("output.pdf")
63
+ ```
64
+
65
+ ### Authentication Tips
66
+
67
+ - Prefer setting `PDFDANCER_TOKEN` in your environment for local development.
68
+ - Override the API host by setting `PDFDANCER_BASE_URL` or passing `base_url="https://sandbox.pdfdancer.com"`.
69
+ - Use the `timeout` parameter on `PDFDancer.open()` to adjust HTTP read timeouts.
70
+
71
+ ## Selecting PDF Content
72
+
73
+ ```python
74
+ with PDFDancer.open("report.pdf") as pdf: # environment variables provide token/URL
75
+ all_paragraphs = pdf.select_paragraphs()
76
+ page_zero_images = pdf.page(0).select_images()
77
+ form_fields = pdf.page(2).select_form_fields()
78
+ paths_at_cursor = pdf.page(3).select_paths_at(x=150, y=320)
79
+
80
+ page = pdf.page(0).get()
81
+ print(page.internal_id, page.position.bounding_rect)
82
+ ```
83
+
84
+ Selectors return rich objects (`ParagraphObject`, `TextLineObject`, `ImageObject`, `FormFieldObject`, etc.) with helpers such as `delete()`, `move_to(x, y)`, or `edit()` depending on the object type.
85
+
86
+ ## Editing Text and Forms
87
+
88
+ ```python
89
+ with PDFDancer.open("report.pdf") as pdf:
90
+ paragraph = pdf.page(0).select_paragraphs_starting_with("Disclaimer")[0]
91
+
92
+ # Chain updates explicitly…
93
+ paragraph.edit() \
94
+ .replace("Updated disclaimer text") \
95
+ .font("Roboto-Regular", 11) \
96
+ .line_spacing(1.1) \
97
+ .move_to(72, 140) \
98
+ .apply()
99
+
100
+ # …or use the context manager to auto-apply on success
101
+ with paragraph.edit() as edit:
102
+ edit.replace("Context-managed update").color(Color(120, 0, 0))
103
+
104
+ # Update an AcroForm field
105
+ field = pdf.page(1).select_form_fields_by_name("signature")[0]
106
+ field.edit().value("Signed by Jane Doe").apply()
107
+ ```
108
+
109
+ ## Adding New Content
110
+
111
+ ```python
112
+ with PDFDancer.open("report.pdf") as pdf:
113
+ # Register fonts from the service
114
+ fonts = pdf.find_fonts("Roboto", 12)
115
+ pdf.register_font("/path/to/custom.ttf")
116
+
117
+ # Paragraphs
118
+ pdf.new_paragraph() \
119
+ .text("Greetings from PDFDancer!") \
120
+ .font(fonts[0].name, fonts[0].size) \
121
+ .at(page_index=0, x=220, y=480) \
122
+ .add()
123
+
124
+ # Raster images
125
+ pdf.new_image() \
126
+ .from_file(Path("logo.png")) \
127
+ .at(page=0, x=48, y=700) \
128
+ .add()
129
+ ```
130
+
131
+ ## Downloading Results
132
+
133
+ - `pdf.get_pdf_file()` returns the modified PDF as `bytes` (ideal for storage services or HTTP responses).
134
+ - `pdf.save("output.pdf")` writes directly to disk, creating directories when needed.
135
+
136
+ ## Error Handling
137
+
138
+ Most operations raise subclasses of `PdfDancerException`:
139
+
140
+ - `ValidationException` for client-side validation issues (missing token, invalid coordinates, etc.).
141
+ - `FontNotFoundException` when the service cannot locate a requested font.
142
+ - `HttpClientException` for transport or server errors with detailed messages.
143
+ - `SessionException` when session creation fails.
144
+
145
+ Wrap complex workflows in `try/except` blocks to surface actionable errors to your users.
146
+
147
+ ## Local Development
148
+
149
+ ```bash
150
+ python -m venv venv
151
+ source venv/bin/activate # Windows: venv\Scripts\activate
152
+ pip install -e .
153
+ pip install -r requirements-dev.txt
154
+
155
+ pytest -q # run the fast unit suite
156
+ pytest tests/e2e # integration tests (requires live API + fixtures)
157
+ ```
158
+
159
+ Package builds are handled by `python -m build`, and release artifacts are published via `python release.py`.
160
+
161
+ ## License
162
+
163
+ MIT © The Famous Cat Ltd.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pdfdancer-client-python"
7
- version = "0.2.4"
7
+ version = "0.2.6"
8
8
  description = "Python client for PDFDancer API"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -31,21 +31,6 @@ class PDFObjectBase:
31
31
  self.internal_id = internal_id
32
32
  self.object_type = object_type
33
33
 
34
- # --------------------------------------------------------------
35
- # Core properties
36
- # --------------------------------------------------------------
37
- def internal_id(self) -> str:
38
- """Internal PDFDancer object identifier, e.g. 'PATH_000023'."""
39
- return self.internal_id
40
-
41
- def type(self) -> ObjectType:
42
- """Enum value representing the PDF object type."""
43
- return self.object_type
44
-
45
- def position(self) -> Position:
46
- """The geometric position of the object on its page."""
47
- return self.position
48
-
49
34
  @property
50
35
  def page_index(self) -> int:
51
36
  """Page index where this object resides."""
@@ -0,0 +1,189 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdfdancer-client-python
3
+ Version: 0.2.6
4
+ Summary: Python client for PDFDancer API
5
+ Author-email: "The Famous Cat Ltd." <hi@thefamouscat.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://www.pdfdancer.com/
8
+ Project-URL: Repository, https://github.com/MenschMachine/pdfdancer-client-python
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: requests>=2.25.0
18
+ Requires-Dist: pydantic>=1.8.0
19
+ Requires-Dist: typing-extensions>=4.0.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7.0; extra == "dev"
22
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
23
+ Requires-Dist: black>=22.0; extra == "dev"
24
+ Requires-Dist: flake8>=5.0; extra == "dev"
25
+ Requires-Dist: mypy>=1.0; extra == "dev"
26
+
27
+ # PDFDancer Python Client
28
+
29
+ Automate PDF clean-up, redaction, form filling, and content injection against the PDFDancer API from Python. The client gives you page-scoped selectors, fluent editors, and builders so you can read, modify, and export PDFs programmatically in just a few lines.
30
+
31
+ ## Highlights
32
+
33
+ - Locate anything inside a PDF—paragraphs, text lines, images, vector paths, pages, AcroForm fields—by page, coordinates, or text prefixes
34
+ - Edit or delete existing content with fluent paragraph/text editors and safe apply-on-exit context managers
35
+ - Fill or update form fields and propagate the changes back to the document instantly
36
+ - Add brand-new content with paragraph/image builders, custom fonts, and precise page positioning
37
+ - Download results as bytes for downstream processing or save directly to disk with one method call
38
+
39
+ ## Core Capabilities
40
+
41
+ - Clean up layout by moving or deleting paragraphs, text lines, or shapes on specific pages
42
+ - Search and filter content (e.g., paragraphs starting with "Invoice") to drive custom workflows
43
+ - Redact or replace text in bulk with chained editor operations
44
+ - Populate AcroForms for contract generation or onboarding flows
45
+ - Insert logos, signatures, and generated paragraphs at deterministic coordinates
46
+ - Export modified PDFs as bytes for APIs, S3 uploads, or direct file saves
47
+
48
+ ## Requirements
49
+
50
+ - Python 3.9 or newer
51
+ - A PDFDancer API token (set `PDFDANCER_TOKEN` or pass `token=...`)
52
+ - Network access to a PDFDancer service (defaults to `https://api.pdfdancer.com`; override with `PDFDANCER_BASE_URL`)
53
+
54
+ ## Installation
55
+
56
+ ```bash
57
+ pip install pdfdancer-client-python
58
+
59
+ # Editable install for local development
60
+ pip install -e .
61
+ ```
62
+
63
+ ## Getting Started
64
+
65
+ ```python
66
+ from pathlib import Path
67
+ from pdfdancer import Color, PDFDancer
68
+
69
+ with PDFDancer.open(
70
+ pdf_data=Path("input.pdf"),
71
+ token="your-api-token", # optional when PDFDANCER_TOKEN is set
72
+ base_url="https://api.pdfdancer.com",
73
+ ) as pdf:
74
+ # Locate existing content
75
+ heading = pdf.page(0).select_paragraphs_starting_with("Executive Summary")[0]
76
+ heading.edit().replace("Overview").apply()
77
+
78
+ # Add a new paragraph using the fluent builder
79
+ pdf.new_paragraph() \
80
+ .text("Generated with PDFDancer") \
81
+ .font("Helvetica", 12) \
82
+ .color(Color(70, 70, 70)) \
83
+ .line_spacing(1.4) \
84
+ .at(page_index=0, x=72, y=520) \
85
+ .add()
86
+
87
+ # Persist the modified document
88
+ pdf.save("output.pdf")
89
+ ```
90
+
91
+ ### Authentication Tips
92
+
93
+ - Prefer setting `PDFDANCER_TOKEN` in your environment for local development.
94
+ - Override the API host by setting `PDFDANCER_BASE_URL` or passing `base_url="https://sandbox.pdfdancer.com"`.
95
+ - Use the `timeout` parameter on `PDFDancer.open()` to adjust HTTP read timeouts.
96
+
97
+ ## Selecting PDF Content
98
+
99
+ ```python
100
+ with PDFDancer.open("report.pdf") as pdf: # environment variables provide token/URL
101
+ all_paragraphs = pdf.select_paragraphs()
102
+ page_zero_images = pdf.page(0).select_images()
103
+ form_fields = pdf.page(2).select_form_fields()
104
+ paths_at_cursor = pdf.page(3).select_paths_at(x=150, y=320)
105
+
106
+ page = pdf.page(0).get()
107
+ print(page.internal_id, page.position.bounding_rect)
108
+ ```
109
+
110
+ Selectors return rich objects (`ParagraphObject`, `TextLineObject`, `ImageObject`, `FormFieldObject`, etc.) with helpers such as `delete()`, `move_to(x, y)`, or `edit()` depending on the object type.
111
+
112
+ ## Editing Text and Forms
113
+
114
+ ```python
115
+ with PDFDancer.open("report.pdf") as pdf:
116
+ paragraph = pdf.page(0).select_paragraphs_starting_with("Disclaimer")[0]
117
+
118
+ # Chain updates explicitly…
119
+ paragraph.edit() \
120
+ .replace("Updated disclaimer text") \
121
+ .font("Roboto-Regular", 11) \
122
+ .line_spacing(1.1) \
123
+ .move_to(72, 140) \
124
+ .apply()
125
+
126
+ # …or use the context manager to auto-apply on success
127
+ with paragraph.edit() as edit:
128
+ edit.replace("Context-managed update").color(Color(120, 0, 0))
129
+
130
+ # Update an AcroForm field
131
+ field = pdf.page(1).select_form_fields_by_name("signature")[0]
132
+ field.edit().value("Signed by Jane Doe").apply()
133
+ ```
134
+
135
+ ## Adding New Content
136
+
137
+ ```python
138
+ with PDFDancer.open("report.pdf") as pdf:
139
+ # Register fonts from the service
140
+ fonts = pdf.find_fonts("Roboto", 12)
141
+ pdf.register_font("/path/to/custom.ttf")
142
+
143
+ # Paragraphs
144
+ pdf.new_paragraph() \
145
+ .text("Greetings from PDFDancer!") \
146
+ .font(fonts[0].name, fonts[0].size) \
147
+ .at(page_index=0, x=220, y=480) \
148
+ .add()
149
+
150
+ # Raster images
151
+ pdf.new_image() \
152
+ .from_file(Path("logo.png")) \
153
+ .at(page=0, x=48, y=700) \
154
+ .add()
155
+ ```
156
+
157
+ ## Downloading Results
158
+
159
+ - `pdf.get_pdf_file()` returns the modified PDF as `bytes` (ideal for storage services or HTTP responses).
160
+ - `pdf.save("output.pdf")` writes directly to disk, creating directories when needed.
161
+
162
+ ## Error Handling
163
+
164
+ Most operations raise subclasses of `PdfDancerException`:
165
+
166
+ - `ValidationException` for client-side validation issues (missing token, invalid coordinates, etc.).
167
+ - `FontNotFoundException` when the service cannot locate a requested font.
168
+ - `HttpClientException` for transport or server errors with detailed messages.
169
+ - `SessionException` when session creation fails.
170
+
171
+ Wrap complex workflows in `try/except` blocks to surface actionable errors to your users.
172
+
173
+ ## Local Development
174
+
175
+ ```bash
176
+ python -m venv venv
177
+ source venv/bin/activate # Windows: venv\Scripts\activate
178
+ pip install -e .
179
+ pip install -r requirements-dev.txt
180
+
181
+ pytest -q # run the fast unit suite
182
+ pytest tests/e2e # integration tests (requires live API + fixtures)
183
+ ```
184
+
185
+ Package builds are handled by `python -m build`, and release artifacts are published via `python release.py`.
186
+
187
+ ## License
188
+
189
+ MIT © The Famous Cat Ltd.
@@ -20,6 +20,7 @@ src/pdfdancer_client_python.egg-info/dependency_links.txt
20
20
  src/pdfdancer_client_python.egg-info/requires.txt
21
21
  src/pdfdancer_client_python.egg-info/top_level.txt
22
22
  tests/__init__.py
23
+ tests/conftest.py
23
24
  tests/test_authentication.py
24
25
  tests/test_models.py
25
26
  tests/test_openapi_compliance.py
@@ -0,0 +1,9 @@
1
+ import os
2
+ import pytest
3
+
4
+
5
+ @pytest.fixture(scope="session", autouse=True)
6
+ def set_test_environment():
7
+ """Automatically set PDFDANCER_BASE_URL to localhost for all tests"""
8
+ os.environ["PDFDANCER_BASE_URL"] = "http://localhost:8080"
9
+ yield
@@ -6,7 +6,7 @@ import requests
6
6
 
7
7
 
8
8
  def _get_base_url():
9
- return os.getenv('PDFDANCER_BASE_URL', 'http://localhost:8080')
9
+ return os.getenv('PDFDANCER_BASE_URL', 'https://api.pdfdancer.com')
10
10
 
11
11
 
12
12
  def _read_token() -> str | None:
@@ -9,9 +9,9 @@ def test_find_form_fields():
9
9
  with PDFDancer.open(pdf_path, token=token, base_url=base_url) as pdf:
10
10
  form_fields = pdf.select_form_fields()
11
11
  assert len(form_fields) == 10
12
- assert form_fields[0].type() == ObjectType.TEXT_FIELD
13
- assert form_fields[4].type() == ObjectType.CHECK_BOX
14
- assert form_fields[6].type() == ObjectType.RADIO_BUTTON
12
+ assert form_fields[0].object_type == ObjectType.TEXT_FIELD
13
+ assert form_fields[4].object_type == ObjectType.CHECK_BOX
14
+ assert form_fields[6].object_type == ObjectType.RADIO_BUTTON
15
15
 
16
16
  # Verify not all fields at origin
17
17
  all_at_origin = all(
@@ -26,7 +26,7 @@ def test_find_form_fields():
26
26
  first_form = pdf.page(0).select_form_fields_at(290, 460)
27
27
  assert len(first_form) == 1
28
28
  f = first_form[0]
29
- assert f.type() == ObjectType.RADIO_BUTTON
29
+ assert f.object_type == ObjectType.RADIO_BUTTON
30
30
  assert f.internal_id == "FORM_FIELD_000008"
31
31
 
32
32
 
@@ -73,7 +73,7 @@ def test_edit_form_fields():
73
73
  f = fields[0]
74
74
  assert f.name == "firstName"
75
75
  assert f.value is None
76
- assert f.type() == ObjectType.TEXT_FIELD
76
+ assert f.object_type == ObjectType.TEXT_FIELD
77
77
  assert f.internal_id == "FORM_FIELD_000001"
78
78
 
79
79
  f.edit().value("Donald Duck").apply()
@@ -11,7 +11,7 @@ def test_delete_form(tmp_path: Path):
11
11
  with PDFDancer.open(pdf_path, token=token, base_url=base_url) as pdf:
12
12
  forms = pdf.select_forms()
13
13
  assert len(forms) == 17
14
- assert forms[0].type() == ObjectType.FORM_X_OBJECT
14
+ assert forms[0].object_type == ObjectType.FORM_X_OBJECT
15
15
 
16
16
  # Delete all form XObjects
17
17
  for form in forms:
@@ -13,7 +13,7 @@ def test_find_images():
13
13
  with PDFDancer.open(pdf_path, token=token, base_url=base_url, timeout=30.0) as pdf:
14
14
  images = pdf.select_images()
15
15
  assert len(images) == 3
16
- assert images[0].type() == ObjectType.IMAGE
16
+ assert images[0].object_type == ObjectType.IMAGE
17
17
 
18
18
  images_page0 = pdf.page(0).select_images()
19
19
  assert len(images_page0) == 2
@@ -9,7 +9,7 @@ def test_get_pages():
9
9
  pages = pdf.pages()
10
10
  assert pages is not None
11
11
  assert len(pages) == 12
12
- assert pages[0].type() == ObjectType.PAGE
12
+ assert pages[0].object_type == ObjectType.PAGE
13
13
 
14
14
 
15
15
  def test_get_page():