nutrient-dws 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nutrient-dws-1.0.1/PKG-INFO +339 -0
- nutrient-dws-1.0.1/README.md +302 -0
- nutrient-dws-1.0.1/pyproject.toml +127 -0
- nutrient-dws-1.0.1/setup.cfg +4 -0
- nutrient-dws-1.0.1/setup.py +8 -0
- nutrient-dws-1.0.1/src/nutrient_dws/__init__.py +25 -0
- nutrient-dws-1.0.1/src/nutrient_dws/api/__init__.py +1 -0
- nutrient-dws-1.0.1/src/nutrient_dws/api/direct.py +295 -0
- nutrient-dws-1.0.1/src/nutrient_dws/builder.py +216 -0
- nutrient-dws-1.0.1/src/nutrient_dws/client.py +110 -0
- nutrient-dws-1.0.1/src/nutrient_dws/exceptions.py +83 -0
- nutrient-dws-1.0.1/src/nutrient_dws/file_handler.py +197 -0
- nutrient-dws-1.0.1/src/nutrient_dws/http_client.py +181 -0
- nutrient-dws-1.0.1/src/nutrient_dws/py.typed +0 -0
- nutrient-dws-1.0.1/src/nutrient_dws.egg-info/PKG-INFO +339 -0
- nutrient-dws-1.0.1/src/nutrient_dws.egg-info/SOURCES.txt +17 -0
- nutrient-dws-1.0.1/src/nutrient_dws.egg-info/dependency_links.txt +1 -0
- nutrient-dws-1.0.1/src/nutrient_dws.egg-info/requires.txt +13 -0
- nutrient-dws-1.0.1/src/nutrient_dws.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: nutrient-dws
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Python client library for Nutrient Document Web Services API
|
|
5
|
+
Author-email: Nutrient <support@nutrient.io>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jdrhyne/nutrient-dws-client-python
|
|
8
|
+
Project-URL: Documentation, https://nutrient-dws-client-python.readthedocs.io
|
|
9
|
+
Project-URL: Repository, https://github.com/jdrhyne/nutrient-dws-client-python
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/jdrhyne/nutrient-dws-client-python/issues
|
|
11
|
+
Keywords: nutrient,pdf,document,processing,api,client
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Office/Business
|
|
23
|
+
Classifier: Topic :: Multimedia :: Graphics :: Graphics Conversion
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
Requires-Dist: requests<3.0.0,>=2.25.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
30
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
31
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
32
|
+
Requires-Dist: types-requests>=2.25.0; extra == "dev"
|
|
33
|
+
Provides-Extra: docs
|
|
34
|
+
Requires-Dist: sphinx>=5.0.0; extra == "docs"
|
|
35
|
+
Requires-Dist: sphinx-rtd-theme>=1.2.0; extra == "docs"
|
|
36
|
+
Requires-Dist: sphinx-autodoc-typehints>=1.22.0; extra == "docs"
|
|
37
|
+
|
|
38
|
+
# Nutrient DWS Python Client
|
|
39
|
+
|
|
40
|
+
[](https://www.python.org/downloads/)
|
|
41
|
+
[](https://github.com/jdrhyne/nutrient-dws-client-python/actions)
|
|
42
|
+
[](https://opensource.org/licenses/MIT)
|
|
43
|
+
[](https://github.com/psf/black)
|
|
44
|
+
[](https://pypi.org/project/nutrient-dws/)
|
|
45
|
+
|
|
46
|
+
A Python client library for the [Nutrient Document Web Services (DWS) API](https://www.nutrient.io/). This library provides a Pythonic interface to interact with Nutrient's document processing services, supporting both Direct API calls and Builder API workflows.
|
|
47
|
+
|
|
48
|
+
## Features
|
|
49
|
+
|
|
50
|
+
- ๐ **Two API styles**: Direct API for single operations, Builder API for complex workflows
|
|
51
|
+
- ๐ **Comprehensive document tools**: Convert, merge, rotate, OCR, watermark, and more
|
|
52
|
+
- ๐ **Automatic retries**: Built-in retry logic for transient failures
|
|
53
|
+
- ๐ **Flexible file handling**: Support for file paths, bytes, and file-like objects
|
|
54
|
+
- ๐ **Type-safe**: Full type hints for better IDE support
|
|
55
|
+
- โก **Streaming support**: Memory-efficient processing of large files
|
|
56
|
+
- ๐งช **Well-tested**: Comprehensive test suite with high coverage
|
|
57
|
+
|
|
58
|
+
## Installation
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install nutrient-dws
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quick Start
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from nutrient_dws import NutrientClient
|
|
68
|
+
|
|
69
|
+
# Initialize the client
|
|
70
|
+
client = NutrientClient(api_key="your-api-key")
|
|
71
|
+
|
|
72
|
+
# Direct API - Flatten PDF annotations
|
|
73
|
+
client.flatten_annotations(
|
|
74
|
+
input_file="document.pdf",
|
|
75
|
+
output_path="flattened.pdf"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Builder API - Chain multiple operations
|
|
79
|
+
client.build(input_file="document.pdf") \
|
|
80
|
+
.add_step("rotate-pages", {"degrees": 90}) \
|
|
81
|
+
.add_step("ocr-pdf", {"language": "en"}) \
|
|
82
|
+
.add_step("watermark-pdf", {"text": "CONFIDENTIAL"}) \
|
|
83
|
+
.execute(output_path="processed.pdf")
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Authentication
|
|
87
|
+
|
|
88
|
+
The client supports API key authentication through multiple methods:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
# 1. Pass directly to client
|
|
92
|
+
client = NutrientClient(api_key="your-api-key")
|
|
93
|
+
|
|
94
|
+
# 2. Set environment variable
|
|
95
|
+
# export NUTRIENT_API_KEY=your-api-key
|
|
96
|
+
client = NutrientClient() # Will use env variable
|
|
97
|
+
|
|
98
|
+
# 3. Use context manager for automatic cleanup
|
|
99
|
+
with NutrientClient(api_key="your-api-key") as client:
|
|
100
|
+
client.convert_to_pdf("document.docx")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Direct API Examples
|
|
104
|
+
|
|
105
|
+
### Flatten Annotations
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
# Flatten all annotations and form fields
|
|
109
|
+
client.flatten_annotations(
|
|
110
|
+
input_file="form.pdf",
|
|
111
|
+
output_path="flattened.pdf"
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Merge PDFs
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# Merge multiple PDFs
|
|
119
|
+
client.merge_pdfs(
|
|
120
|
+
input_files=["doc1.pdf", "doc2.pdf", "doc3.pdf"],
|
|
121
|
+
output_path="merged.pdf"
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### OCR PDF
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
# Add OCR layer to scanned PDF
|
|
129
|
+
client.ocr_pdf(
|
|
130
|
+
input_file="scanned.pdf",
|
|
131
|
+
output_path="searchable.pdf",
|
|
132
|
+
language="en"
|
|
133
|
+
)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Rotate Pages
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
# Rotate all pages
|
|
140
|
+
client.rotate_pages(
|
|
141
|
+
input_file="document.pdf",
|
|
142
|
+
output_path="rotated.pdf",
|
|
143
|
+
degrees=180
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Rotate specific pages
|
|
147
|
+
client.rotate_pages(
|
|
148
|
+
input_file="document.pdf",
|
|
149
|
+
output_path="rotated.pdf",
|
|
150
|
+
degrees=90,
|
|
151
|
+
page_indexes=[0, 2, 4] # Pages 1, 3, and 5
|
|
152
|
+
)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Watermark PDF
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
# Add text watermark (width/height required)
|
|
159
|
+
client.watermark_pdf(
|
|
160
|
+
input_file="document.pdf",
|
|
161
|
+
output_path="watermarked.pdf",
|
|
162
|
+
text="DRAFT",
|
|
163
|
+
width=200,
|
|
164
|
+
height=100,
|
|
165
|
+
opacity=0.5,
|
|
166
|
+
position="center"
|
|
167
|
+
)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Builder API Examples
|
|
171
|
+
|
|
172
|
+
The Builder API allows you to chain multiple operations in a single workflow:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# Complex document processing pipeline
|
|
176
|
+
result = client.build(input_file="raw-scan.pdf") \
|
|
177
|
+
.add_step("ocr-pdf", {"language": "en"}) \
|
|
178
|
+
.add_step("rotate-pages", {"degrees": -90, "page_indexes": [0]}) \
|
|
179
|
+
.add_step("watermark-pdf", {
|
|
180
|
+
"text": "PROCESSED",
|
|
181
|
+
"opacity": 0.3,
|
|
182
|
+
"position": "top-right"
|
|
183
|
+
}) \
|
|
184
|
+
.add_step("flatten-annotations") \
|
|
185
|
+
.set_output_options(
|
|
186
|
+
metadata={"title": "Processed Document", "author": "DWS Client"},
|
|
187
|
+
optimize=True
|
|
188
|
+
) \
|
|
189
|
+
.execute(output_path="final.pdf")
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## File Input Options
|
|
193
|
+
|
|
194
|
+
The library supports multiple ways to provide input files:
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
# File path (string or Path object)
|
|
198
|
+
client.convert_to_pdf("document.docx")
|
|
199
|
+
client.convert_to_pdf(Path("document.docx"))
|
|
200
|
+
|
|
201
|
+
# Bytes
|
|
202
|
+
with open("document.docx", "rb") as f:
|
|
203
|
+
file_bytes = f.read()
|
|
204
|
+
client.convert_to_pdf(file_bytes)
|
|
205
|
+
|
|
206
|
+
# File-like object
|
|
207
|
+
with open("document.docx", "rb") as f:
|
|
208
|
+
client.convert_to_pdf(f)
|
|
209
|
+
|
|
210
|
+
# URL (for supported operations)
|
|
211
|
+
client.import_from_url("https://example.com/document.pdf")
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Error Handling
|
|
215
|
+
|
|
216
|
+
The library provides specific exceptions for different error scenarios:
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
from nutrient_dws import (
|
|
220
|
+
NutrientError,
|
|
221
|
+
AuthenticationError,
|
|
222
|
+
APIError,
|
|
223
|
+
ValidationError,
|
|
224
|
+
NutrientTimeoutError,
|
|
225
|
+
FileProcessingError
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
client.convert_to_pdf("document.docx")
|
|
230
|
+
except AuthenticationError:
|
|
231
|
+
print("Invalid API key")
|
|
232
|
+
except ValidationError as e:
|
|
233
|
+
print(f"Invalid parameters: {e.errors}")
|
|
234
|
+
except APIError as e:
|
|
235
|
+
print(f"API error: {e.status_code} - {e.message}")
|
|
236
|
+
except NutrientTimeoutError:
|
|
237
|
+
print("Request timed out")
|
|
238
|
+
except FileProcessingError as e:
|
|
239
|
+
print(f"File processing failed: {e}")
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Advanced Configuration
|
|
243
|
+
|
|
244
|
+
### Custom Timeout
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
# Set timeout to 10 minutes for large files
|
|
248
|
+
client = NutrientClient(api_key="your-api-key", timeout=600)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### Streaming Large Files
|
|
252
|
+
|
|
253
|
+
Files larger than 10MB are automatically streamed to avoid memory issues:
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
# This will stream the file instead of loading it into memory
|
|
257
|
+
client.flatten_annotations("large-document.pdf")
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Available Operations
|
|
261
|
+
|
|
262
|
+
### PDF Manipulation
|
|
263
|
+
- `merge_pdfs` - Merge multiple PDFs into one
|
|
264
|
+
- `rotate_pages` - Rotate PDF pages (all or specific pages)
|
|
265
|
+
- `flatten_annotations` - Flatten form fields and annotations
|
|
266
|
+
|
|
267
|
+
### PDF Enhancement
|
|
268
|
+
- `ocr_pdf` - Add searchable text layer (English and German)
|
|
269
|
+
- `watermark_pdf` - Add text or image watermarks
|
|
270
|
+
|
|
271
|
+
### PDF Security
|
|
272
|
+
- `apply_redactions` - Apply existing redaction annotations
|
|
273
|
+
|
|
274
|
+
### Builder API
|
|
275
|
+
The Builder API allows chaining multiple operations:
|
|
276
|
+
```python
|
|
277
|
+
client.build(input_file="document.pdf") \
|
|
278
|
+
.add_step("rotate-pages", {"degrees": 90}) \
|
|
279
|
+
.add_step("ocr-pdf", {"language": "english"}) \
|
|
280
|
+
.add_step("watermark-pdf", {"text": "DRAFT", "width": 200, "height": 100}) \
|
|
281
|
+
.execute(output_path="processed.pdf")
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
Note: See [SUPPORTED_OPERATIONS.md](SUPPORTED_OPERATIONS.md) for detailed documentation of all supported operations and their parameters.
|
|
285
|
+
|
|
286
|
+
## Development
|
|
287
|
+
|
|
288
|
+
### Setup
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
# Clone the repository
|
|
292
|
+
git clone https://github.com/jdrhyne/nutrient-dws-client-python.git
|
|
293
|
+
cd nutrient-dws-client-python
|
|
294
|
+
|
|
295
|
+
# Install in development mode
|
|
296
|
+
pip install -e ".[dev]"
|
|
297
|
+
|
|
298
|
+
# Run tests
|
|
299
|
+
pytest
|
|
300
|
+
|
|
301
|
+
# Run linting
|
|
302
|
+
ruff check .
|
|
303
|
+
|
|
304
|
+
# Run type checking
|
|
305
|
+
mypy src tests
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### Running Tests
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
# Run all tests
|
|
312
|
+
pytest
|
|
313
|
+
|
|
314
|
+
# Run with coverage
|
|
315
|
+
pytest --cov=nutrient --cov-report=html
|
|
316
|
+
|
|
317
|
+
# Run specific test file
|
|
318
|
+
pytest tests/unit/test_client.py
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
## Contributing
|
|
322
|
+
|
|
323
|
+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
|
324
|
+
|
|
325
|
+
1. Fork the repository
|
|
326
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
327
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
328
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
329
|
+
5. Open a Pull Request
|
|
330
|
+
|
|
331
|
+
## License
|
|
332
|
+
|
|
333
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
334
|
+
|
|
335
|
+
## Support
|
|
336
|
+
|
|
337
|
+
- ๐ง Email: support@nutrient.io
|
|
338
|
+
- ๐ Documentation: https://www.nutrient.io/docs/
|
|
339
|
+
- ๐ Issues: https://github.com/jdrhyne/nutrient-dws-client-python/issues
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
# Nutrient DWS Python Client
|
|
2
|
+
|
|
3
|
+
[](https://www.python.org/downloads/)
|
|
4
|
+
[](https://github.com/jdrhyne/nutrient-dws-client-python/actions)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://github.com/psf/black)
|
|
7
|
+
[](https://pypi.org/project/nutrient-dws/)
|
|
8
|
+
|
|
9
|
+
A Python client library for the [Nutrient Document Web Services (DWS) API](https://www.nutrient.io/). This library provides a Pythonic interface to interact with Nutrient's document processing services, supporting both Direct API calls and Builder API workflows.
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
- ๐ **Two API styles**: Direct API for single operations, Builder API for complex workflows
|
|
14
|
+
- ๐ **Comprehensive document tools**: Convert, merge, rotate, OCR, watermark, and more
|
|
15
|
+
- ๐ **Automatic retries**: Built-in retry logic for transient failures
|
|
16
|
+
- ๐ **Flexible file handling**: Support for file paths, bytes, and file-like objects
|
|
17
|
+
- ๐ **Type-safe**: Full type hints for better IDE support
|
|
18
|
+
- โก **Streaming support**: Memory-efficient processing of large files
|
|
19
|
+
- ๐งช **Well-tested**: Comprehensive test suite with high coverage
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install nutrient-dws
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from nutrient_dws import NutrientClient
|
|
31
|
+
|
|
32
|
+
# Initialize the client
|
|
33
|
+
client = NutrientClient(api_key="your-api-key")
|
|
34
|
+
|
|
35
|
+
# Direct API - Flatten PDF annotations
|
|
36
|
+
client.flatten_annotations(
|
|
37
|
+
input_file="document.pdf",
|
|
38
|
+
output_path="flattened.pdf"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Builder API - Chain multiple operations
|
|
42
|
+
client.build(input_file="document.pdf") \
|
|
43
|
+
.add_step("rotate-pages", {"degrees": 90}) \
|
|
44
|
+
.add_step("ocr-pdf", {"language": "en"}) \
|
|
45
|
+
.add_step("watermark-pdf", {"text": "CONFIDENTIAL"}) \
|
|
46
|
+
.execute(output_path="processed.pdf")
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Authentication
|
|
50
|
+
|
|
51
|
+
The client supports API key authentication through multiple methods:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
# 1. Pass directly to client
|
|
55
|
+
client = NutrientClient(api_key="your-api-key")
|
|
56
|
+
|
|
57
|
+
# 2. Set environment variable
|
|
58
|
+
# export NUTRIENT_API_KEY=your-api-key
|
|
59
|
+
client = NutrientClient() # Will use env variable
|
|
60
|
+
|
|
61
|
+
# 3. Use context manager for automatic cleanup
|
|
62
|
+
with NutrientClient(api_key="your-api-key") as client:
|
|
63
|
+
client.convert_to_pdf("document.docx")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Direct API Examples
|
|
67
|
+
|
|
68
|
+
### Flatten Annotations
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
# Flatten all annotations and form fields
|
|
72
|
+
client.flatten_annotations(
|
|
73
|
+
input_file="form.pdf",
|
|
74
|
+
output_path="flattened.pdf"
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Merge PDFs
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# Merge multiple PDFs
|
|
82
|
+
client.merge_pdfs(
|
|
83
|
+
input_files=["doc1.pdf", "doc2.pdf", "doc3.pdf"],
|
|
84
|
+
output_path="merged.pdf"
|
|
85
|
+
)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### OCR PDF
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
# Add OCR layer to scanned PDF
|
|
92
|
+
client.ocr_pdf(
|
|
93
|
+
input_file="scanned.pdf",
|
|
94
|
+
output_path="searchable.pdf",
|
|
95
|
+
language="en"
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Rotate Pages
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
# Rotate all pages
|
|
103
|
+
client.rotate_pages(
|
|
104
|
+
input_file="document.pdf",
|
|
105
|
+
output_path="rotated.pdf",
|
|
106
|
+
degrees=180
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Rotate specific pages
|
|
110
|
+
client.rotate_pages(
|
|
111
|
+
input_file="document.pdf",
|
|
112
|
+
output_path="rotated.pdf",
|
|
113
|
+
degrees=90,
|
|
114
|
+
page_indexes=[0, 2, 4] # Pages 1, 3, and 5
|
|
115
|
+
)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Watermark PDF
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
# Add text watermark (width/height required)
|
|
122
|
+
client.watermark_pdf(
|
|
123
|
+
input_file="document.pdf",
|
|
124
|
+
output_path="watermarked.pdf",
|
|
125
|
+
text="DRAFT",
|
|
126
|
+
width=200,
|
|
127
|
+
height=100,
|
|
128
|
+
opacity=0.5,
|
|
129
|
+
position="center"
|
|
130
|
+
)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Builder API Examples
|
|
134
|
+
|
|
135
|
+
The Builder API allows you to chain multiple operations in a single workflow:
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
# Complex document processing pipeline
|
|
139
|
+
result = client.build(input_file="raw-scan.pdf") \
|
|
140
|
+
.add_step("ocr-pdf", {"language": "en"}) \
|
|
141
|
+
.add_step("rotate-pages", {"degrees": -90, "page_indexes": [0]}) \
|
|
142
|
+
.add_step("watermark-pdf", {
|
|
143
|
+
"text": "PROCESSED",
|
|
144
|
+
"opacity": 0.3,
|
|
145
|
+
"position": "top-right"
|
|
146
|
+
}) \
|
|
147
|
+
.add_step("flatten-annotations") \
|
|
148
|
+
.set_output_options(
|
|
149
|
+
metadata={"title": "Processed Document", "author": "DWS Client"},
|
|
150
|
+
optimize=True
|
|
151
|
+
) \
|
|
152
|
+
.execute(output_path="final.pdf")
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## File Input Options
|
|
156
|
+
|
|
157
|
+
The library supports multiple ways to provide input files:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
# File path (string or Path object)
|
|
161
|
+
client.convert_to_pdf("document.docx")
|
|
162
|
+
client.convert_to_pdf(Path("document.docx"))
|
|
163
|
+
|
|
164
|
+
# Bytes
|
|
165
|
+
with open("document.docx", "rb") as f:
|
|
166
|
+
file_bytes = f.read()
|
|
167
|
+
client.convert_to_pdf(file_bytes)
|
|
168
|
+
|
|
169
|
+
# File-like object
|
|
170
|
+
with open("document.docx", "rb") as f:
|
|
171
|
+
client.convert_to_pdf(f)
|
|
172
|
+
|
|
173
|
+
# URL (for supported operations)
|
|
174
|
+
client.import_from_url("https://example.com/document.pdf")
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Error Handling
|
|
178
|
+
|
|
179
|
+
The library provides specific exceptions for different error scenarios:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from nutrient_dws import (
|
|
183
|
+
NutrientError,
|
|
184
|
+
AuthenticationError,
|
|
185
|
+
APIError,
|
|
186
|
+
ValidationError,
|
|
187
|
+
NutrientTimeoutError,
|
|
188
|
+
FileProcessingError
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
client.convert_to_pdf("document.docx")
|
|
193
|
+
except AuthenticationError:
|
|
194
|
+
print("Invalid API key")
|
|
195
|
+
except ValidationError as e:
|
|
196
|
+
print(f"Invalid parameters: {e.errors}")
|
|
197
|
+
except APIError as e:
|
|
198
|
+
print(f"API error: {e.status_code} - {e.message}")
|
|
199
|
+
except NutrientTimeoutError:
|
|
200
|
+
print("Request timed out")
|
|
201
|
+
except FileProcessingError as e:
|
|
202
|
+
print(f"File processing failed: {e}")
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## Advanced Configuration
|
|
206
|
+
|
|
207
|
+
### Custom Timeout
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
# Set timeout to 10 minutes for large files
|
|
211
|
+
client = NutrientClient(api_key="your-api-key", timeout=600)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Streaming Large Files
|
|
215
|
+
|
|
216
|
+
Files larger than 10MB are automatically streamed to avoid memory issues:
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
# This will stream the file instead of loading it into memory
|
|
220
|
+
client.flatten_annotations("large-document.pdf")
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## Available Operations
|
|
224
|
+
|
|
225
|
+
### PDF Manipulation
|
|
226
|
+
- `merge_pdfs` - Merge multiple PDFs into one
|
|
227
|
+
- `rotate_pages` - Rotate PDF pages (all or specific pages)
|
|
228
|
+
- `flatten_annotations` - Flatten form fields and annotations
|
|
229
|
+
|
|
230
|
+
### PDF Enhancement
|
|
231
|
+
- `ocr_pdf` - Add searchable text layer (English and German)
|
|
232
|
+
- `watermark_pdf` - Add text or image watermarks
|
|
233
|
+
|
|
234
|
+
### PDF Security
|
|
235
|
+
- `apply_redactions` - Apply existing redaction annotations
|
|
236
|
+
|
|
237
|
+
### Builder API
|
|
238
|
+
The Builder API allows chaining multiple operations:
|
|
239
|
+
```python
|
|
240
|
+
client.build(input_file="document.pdf") \
|
|
241
|
+
.add_step("rotate-pages", {"degrees": 90}) \
|
|
242
|
+
.add_step("ocr-pdf", {"language": "english"}) \
|
|
243
|
+
.add_step("watermark-pdf", {"text": "DRAFT", "width": 200, "height": 100}) \
|
|
244
|
+
.execute(output_path="processed.pdf")
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Note: See [SUPPORTED_OPERATIONS.md](SUPPORTED_OPERATIONS.md) for detailed documentation of all supported operations and their parameters.
|
|
248
|
+
|
|
249
|
+
## Development
|
|
250
|
+
|
|
251
|
+
### Setup
|
|
252
|
+
|
|
253
|
+
```bash
|
|
254
|
+
# Clone the repository
|
|
255
|
+
git clone https://github.com/jdrhyne/nutrient-dws-client-python.git
|
|
256
|
+
cd nutrient-dws-client-python
|
|
257
|
+
|
|
258
|
+
# Install in development mode
|
|
259
|
+
pip install -e ".[dev]"
|
|
260
|
+
|
|
261
|
+
# Run tests
|
|
262
|
+
pytest
|
|
263
|
+
|
|
264
|
+
# Run linting
|
|
265
|
+
ruff check .
|
|
266
|
+
|
|
267
|
+
# Run type checking
|
|
268
|
+
mypy src tests
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
### Running Tests
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
# Run all tests
|
|
275
|
+
pytest
|
|
276
|
+
|
|
277
|
+
# Run with coverage
|
|
278
|
+
pytest --cov=nutrient --cov-report=html
|
|
279
|
+
|
|
280
|
+
# Run specific test file
|
|
281
|
+
pytest tests/unit/test_client.py
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
## Contributing
|
|
285
|
+
|
|
286
|
+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
|
287
|
+
|
|
288
|
+
1. Fork the repository
|
|
289
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
290
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
291
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
292
|
+
5. Open a Pull Request
|
|
293
|
+
|
|
294
|
+
## License
|
|
295
|
+
|
|
296
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
297
|
+
|
|
298
|
+
## Support
|
|
299
|
+
|
|
300
|
+
- ๐ง Email: support@nutrient.io
|
|
301
|
+
- ๐ Documentation: https://www.nutrient.io/docs/
|
|
302
|
+
- ๐ Issues: https://github.com/jdrhyne/nutrient-dws-client-python/issues
|