pdfdancer-client-python 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdfdancer/__init__.py +40 -0
- pdfdancer/client_v1.py +675 -0
- pdfdancer/exceptions.py +57 -0
- pdfdancer/models.py +417 -0
- pdfdancer/paragraph_builder.py +267 -0
- pdfdancer_client_python-0.1.1.dist-info/METADATA +308 -0
- pdfdancer_client_python-0.1.1.dist-info/RECORD +9 -0
- pdfdancer_client_python-0.1.1.dist-info/WHEEL +5 -0
- pdfdancer_client_python-0.1.1.dist-info/top_level.txt +1 -0
pdfdancer/exceptions.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Exception classes for the PDFDancer Python client.
|
|
3
|
+
Mirrors the Java client exception hierarchy.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PdfDancerException(Exception):
|
|
12
|
+
"""
|
|
13
|
+
Base exception for all PDFDancer client errors.
|
|
14
|
+
Equivalent to runtime exceptions in the Java client.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, message: str, cause: Optional[Exception] = None):
|
|
18
|
+
super().__init__(message)
|
|
19
|
+
self.cause = cause
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FontNotFoundException(PdfDancerException):
|
|
23
|
+
"""
|
|
24
|
+
Exception raised when a required font is not found or available.
|
|
25
|
+
Equivalent to FontNotFoundException in the Java client.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, message: str):
|
|
29
|
+
super().__init__(f"Font not found: {message}")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class HttpClientException(PdfDancerException):
|
|
33
|
+
"""
|
|
34
|
+
Exception raised for HTTP client errors during API communication.
|
|
35
|
+
Wraps requests exceptions and HTTP errors from the API.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, message: str, response: Optional[requests.Response] = None, cause: Optional[Exception] = None):
|
|
39
|
+
super().__init__(message, cause)
|
|
40
|
+
self.response = response
|
|
41
|
+
self.status_code = response.status_code if response else None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class SessionException(PdfDancerException):
|
|
45
|
+
"""
|
|
46
|
+
Exception raised for session-related errors.
|
|
47
|
+
Occurs when session creation fails or session is invalid.
|
|
48
|
+
"""
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ValidationException(PdfDancerException):
|
|
53
|
+
"""
|
|
54
|
+
Exception raised for input validation errors.
|
|
55
|
+
Equivalent to IllegalArgumentException in the Java client.
|
|
56
|
+
"""
|
|
57
|
+
pass
|
pdfdancer/models.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model classes for the PDFDancer Python client.
|
|
3
|
+
Closely mirrors the Java model classes with Python conventions.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Optional, List, Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ObjectType(Enum):
|
|
12
|
+
"""Object type enumeration matching the Java ObjectType."""
|
|
13
|
+
IMAGE = "IMAGE"
|
|
14
|
+
FORM = "FORM"
|
|
15
|
+
PATH = "PATH"
|
|
16
|
+
PARAGRAPH = "PARAGRAPH"
|
|
17
|
+
TEXT_LINE = "TEXT_LINE"
|
|
18
|
+
PAGE = "PAGE"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PositionMode(Enum):
|
|
22
|
+
"""Defines how position matching should be performed when searching for objects."""
|
|
23
|
+
INTERSECT = "INTERSECT" # Objects that intersect with the specified position area
|
|
24
|
+
CONTAINS = "CONTAINS" # Objects completely contained within the specified position area
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ShapeType(Enum):
|
|
28
|
+
"""Defines the geometric shape type used for position specification."""
|
|
29
|
+
POINT = "POINT" # Single point coordinate
|
|
30
|
+
LINE = "LINE" # Linear shape between two points
|
|
31
|
+
CIRCLE = "CIRCLE" # Circular area with radius
|
|
32
|
+
RECT = "RECT" # Rectangular area with width and height
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class Point:
|
|
37
|
+
"""Represents a 2D point with x and y coordinates."""
|
|
38
|
+
x: float
|
|
39
|
+
y: float
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class BoundingRect:
|
|
44
|
+
"""
|
|
45
|
+
Represents a bounding rectangle with position and dimensions.
|
|
46
|
+
Matches the Java BoundingRect class.
|
|
47
|
+
"""
|
|
48
|
+
x: float
|
|
49
|
+
y: float
|
|
50
|
+
width: float
|
|
51
|
+
height: float
|
|
52
|
+
|
|
53
|
+
def get_x(self) -> float:
|
|
54
|
+
return self.x
|
|
55
|
+
|
|
56
|
+
def get_y(self) -> float:
|
|
57
|
+
return self.y
|
|
58
|
+
|
|
59
|
+
def get_width(self) -> float:
|
|
60
|
+
return self.width
|
|
61
|
+
|
|
62
|
+
def get_height(self) -> float:
|
|
63
|
+
return self.height
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class Position:
|
|
68
|
+
"""
|
|
69
|
+
Represents spatial positioning and location information for PDF objects.
|
|
70
|
+
Closely mirrors the Java Position class with Python conventions.
|
|
71
|
+
"""
|
|
72
|
+
page_index: Optional[int] = None
|
|
73
|
+
shape: Optional[ShapeType] = None
|
|
74
|
+
mode: Optional[PositionMode] = None
|
|
75
|
+
bounding_rect: Optional[BoundingRect] = None
|
|
76
|
+
text_starts_with: Optional[str] = None
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def from_page_index(page_index: int) -> 'Position':
|
|
80
|
+
"""
|
|
81
|
+
Creates a position specification for an entire page.
|
|
82
|
+
Equivalent to Position.fromPageIndex() in Java.
|
|
83
|
+
"""
|
|
84
|
+
return Position(page_index=page_index, mode=PositionMode.CONTAINS)
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def on_page_coordinates(page_index: int, x: float, y: float) -> 'Position':
|
|
88
|
+
"""
|
|
89
|
+
Creates a position specification for specific coordinates on a page.
|
|
90
|
+
Equivalent to Position.onPageCoordinates() in Java.
|
|
91
|
+
"""
|
|
92
|
+
position = Position.from_page_index(page_index)
|
|
93
|
+
position.set_point(Point(x, y))
|
|
94
|
+
return position
|
|
95
|
+
|
|
96
|
+
def set_point(self, point: Point) -> None:
|
|
97
|
+
"""
|
|
98
|
+
Sets the position to a specific point location.
|
|
99
|
+
Equivalent to Position.set() in Java.
|
|
100
|
+
"""
|
|
101
|
+
self.mode = PositionMode.CONTAINS
|
|
102
|
+
self.shape = ShapeType.POINT
|
|
103
|
+
self.bounding_rect = BoundingRect(point.x, point.y, 0, 0)
|
|
104
|
+
|
|
105
|
+
def move_x(self, x_offset: float) -> 'Position':
|
|
106
|
+
"""Move the position horizontally by the specified offset."""
|
|
107
|
+
if self.bounding_rect:
|
|
108
|
+
self.set_point(Point(self.get_x() + x_offset, self.get_y()))
|
|
109
|
+
return self
|
|
110
|
+
|
|
111
|
+
def move_y(self, y_offset: float) -> 'Position':
|
|
112
|
+
"""Move the position vertically by the specified offset."""
|
|
113
|
+
if self.bounding_rect:
|
|
114
|
+
self.set_point(Point(self.get_x(), self.get_y() + y_offset))
|
|
115
|
+
return self
|
|
116
|
+
|
|
117
|
+
def get_x(self) -> Optional[float]:
|
|
118
|
+
"""Returns the X coordinate of this position."""
|
|
119
|
+
return self.bounding_rect.get_x() if self.bounding_rect else None
|
|
120
|
+
|
|
121
|
+
def get_y(self) -> Optional[float]:
|
|
122
|
+
"""Returns the Y coordinate of this position."""
|
|
123
|
+
return self.bounding_rect.get_y() if self.bounding_rect else None
|
|
124
|
+
|
|
125
|
+
def copy(self) -> 'Position':
|
|
126
|
+
"""Creates a copy of this position."""
|
|
127
|
+
# Create deep copy of bounding_rect if it exists
|
|
128
|
+
bounding_rect_copy = None
|
|
129
|
+
if self.bounding_rect:
|
|
130
|
+
bounding_rect_copy = BoundingRect(
|
|
131
|
+
self.bounding_rect.x,
|
|
132
|
+
self.bounding_rect.y,
|
|
133
|
+
self.bounding_rect.width,
|
|
134
|
+
self.bounding_rect.height
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
pos = Position(self.page_index, self.shape, self.mode, bounding_rect_copy, self.text_starts_with)
|
|
138
|
+
return pos
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class ObjectRef:
|
|
143
|
+
"""
|
|
144
|
+
Lightweight reference to a PDF object providing identity and type information.
|
|
145
|
+
Mirrors the Java ObjectRef class exactly.
|
|
146
|
+
"""
|
|
147
|
+
internal_id: str
|
|
148
|
+
position: Position
|
|
149
|
+
type: ObjectType
|
|
150
|
+
|
|
151
|
+
def get_internal_id(self) -> str:
|
|
152
|
+
"""Returns the internal identifier for the referenced object."""
|
|
153
|
+
return self.internal_id
|
|
154
|
+
|
|
155
|
+
def get_position(self) -> Position:
|
|
156
|
+
"""Returns the current position information for the referenced object."""
|
|
157
|
+
return self.position
|
|
158
|
+
|
|
159
|
+
def set_position(self, position: Position) -> None:
|
|
160
|
+
"""Updates the position information for the referenced object."""
|
|
161
|
+
self.position = position
|
|
162
|
+
|
|
163
|
+
def get_type(self) -> ObjectType:
|
|
164
|
+
"""Returns the type classification of the referenced object."""
|
|
165
|
+
return self.type
|
|
166
|
+
|
|
167
|
+
def to_dict(self) -> dict:
|
|
168
|
+
"""Convert to dictionary for JSON serialization."""
|
|
169
|
+
return {
|
|
170
|
+
"internalId": self.internal_id,
|
|
171
|
+
"position": FindRequest._position_to_dict(self.position),
|
|
172
|
+
"type": self.type.value
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@dataclass
|
|
177
|
+
class Color:
|
|
178
|
+
"""Represents an RGB color with optional alpha channel, values from 0-255."""
|
|
179
|
+
r: int
|
|
180
|
+
g: int
|
|
181
|
+
b: int
|
|
182
|
+
a: int = 255 # Alpha channel, default fully opaque
|
|
183
|
+
|
|
184
|
+
def __post_init__(self):
|
|
185
|
+
# Validation similar to Java client
|
|
186
|
+
for component in [self.r, self.g, self.b, self.a]:
|
|
187
|
+
if not 0 <= component <= 255:
|
|
188
|
+
raise ValueError(f"Color component must be between 0 and 255, got {component}")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@dataclass
|
|
192
|
+
class Font:
|
|
193
|
+
"""Represents a font with name and size."""
|
|
194
|
+
name: str
|
|
195
|
+
size: float
|
|
196
|
+
|
|
197
|
+
def __post_init__(self):
|
|
198
|
+
if self.size <= 0:
|
|
199
|
+
raise ValueError(f"Font size must be positive, got {self.size}")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@dataclass
|
|
203
|
+
class Image:
|
|
204
|
+
"""
|
|
205
|
+
Represents an image object in a PDF document.
|
|
206
|
+
Matches the Java Image class structure.
|
|
207
|
+
"""
|
|
208
|
+
position: Optional[Position] = None
|
|
209
|
+
format: Optional[str] = None
|
|
210
|
+
width: Optional[float] = None
|
|
211
|
+
height: Optional[float] = None
|
|
212
|
+
data: Optional[bytes] = None
|
|
213
|
+
|
|
214
|
+
def get_position(self) -> Optional[Position]:
|
|
215
|
+
"""Returns the position of this image."""
|
|
216
|
+
return self.position
|
|
217
|
+
|
|
218
|
+
def set_position(self, position: Position) -> None:
|
|
219
|
+
"""Sets the position of this image."""
|
|
220
|
+
self.position = position
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
@dataclass
|
|
224
|
+
class Paragraph:
|
|
225
|
+
"""
|
|
226
|
+
Represents a paragraph of text in a PDF document.
|
|
227
|
+
Structure mirrors the Java Paragraph class.
|
|
228
|
+
"""
|
|
229
|
+
position: Optional[Position] = None
|
|
230
|
+
text_lines: Optional[List[str]] = None
|
|
231
|
+
font: Optional[Font] = None
|
|
232
|
+
color: Optional[Color] = None
|
|
233
|
+
line_spacing: float = 1.2
|
|
234
|
+
|
|
235
|
+
def get_position(self) -> Optional[Position]:
|
|
236
|
+
"""Returns the position of this paragraph."""
|
|
237
|
+
return self.position
|
|
238
|
+
|
|
239
|
+
def set_position(self, position: Position) -> None:
|
|
240
|
+
"""Sets the position of this paragraph."""
|
|
241
|
+
self.position = position
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# Request classes for API communication
|
|
245
|
+
@dataclass
|
|
246
|
+
class FindRequest:
|
|
247
|
+
"""Request object for find operations."""
|
|
248
|
+
object_type: Optional[ObjectType]
|
|
249
|
+
position: Optional[Position]
|
|
250
|
+
hint: Optional[str] = None
|
|
251
|
+
|
|
252
|
+
def to_dict(self) -> dict:
|
|
253
|
+
"""Convert to dictionary for JSON serialization."""
|
|
254
|
+
return {
|
|
255
|
+
"objectType": self.object_type.value if self.object_type else None,
|
|
256
|
+
"position": self._position_to_dict(self.position) if self.position else None,
|
|
257
|
+
"hint": self.hint
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
@staticmethod
|
|
261
|
+
def _position_to_dict(position: Position) -> dict:
|
|
262
|
+
"""Convert Position to dictionary for JSON serialization."""
|
|
263
|
+
result = {
|
|
264
|
+
"pageIndex": position.page_index,
|
|
265
|
+
"textStartsWith": position.text_starts_with
|
|
266
|
+
}
|
|
267
|
+
if position.shape:
|
|
268
|
+
result["shape"] = position.shape.value
|
|
269
|
+
if position.mode:
|
|
270
|
+
result["mode"] = position.mode.value
|
|
271
|
+
if position.bounding_rect:
|
|
272
|
+
result["boundingRect"] = {
|
|
273
|
+
"x": position.bounding_rect.x,
|
|
274
|
+
"y": position.bounding_rect.y,
|
|
275
|
+
"width": position.bounding_rect.width,
|
|
276
|
+
"height": position.bounding_rect.height
|
|
277
|
+
}
|
|
278
|
+
return result
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
@dataclass
|
|
282
|
+
class DeleteRequest:
|
|
283
|
+
"""Request object for delete operations."""
|
|
284
|
+
object_ref: ObjectRef
|
|
285
|
+
|
|
286
|
+
def to_dict(self) -> dict:
|
|
287
|
+
"""Convert to dictionary for JSON serialization."""
|
|
288
|
+
return {
|
|
289
|
+
"objectRef": {
|
|
290
|
+
"internalId": self.object_ref.internal_id,
|
|
291
|
+
"position": FindRequest._position_to_dict(self.object_ref.position),
|
|
292
|
+
"type": self.object_ref.type.value
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
@dataclass
|
|
298
|
+
class MoveRequest:
|
|
299
|
+
"""Request object for move operations."""
|
|
300
|
+
object_ref: ObjectRef
|
|
301
|
+
position: Position
|
|
302
|
+
|
|
303
|
+
def to_dict(self) -> dict:
|
|
304
|
+
"""Convert to dictionary for JSON serialization."""
|
|
305
|
+
# Server API expects the new coordinates under 'newPosition' (see Java MoveRequest)
|
|
306
|
+
return {
|
|
307
|
+
"objectRef": {
|
|
308
|
+
"internalId": self.object_ref.internal_id,
|
|
309
|
+
"position": FindRequest._position_to_dict(self.object_ref.position),
|
|
310
|
+
"type": self.object_ref.type.value
|
|
311
|
+
},
|
|
312
|
+
"newPosition": FindRequest._position_to_dict(self.position)
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
@dataclass
|
|
317
|
+
class AddRequest:
|
|
318
|
+
"""Request object for add operations."""
|
|
319
|
+
pdf_object: Any # Can be Image, Paragraph, etc.
|
|
320
|
+
|
|
321
|
+
def to_dict(self) -> dict:
|
|
322
|
+
"""Convert to dictionary for JSON serialization matching server API.
|
|
323
|
+
Server expects an AddRequest with a nested 'object' containing the PDFObject
|
|
324
|
+
(with a 'type' discriminator), mirroring Java AddRequest(PDFObject object).
|
|
325
|
+
"""
|
|
326
|
+
obj = self.pdf_object
|
|
327
|
+
return {
|
|
328
|
+
"object": self._object_to_dict(obj)
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
def _object_to_dict(self, obj: Any) -> dict:
|
|
332
|
+
"""Convert PDF object to dictionary for JSON serialization."""
|
|
333
|
+
import base64
|
|
334
|
+
if isinstance(obj, Image):
|
|
335
|
+
size = None
|
|
336
|
+
if obj.width is not None and obj.height is not None:
|
|
337
|
+
size = {"width": obj.width, "height": obj.height}
|
|
338
|
+
data_b64 = None
|
|
339
|
+
if obj.data is not None:
|
|
340
|
+
# Java byte[] expects base64 string in JSON
|
|
341
|
+
data_b64 = base64.b64encode(obj.data).decode("ascii")
|
|
342
|
+
return {
|
|
343
|
+
"type": "IMAGE",
|
|
344
|
+
"position": FindRequest._position_to_dict(obj.position) if obj.position else None,
|
|
345
|
+
"format": obj.format,
|
|
346
|
+
"size": size,
|
|
347
|
+
"data": data_b64
|
|
348
|
+
}
|
|
349
|
+
elif isinstance(obj, Paragraph):
|
|
350
|
+
# Build lines -> List<TextLine> with minimal structure required by server
|
|
351
|
+
lines = []
|
|
352
|
+
if obj.text_lines:
|
|
353
|
+
for line in obj.text_lines:
|
|
354
|
+
text_element = {
|
|
355
|
+
"text": line,
|
|
356
|
+
"font": {"name": obj.font.name, "size": obj.font.size} if obj.font else None,
|
|
357
|
+
"color": {"r": obj.color.r, "g": obj.color.g, "b": obj.color.b} if obj.color else None,
|
|
358
|
+
"position": FindRequest._position_to_dict(obj.position) if obj.position else None
|
|
359
|
+
}
|
|
360
|
+
text_line = {
|
|
361
|
+
"textElements": [text_element]
|
|
362
|
+
}
|
|
363
|
+
# TextLine has color and position
|
|
364
|
+
if obj.color:
|
|
365
|
+
text_line["color"] = {"r": obj.color.r, "g": obj.color.g, "b": obj.color.b}
|
|
366
|
+
if obj.position:
|
|
367
|
+
text_line["position"] = FindRequest._position_to_dict(obj.position)
|
|
368
|
+
lines.append(text_line)
|
|
369
|
+
line_spacings = None
|
|
370
|
+
if hasattr(obj, "line_spacing") and obj.line_spacing is not None:
|
|
371
|
+
# Server expects a list
|
|
372
|
+
line_spacings = [obj.line_spacing]
|
|
373
|
+
return {
|
|
374
|
+
"type": "PARAGRAPH",
|
|
375
|
+
"position": FindRequest._position_to_dict(obj.position) if obj.position else None,
|
|
376
|
+
"lines": lines,
|
|
377
|
+
"lineSpacings": line_spacings,
|
|
378
|
+
"font": {"name": obj.font.name, "size": obj.font.size} if obj.font else None
|
|
379
|
+
}
|
|
380
|
+
else:
|
|
381
|
+
raise ValueError(f"Unsupported object type: {type(obj)}")
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@dataclass
|
|
385
|
+
class ModifyRequest:
|
|
386
|
+
"""Request object for modify operations."""
|
|
387
|
+
object_ref: ObjectRef
|
|
388
|
+
new_object: Any
|
|
389
|
+
|
|
390
|
+
def to_dict(self) -> dict:
|
|
391
|
+
"""Convert to dictionary for JSON serialization."""
|
|
392
|
+
return {
|
|
393
|
+
"ref": {
|
|
394
|
+
"internalId": self.object_ref.internal_id,
|
|
395
|
+
"position": FindRequest._position_to_dict(self.object_ref.position),
|
|
396
|
+
"type": self.object_ref.type.value
|
|
397
|
+
},
|
|
398
|
+
"newObject": AddRequest(None)._object_to_dict(self.new_object)
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
@dataclass
|
|
403
|
+
class ModifyTextRequest:
|
|
404
|
+
"""Request object for text modification operations."""
|
|
405
|
+
object_ref: ObjectRef
|
|
406
|
+
new_text: str
|
|
407
|
+
|
|
408
|
+
def to_dict(self) -> dict:
|
|
409
|
+
"""Convert to dictionary for JSON serialization."""
|
|
410
|
+
return {
|
|
411
|
+
"ref": {
|
|
412
|
+
"internalId": self.object_ref.internal_id,
|
|
413
|
+
"position": FindRequest._position_to_dict(self.object_ref.position),
|
|
414
|
+
"type": self.object_ref.type.value
|
|
415
|
+
},
|
|
416
|
+
"newTextLine": self.new_text
|
|
417
|
+
}
|