pdfdancer-client-python 0.1.2__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdfdancer/__init__.py +5 -3
- pdfdancer/image_builder.py +30 -0
- pdfdancer/models.py +58 -6
- pdfdancer/paragraph_builder.py +15 -12
- pdfdancer/{client_v1.py → pdfdancer_v1.py} +260 -56
- pdfdancer/types.py +263 -0
- {pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.3.dist-info}/METADATA +39 -37
- pdfdancer_client_python-0.2.3.dist-info/RECORD +11 -0
- pdfdancer_client_python-0.1.2.dist-info/RECORD +0 -9
- {pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.3.dist-info}/WHEEL +0 -0
- {pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.3.dist-info}/top_level.txt +0 -0
pdfdancer/__init__.py
CHANGED
|
@@ -6,20 +6,19 @@ Provides a clean, Pythonic interface for PDF operations that closely
|
|
|
6
6
|
mirrors the Java client structure and functionality.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
from .client_v1 import ClientV1
|
|
10
9
|
from .exceptions import (
|
|
11
10
|
PdfDancerException, FontNotFoundException, ValidationException,
|
|
12
11
|
HttpClientException, SessionException
|
|
13
12
|
)
|
|
14
13
|
from .models import (
|
|
15
|
-
ObjectRef, Position, ObjectType, Font, Color, Image, BoundingRect, Paragraph,
|
|
14
|
+
ObjectRef, Position, ObjectType, Font, Color, Image, BoundingRect, Paragraph, FormFieldRef,
|
|
16
15
|
PositionMode, ShapeType, Point
|
|
17
16
|
)
|
|
18
17
|
from .paragraph_builder import ParagraphBuilder
|
|
19
18
|
|
|
20
19
|
__version__ = "1.0.0"
|
|
21
20
|
__all__ = [
|
|
22
|
-
"
|
|
21
|
+
"PDFDancer",
|
|
23
22
|
"ParagraphBuilder",
|
|
24
23
|
"ObjectRef",
|
|
25
24
|
"Position",
|
|
@@ -29,6 +28,7 @@ __all__ = [
|
|
|
29
28
|
"Image",
|
|
30
29
|
"BoundingRect",
|
|
31
30
|
"Paragraph",
|
|
31
|
+
"FormFieldRef",
|
|
32
32
|
"PositionMode",
|
|
33
33
|
"ShapeType",
|
|
34
34
|
"Point",
|
|
@@ -38,3 +38,5 @@ __all__ = [
|
|
|
38
38
|
"HttpClientException",
|
|
39
39
|
"SessionException"
|
|
40
40
|
]
|
|
41
|
+
|
|
42
|
+
from .pdfdancer_v1 import PDFDancer
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from pdfdancer import ValidationException, Image, Position
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ImageBuilder:
|
|
7
|
+
|
|
8
|
+
def __init__(self, client: 'PDFDancer'):
|
|
9
|
+
"""
|
|
10
|
+
Initialize the image builder with a client reference.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
client: The PDFDancer instance for font registration
|
|
14
|
+
"""
|
|
15
|
+
if client is None:
|
|
16
|
+
raise ValidationException("Client cannot be null")
|
|
17
|
+
|
|
18
|
+
self._client = client
|
|
19
|
+
self._image = Image()
|
|
20
|
+
|
|
21
|
+
def from_file(self, img_path: Path) -> 'ImageBuilder':
|
|
22
|
+
self._image.data = img_path.read_bytes()
|
|
23
|
+
return self
|
|
24
|
+
|
|
25
|
+
def at(self, page, x, y) -> 'ImageBuilder':
|
|
26
|
+
self._image.position = Position.at_page_coordinates(page, x, y)
|
|
27
|
+
return self
|
|
28
|
+
|
|
29
|
+
def add(self) -> bool:
|
|
30
|
+
return self._client._add_image(self._image, self._image.position)
|
pdfdancer/models.py
CHANGED
|
@@ -10,12 +10,16 @@ from typing import Optional, List, Any
|
|
|
10
10
|
|
|
11
11
|
class ObjectType(Enum):
|
|
12
12
|
"""Object type enumeration matching the Java ObjectType."""
|
|
13
|
+
FORM_FIELD = "FORM_FIELD"
|
|
13
14
|
IMAGE = "IMAGE"
|
|
14
15
|
FORM_X_OBJECT = "FORM_X_OBJECT"
|
|
15
16
|
PATH = "PATH"
|
|
16
17
|
PARAGRAPH = "PARAGRAPH"
|
|
17
18
|
TEXT_LINE = "TEXT_LINE"
|
|
18
19
|
PAGE = "PAGE"
|
|
20
|
+
TEXT_FIELD = "TEXT_FIELD"
|
|
21
|
+
CHECK_BOX = "CHECK_BOX"
|
|
22
|
+
RADIO_BUTTON = "RADIO_BUTTON"
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
class PositionMode(Enum):
|
|
@@ -74,6 +78,7 @@ class Position:
|
|
|
74
78
|
mode: Optional[PositionMode] = None
|
|
75
79
|
bounding_rect: Optional[BoundingRect] = None
|
|
76
80
|
text_starts_with: Optional[str] = None
|
|
81
|
+
name: Optional[str] = None
|
|
77
82
|
|
|
78
83
|
@staticmethod
|
|
79
84
|
def at_page(page_index: int) -> 'Position':
|
|
@@ -93,7 +98,17 @@ class Position:
|
|
|
93
98
|
position.at_coordinates(Point(x, y))
|
|
94
99
|
return position
|
|
95
100
|
|
|
96
|
-
|
|
101
|
+
@staticmethod
|
|
102
|
+
def by_name(name: str) -> 'Position':
|
|
103
|
+
"""
|
|
104
|
+
Creates a position specification for finding objects by name.
|
|
105
|
+
Equivalent to Position.byName() in Java.
|
|
106
|
+
"""
|
|
107
|
+
position = Position()
|
|
108
|
+
position.name = name
|
|
109
|
+
return position
|
|
110
|
+
|
|
111
|
+
def at_coordinates(self, point: Point) -> 'Position':
|
|
97
112
|
"""
|
|
98
113
|
Sets the position to a specific point location.
|
|
99
114
|
Equivalent to Position.set() in Java.
|
|
@@ -101,7 +116,7 @@ class Position:
|
|
|
101
116
|
self.mode = PositionMode.CONTAINS
|
|
102
117
|
self.shape = ShapeType.POINT
|
|
103
118
|
self.bounding_rect = BoundingRect(point.x, point.y, 0, 0)
|
|
104
|
-
return self
|
|
119
|
+
return self
|
|
105
120
|
|
|
106
121
|
def with_text_starts(self, text: str) -> 'Position':
|
|
107
122
|
self.text_starts_with = text
|
|
@@ -110,20 +125,20 @@ class Position:
|
|
|
110
125
|
def move_x(self, x_offset: float) -> 'Position':
|
|
111
126
|
"""Move the position horizontally by the specified offset."""
|
|
112
127
|
if self.bounding_rect:
|
|
113
|
-
self.at_coordinates(Point(self.
|
|
128
|
+
self.at_coordinates(Point(self.x() + x_offset, self.y()))
|
|
114
129
|
return self
|
|
115
130
|
|
|
116
131
|
def move_y(self, y_offset: float) -> 'Position':
|
|
117
132
|
"""Move the position vertically by the specified offset."""
|
|
118
133
|
if self.bounding_rect:
|
|
119
|
-
self.at_coordinates(Point(self.
|
|
134
|
+
self.at_coordinates(Point(self.x(), self.y() + y_offset))
|
|
120
135
|
return self
|
|
121
136
|
|
|
122
|
-
def
|
|
137
|
+
def x(self) -> Optional[float]:
|
|
123
138
|
"""Returns the X coordinate of this position."""
|
|
124
139
|
return self.bounding_rect.get_x() if self.bounding_rect else None
|
|
125
140
|
|
|
126
|
-
def
|
|
141
|
+
def y(self) -> Optional[float]:
|
|
127
142
|
"""Returns the Y coordinate of this position."""
|
|
128
143
|
return self.bounding_rect.get_y() if self.bounding_rect else None
|
|
129
144
|
|
|
@@ -269,6 +284,8 @@ class FindRequest:
|
|
|
269
284
|
"pageIndex": position.page_index,
|
|
270
285
|
"textStartsWith": position.text_starts_with
|
|
271
286
|
}
|
|
287
|
+
if position.name:
|
|
288
|
+
result["name"] = position.name
|
|
272
289
|
if position.shape:
|
|
273
290
|
result["shape"] = position.shape.value
|
|
274
291
|
if position.mode:
|
|
@@ -420,3 +437,38 @@ class ModifyTextRequest:
|
|
|
420
437
|
},
|
|
421
438
|
"newTextLine": self.new_text
|
|
422
439
|
}
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
@dataclass
|
|
443
|
+
class ChangeFormFieldRequest:
|
|
444
|
+
object_ref: ObjectRef
|
|
445
|
+
value: str
|
|
446
|
+
|
|
447
|
+
def to_dict(self) -> dict:
|
|
448
|
+
"""Convert to dictionary for JSON serialization."""
|
|
449
|
+
return {
|
|
450
|
+
"ref": {
|
|
451
|
+
"internalId": self.object_ref.internal_id,
|
|
452
|
+
"position": FindRequest._position_to_dict(self.object_ref.position),
|
|
453
|
+
"type": self.object_ref.type.value
|
|
454
|
+
},
|
|
455
|
+
"value": self.value
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
@dataclass
|
|
460
|
+
class FormFieldRef(ObjectRef):
|
|
461
|
+
"""
|
|
462
|
+
Represents a form field reference with additional form-specific properties.
|
|
463
|
+
Extends ObjectRef to include form field name and value.
|
|
464
|
+
"""
|
|
465
|
+
name: Optional[str] = None
|
|
466
|
+
value: Optional[str] = None
|
|
467
|
+
|
|
468
|
+
def get_name(self) -> Optional[str]:
|
|
469
|
+
"""Get the form field name."""
|
|
470
|
+
return self.name
|
|
471
|
+
|
|
472
|
+
def get_value(self) -> Optional[str]:
|
|
473
|
+
"""Get the form field value."""
|
|
474
|
+
return self.value
|
pdfdancer/paragraph_builder.py
CHANGED
|
@@ -4,14 +4,11 @@ Closely mirrors the Java ParagraphBuilder class with Python conventions.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Optional, Union
|
|
7
|
+
from typing import Optional, Union
|
|
8
8
|
|
|
9
9
|
from .exceptions import ValidationException
|
|
10
10
|
from .models import Paragraph, Font, Color, Position
|
|
11
11
|
|
|
12
|
-
if TYPE_CHECKING:
|
|
13
|
-
from .client_v1 import ClientV1
|
|
14
|
-
|
|
15
12
|
|
|
16
13
|
class ParagraphBuilder:
|
|
17
14
|
"""
|
|
@@ -19,7 +16,7 @@ class ParagraphBuilder:
|
|
|
19
16
|
Mirrors the Java ParagraphBuilder class exactly.
|
|
20
17
|
"""
|
|
21
18
|
|
|
22
|
-
def __init__(self, client: '
|
|
19
|
+
def __init__(self, client: 'PDFDancer'):
|
|
23
20
|
"""
|
|
24
21
|
Initialize the paragraph builder with a client reference.
|
|
25
22
|
|
|
@@ -37,7 +34,7 @@ class ParagraphBuilder:
|
|
|
37
34
|
self._ttf_file: Optional[Path] = None
|
|
38
35
|
self._font: Optional[Font] = None
|
|
39
36
|
|
|
40
|
-
def
|
|
37
|
+
def text(self, text: str, color: Optional[Color] = None) -> 'ParagraphBuilder':
|
|
41
38
|
"""
|
|
42
39
|
Set the text content for the paragraph.
|
|
43
40
|
Equivalent to fromString() methods in Java ParagraphBuilder.
|
|
@@ -63,13 +60,14 @@ class ParagraphBuilder:
|
|
|
63
60
|
|
|
64
61
|
return self
|
|
65
62
|
|
|
66
|
-
def
|
|
63
|
+
def font(self, font_name: str, font_size: float) -> 'ParagraphBuilder':
|
|
67
64
|
"""
|
|
68
65
|
Set the font for the paragraph using an existing Font object.
|
|
69
66
|
Equivalent to withFont(Font) in Java ParagraphBuilder.
|
|
70
67
|
|
|
71
68
|
Args:
|
|
72
|
-
|
|
69
|
+
font_name: The Font to use
|
|
70
|
+
font_size: The font size
|
|
73
71
|
|
|
74
72
|
Returns:
|
|
75
73
|
Self for method chaining
|
|
@@ -77,6 +75,7 @@ class ParagraphBuilder:
|
|
|
77
75
|
Raises:
|
|
78
76
|
ValidationException: If font is None
|
|
79
77
|
"""
|
|
78
|
+
font = Font(font_name, font_size)
|
|
80
79
|
if font is None:
|
|
81
80
|
raise ValidationException("Font cannot be null")
|
|
82
81
|
|
|
@@ -84,7 +83,7 @@ class ParagraphBuilder:
|
|
|
84
83
|
self._ttf_file = None # Clear TTF file when using existing font
|
|
85
84
|
return self
|
|
86
85
|
|
|
87
|
-
def
|
|
86
|
+
def font_file(self, ttf_file: Union[Path, str], font_size: float) -> 'ParagraphBuilder':
|
|
88
87
|
"""
|
|
89
88
|
Set the font for the paragraph using a TTF file.
|
|
90
89
|
Equivalent to withFont(File, double) in Java ParagraphBuilder.
|
|
@@ -125,7 +124,7 @@ class ParagraphBuilder:
|
|
|
125
124
|
self._font = self._register_ttf(ttf_path, font_size)
|
|
126
125
|
return self
|
|
127
126
|
|
|
128
|
-
def
|
|
127
|
+
def line_spacing(self, spacing: float) -> 'ParagraphBuilder':
|
|
129
128
|
"""
|
|
130
129
|
Set the line spacing for the paragraph.
|
|
131
130
|
Equivalent to withLineSpacing() in Java ParagraphBuilder.
|
|
@@ -145,7 +144,7 @@ class ParagraphBuilder:
|
|
|
145
144
|
self._line_spacing = spacing
|
|
146
145
|
return self
|
|
147
146
|
|
|
148
|
-
def
|
|
147
|
+
def color(self, color: Color) -> 'ParagraphBuilder':
|
|
149
148
|
"""
|
|
150
149
|
Set the text color for the paragraph.
|
|
151
150
|
Equivalent to withColor() in Java ParagraphBuilder.
|
|
@@ -165,7 +164,7 @@ class ParagraphBuilder:
|
|
|
165
164
|
self._text_color = color
|
|
166
165
|
return self
|
|
167
166
|
|
|
168
|
-
def
|
|
167
|
+
def at(self, page_index: int, x: float, y: float) -> 'ParagraphBuilder':
|
|
169
168
|
"""
|
|
170
169
|
Set the position for the paragraph.
|
|
171
170
|
Equivalent to withPosition() in Java ParagraphBuilder.
|
|
@@ -179,6 +178,7 @@ class ParagraphBuilder:
|
|
|
179
178
|
Raises:
|
|
180
179
|
ValidationException: If position is None
|
|
181
180
|
"""
|
|
181
|
+
position = Position.at_page_coordinates(page_index, x, y)
|
|
182
182
|
if position is None:
|
|
183
183
|
raise ValidationException("Position cannot be null")
|
|
184
184
|
|
|
@@ -265,3 +265,6 @@ class ParagraphBuilder:
|
|
|
265
265
|
lines = ['']
|
|
266
266
|
|
|
267
267
|
return lines
|
|
268
|
+
|
|
269
|
+
def add(self):
|
|
270
|
+
self._client._add_paragraph(self.build())
|
|
@@ -6,11 +6,13 @@ Provides session-based PDF manipulation operations with strict validation.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import json
|
|
9
|
+
import os
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import List, Optional, Union, BinaryIO
|
|
11
12
|
|
|
12
13
|
import requests
|
|
13
14
|
|
|
15
|
+
from . import ParagraphBuilder
|
|
14
16
|
from .exceptions import (
|
|
15
17
|
PdfDancerException,
|
|
16
18
|
FontNotFoundException,
|
|
@@ -18,24 +20,133 @@ from .exceptions import (
|
|
|
18
20
|
SessionException,
|
|
19
21
|
ValidationException
|
|
20
22
|
)
|
|
23
|
+
from .image_builder import ImageBuilder
|
|
21
24
|
from .models import (
|
|
22
|
-
ObjectRef, Position, ObjectType, Font, Image, Paragraph,
|
|
23
|
-
FindRequest, DeleteRequest, MoveRequest, AddRequest, ModifyRequest, ModifyTextRequest,
|
|
25
|
+
ObjectRef, Position, ObjectType, Font, Image, Paragraph, FormFieldRef,
|
|
26
|
+
FindRequest, DeleteRequest, MoveRequest, AddRequest, ModifyRequest, ModifyTextRequest, ChangeFormFieldRequest,
|
|
27
|
+
ShapeType, PositionMode
|
|
24
28
|
)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
29
|
+
from .types import PathObject, ParagraphObject, TextLineObject, ImageObject, FormObject, FormFieldObject, PageObject
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PageClient:
|
|
33
|
+
def __init__(self, page_index: int, root: "PDFDancer"):
|
|
34
|
+
self.page_index = page_index
|
|
35
|
+
self.root = root
|
|
36
|
+
|
|
37
|
+
def select_paths_at(self, x: float, y: float) -> List[PathObject]:
|
|
38
|
+
# noinspection PyProtectedMember
|
|
39
|
+
return self.root._to_path_objects(self.root._find_paths(Position.at_page_coordinates(self.page_index, x, y)))
|
|
40
|
+
|
|
41
|
+
def select_paragraphs(self) -> List[ParagraphObject]:
|
|
42
|
+
# noinspection PyProtectedMember
|
|
43
|
+
return self.root._to_paragraph_objects(self.root._find_paragraphs(Position.at_page(self.page_index)))
|
|
44
|
+
|
|
45
|
+
def select_paragraphs_starting_with(self, text: str) -> List[ParagraphObject]:
|
|
46
|
+
position = Position.at_page(self.page_index)
|
|
47
|
+
position.with_text_starts(text)
|
|
48
|
+
# noinspection PyProtectedMember
|
|
49
|
+
return self.root._to_paragraph_objects(self.root._find_paragraphs(position))
|
|
50
|
+
|
|
51
|
+
def select_paragraphs_at(self, x: float, y: float) -> List[ParagraphObject]:
|
|
52
|
+
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
53
|
+
# noinspection PyProtectedMember
|
|
54
|
+
return self.root._to_paragraph_objects(self.root._find_paragraphs(position))
|
|
55
|
+
|
|
56
|
+
def select_text_lines(self) -> List[TextLineObject]:
|
|
57
|
+
position = Position.at_page(self.page_index)
|
|
58
|
+
# noinspection PyProtectedMember
|
|
59
|
+
return self.root._to_textline_objects(self.root._find_text_lines(position))
|
|
60
|
+
|
|
61
|
+
def select_text_lines_starting_with(self, text: str) -> List[TextLineObject]:
|
|
62
|
+
position = Position.at_page(self.page_index)
|
|
63
|
+
position.with_text_starts(text)
|
|
64
|
+
# noinspection PyProtectedMember
|
|
65
|
+
return self.root._to_textline_objects(self.root._find_text_lines(position))
|
|
66
|
+
|
|
67
|
+
def select_images(self) -> List[ImageObject]:
|
|
68
|
+
# noinspection PyProtectedMember
|
|
69
|
+
return self.root._to_image_objects(self.root._find_images(Position.at_page(self.page_index)))
|
|
70
|
+
|
|
71
|
+
def select_images_at(self, x: float, y: float) -> List[ImageObject]:
|
|
72
|
+
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
73
|
+
# noinspection PyProtectedMember
|
|
74
|
+
return self.root._to_image_objects(self.root._find_images(position))
|
|
75
|
+
|
|
76
|
+
def select_forms(self) -> List[FormObject]:
|
|
77
|
+
position = Position.at_page(self.page_index)
|
|
78
|
+
# noinspection PyProtectedMember
|
|
79
|
+
return self.root._to_form_objects(self.root._find_form_x_objects(position))
|
|
80
|
+
|
|
81
|
+
def select_forms_at(self, x: float, y: float) -> List[FormObject]:
|
|
82
|
+
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
83
|
+
# noinspection PyProtectedMember
|
|
84
|
+
return self.root._to_form_objects(self.root._find_form_x_objects(position))
|
|
85
|
+
|
|
86
|
+
def select_form_fields(self) -> List[FormFieldObject]:
|
|
87
|
+
position = Position.at_page(self.page_index)
|
|
88
|
+
# noinspection PyProtectedMember
|
|
89
|
+
return self.root._to_form_field_objects(self.root._find_form_fields(position))
|
|
90
|
+
|
|
91
|
+
def select_form_fields_by_name(self, field_name: str) -> List[FormFieldObject]:
|
|
92
|
+
pos = Position.by_name(field_name)
|
|
93
|
+
pos.page_index = self.page_index
|
|
94
|
+
# noinspection PyProtectedMember
|
|
95
|
+
return self.root._to_form_field_objects(self.root._find_form_fields(pos))
|
|
96
|
+
|
|
97
|
+
def select_form_fields_at(self, x: float, y: float) -> List[FormFieldObject]:
|
|
98
|
+
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
99
|
+
# noinspection PyProtectedMember
|
|
100
|
+
return self.root._to_form_field_objects(self.root._find_form_fields(position))
|
|
101
|
+
|
|
102
|
+
def get(self) -> PageObject:
|
|
103
|
+
return self.root._to_page_object(self.root._get_page(self.page_index))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class PDFDancer:
|
|
28
107
|
"""
|
|
29
108
|
REST API client for interacting with the PDFDancer PDF manipulation service.
|
|
30
109
|
This client provides a convenient Python interface for performing PDF operations
|
|
31
110
|
including session management, object searching, manipulation, and retrieval.
|
|
32
111
|
Handles authentication, session lifecycle, and HTTP communication transparently.
|
|
33
|
-
|
|
34
|
-
Mirrors the Java Client class functionality exactly.
|
|
35
112
|
"""
|
|
36
113
|
|
|
114
|
+
# --------------------------------------------------------------
|
|
115
|
+
# CLASS METHOD ENTRY POINT
|
|
116
|
+
# --------------------------------------------------------------
|
|
117
|
+
@classmethod
|
|
118
|
+
def open(cls,
|
|
119
|
+
pdf_data: Union[bytes, Path, str, BinaryIO],
|
|
120
|
+
token: Optional[str] = None,
|
|
121
|
+
base_url: Optional[str] = None,
|
|
122
|
+
timeout: float = 30.0) -> "PDFDancer":
|
|
123
|
+
"""
|
|
124
|
+
Create a client session, falling back to environment variables when needed.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
pdf_data: PDF payload supplied directly or via filesystem handles.
|
|
128
|
+
token: Override for the API token; falls back to `PDFDANCER_TOKEN` environement variable.
|
|
129
|
+
base_url: Override for the API base URL; falls back to `PDFDANCER_BASE_URL`
|
|
130
|
+
or defaults to `https://api.pdfdancer.com`.
|
|
131
|
+
timeout: HTTP read timeout in seconds.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
A ready-to-use `PDFDancer` client instance.
|
|
135
|
+
"""
|
|
136
|
+
resolved_token = token.strip() if token and token.strip() else None
|
|
137
|
+
if resolved_token is None:
|
|
138
|
+
env_token = os.getenv("PDFDANCER_TOKEN")
|
|
139
|
+
resolved_token = env_token.strip() if env_token and env_token.strip() else None
|
|
140
|
+
|
|
141
|
+
env_base_url = os.getenv("PDFDANCER_BASE_URL")
|
|
142
|
+
resolved_base_url = base_url or (env_base_url.strip() if env_base_url and env_base_url.strip() else None)
|
|
143
|
+
if resolved_base_url is None:
|
|
144
|
+
resolved_base_url = "https://api.pdfdancer.com"
|
|
145
|
+
|
|
146
|
+
return PDFDancer(resolved_token, pdf_data, resolved_base_url, timeout)
|
|
147
|
+
|
|
37
148
|
def __init__(self, token: str, pdf_data: Union[bytes, Path, str, BinaryIO],
|
|
38
|
-
base_url: str
|
|
149
|
+
base_url: str, read_timeout: float = 0):
|
|
39
150
|
"""
|
|
40
151
|
Creates a new client with PDF data.
|
|
41
152
|
This constructor initializes the client, uploads the PDF data to create
|
|
@@ -75,7 +186,6 @@ class ClientV1:
|
|
|
75
186
|
def _process_pdf_data(self, pdf_data: Union[bytes, Path, str, BinaryIO]) -> bytes:
|
|
76
187
|
"""
|
|
77
188
|
Process PDF data from various input types with strict validation.
|
|
78
|
-
Equivalent to readFile() method in Java client.
|
|
79
189
|
"""
|
|
80
190
|
if pdf_data is None:
|
|
81
191
|
raise ValidationException("PDF data cannot be null")
|
|
@@ -152,7 +262,6 @@ class ClientV1:
|
|
|
152
262
|
def _create_session(self) -> str:
|
|
153
263
|
"""
|
|
154
264
|
Creates a new PDF processing session by uploading the PDF data.
|
|
155
|
-
Equivalent to createSession() method in Java client.
|
|
156
265
|
"""
|
|
157
266
|
try:
|
|
158
267
|
files = {
|
|
@@ -182,7 +291,6 @@ class ClientV1:
|
|
|
182
291
|
params: Optional[dict] = None) -> requests.Response:
|
|
183
292
|
"""
|
|
184
293
|
Make HTTP request with session headers and error handling.
|
|
185
|
-
Equivalent to retrieve() method pattern in Java client.
|
|
186
294
|
"""
|
|
187
295
|
headers = {
|
|
188
296
|
'X-Session-Id': self._session_id,
|
|
@@ -218,7 +326,7 @@ class ClientV1:
|
|
|
218
326
|
|
|
219
327
|
# Search Operations - matching Java client exactly
|
|
220
328
|
|
|
221
|
-
def
|
|
329
|
+
def _find(self, object_type: Optional[ObjectType] = None, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
222
330
|
"""
|
|
223
331
|
Searches for PDF objects matching the specified criteria.
|
|
224
332
|
This method provides flexible search capabilities across all PDF content,
|
|
@@ -238,56 +346,120 @@ class ClientV1:
|
|
|
238
346
|
objects_data = response.json()
|
|
239
347
|
return [self._parse_object_ref(obj_data) for obj_data in objects_data]
|
|
240
348
|
|
|
241
|
-
def
|
|
349
|
+
def select_paragraphs(self) -> List[ParagraphObject]:
|
|
242
350
|
"""
|
|
243
351
|
Searches for paragraph objects at the specified position.
|
|
244
|
-
Equivalent to findParagraphs() in Java client.
|
|
245
352
|
"""
|
|
246
|
-
return self.
|
|
353
|
+
return self._to_paragraph_objects(self._find(ObjectType.PARAGRAPH, None))
|
|
247
354
|
|
|
248
|
-
def
|
|
355
|
+
def _find_paragraphs(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
356
|
+
"""
|
|
357
|
+
Searches for paragraph objects at the specified position.
|
|
358
|
+
"""
|
|
359
|
+
return self._find(ObjectType.PARAGRAPH, position)
|
|
360
|
+
|
|
361
|
+
def _find_images(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
249
362
|
"""
|
|
250
363
|
Searches for image objects at the specified position.
|
|
251
|
-
Equivalent to findImages() in Java client.
|
|
252
364
|
"""
|
|
253
|
-
return self.
|
|
365
|
+
return self._find(ObjectType.IMAGE, position)
|
|
366
|
+
|
|
367
|
+
def select_images(self) -> List[ImageObject]:
|
|
368
|
+
"""
|
|
369
|
+
Searches for image objects in the whole document
|
|
370
|
+
"""
|
|
371
|
+
return self._to_image_objects(self._find(ObjectType.IMAGE, None))
|
|
254
372
|
|
|
255
|
-
def
|
|
373
|
+
def select_forms(self) -> List[FormObject]:
|
|
374
|
+
"""
|
|
375
|
+
Searches for form field objects in the whole document.
|
|
376
|
+
"""
|
|
377
|
+
return self._to_form_objects(self._find(ObjectType.FORM_X_OBJECT, None))
|
|
378
|
+
|
|
379
|
+
def _find_form_x_objects(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
256
380
|
"""
|
|
257
381
|
Searches for form field objects at the specified position.
|
|
258
|
-
Equivalent to findForms() in Java client.
|
|
259
382
|
"""
|
|
260
|
-
return self.
|
|
383
|
+
return self._find(ObjectType.FORM_X_OBJECT, position)
|
|
384
|
+
|
|
385
|
+
def select_form_fields(self) -> List[FormFieldObject]:
|
|
386
|
+
"""
|
|
387
|
+
Searches for form field objects in the whole document.
|
|
388
|
+
"""
|
|
389
|
+
return self._to_form_field_objects(self._find_form_fields(None))
|
|
390
|
+
|
|
391
|
+
def select_form_fields_by_name(self, field_name: str) -> List[FormFieldObject]:
|
|
392
|
+
"""
|
|
393
|
+
Searches for form field objects in the whole document.
|
|
394
|
+
"""
|
|
395
|
+
return self._to_form_field_objects(self._find_form_fields(Position.by_name(field_name)))
|
|
261
396
|
|
|
262
|
-
def
|
|
397
|
+
def _find_form_fields(self, position: Optional[Position] = None) -> List[FormFieldRef]:
|
|
398
|
+
"""
|
|
399
|
+
Searches for form fields at the specified position.
|
|
400
|
+
Returns FormFieldRef objects with name and value properties.
|
|
401
|
+
"""
|
|
402
|
+
request_data = FindRequest(ObjectType.FORM_FIELD, position).to_dict()
|
|
403
|
+
response = self._make_request('POST', '/pdf/find', data=request_data)
|
|
404
|
+
|
|
405
|
+
# Parse response into ObjectRef objects
|
|
406
|
+
objects_data = response.json()
|
|
407
|
+
return [self._parse_form_field_ref(obj_data) for obj_data in objects_data]
|
|
408
|
+
|
|
409
|
+
def _change_form_field(self, form_field_ref: FormFieldRef, new_value: str) -> bool:
|
|
410
|
+
"""
|
|
411
|
+
Changes the value of a form field.
|
|
412
|
+
"""
|
|
413
|
+
if form_field_ref is None:
|
|
414
|
+
raise ValidationException("Form field reference cannot be null")
|
|
415
|
+
|
|
416
|
+
request_data = ChangeFormFieldRequest(form_field_ref, new_value).to_dict()
|
|
417
|
+
response = self._make_request('PUT', '/pdf/modify/formField', data=request_data)
|
|
418
|
+
return response.json()
|
|
419
|
+
|
|
420
|
+
def select_paths(self) -> List[ObjectRef]:
|
|
263
421
|
"""
|
|
264
422
|
Searches for vector path objects at the specified position.
|
|
265
|
-
Equivalent to findPaths() in Java client.
|
|
266
423
|
"""
|
|
267
|
-
return self.
|
|
424
|
+
return self._find(ObjectType.PATH, None)
|
|
425
|
+
|
|
426
|
+
def _find_paths(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
427
|
+
"""
|
|
428
|
+
Searches for vector path objects at the specified position.
|
|
429
|
+
"""
|
|
430
|
+
return self._find(ObjectType.PATH, position)
|
|
431
|
+
|
|
432
|
+
def _find_text_lines(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
433
|
+
"""
|
|
434
|
+
Searches for text line objects at the specified position.
|
|
435
|
+
"""
|
|
436
|
+
return self._find(ObjectType.TEXT_LINE, position)
|
|
268
437
|
|
|
269
|
-
def
|
|
438
|
+
def select_text_lines(self) -> List[TextLineObject]:
|
|
270
439
|
"""
|
|
271
440
|
Searches for text line objects at the specified position.
|
|
272
|
-
Equivalent to findTextLines() in Java client.
|
|
273
441
|
"""
|
|
274
|
-
return self.
|
|
442
|
+
return self._to_textline_objects(self._find(ObjectType.TEXT_LINE, None))
|
|
443
|
+
|
|
444
|
+
def page(self, page_index: int) -> PageClient:
|
|
445
|
+
return PageClient(page_index, self)
|
|
275
446
|
|
|
276
447
|
# Page Operations
|
|
277
448
|
|
|
278
|
-
def
|
|
449
|
+
def pages(self) -> List[PageObject]:
|
|
450
|
+
return self._to_page_objects(self._get_pages())
|
|
451
|
+
|
|
452
|
+
def _get_pages(self) -> List[ObjectRef]:
|
|
279
453
|
"""
|
|
280
454
|
Retrieves references to all pages in the PDF document.
|
|
281
|
-
Equivalent to getPages() in Java client.
|
|
282
455
|
"""
|
|
283
456
|
response = self._make_request('POST', '/pdf/page/find')
|
|
284
457
|
pages_data = response.json()
|
|
285
458
|
return [self._parse_object_ref(page_data) for page_data in pages_data]
|
|
286
459
|
|
|
287
|
-
def
|
|
460
|
+
def _get_page(self, page_index: int) -> Optional[ObjectRef]:
|
|
288
461
|
"""
|
|
289
462
|
Retrieves a reference to a specific page by its page index.
|
|
290
|
-
Equivalent to getPage() in Java client.
|
|
291
463
|
|
|
292
464
|
Args:
|
|
293
465
|
page_index: The page index to retrieve (1-based indexing)
|
|
@@ -307,10 +479,9 @@ class ClientV1:
|
|
|
307
479
|
|
|
308
480
|
return self._parse_object_ref(pages_data[0])
|
|
309
481
|
|
|
310
|
-
def
|
|
482
|
+
def _delete_page(self, page_ref: ObjectRef) -> bool:
|
|
311
483
|
"""
|
|
312
484
|
Deletes a page from the PDF document.
|
|
313
|
-
Equivalent to deletePage() in Java client.
|
|
314
485
|
|
|
315
486
|
Args:
|
|
316
487
|
page_ref: Reference to the page to be deleted
|
|
@@ -328,10 +499,9 @@ class ClientV1:
|
|
|
328
499
|
|
|
329
500
|
# Manipulation Operations
|
|
330
501
|
|
|
331
|
-
def
|
|
502
|
+
def _delete(self, object_ref: ObjectRef) -> bool:
|
|
332
503
|
"""
|
|
333
504
|
Deletes the specified PDF object from the document.
|
|
334
|
-
Equivalent to delete() in Java client.
|
|
335
505
|
|
|
336
506
|
Args:
|
|
337
507
|
object_ref: Reference to the object to be deleted
|
|
@@ -346,10 +516,9 @@ class ClientV1:
|
|
|
346
516
|
response = self._make_request('DELETE', '/pdf/delete', data=request_data)
|
|
347
517
|
return response.json()
|
|
348
518
|
|
|
349
|
-
def
|
|
519
|
+
def _move(self, object_ref: ObjectRef, position: Position) -> bool:
|
|
350
520
|
"""
|
|
351
521
|
Moves a PDF object to a new position within the document.
|
|
352
|
-
Equivalent to move() in Java client.
|
|
353
522
|
|
|
354
523
|
Args:
|
|
355
524
|
object_ref: Reference to the object to be moved
|
|
@@ -369,10 +538,9 @@ class ClientV1:
|
|
|
369
538
|
|
|
370
539
|
# Add Operations
|
|
371
540
|
|
|
372
|
-
def
|
|
541
|
+
def _add_image(self, image: Image, position: Optional[Position] = None) -> bool:
|
|
373
542
|
"""
|
|
374
543
|
Adds an image to the PDF document.
|
|
375
|
-
Equivalent to addImage() methods in Java client.
|
|
376
544
|
|
|
377
545
|
Args:
|
|
378
546
|
image: The image object to add
|
|
@@ -392,10 +560,9 @@ class ClientV1:
|
|
|
392
560
|
|
|
393
561
|
return self._add_object(image)
|
|
394
562
|
|
|
395
|
-
def
|
|
563
|
+
def _add_paragraph(self, paragraph: Paragraph) -> bool:
|
|
396
564
|
"""
|
|
397
565
|
Adds a paragraph to the PDF document.
|
|
398
|
-
Equivalent to addParagraph() in Java client with validation.
|
|
399
566
|
|
|
400
567
|
Args:
|
|
401
568
|
paragraph: The paragraph object to add
|
|
@@ -417,18 +584,21 @@ class ClientV1:
|
|
|
417
584
|
def _add_object(self, pdf_object) -> bool:
|
|
418
585
|
"""
|
|
419
586
|
Internal method to add any PDF object.
|
|
420
|
-
Equivalent to addObject() in Java client.
|
|
421
587
|
"""
|
|
422
588
|
request_data = AddRequest(pdf_object).to_dict()
|
|
423
589
|
response = self._make_request('POST', '/pdf/add', data=request_data)
|
|
424
590
|
return response.json()
|
|
425
591
|
|
|
426
|
-
|
|
592
|
+
def new_paragraph(self) -> ParagraphBuilder:
|
|
593
|
+
return ParagraphBuilder(self)
|
|
594
|
+
|
|
595
|
+
def new_image(self) -> ImageBuilder:
|
|
596
|
+
return ImageBuilder(self)
|
|
427
597
|
|
|
428
|
-
|
|
598
|
+
# Modify Operations
|
|
599
|
+
def _modify_paragraph(self, object_ref: ObjectRef, new_paragraph: Union[Paragraph, str]) -> bool:
|
|
429
600
|
"""
|
|
430
601
|
Modifies a paragraph object or its text content.
|
|
431
|
-
Equivalent to modifyParagraph() methods in Java client.
|
|
432
602
|
|
|
433
603
|
Args:
|
|
434
604
|
object_ref: Reference to the paragraph to modify
|
|
@@ -453,10 +623,9 @@ class ClientV1:
|
|
|
453
623
|
|
|
454
624
|
return response.json()
|
|
455
625
|
|
|
456
|
-
def
|
|
626
|
+
def _modify_text_line(self, object_ref: ObjectRef, new_text: str) -> bool:
|
|
457
627
|
"""
|
|
458
628
|
Modifies a text line object.
|
|
459
|
-
Equivalent to modifyTextLine() in Java client.
|
|
460
629
|
|
|
461
630
|
Args:
|
|
462
631
|
object_ref: Reference to the text line to modify
|
|
@@ -479,7 +648,6 @@ class ClientV1:
|
|
|
479
648
|
def find_fonts(self, font_name: str, font_size: int) -> List[Font]:
|
|
480
649
|
"""
|
|
481
650
|
Finds available fonts matching the specified name and size.
|
|
482
|
-
Equivalent to findFonts() in Java client.
|
|
483
651
|
|
|
484
652
|
Args:
|
|
485
653
|
font_name: Name of the font to search for
|
|
@@ -502,7 +670,6 @@ class ClientV1:
|
|
|
502
670
|
def register_font(self, ttf_file: Union[Path, str, bytes, BinaryIO]) -> str:
|
|
503
671
|
"""
|
|
504
672
|
Registers a custom font for use in PDF operations.
|
|
505
|
-
Equivalent to registerFont() in Java client.
|
|
506
673
|
|
|
507
674
|
Args:
|
|
508
675
|
ttf_file: TTF font file as Path, filename, bytes, or file-like object
|
|
@@ -577,7 +744,6 @@ class ClientV1:
|
|
|
577
744
|
def get_pdf_file(self) -> bytes:
|
|
578
745
|
"""
|
|
579
746
|
Downloads the current state of the PDF document with all modifications applied.
|
|
580
|
-
Equivalent to getPDFFile() in Java client.
|
|
581
747
|
|
|
582
748
|
Returns:
|
|
583
749
|
PDF file data as bytes with all session modifications applied
|
|
@@ -585,10 +751,9 @@ class ClientV1:
|
|
|
585
751
|
response = self._make_request('GET', f'/session/{self._session_id}/pdf')
|
|
586
752
|
return response.content
|
|
587
753
|
|
|
588
|
-
def
|
|
754
|
+
def save(self, file_path: Union[str, Path]) -> None:
|
|
589
755
|
"""
|
|
590
756
|
Saves the current PDF to a file.
|
|
591
|
-
Equivalent to savePDF() in Java client.
|
|
592
757
|
|
|
593
758
|
Args:
|
|
594
759
|
file_path: Path where to save the PDF file
|
|
@@ -628,7 +793,23 @@ class ClientV1:
|
|
|
628
793
|
type=object_type
|
|
629
794
|
)
|
|
630
795
|
|
|
631
|
-
def
|
|
796
|
+
def _parse_form_field_ref(self, obj_data: dict) -> ObjectRef:
|
|
797
|
+
"""Parse JSON object data into ObjectRef instance."""
|
|
798
|
+
position_data = obj_data.get('position', {})
|
|
799
|
+
position = self._parse_position(position_data) if position_data else None
|
|
800
|
+
|
|
801
|
+
object_type = ObjectType(obj_data['type'])
|
|
802
|
+
|
|
803
|
+
return FormFieldRef(
|
|
804
|
+
internal_id=obj_data['internalId'] if 'internalId' in obj_data else None,
|
|
805
|
+
position=position,
|
|
806
|
+
type=object_type,
|
|
807
|
+
name=obj_data['name'] if 'name' in obj_data else None,
|
|
808
|
+
value=obj_data['value'] if 'value' in obj_data else None,
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
@staticmethod
|
|
812
|
+
def _parse_position(pos_data: dict) -> Position:
|
|
632
813
|
"""Parse JSON position data into Position instance."""
|
|
633
814
|
position = Position()
|
|
634
815
|
position.page_index = pos_data.get('pageIndex')
|
|
@@ -653,11 +834,9 @@ class ClientV1:
|
|
|
653
834
|
|
|
654
835
|
# Builder Pattern Support
|
|
655
836
|
|
|
656
|
-
def
|
|
837
|
+
def _paragraph_builder(self) -> 'ParagraphBuilder':
|
|
657
838
|
"""
|
|
658
839
|
Creates a new ParagraphBuilder for fluent paragraph construction.
|
|
659
|
-
Equivalent to paragraphBuilder() in Java client.
|
|
660
|
-
|
|
661
840
|
Returns:
|
|
662
841
|
A new ParagraphBuilder instance
|
|
663
842
|
"""
|
|
@@ -673,3 +852,28 @@ class ClientV1:
|
|
|
673
852
|
"""Context manager exit - cleanup if needed."""
|
|
674
853
|
# Could add session cleanup here if API supports it
|
|
675
854
|
pass
|
|
855
|
+
|
|
856
|
+
def _to_path_objects(self, path_refs: List[ObjectRef]) -> List[PathObject]:
|
|
857
|
+
return [PathObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
858
|
+
|
|
859
|
+
def _to_paragraph_objects(self, path_refs: List[ObjectRef]) -> List[ParagraphObject]:
|
|
860
|
+
return [ParagraphObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
861
|
+
|
|
862
|
+
def _to_textline_objects(self, path_refs: List[ObjectRef]) -> List[TextLineObject]:
|
|
863
|
+
return [TextLineObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
864
|
+
|
|
865
|
+
def _to_image_objects(self, path_refs: List[ObjectRef]) -> List[ImageObject]:
|
|
866
|
+
return [ImageObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
867
|
+
|
|
868
|
+
def _to_form_objects(self, path_refs: List[ObjectRef]) -> List[FormObject]:
|
|
869
|
+
return [FormObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
870
|
+
|
|
871
|
+
def _to_form_field_objects(self, path_refs: List[FormFieldRef]) -> List[FormFieldObject]:
|
|
872
|
+
return [FormFieldObject(self, ref.internal_id, ref.type, ref.position, ref.name, ref.value) for ref in
|
|
873
|
+
path_refs]
|
|
874
|
+
|
|
875
|
+
def _to_page_objects(self, path_refs: List[ObjectRef]) -> List[PageObject]:
|
|
876
|
+
return [PageObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
877
|
+
|
|
878
|
+
def _to_page_object(self, ref: ObjectRef) -> PageObject:
|
|
879
|
+
return PageObject(self, ref.internal_id, ref.type, ref.position)
|
pdfdancer/types.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from . import ObjectType, Position, ObjectRef, Point, Paragraph, Font, Color, FormFieldRef
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class BoundingRect:
|
|
11
|
+
x: float
|
|
12
|
+
y: float
|
|
13
|
+
width: Optional[float] = None
|
|
14
|
+
height: Optional[float] = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class UnsupportedOperation(Exception):
|
|
18
|
+
def __init__(self, msg: str):
|
|
19
|
+
super().__init__(msg)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PDFObjectBase:
|
|
23
|
+
"""
|
|
24
|
+
Base class for all PDF objects (paths, paragraphs, text lines, etc.)
|
|
25
|
+
providing shared behavior such as position, deletion, and movement.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, client: 'PDFDancer', internal_id: str, object_type: ObjectType, position: Position):
|
|
29
|
+
self._client = client
|
|
30
|
+
self.position = position
|
|
31
|
+
self.internal_id = internal_id
|
|
32
|
+
self.object_type = object_type
|
|
33
|
+
|
|
34
|
+
# --------------------------------------------------------------
|
|
35
|
+
# Core properties
|
|
36
|
+
# --------------------------------------------------------------
|
|
37
|
+
def internal_id(self) -> str:
|
|
38
|
+
"""Internal PDFDancer object identifier, e.g. 'PATH_000023'."""
|
|
39
|
+
return self.internal_id
|
|
40
|
+
|
|
41
|
+
def type(self) -> ObjectType:
|
|
42
|
+
"""Enum value representing the PDF object type."""
|
|
43
|
+
return self.object_type
|
|
44
|
+
|
|
45
|
+
def position(self) -> Position:
|
|
46
|
+
"""The geometric position of the object on its page."""
|
|
47
|
+
return self.position
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def page_index(self) -> int:
|
|
51
|
+
"""Page index where this object resides."""
|
|
52
|
+
return self.position.page_index
|
|
53
|
+
|
|
54
|
+
def object_ref(self) -> ObjectRef:
|
|
55
|
+
return ObjectRef(self.internal_id, self.position, self.object_type)
|
|
56
|
+
|
|
57
|
+
# --------------------------------------------------------------
|
|
58
|
+
# Common actions
|
|
59
|
+
# --------------------------------------------------------------
|
|
60
|
+
def delete(self) -> bool:
|
|
61
|
+
"""Delete this object from the PDF document."""
|
|
62
|
+
return self._client._delete(self.object_ref())
|
|
63
|
+
|
|
64
|
+
def move_to(self, x: float, y: float) -> bool:
|
|
65
|
+
"""Move this object to a new position."""
|
|
66
|
+
return self._client._move(
|
|
67
|
+
self.object_ref(),
|
|
68
|
+
Position.at_page_coordinates(self.position.page_index, x, y)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# -------------------------------------------------------------------
|
|
73
|
+
# Subclasses
|
|
74
|
+
# -------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
class PathObject(PDFObjectBase):
|
|
77
|
+
"""Represents a vector path object inside a PDF page."""
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def bounding_box(self) -> Optional[BoundingRect]:
|
|
81
|
+
"""Optional bounding rectangle (if available)."""
|
|
82
|
+
return self.position.bounding_rect
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ImageObject(PDFObjectBase):
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class FormObject(PDFObjectBase):
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _process_text_lines(text: str) -> list[str]:
|
|
94
|
+
"""
|
|
95
|
+
Process text into lines for the paragraph.
|
|
96
|
+
This is a simplified version - the full implementation would handle
|
|
97
|
+
word wrapping, line breaks, and other text formatting based on the font
|
|
98
|
+
and paragraph width. TODO
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
text: The input text to process
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of text lines for the paragraph
|
|
105
|
+
"""
|
|
106
|
+
# Handle escaped newlines (\\n) as actual newlines
|
|
107
|
+
processed_text = text.replace('\\n', '\n')
|
|
108
|
+
|
|
109
|
+
# Simple implementation - split on newlines
|
|
110
|
+
# In the full version, this would implement proper text layout
|
|
111
|
+
lines = processed_text.split('\n')
|
|
112
|
+
|
|
113
|
+
# Remove empty lines at the end but preserve intentional line breaks
|
|
114
|
+
while lines and not lines[-1].strip():
|
|
115
|
+
lines.pop()
|
|
116
|
+
|
|
117
|
+
# Ensure at least one line
|
|
118
|
+
if not lines:
|
|
119
|
+
lines = ['']
|
|
120
|
+
|
|
121
|
+
return lines
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
DEFAULT_LINE_SPACING = 1.2
|
|
125
|
+
DEFAULT_COLOR = Color(0, 0, 0)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class BaseTextEdit:
|
|
129
|
+
"""Common base for text-like editable objects (Paragraph, TextLine, etc.)"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, target_obj, object_ref):
|
|
132
|
+
self._color = None
|
|
133
|
+
self._position = None
|
|
134
|
+
self._line_spacing = None
|
|
135
|
+
self._font_size = None
|
|
136
|
+
self._font_name = None
|
|
137
|
+
self._new_text = None
|
|
138
|
+
self._target_obj = target_obj
|
|
139
|
+
self._object_ref = object_ref
|
|
140
|
+
|
|
141
|
+
def __enter__(self):
|
|
142
|
+
return self
|
|
143
|
+
|
|
144
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
145
|
+
if not exc_type:
|
|
146
|
+
self.apply()
|
|
147
|
+
|
|
148
|
+
# --- Common fluent configuration methods ---
|
|
149
|
+
|
|
150
|
+
def replace(self, text: str):
|
|
151
|
+
self._new_text = text
|
|
152
|
+
return self
|
|
153
|
+
|
|
154
|
+
def font(self, font_name: str, font_size: float):
|
|
155
|
+
self._font_name = font_name
|
|
156
|
+
self._font_size = font_size
|
|
157
|
+
return self
|
|
158
|
+
|
|
159
|
+
def color(self, color):
|
|
160
|
+
self._color = color
|
|
161
|
+
return self
|
|
162
|
+
|
|
163
|
+
def line_spacing(self, line_spacing: float):
|
|
164
|
+
self._line_spacing = line_spacing
|
|
165
|
+
return self
|
|
166
|
+
|
|
167
|
+
def move_to(self, x: float, y: float):
|
|
168
|
+
self._position = Position().at_coordinates(Point(x, y))
|
|
169
|
+
return self
|
|
170
|
+
|
|
171
|
+
# --- Abstract method: implemented by subclass ---
|
|
172
|
+
def apply(self):
|
|
173
|
+
raise NotImplementedError("Subclasses must implement apply()")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class ParagraphEdit(BaseTextEdit):
|
|
177
|
+
def apply(self) -> bool:
|
|
178
|
+
if (
|
|
179
|
+
self._position is None
|
|
180
|
+
and self._line_spacing is None
|
|
181
|
+
and self._font_size is None
|
|
182
|
+
and self._font_name is None
|
|
183
|
+
and self._color is None
|
|
184
|
+
):
|
|
185
|
+
# noinspection PyProtectedMember
|
|
186
|
+
return self._target_obj._client._modify_paragraph(self._object_ref, self._new_text)
|
|
187
|
+
else:
|
|
188
|
+
new_paragraph = Paragraph(
|
|
189
|
+
position=self._position,
|
|
190
|
+
line_spacing=self._line_spacing if self._line_spacing is not None else DEFAULT_LINE_SPACING,
|
|
191
|
+
font=Font(name=self._font_name, size=self._font_size),
|
|
192
|
+
text_lines=_process_text_lines(self._new_text),
|
|
193
|
+
color=self._color if self._color is not None else DEFAULT_COLOR,
|
|
194
|
+
)
|
|
195
|
+
# noinspection PyProtectedMember
|
|
196
|
+
return self._target_obj._client._modify_paragraph(self._object_ref, new_paragraph)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class TextLineEdit(BaseTextEdit):
|
|
200
|
+
def apply(self) -> bool:
|
|
201
|
+
if (
|
|
202
|
+
self._position is None
|
|
203
|
+
and self._line_spacing is None
|
|
204
|
+
and self._font_size is None
|
|
205
|
+
and self._font_name is None
|
|
206
|
+
and self._color is None
|
|
207
|
+
):
|
|
208
|
+
# noinspection PyProtectedMember
|
|
209
|
+
return self._target_obj._client._modify_text_line(self._object_ref, self._new_text)
|
|
210
|
+
else:
|
|
211
|
+
raise UnsupportedOperation("TextLineEdit cannot be applied to text lines")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class ParagraphObject(PDFObjectBase):
|
|
215
|
+
"""Represents a paragraph text block inside a PDF page."""
|
|
216
|
+
|
|
217
|
+
def edit(self) -> ParagraphEdit:
|
|
218
|
+
return ParagraphEdit(self, self.object_ref())
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class TextLineObject(PDFObjectBase):
|
|
222
|
+
"""Represents a single line of text inside a PDF page."""
|
|
223
|
+
|
|
224
|
+
def edit(self) -> TextLineEdit:
|
|
225
|
+
return TextLineEdit(self, self.object_ref())
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class FormFieldEdit:
|
|
229
|
+
def __init__(self, form_field: 'FormFieldObject', object_ref: FormFieldRef):
|
|
230
|
+
self.form_field = form_field
|
|
231
|
+
self.object_ref = object_ref
|
|
232
|
+
|
|
233
|
+
def value(self, new_value: str) -> 'FormFieldEdit':
|
|
234
|
+
self.form_field.value = new_value
|
|
235
|
+
return self
|
|
236
|
+
|
|
237
|
+
def apply(self) -> bool:
|
|
238
|
+
# noinspection PyProtectedMember
|
|
239
|
+
return self.form_field._client._change_form_field(self.object_ref, self.form_field.value)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class FormFieldObject(PDFObjectBase):
|
|
243
|
+
def __init__(self, client: 'PDFDancer', internal_id: str, object_type: ObjectType, position: Position,
|
|
244
|
+
field_name: str, field_value: str):
|
|
245
|
+
super().__init__(client, internal_id, object_type, position)
|
|
246
|
+
self.name = field_name
|
|
247
|
+
self.value = field_value
|
|
248
|
+
|
|
249
|
+
def edit(self) -> FormFieldEdit:
|
|
250
|
+
return FormFieldEdit(self, self.object_ref())
|
|
251
|
+
|
|
252
|
+
def object_ref(self) -> FormFieldRef:
|
|
253
|
+
ref = FormFieldRef(self.internal_id, self.position, self.object_type)
|
|
254
|
+
ref.name = self.name
|
|
255
|
+
ref.value = self.value
|
|
256
|
+
return ref
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class PageObject(PDFObjectBase):
|
|
260
|
+
|
|
261
|
+
def delete(self) -> bool:
|
|
262
|
+
# noinspection PyProtectedMember
|
|
263
|
+
return self._client._delete_page(self.object_ref())
|
{pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.3.dist-info}/METADATA
RENAMED
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pdfdancer-client-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Python client for PDFDancer API
|
|
5
|
-
Author-email:
|
|
5
|
+
Author-email: "The Famous Cat Ltd." <hi@thefamouscat.com>
|
|
6
6
|
License: MIT
|
|
7
|
-
Project-URL: Homepage, https://
|
|
8
|
-
Project-URL: Repository, https://github.com/
|
|
7
|
+
Project-URL: Homepage, https://www.pdfdancer.com/
|
|
8
|
+
Project-URL: Repository, https://github.com/MenschMachine/pdfdancer-client-python
|
|
9
9
|
Classifier: Development Status :: 4 - Beta
|
|
10
10
|
Classifier: Intended Audience :: Developers
|
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
-
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
14
12
|
Classifier: Programming Language :: Python :: 3.9
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
16
|
Description-Content-Type: text/markdown
|
|
18
17
|
Requires-Dist: requests>=2.25.0
|
|
19
18
|
Requires-Dist: pydantic>=1.8.0
|
|
@@ -60,18 +59,18 @@ paragraphs = client.find_paragraphs(None)
|
|
|
60
59
|
images = client.find_images(Position.at_page(0))
|
|
61
60
|
|
|
62
61
|
# Manipulation operations (mirrors Java client methods)
|
|
63
|
-
client.
|
|
64
|
-
client.
|
|
62
|
+
client._delete(paragraphs[0])
|
|
63
|
+
client._move(images[0], Position.at_page_coordinates(0, 100, 200))
|
|
65
64
|
|
|
66
65
|
# Builder pattern (mirrors Java ParagraphBuilder)
|
|
67
|
-
paragraph = (client.
|
|
66
|
+
paragraph = (client._paragraph_builder()
|
|
68
67
|
.from_string("Hello World")
|
|
69
68
|
.with_font(Font("Arial", 12))
|
|
70
69
|
.with_color(Color(255, 0, 0))
|
|
71
70
|
.with_position(Position.at_page(0))
|
|
72
71
|
.build())
|
|
73
72
|
|
|
74
|
-
client.
|
|
73
|
+
client._add_paragraph(paragraph)
|
|
75
74
|
|
|
76
75
|
# Save result (mirrors Java savePDF)
|
|
77
76
|
client.save_pdf("output.pdf")
|
|
@@ -85,7 +84,7 @@ from pdfdancer import ClientV1
|
|
|
85
84
|
# Automatic resource management
|
|
86
85
|
with ClientV1(token="jwt-token", pdf_data="input.pdf") as client:
|
|
87
86
|
paragraphs = client.find_paragraphs(None)
|
|
88
|
-
client.
|
|
87
|
+
client._delete(paragraphs[0])
|
|
89
88
|
client.save_pdf("output.pdf")
|
|
90
89
|
# Session automatically cleaned up
|
|
91
90
|
```
|
|
@@ -105,60 +104,63 @@ client = ClientV1(token="jwt-token", pdf_data=pdf_file, base_url="https://api.se
|
|
|
105
104
|
```
|
|
106
105
|
|
|
107
106
|
### Find Operations
|
|
107
|
+
|
|
108
108
|
```python
|
|
109
109
|
# Generic find (Java: client.find())
|
|
110
|
-
objects = client.
|
|
110
|
+
objects = client._find(ObjectType.PARAGRAPH, position)
|
|
111
111
|
|
|
112
112
|
# Specific finders (Java: client.findParagraphs(), etc.)
|
|
113
|
-
paragraphs = client.
|
|
114
|
-
images = client.
|
|
115
|
-
forms = client.
|
|
116
|
-
paths = client.
|
|
117
|
-
text_lines = client.
|
|
113
|
+
paragraphs = client._find_paragraphs(position)
|
|
114
|
+
images = client._find_images(position)
|
|
115
|
+
forms = client._find_form_x_objects(position)
|
|
116
|
+
paths = client._find_paths(position)
|
|
117
|
+
text_lines = client._find_text_lines(position)
|
|
118
118
|
|
|
119
119
|
# Page operations (Java: client.getPages(), client.getPage())
|
|
120
120
|
pages = client.get_pages()
|
|
121
|
-
page = client.
|
|
121
|
+
page = client._get_page(1) # 1-based indexing
|
|
122
122
|
```
|
|
123
123
|
|
|
124
124
|
### Manipulation Operations
|
|
125
|
+
|
|
125
126
|
```python
|
|
126
127
|
# Delete (Java: client.delete(), client.deletePage())
|
|
127
|
-
result = client.
|
|
128
|
-
result = client.
|
|
128
|
+
result = client._delete(object_ref)
|
|
129
|
+
result = client._delete_page(page_ref)
|
|
129
130
|
|
|
130
131
|
# Move (Java: client.move())
|
|
131
|
-
result = client.
|
|
132
|
+
result = client._move(object_ref, new_position)
|
|
132
133
|
|
|
133
134
|
# Add (Java: client.addImage(), client.addParagraph())
|
|
134
|
-
result = client.
|
|
135
|
-
result = client.
|
|
135
|
+
result = client._add_image(image, position)
|
|
136
|
+
result = client._add_paragraph(paragraph)
|
|
136
137
|
|
|
137
138
|
# Modify (Java: client.modifyParagraph(), client.modifyTextLine())
|
|
138
|
-
result = client.
|
|
139
|
+
result = client._modify_paragraph(ref, new_paragraph)
|
|
139
140
|
result = client.modify_text_line(ref, "new text")
|
|
140
141
|
```
|
|
141
142
|
|
|
142
143
|
### Builder Pattern
|
|
144
|
+
|
|
143
145
|
```python
|
|
144
146
|
# Java: client.paragraphBuilder()
|
|
145
|
-
builder = client.
|
|
147
|
+
builder = client._paragraph_builder()
|
|
146
148
|
|
|
147
149
|
# Fluent interface (mirrors Java ParagraphBuilder)
|
|
148
150
|
paragraph = (builder
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
151
|
+
.from_string("Text content") # Java: fromString()
|
|
152
|
+
.with_font(Font("Arial", 12)) # Java: withFont()
|
|
153
|
+
.with_color(Color(255, 0, 0)) # Java: withColor()
|
|
154
|
+
.with_line_spacing(1.5) # Java: withLineSpacing()
|
|
155
|
+
.with_position(position) # Java: withPosition()
|
|
156
|
+
.build()) # Java: build()
|
|
155
157
|
|
|
156
158
|
# Font file registration (Java: withFont(File, double))
|
|
157
159
|
paragraph = (builder
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
160
|
+
.with_font_file("custom.ttf", 14.0) # Java: withFont(File, double)
|
|
161
|
+
.from_string("Custom font text")
|
|
162
|
+
.with_position(position)
|
|
163
|
+
.build())
|
|
162
164
|
```
|
|
163
165
|
|
|
164
166
|
### Position API
|
|
@@ -171,8 +173,8 @@ position = Position.at_page(0)
|
|
|
171
173
|
position = Position.at_page_coordinates(0, 100, 200)
|
|
172
174
|
|
|
173
175
|
# Coordinate access (Java: position.getX(), position.getY())
|
|
174
|
-
x = position.
|
|
175
|
-
y = position.
|
|
176
|
+
x = position.x()
|
|
177
|
+
y = position.y()
|
|
176
178
|
|
|
177
179
|
# Movement (Java: position.moveX(), position.moveY())
|
|
178
180
|
position.move_x(50.0)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
pdfdancer/__init__.py,sha256=5HVIUIEESA_GQHKJhItdperrx079oFZM028ciSt2Z9o,1004
|
|
2
|
+
pdfdancer/exceptions.py,sha256=Y5zwNVZprsv2hvKX304cXWobJt11nrEhCzLklu2wiO8,1567
|
|
3
|
+
pdfdancer/image_builder.py,sha256=Omxc2LcieJ1MbvWBXR5_sfia--eAucTUe0KWgr22HYo,842
|
|
4
|
+
pdfdancer/models.py,sha256=SmkKScr47uVs6FCWUAVIg6rucYrYHvbIxZngyA50XyI,15498
|
|
5
|
+
pdfdancer/paragraph_builder.py,sha256=bAfwX9U2YT1UGX9EKkPnGYvGK3SQP3X1ocxlgyLE_rU,8872
|
|
6
|
+
pdfdancer/pdfdancer_v1.py,sha256=pDRrUfbUpk7Ho2d57ouOPOd-nCwlcCUQWfsObfXgMGA,34334
|
|
7
|
+
pdfdancer/types.py,sha256=lcYnqCFgnrGpplSPCxKh3X9AZ3-9t-lJqMH5ZLew_I4,8188
|
|
8
|
+
pdfdancer_client_python-0.2.3.dist-info/METADATA,sha256=vULfZlPY62-n4THslmEZma4DSkZ1SszgFf6pWN8n8sc,9253
|
|
9
|
+
pdfdancer_client_python-0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
+
pdfdancer_client_python-0.2.3.dist-info/top_level.txt,sha256=ICwSVRpcCKrdBF9QlaX9Y0e_N3Nk1p7QVxadGOnbxeY,10
|
|
11
|
+
pdfdancer_client_python-0.2.3.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
pdfdancer/__init__.py,sha256=uSIbj9rI81_o_asKd6Er15rI_Fa-TcMl1N7BEq7P_Gc,964
|
|
2
|
-
pdfdancer/client_v1.py,sha256=M4A_q8iZ2d9uUrmDSRrVRw1LADpM-MInti_FKsBRAnM,25147
|
|
3
|
-
pdfdancer/exceptions.py,sha256=Y5zwNVZprsv2hvKX304cXWobJt11nrEhCzLklu2wiO8,1567
|
|
4
|
-
pdfdancer/models.py,sha256=MHp5_iFpvHhJC7-kN71D5xc2NHhFj0PMSF9afmxdjL8,14064
|
|
5
|
-
pdfdancer/paragraph_builder.py,sha256=uBMSNhL3b5DgbCJWf5VFWxgm3RpsQyQukk67FDd86Bs,8727
|
|
6
|
-
pdfdancer_client_python-0.1.2.dist-info/METADATA,sha256=nv1xrRZG_kIIL1-2iFSX8N5l0UKiIITqUZsmBnlPOWo,9248
|
|
7
|
-
pdfdancer_client_python-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
pdfdancer_client_python-0.1.2.dist-info/top_level.txt,sha256=ICwSVRpcCKrdBF9QlaX9Y0e_N3Nk1p7QVxadGOnbxeY,10
|
|
9
|
-
pdfdancer_client_python-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
{pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.3.dist-info}/top_level.txt
RENAMED
|
File without changes
|