pdfdancer-client-python 0.1.2__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdfdancer/__init__.py +5 -3
- pdfdancer/image_builder.py +30 -0
- pdfdancer/models.py +58 -6
- pdfdancer/paragraph_builder.py +15 -12
- pdfdancer/{client_v1.py → pdfdancer_v1.py} +236 -55
- pdfdancer/types.py +263 -0
- {pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.2.dist-info}/METADATA +26 -25
- pdfdancer_client_python-0.2.2.dist-info/RECORD +11 -0
- pdfdancer_client_python-0.1.2.dist-info/RECORD +0 -9
- {pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.2.dist-info}/WHEEL +0 -0
- {pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.2.dist-info}/top_level.txt +0 -0
pdfdancer/__init__.py
CHANGED
|
@@ -6,20 +6,19 @@ Provides a clean, Pythonic interface for PDF operations that closely
|
|
|
6
6
|
mirrors the Java client structure and functionality.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
from .client_v1 import ClientV1
|
|
10
9
|
from .exceptions import (
|
|
11
10
|
PdfDancerException, FontNotFoundException, ValidationException,
|
|
12
11
|
HttpClientException, SessionException
|
|
13
12
|
)
|
|
14
13
|
from .models import (
|
|
15
|
-
ObjectRef, Position, ObjectType, Font, Color, Image, BoundingRect, Paragraph,
|
|
14
|
+
ObjectRef, Position, ObjectType, Font, Color, Image, BoundingRect, Paragraph, FormFieldRef,
|
|
16
15
|
PositionMode, ShapeType, Point
|
|
17
16
|
)
|
|
18
17
|
from .paragraph_builder import ParagraphBuilder
|
|
19
18
|
|
|
20
19
|
__version__ = "1.0.0"
|
|
21
20
|
__all__ = [
|
|
22
|
-
"
|
|
21
|
+
"PDFDancer",
|
|
23
22
|
"ParagraphBuilder",
|
|
24
23
|
"ObjectRef",
|
|
25
24
|
"Position",
|
|
@@ -29,6 +28,7 @@ __all__ = [
|
|
|
29
28
|
"Image",
|
|
30
29
|
"BoundingRect",
|
|
31
30
|
"Paragraph",
|
|
31
|
+
"FormFieldRef",
|
|
32
32
|
"PositionMode",
|
|
33
33
|
"ShapeType",
|
|
34
34
|
"Point",
|
|
@@ -38,3 +38,5 @@ __all__ = [
|
|
|
38
38
|
"HttpClientException",
|
|
39
39
|
"SessionException"
|
|
40
40
|
]
|
|
41
|
+
|
|
42
|
+
from .pdfdancer_v1 import PDFDancer
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from pdfdancer import ValidationException, Image, Position
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ImageBuilder:
|
|
7
|
+
|
|
8
|
+
def __init__(self, client: 'PDFDancer'):
|
|
9
|
+
"""
|
|
10
|
+
Initialize the image builder with a client reference.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
client: The PDFDancer instance for font registration
|
|
14
|
+
"""
|
|
15
|
+
if client is None:
|
|
16
|
+
raise ValidationException("Client cannot be null")
|
|
17
|
+
|
|
18
|
+
self._client = client
|
|
19
|
+
self._image = Image()
|
|
20
|
+
|
|
21
|
+
def from_file(self, img_path: Path) -> 'ImageBuilder':
|
|
22
|
+
self._image.data = img_path.read_bytes()
|
|
23
|
+
return self
|
|
24
|
+
|
|
25
|
+
def at(self, page, x, y) -> 'ImageBuilder':
|
|
26
|
+
self._image.position = Position.at_page_coordinates(page, x, y)
|
|
27
|
+
return self
|
|
28
|
+
|
|
29
|
+
def add(self) -> bool:
|
|
30
|
+
return self._client._add_image(self._image, self._image.position)
|
pdfdancer/models.py
CHANGED
|
@@ -10,12 +10,16 @@ from typing import Optional, List, Any
|
|
|
10
10
|
|
|
11
11
|
class ObjectType(Enum):
|
|
12
12
|
"""Object type enumeration matching the Java ObjectType."""
|
|
13
|
+
FORM_FIELD = "FORM_FIELD"
|
|
13
14
|
IMAGE = "IMAGE"
|
|
14
15
|
FORM_X_OBJECT = "FORM_X_OBJECT"
|
|
15
16
|
PATH = "PATH"
|
|
16
17
|
PARAGRAPH = "PARAGRAPH"
|
|
17
18
|
TEXT_LINE = "TEXT_LINE"
|
|
18
19
|
PAGE = "PAGE"
|
|
20
|
+
TEXT_FIELD = "TEXT_FIELD"
|
|
21
|
+
CHECK_BOX = "CHECK_BOX"
|
|
22
|
+
RADIO_BUTTON = "RADIO_BUTTON"
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
class PositionMode(Enum):
|
|
@@ -74,6 +78,7 @@ class Position:
|
|
|
74
78
|
mode: Optional[PositionMode] = None
|
|
75
79
|
bounding_rect: Optional[BoundingRect] = None
|
|
76
80
|
text_starts_with: Optional[str] = None
|
|
81
|
+
name: Optional[str] = None
|
|
77
82
|
|
|
78
83
|
@staticmethod
|
|
79
84
|
def at_page(page_index: int) -> 'Position':
|
|
@@ -93,7 +98,17 @@ class Position:
|
|
|
93
98
|
position.at_coordinates(Point(x, y))
|
|
94
99
|
return position
|
|
95
100
|
|
|
96
|
-
|
|
101
|
+
@staticmethod
|
|
102
|
+
def by_name(name: str) -> 'Position':
|
|
103
|
+
"""
|
|
104
|
+
Creates a position specification for finding objects by name.
|
|
105
|
+
Equivalent to Position.byName() in Java.
|
|
106
|
+
"""
|
|
107
|
+
position = Position()
|
|
108
|
+
position.name = name
|
|
109
|
+
return position
|
|
110
|
+
|
|
111
|
+
def at_coordinates(self, point: Point) -> 'Position':
|
|
97
112
|
"""
|
|
98
113
|
Sets the position to a specific point location.
|
|
99
114
|
Equivalent to Position.set() in Java.
|
|
@@ -101,7 +116,7 @@ class Position:
|
|
|
101
116
|
self.mode = PositionMode.CONTAINS
|
|
102
117
|
self.shape = ShapeType.POINT
|
|
103
118
|
self.bounding_rect = BoundingRect(point.x, point.y, 0, 0)
|
|
104
|
-
return self
|
|
119
|
+
return self
|
|
105
120
|
|
|
106
121
|
def with_text_starts(self, text: str) -> 'Position':
|
|
107
122
|
self.text_starts_with = text
|
|
@@ -110,20 +125,20 @@ class Position:
|
|
|
110
125
|
def move_x(self, x_offset: float) -> 'Position':
|
|
111
126
|
"""Move the position horizontally by the specified offset."""
|
|
112
127
|
if self.bounding_rect:
|
|
113
|
-
self.at_coordinates(Point(self.
|
|
128
|
+
self.at_coordinates(Point(self.x() + x_offset, self.y()))
|
|
114
129
|
return self
|
|
115
130
|
|
|
116
131
|
def move_y(self, y_offset: float) -> 'Position':
|
|
117
132
|
"""Move the position vertically by the specified offset."""
|
|
118
133
|
if self.bounding_rect:
|
|
119
|
-
self.at_coordinates(Point(self.
|
|
134
|
+
self.at_coordinates(Point(self.x(), self.y() + y_offset))
|
|
120
135
|
return self
|
|
121
136
|
|
|
122
|
-
def
|
|
137
|
+
def x(self) -> Optional[float]:
|
|
123
138
|
"""Returns the X coordinate of this position."""
|
|
124
139
|
return self.bounding_rect.get_x() if self.bounding_rect else None
|
|
125
140
|
|
|
126
|
-
def
|
|
141
|
+
def y(self) -> Optional[float]:
|
|
127
142
|
"""Returns the Y coordinate of this position."""
|
|
128
143
|
return self.bounding_rect.get_y() if self.bounding_rect else None
|
|
129
144
|
|
|
@@ -269,6 +284,8 @@ class FindRequest:
|
|
|
269
284
|
"pageIndex": position.page_index,
|
|
270
285
|
"textStartsWith": position.text_starts_with
|
|
271
286
|
}
|
|
287
|
+
if position.name:
|
|
288
|
+
result["name"] = position.name
|
|
272
289
|
if position.shape:
|
|
273
290
|
result["shape"] = position.shape.value
|
|
274
291
|
if position.mode:
|
|
@@ -420,3 +437,38 @@ class ModifyTextRequest:
|
|
|
420
437
|
},
|
|
421
438
|
"newTextLine": self.new_text
|
|
422
439
|
}
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
@dataclass
|
|
443
|
+
class ChangeFormFieldRequest:
|
|
444
|
+
object_ref: ObjectRef
|
|
445
|
+
value: str
|
|
446
|
+
|
|
447
|
+
def to_dict(self) -> dict:
|
|
448
|
+
"""Convert to dictionary for JSON serialization."""
|
|
449
|
+
return {
|
|
450
|
+
"ref": {
|
|
451
|
+
"internalId": self.object_ref.internal_id,
|
|
452
|
+
"position": FindRequest._position_to_dict(self.object_ref.position),
|
|
453
|
+
"type": self.object_ref.type.value
|
|
454
|
+
},
|
|
455
|
+
"value": self.value
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
@dataclass
|
|
460
|
+
class FormFieldRef(ObjectRef):
|
|
461
|
+
"""
|
|
462
|
+
Represents a form field reference with additional form-specific properties.
|
|
463
|
+
Extends ObjectRef to include form field name and value.
|
|
464
|
+
"""
|
|
465
|
+
name: Optional[str] = None
|
|
466
|
+
value: Optional[str] = None
|
|
467
|
+
|
|
468
|
+
def get_name(self) -> Optional[str]:
|
|
469
|
+
"""Get the form field name."""
|
|
470
|
+
return self.name
|
|
471
|
+
|
|
472
|
+
def get_value(self) -> Optional[str]:
|
|
473
|
+
"""Get the form field value."""
|
|
474
|
+
return self.value
|
pdfdancer/paragraph_builder.py
CHANGED
|
@@ -4,14 +4,11 @@ Closely mirrors the Java ParagraphBuilder class with Python conventions.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Optional, Union
|
|
7
|
+
from typing import Optional, Union
|
|
8
8
|
|
|
9
9
|
from .exceptions import ValidationException
|
|
10
10
|
from .models import Paragraph, Font, Color, Position
|
|
11
11
|
|
|
12
|
-
if TYPE_CHECKING:
|
|
13
|
-
from .client_v1 import ClientV1
|
|
14
|
-
|
|
15
12
|
|
|
16
13
|
class ParagraphBuilder:
|
|
17
14
|
"""
|
|
@@ -19,7 +16,7 @@ class ParagraphBuilder:
|
|
|
19
16
|
Mirrors the Java ParagraphBuilder class exactly.
|
|
20
17
|
"""
|
|
21
18
|
|
|
22
|
-
def __init__(self, client: '
|
|
19
|
+
def __init__(self, client: 'PDFDancer'):
|
|
23
20
|
"""
|
|
24
21
|
Initialize the paragraph builder with a client reference.
|
|
25
22
|
|
|
@@ -37,7 +34,7 @@ class ParagraphBuilder:
|
|
|
37
34
|
self._ttf_file: Optional[Path] = None
|
|
38
35
|
self._font: Optional[Font] = None
|
|
39
36
|
|
|
40
|
-
def
|
|
37
|
+
def text(self, text: str, color: Optional[Color] = None) -> 'ParagraphBuilder':
|
|
41
38
|
"""
|
|
42
39
|
Set the text content for the paragraph.
|
|
43
40
|
Equivalent to fromString() methods in Java ParagraphBuilder.
|
|
@@ -63,13 +60,14 @@ class ParagraphBuilder:
|
|
|
63
60
|
|
|
64
61
|
return self
|
|
65
62
|
|
|
66
|
-
def
|
|
63
|
+
def font(self, font_name: str, font_size: float) -> 'ParagraphBuilder':
|
|
67
64
|
"""
|
|
68
65
|
Set the font for the paragraph using an existing Font object.
|
|
69
66
|
Equivalent to withFont(Font) in Java ParagraphBuilder.
|
|
70
67
|
|
|
71
68
|
Args:
|
|
72
|
-
|
|
69
|
+
font_name: The Font to use
|
|
70
|
+
font_size: The font size
|
|
73
71
|
|
|
74
72
|
Returns:
|
|
75
73
|
Self for method chaining
|
|
@@ -77,6 +75,7 @@ class ParagraphBuilder:
|
|
|
77
75
|
Raises:
|
|
78
76
|
ValidationException: If font is None
|
|
79
77
|
"""
|
|
78
|
+
font = Font(font_name, font_size)
|
|
80
79
|
if font is None:
|
|
81
80
|
raise ValidationException("Font cannot be null")
|
|
82
81
|
|
|
@@ -84,7 +83,7 @@ class ParagraphBuilder:
|
|
|
84
83
|
self._ttf_file = None # Clear TTF file when using existing font
|
|
85
84
|
return self
|
|
86
85
|
|
|
87
|
-
def
|
|
86
|
+
def font_file(self, ttf_file: Union[Path, str], font_size: float) -> 'ParagraphBuilder':
|
|
88
87
|
"""
|
|
89
88
|
Set the font for the paragraph using a TTF file.
|
|
90
89
|
Equivalent to withFont(File, double) in Java ParagraphBuilder.
|
|
@@ -125,7 +124,7 @@ class ParagraphBuilder:
|
|
|
125
124
|
self._font = self._register_ttf(ttf_path, font_size)
|
|
126
125
|
return self
|
|
127
126
|
|
|
128
|
-
def
|
|
127
|
+
def line_spacing(self, spacing: float) -> 'ParagraphBuilder':
|
|
129
128
|
"""
|
|
130
129
|
Set the line spacing for the paragraph.
|
|
131
130
|
Equivalent to withLineSpacing() in Java ParagraphBuilder.
|
|
@@ -145,7 +144,7 @@ class ParagraphBuilder:
|
|
|
145
144
|
self._line_spacing = spacing
|
|
146
145
|
return self
|
|
147
146
|
|
|
148
|
-
def
|
|
147
|
+
def color(self, color: Color) -> 'ParagraphBuilder':
|
|
149
148
|
"""
|
|
150
149
|
Set the text color for the paragraph.
|
|
151
150
|
Equivalent to withColor() in Java ParagraphBuilder.
|
|
@@ -165,7 +164,7 @@ class ParagraphBuilder:
|
|
|
165
164
|
self._text_color = color
|
|
166
165
|
return self
|
|
167
166
|
|
|
168
|
-
def
|
|
167
|
+
def at(self, page_index: int, x: float, y: float) -> 'ParagraphBuilder':
|
|
169
168
|
"""
|
|
170
169
|
Set the position for the paragraph.
|
|
171
170
|
Equivalent to withPosition() in Java ParagraphBuilder.
|
|
@@ -179,6 +178,7 @@ class ParagraphBuilder:
|
|
|
179
178
|
Raises:
|
|
180
179
|
ValidationException: If position is None
|
|
181
180
|
"""
|
|
181
|
+
position = Position.at_page_coordinates(page_index, x, y)
|
|
182
182
|
if position is None:
|
|
183
183
|
raise ValidationException("Position cannot be null")
|
|
184
184
|
|
|
@@ -265,3 +265,6 @@ class ParagraphBuilder:
|
|
|
265
265
|
lines = ['']
|
|
266
266
|
|
|
267
267
|
return lines
|
|
268
|
+
|
|
269
|
+
def add(self):
|
|
270
|
+
self._client._add_paragraph(self.build())
|
|
@@ -11,6 +11,7 @@ from typing import List, Optional, Union, BinaryIO
|
|
|
11
11
|
|
|
12
12
|
import requests
|
|
13
13
|
|
|
14
|
+
from . import ParagraphBuilder
|
|
14
15
|
from .exceptions import (
|
|
15
16
|
PdfDancerException,
|
|
16
17
|
FontNotFoundException,
|
|
@@ -18,24 +19,111 @@ from .exceptions import (
|
|
|
18
19
|
SessionException,
|
|
19
20
|
ValidationException
|
|
20
21
|
)
|
|
22
|
+
from .image_builder import ImageBuilder
|
|
21
23
|
from .models import (
|
|
22
|
-
ObjectRef, Position, ObjectType, Font, Image, Paragraph,
|
|
23
|
-
FindRequest, DeleteRequest, MoveRequest, AddRequest, ModifyRequest, ModifyTextRequest,
|
|
24
|
+
ObjectRef, Position, ObjectType, Font, Image, Paragraph, FormFieldRef,
|
|
25
|
+
FindRequest, DeleteRequest, MoveRequest, AddRequest, ModifyRequest, ModifyTextRequest, ChangeFormFieldRequest,
|
|
26
|
+
ShapeType, PositionMode
|
|
24
27
|
)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
from .types import PathObject, ParagraphObject, TextLineObject, ImageObject, FormObject, FormFieldObject, PageObject
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PageClient:
|
|
32
|
+
def __init__(self, page_index: int, root: "PDFDancer"):
|
|
33
|
+
self.page_index = page_index
|
|
34
|
+
self.root = root
|
|
35
|
+
|
|
36
|
+
def select_paths_at(self, x: float, y: float) -> List[PathObject]:
|
|
37
|
+
# noinspection PyProtectedMember
|
|
38
|
+
return self.root._to_path_objects(self.root._find_paths(Position.at_page_coordinates(self.page_index, x, y)))
|
|
39
|
+
|
|
40
|
+
def select_paragraphs(self) -> List[ParagraphObject]:
|
|
41
|
+
# noinspection PyProtectedMember
|
|
42
|
+
return self.root._to_paragraph_objects(self.root._find_paragraphs(Position.at_page(self.page_index)))
|
|
43
|
+
|
|
44
|
+
def select_paragraphs_starting_with(self, text: str) -> List[ParagraphObject]:
|
|
45
|
+
position = Position.at_page(self.page_index)
|
|
46
|
+
position.with_text_starts(text)
|
|
47
|
+
# noinspection PyProtectedMember
|
|
48
|
+
return self.root._to_paragraph_objects(self.root._find_paragraphs(position))
|
|
49
|
+
|
|
50
|
+
def select_paragraphs_at(self, x: float, y: float) -> List[ParagraphObject]:
|
|
51
|
+
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
52
|
+
# noinspection PyProtectedMember
|
|
53
|
+
return self.root._to_paragraph_objects(self.root._find_paragraphs(position))
|
|
54
|
+
|
|
55
|
+
def select_text_lines(self) -> List[TextLineObject]:
|
|
56
|
+
position = Position.at_page(self.page_index)
|
|
57
|
+
# noinspection PyProtectedMember
|
|
58
|
+
return self.root._to_textline_objects(self.root._find_text_lines(position))
|
|
59
|
+
|
|
60
|
+
def select_text_lines_starting_with(self, text: str) -> List[TextLineObject]:
|
|
61
|
+
position = Position.at_page(self.page_index)
|
|
62
|
+
position.with_text_starts(text)
|
|
63
|
+
# noinspection PyProtectedMember
|
|
64
|
+
return self.root._to_textline_objects(self.root._find_text_lines(position))
|
|
65
|
+
|
|
66
|
+
def select_images(self) -> List[ImageObject]:
|
|
67
|
+
# noinspection PyProtectedMember
|
|
68
|
+
return self.root._to_image_objects(self.root._find_images(Position.at_page(self.page_index)))
|
|
69
|
+
|
|
70
|
+
def select_images_at(self, x: float, y: float) -> List[ImageObject]:
|
|
71
|
+
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
72
|
+
# noinspection PyProtectedMember
|
|
73
|
+
return self.root._to_image_objects(self.root._find_images(position))
|
|
74
|
+
|
|
75
|
+
def select_forms(self) -> List[FormObject]:
|
|
76
|
+
position = Position.at_page(self.page_index)
|
|
77
|
+
# noinspection PyProtectedMember
|
|
78
|
+
return self.root._to_form_objects(self.root._find_form_x_objects(position))
|
|
79
|
+
|
|
80
|
+
def select_forms_at(self, x: float, y: float) -> List[FormObject]:
|
|
81
|
+
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
82
|
+
# noinspection PyProtectedMember
|
|
83
|
+
return self.root._to_form_objects(self.root._find_form_x_objects(position))
|
|
84
|
+
|
|
85
|
+
def select_form_fields(self) -> List[FormFieldObject]:
|
|
86
|
+
position = Position.at_page(self.page_index)
|
|
87
|
+
# noinspection PyProtectedMember
|
|
88
|
+
return self.root._to_form_field_objects(self.root._find_form_fields(position))
|
|
89
|
+
|
|
90
|
+
def select_form_fields_by_name(self, field_name: str) -> List[FormFieldObject]:
|
|
91
|
+
pos = Position.by_name(field_name)
|
|
92
|
+
pos.page_index = self.page_index
|
|
93
|
+
# noinspection PyProtectedMember
|
|
94
|
+
return self.root._to_form_field_objects(self.root._find_form_fields(pos))
|
|
95
|
+
|
|
96
|
+
def select_form_fields_at(self, x: float, y: float) -> List[FormFieldObject]:
|
|
97
|
+
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
98
|
+
# noinspection PyProtectedMember
|
|
99
|
+
return self.root._to_form_field_objects(self.root._find_form_fields(position))
|
|
100
|
+
|
|
101
|
+
def get(self) -> PageObject:
|
|
102
|
+
return self.root._to_page_object(self.root._get_page(self.page_index))
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class PDFDancer:
|
|
28
106
|
"""
|
|
29
107
|
REST API client for interacting with the PDFDancer PDF manipulation service.
|
|
30
108
|
This client provides a convenient Python interface for performing PDF operations
|
|
31
109
|
including session management, object searching, manipulation, and retrieval.
|
|
32
110
|
Handles authentication, session lifecycle, and HTTP communication transparently.
|
|
33
|
-
|
|
34
|
-
Mirrors the Java Client class functionality exactly.
|
|
35
111
|
"""
|
|
36
112
|
|
|
113
|
+
# --------------------------------------------------------------
|
|
114
|
+
# CLASS METHOD ENTRY POINT
|
|
115
|
+
# --------------------------------------------------------------
|
|
116
|
+
@classmethod
|
|
117
|
+
def open(cls,
|
|
118
|
+
pdf_data: Union[bytes, Path, str, BinaryIO],
|
|
119
|
+
token: str,
|
|
120
|
+
base_url: str = "http://localhost:8080",
|
|
121
|
+
timeout: float = 30.0) -> "PDFDancer":
|
|
122
|
+
|
|
123
|
+
return PDFDancer(token, pdf_data, base_url, timeout)
|
|
124
|
+
|
|
37
125
|
def __init__(self, token: str, pdf_data: Union[bytes, Path, str, BinaryIO],
|
|
38
|
-
base_url: str
|
|
126
|
+
base_url: str, read_timeout: float = 0):
|
|
39
127
|
"""
|
|
40
128
|
Creates a new client with PDF data.
|
|
41
129
|
This constructor initializes the client, uploads the PDF data to create
|
|
@@ -75,7 +163,6 @@ class ClientV1:
|
|
|
75
163
|
def _process_pdf_data(self, pdf_data: Union[bytes, Path, str, BinaryIO]) -> bytes:
|
|
76
164
|
"""
|
|
77
165
|
Process PDF data from various input types with strict validation.
|
|
78
|
-
Equivalent to readFile() method in Java client.
|
|
79
166
|
"""
|
|
80
167
|
if pdf_data is None:
|
|
81
168
|
raise ValidationException("PDF data cannot be null")
|
|
@@ -152,7 +239,6 @@ class ClientV1:
|
|
|
152
239
|
def _create_session(self) -> str:
|
|
153
240
|
"""
|
|
154
241
|
Creates a new PDF processing session by uploading the PDF data.
|
|
155
|
-
Equivalent to createSession() method in Java client.
|
|
156
242
|
"""
|
|
157
243
|
try:
|
|
158
244
|
files = {
|
|
@@ -182,7 +268,6 @@ class ClientV1:
|
|
|
182
268
|
params: Optional[dict] = None) -> requests.Response:
|
|
183
269
|
"""
|
|
184
270
|
Make HTTP request with session headers and error handling.
|
|
185
|
-
Equivalent to retrieve() method pattern in Java client.
|
|
186
271
|
"""
|
|
187
272
|
headers = {
|
|
188
273
|
'X-Session-Id': self._session_id,
|
|
@@ -218,7 +303,7 @@ class ClientV1:
|
|
|
218
303
|
|
|
219
304
|
# Search Operations - matching Java client exactly
|
|
220
305
|
|
|
221
|
-
def
|
|
306
|
+
def _find(self, object_type: Optional[ObjectType] = None, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
222
307
|
"""
|
|
223
308
|
Searches for PDF objects matching the specified criteria.
|
|
224
309
|
This method provides flexible search capabilities across all PDF content,
|
|
@@ -238,56 +323,120 @@ class ClientV1:
|
|
|
238
323
|
objects_data = response.json()
|
|
239
324
|
return [self._parse_object_ref(obj_data) for obj_data in objects_data]
|
|
240
325
|
|
|
241
|
-
def
|
|
326
|
+
def select_paragraphs(self) -> List[ParagraphObject]:
|
|
327
|
+
"""
|
|
328
|
+
Searches for paragraph objects at the specified position.
|
|
329
|
+
"""
|
|
330
|
+
return self._to_paragraph_objects(self._find(ObjectType.PARAGRAPH, None))
|
|
331
|
+
|
|
332
|
+
def _find_paragraphs(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
242
333
|
"""
|
|
243
334
|
Searches for paragraph objects at the specified position.
|
|
244
|
-
Equivalent to findParagraphs() in Java client.
|
|
245
335
|
"""
|
|
246
|
-
return self.
|
|
336
|
+
return self._find(ObjectType.PARAGRAPH, position)
|
|
247
337
|
|
|
248
|
-
def
|
|
338
|
+
def _find_images(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
249
339
|
"""
|
|
250
340
|
Searches for image objects at the specified position.
|
|
251
|
-
Equivalent to findImages() in Java client.
|
|
252
341
|
"""
|
|
253
|
-
return self.
|
|
342
|
+
return self._find(ObjectType.IMAGE, position)
|
|
254
343
|
|
|
255
|
-
def
|
|
344
|
+
def select_images(self) -> List[ImageObject]:
|
|
345
|
+
"""
|
|
346
|
+
Searches for image objects in the whole document
|
|
347
|
+
"""
|
|
348
|
+
return self._to_image_objects(self._find(ObjectType.IMAGE, None))
|
|
349
|
+
|
|
350
|
+
def select_forms(self) -> List[FormObject]:
|
|
351
|
+
"""
|
|
352
|
+
Searches for form field objects in the whole document.
|
|
353
|
+
"""
|
|
354
|
+
return self._to_form_objects(self._find(ObjectType.FORM_X_OBJECT, None))
|
|
355
|
+
|
|
356
|
+
def _find_form_x_objects(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
256
357
|
"""
|
|
257
358
|
Searches for form field objects at the specified position.
|
|
258
|
-
Equivalent to findForms() in Java client.
|
|
259
359
|
"""
|
|
260
|
-
return self.
|
|
360
|
+
return self._find(ObjectType.FORM_X_OBJECT, position)
|
|
361
|
+
|
|
362
|
+
def select_form_fields(self) -> List[FormFieldObject]:
|
|
363
|
+
"""
|
|
364
|
+
Searches for form field objects in the whole document.
|
|
365
|
+
"""
|
|
366
|
+
return self._to_form_field_objects(self._find_form_fields(None))
|
|
367
|
+
|
|
368
|
+
def select_form_fields_by_name(self, field_name: str) -> List[FormFieldObject]:
|
|
369
|
+
"""
|
|
370
|
+
Searches for form field objects in the whole document.
|
|
371
|
+
"""
|
|
372
|
+
return self._to_form_field_objects(self._find_form_fields(Position.by_name(field_name)))
|
|
373
|
+
|
|
374
|
+
def _find_form_fields(self, position: Optional[Position] = None) -> List[FormFieldRef]:
|
|
375
|
+
"""
|
|
376
|
+
Searches for form fields at the specified position.
|
|
377
|
+
Returns FormFieldRef objects with name and value properties.
|
|
378
|
+
"""
|
|
379
|
+
request_data = FindRequest(ObjectType.FORM_FIELD, position).to_dict()
|
|
380
|
+
response = self._make_request('POST', '/pdf/find', data=request_data)
|
|
381
|
+
|
|
382
|
+
# Parse response into ObjectRef objects
|
|
383
|
+
objects_data = response.json()
|
|
384
|
+
return [self._parse_form_field_ref(obj_data) for obj_data in objects_data]
|
|
385
|
+
|
|
386
|
+
def _change_form_field(self, form_field_ref: FormFieldRef, new_value: str) -> bool:
|
|
387
|
+
"""
|
|
388
|
+
Changes the value of a form field.
|
|
389
|
+
"""
|
|
390
|
+
if form_field_ref is None:
|
|
391
|
+
raise ValidationException("Form field reference cannot be null")
|
|
392
|
+
|
|
393
|
+
request_data = ChangeFormFieldRequest(form_field_ref, new_value).to_dict()
|
|
394
|
+
response = self._make_request('PUT', '/pdf/modify/formField', data=request_data)
|
|
395
|
+
return response.json()
|
|
396
|
+
|
|
397
|
+
def select_paths(self) -> List[ObjectRef]:
|
|
398
|
+
"""
|
|
399
|
+
Searches for vector path objects at the specified position.
|
|
400
|
+
"""
|
|
401
|
+
return self._find(ObjectType.PATH, None)
|
|
261
402
|
|
|
262
|
-
def
|
|
403
|
+
def _find_paths(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
263
404
|
"""
|
|
264
405
|
Searches for vector path objects at the specified position.
|
|
265
|
-
Equivalent to findPaths() in Java client.
|
|
266
406
|
"""
|
|
267
|
-
return self.
|
|
407
|
+
return self._find(ObjectType.PATH, position)
|
|
268
408
|
|
|
269
|
-
def
|
|
409
|
+
def _find_text_lines(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
270
410
|
"""
|
|
271
411
|
Searches for text line objects at the specified position.
|
|
272
|
-
Equivalent to findTextLines() in Java client.
|
|
273
412
|
"""
|
|
274
|
-
return self.
|
|
413
|
+
return self._find(ObjectType.TEXT_LINE, position)
|
|
414
|
+
|
|
415
|
+
def select_text_lines(self) -> List[TextLineObject]:
|
|
416
|
+
"""
|
|
417
|
+
Searches for text line objects at the specified position.
|
|
418
|
+
"""
|
|
419
|
+
return self._to_textline_objects(self._find(ObjectType.TEXT_LINE, None))
|
|
420
|
+
|
|
421
|
+
def page(self, page_index: int) -> PageClient:
|
|
422
|
+
return PageClient(page_index, self)
|
|
275
423
|
|
|
276
424
|
# Page Operations
|
|
277
425
|
|
|
278
|
-
def
|
|
426
|
+
def pages(self) -> List[PageObject]:
|
|
427
|
+
return self._to_page_objects(self._get_pages())
|
|
428
|
+
|
|
429
|
+
def _get_pages(self) -> List[ObjectRef]:
|
|
279
430
|
"""
|
|
280
431
|
Retrieves references to all pages in the PDF document.
|
|
281
|
-
Equivalent to getPages() in Java client.
|
|
282
432
|
"""
|
|
283
433
|
response = self._make_request('POST', '/pdf/page/find')
|
|
284
434
|
pages_data = response.json()
|
|
285
435
|
return [self._parse_object_ref(page_data) for page_data in pages_data]
|
|
286
436
|
|
|
287
|
-
def
|
|
437
|
+
def _get_page(self, page_index: int) -> Optional[ObjectRef]:
|
|
288
438
|
"""
|
|
289
439
|
Retrieves a reference to a specific page by its page index.
|
|
290
|
-
Equivalent to getPage() in Java client.
|
|
291
440
|
|
|
292
441
|
Args:
|
|
293
442
|
page_index: The page index to retrieve (1-based indexing)
|
|
@@ -307,10 +456,9 @@ class ClientV1:
|
|
|
307
456
|
|
|
308
457
|
return self._parse_object_ref(pages_data[0])
|
|
309
458
|
|
|
310
|
-
def
|
|
459
|
+
def _delete_page(self, page_ref: ObjectRef) -> bool:
|
|
311
460
|
"""
|
|
312
461
|
Deletes a page from the PDF document.
|
|
313
|
-
Equivalent to deletePage() in Java client.
|
|
314
462
|
|
|
315
463
|
Args:
|
|
316
464
|
page_ref: Reference to the page to be deleted
|
|
@@ -328,10 +476,9 @@ class ClientV1:
|
|
|
328
476
|
|
|
329
477
|
# Manipulation Operations
|
|
330
478
|
|
|
331
|
-
def
|
|
479
|
+
def _delete(self, object_ref: ObjectRef) -> bool:
|
|
332
480
|
"""
|
|
333
481
|
Deletes the specified PDF object from the document.
|
|
334
|
-
Equivalent to delete() in Java client.
|
|
335
482
|
|
|
336
483
|
Args:
|
|
337
484
|
object_ref: Reference to the object to be deleted
|
|
@@ -346,10 +493,9 @@ class ClientV1:
|
|
|
346
493
|
response = self._make_request('DELETE', '/pdf/delete', data=request_data)
|
|
347
494
|
return response.json()
|
|
348
495
|
|
|
349
|
-
def
|
|
496
|
+
def _move(self, object_ref: ObjectRef, position: Position) -> bool:
|
|
350
497
|
"""
|
|
351
498
|
Moves a PDF object to a new position within the document.
|
|
352
|
-
Equivalent to move() in Java client.
|
|
353
499
|
|
|
354
500
|
Args:
|
|
355
501
|
object_ref: Reference to the object to be moved
|
|
@@ -369,10 +515,9 @@ class ClientV1:
|
|
|
369
515
|
|
|
370
516
|
# Add Operations
|
|
371
517
|
|
|
372
|
-
def
|
|
518
|
+
def _add_image(self, image: Image, position: Optional[Position] = None) -> bool:
|
|
373
519
|
"""
|
|
374
520
|
Adds an image to the PDF document.
|
|
375
|
-
Equivalent to addImage() methods in Java client.
|
|
376
521
|
|
|
377
522
|
Args:
|
|
378
523
|
image: The image object to add
|
|
@@ -392,10 +537,9 @@ class ClientV1:
|
|
|
392
537
|
|
|
393
538
|
return self._add_object(image)
|
|
394
539
|
|
|
395
|
-
def
|
|
540
|
+
def _add_paragraph(self, paragraph: Paragraph) -> bool:
|
|
396
541
|
"""
|
|
397
542
|
Adds a paragraph to the PDF document.
|
|
398
|
-
Equivalent to addParagraph() in Java client with validation.
|
|
399
543
|
|
|
400
544
|
Args:
|
|
401
545
|
paragraph: The paragraph object to add
|
|
@@ -417,18 +561,21 @@ class ClientV1:
|
|
|
417
561
|
def _add_object(self, pdf_object) -> bool:
|
|
418
562
|
"""
|
|
419
563
|
Internal method to add any PDF object.
|
|
420
|
-
Equivalent to addObject() in Java client.
|
|
421
564
|
"""
|
|
422
565
|
request_data = AddRequest(pdf_object).to_dict()
|
|
423
566
|
response = self._make_request('POST', '/pdf/add', data=request_data)
|
|
424
567
|
return response.json()
|
|
425
568
|
|
|
426
|
-
|
|
569
|
+
def new_paragraph(self) -> ParagraphBuilder:
|
|
570
|
+
return ParagraphBuilder(self)
|
|
571
|
+
|
|
572
|
+
def new_image(self) -> ImageBuilder:
|
|
573
|
+
return ImageBuilder(self)
|
|
427
574
|
|
|
428
|
-
|
|
575
|
+
# Modify Operations
|
|
576
|
+
def _modify_paragraph(self, object_ref: ObjectRef, new_paragraph: Union[Paragraph, str]) -> bool:
|
|
429
577
|
"""
|
|
430
578
|
Modifies a paragraph object or its text content.
|
|
431
|
-
Equivalent to modifyParagraph() methods in Java client.
|
|
432
579
|
|
|
433
580
|
Args:
|
|
434
581
|
object_ref: Reference to the paragraph to modify
|
|
@@ -453,10 +600,9 @@ class ClientV1:
|
|
|
453
600
|
|
|
454
601
|
return response.json()
|
|
455
602
|
|
|
456
|
-
def
|
|
603
|
+
def _modify_text_line(self, object_ref: ObjectRef, new_text: str) -> bool:
|
|
457
604
|
"""
|
|
458
605
|
Modifies a text line object.
|
|
459
|
-
Equivalent to modifyTextLine() in Java client.
|
|
460
606
|
|
|
461
607
|
Args:
|
|
462
608
|
object_ref: Reference to the text line to modify
|
|
@@ -479,7 +625,6 @@ class ClientV1:
|
|
|
479
625
|
def find_fonts(self, font_name: str, font_size: int) -> List[Font]:
|
|
480
626
|
"""
|
|
481
627
|
Finds available fonts matching the specified name and size.
|
|
482
|
-
Equivalent to findFonts() in Java client.
|
|
483
628
|
|
|
484
629
|
Args:
|
|
485
630
|
font_name: Name of the font to search for
|
|
@@ -502,7 +647,6 @@ class ClientV1:
|
|
|
502
647
|
def register_font(self, ttf_file: Union[Path, str, bytes, BinaryIO]) -> str:
|
|
503
648
|
"""
|
|
504
649
|
Registers a custom font for use in PDF operations.
|
|
505
|
-
Equivalent to registerFont() in Java client.
|
|
506
650
|
|
|
507
651
|
Args:
|
|
508
652
|
ttf_file: TTF font file as Path, filename, bytes, or file-like object
|
|
@@ -577,7 +721,6 @@ class ClientV1:
|
|
|
577
721
|
def get_pdf_file(self) -> bytes:
|
|
578
722
|
"""
|
|
579
723
|
Downloads the current state of the PDF document with all modifications applied.
|
|
580
|
-
Equivalent to getPDFFile() in Java client.
|
|
581
724
|
|
|
582
725
|
Returns:
|
|
583
726
|
PDF file data as bytes with all session modifications applied
|
|
@@ -585,10 +728,9 @@ class ClientV1:
|
|
|
585
728
|
response = self._make_request('GET', f'/session/{self._session_id}/pdf')
|
|
586
729
|
return response.content
|
|
587
730
|
|
|
588
|
-
def
|
|
731
|
+
def save(self, file_path: Union[str, Path]) -> None:
|
|
589
732
|
"""
|
|
590
733
|
Saves the current PDF to a file.
|
|
591
|
-
Equivalent to savePDF() in Java client.
|
|
592
734
|
|
|
593
735
|
Args:
|
|
594
736
|
file_path: Path where to save the PDF file
|
|
@@ -628,7 +770,23 @@ class ClientV1:
|
|
|
628
770
|
type=object_type
|
|
629
771
|
)
|
|
630
772
|
|
|
631
|
-
def
|
|
773
|
+
def _parse_form_field_ref(self, obj_data: dict) -> ObjectRef:
|
|
774
|
+
"""Parse JSON object data into ObjectRef instance."""
|
|
775
|
+
position_data = obj_data.get('position', {})
|
|
776
|
+
position = self._parse_position(position_data) if position_data else None
|
|
777
|
+
|
|
778
|
+
object_type = ObjectType(obj_data['type'])
|
|
779
|
+
|
|
780
|
+
return FormFieldRef(
|
|
781
|
+
internal_id=obj_data['internalId'] if 'internalId' in obj_data else None,
|
|
782
|
+
position=position,
|
|
783
|
+
type=object_type,
|
|
784
|
+
name=obj_data['name'] if 'name' in obj_data else None,
|
|
785
|
+
value=obj_data['value'] if 'value' in obj_data else None,
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
@staticmethod
|
|
789
|
+
def _parse_position(pos_data: dict) -> Position:
|
|
632
790
|
"""Parse JSON position data into Position instance."""
|
|
633
791
|
position = Position()
|
|
634
792
|
position.page_index = pos_data.get('pageIndex')
|
|
@@ -656,8 +814,6 @@ class ClientV1:
|
|
|
656
814
|
def paragraph_builder(self) -> 'ParagraphBuilder':
|
|
657
815
|
"""
|
|
658
816
|
Creates a new ParagraphBuilder for fluent paragraph construction.
|
|
659
|
-
Equivalent to paragraphBuilder() in Java client.
|
|
660
|
-
|
|
661
817
|
Returns:
|
|
662
818
|
A new ParagraphBuilder instance
|
|
663
819
|
"""
|
|
@@ -673,3 +829,28 @@ class ClientV1:
|
|
|
673
829
|
"""Context manager exit - cleanup if needed."""
|
|
674
830
|
# Could add session cleanup here if API supports it
|
|
675
831
|
pass
|
|
832
|
+
|
|
833
|
+
def _to_path_objects(self, path_refs: List[ObjectRef]) -> List[PathObject]:
|
|
834
|
+
return [PathObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
835
|
+
|
|
836
|
+
def _to_paragraph_objects(self, path_refs: List[ObjectRef]) -> List[ParagraphObject]:
|
|
837
|
+
return [ParagraphObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
838
|
+
|
|
839
|
+
def _to_textline_objects(self, path_refs: List[ObjectRef]) -> List[TextLineObject]:
|
|
840
|
+
return [TextLineObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
841
|
+
|
|
842
|
+
def _to_image_objects(self, path_refs: List[ObjectRef]) -> List[ImageObject]:
|
|
843
|
+
return [ImageObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
844
|
+
|
|
845
|
+
def _to_form_objects(self, path_refs: List[ObjectRef]) -> List[FormObject]:
|
|
846
|
+
return [FormObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
847
|
+
|
|
848
|
+
def _to_form_field_objects(self, path_refs: List[FormFieldRef]) -> List[FormFieldObject]:
|
|
849
|
+
return [FormFieldObject(self, ref.internal_id, ref.type, ref.position, ref.name, ref.value) for ref in
|
|
850
|
+
path_refs]
|
|
851
|
+
|
|
852
|
+
def _to_page_objects(self, path_refs: List[ObjectRef]) -> List[PageObject]:
|
|
853
|
+
return [PageObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
|
|
854
|
+
|
|
855
|
+
def _to_page_object(self, ref: ObjectRef) -> PageObject:
|
|
856
|
+
return PageObject(self, ref.internal_id, ref.type, ref.position)
|
pdfdancer/types.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from . import ObjectType, Position, ObjectRef, Point, Paragraph, Font, Color, FormFieldRef
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class BoundingRect:
|
|
11
|
+
x: float
|
|
12
|
+
y: float
|
|
13
|
+
width: Optional[float] = None
|
|
14
|
+
height: Optional[float] = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class UnsupportedOperation(Exception):
|
|
18
|
+
def __init__(self, msg: str):
|
|
19
|
+
super().__init__(msg)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PDFObjectBase:
|
|
23
|
+
"""
|
|
24
|
+
Base class for all PDF objects (paths, paragraphs, text lines, etc.)
|
|
25
|
+
providing shared behavior such as position, deletion, and movement.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, client: 'PDFDancer', internal_id: str, object_type: ObjectType, position: Position):
|
|
29
|
+
self._client = client
|
|
30
|
+
self.position = position
|
|
31
|
+
self.internal_id = internal_id
|
|
32
|
+
self.object_type = object_type
|
|
33
|
+
|
|
34
|
+
# --------------------------------------------------------------
|
|
35
|
+
# Core properties
|
|
36
|
+
# --------------------------------------------------------------
|
|
37
|
+
def internal_id(self) -> str:
|
|
38
|
+
"""Internal PDFDancer object identifier, e.g. 'PATH_000023'."""
|
|
39
|
+
return self.internal_id
|
|
40
|
+
|
|
41
|
+
def type(self) -> ObjectType:
|
|
42
|
+
"""Enum value representing the PDF object type."""
|
|
43
|
+
return self.object_type
|
|
44
|
+
|
|
45
|
+
def position(self) -> Position:
|
|
46
|
+
"""The geometric position of the object on its page."""
|
|
47
|
+
return self.position
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def page_index(self) -> int:
|
|
51
|
+
"""Page index where this object resides."""
|
|
52
|
+
return self.position.page_index
|
|
53
|
+
|
|
54
|
+
def object_ref(self) -> ObjectRef:
|
|
55
|
+
return ObjectRef(self.internal_id, self.position, self.object_type)
|
|
56
|
+
|
|
57
|
+
# --------------------------------------------------------------
|
|
58
|
+
# Common actions
|
|
59
|
+
# --------------------------------------------------------------
|
|
60
|
+
def delete(self) -> bool:
|
|
61
|
+
"""Delete this object from the PDF document."""
|
|
62
|
+
return self._client._delete(self.object_ref())
|
|
63
|
+
|
|
64
|
+
def move_to(self, x: float, y: float) -> bool:
|
|
65
|
+
"""Move this object to a new position."""
|
|
66
|
+
return self._client._move(
|
|
67
|
+
self.object_ref(),
|
|
68
|
+
Position.at_page_coordinates(self.position.page_index, x, y)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# -------------------------------------------------------------------
|
|
73
|
+
# Subclasses
|
|
74
|
+
# -------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
class PathObject(PDFObjectBase):
|
|
77
|
+
"""Represents a vector path object inside a PDF page."""
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def bounding_box(self) -> Optional[BoundingRect]:
|
|
81
|
+
"""Optional bounding rectangle (if available)."""
|
|
82
|
+
return self.position.bounding_rect
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ImageObject(PDFObjectBase):
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class FormObject(PDFObjectBase):
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _process_text_lines(text: str) -> list[str]:
|
|
94
|
+
"""
|
|
95
|
+
Process text into lines for the paragraph.
|
|
96
|
+
This is a simplified version - the full implementation would handle
|
|
97
|
+
word wrapping, line breaks, and other text formatting based on the font
|
|
98
|
+
and paragraph width. TODO
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
text: The input text to process
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of text lines for the paragraph
|
|
105
|
+
"""
|
|
106
|
+
# Handle escaped newlines (\\n) as actual newlines
|
|
107
|
+
processed_text = text.replace('\\n', '\n')
|
|
108
|
+
|
|
109
|
+
# Simple implementation - split on newlines
|
|
110
|
+
# In the full version, this would implement proper text layout
|
|
111
|
+
lines = processed_text.split('\n')
|
|
112
|
+
|
|
113
|
+
# Remove empty lines at the end but preserve intentional line breaks
|
|
114
|
+
while lines and not lines[-1].strip():
|
|
115
|
+
lines.pop()
|
|
116
|
+
|
|
117
|
+
# Ensure at least one line
|
|
118
|
+
if not lines:
|
|
119
|
+
lines = ['']
|
|
120
|
+
|
|
121
|
+
return lines
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
DEFAULT_LINE_SPACING = 1.2
|
|
125
|
+
DEFAULT_COLOR = Color(0, 0, 0)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class BaseTextEdit:
|
|
129
|
+
"""Common base for text-like editable objects (Paragraph, TextLine, etc.)"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, target_obj, object_ref):
|
|
132
|
+
self._color = None
|
|
133
|
+
self._position = None
|
|
134
|
+
self._line_spacing = None
|
|
135
|
+
self._font_size = None
|
|
136
|
+
self._font_name = None
|
|
137
|
+
self._new_text = None
|
|
138
|
+
self._target_obj = target_obj
|
|
139
|
+
self._object_ref = object_ref
|
|
140
|
+
|
|
141
|
+
def __enter__(self):
|
|
142
|
+
return self
|
|
143
|
+
|
|
144
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
145
|
+
if not exc_type:
|
|
146
|
+
self.apply()
|
|
147
|
+
|
|
148
|
+
# --- Common fluent configuration methods ---
|
|
149
|
+
|
|
150
|
+
def replace(self, text: str):
|
|
151
|
+
self._new_text = text
|
|
152
|
+
return self
|
|
153
|
+
|
|
154
|
+
def font(self, font_name: str, font_size: float):
|
|
155
|
+
self._font_name = font_name
|
|
156
|
+
self._font_size = font_size
|
|
157
|
+
return self
|
|
158
|
+
|
|
159
|
+
def color(self, color):
|
|
160
|
+
self._color = color
|
|
161
|
+
return self
|
|
162
|
+
|
|
163
|
+
def line_spacing(self, line_spacing: float):
|
|
164
|
+
self._line_spacing = line_spacing
|
|
165
|
+
return self
|
|
166
|
+
|
|
167
|
+
def move_to(self, x: float, y: float):
|
|
168
|
+
self._position = Position().at_coordinates(Point(x, y))
|
|
169
|
+
return self
|
|
170
|
+
|
|
171
|
+
# --- Abstract method: implemented by subclass ---
|
|
172
|
+
def apply(self):
|
|
173
|
+
raise NotImplementedError("Subclasses must implement apply()")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class ParagraphEdit(BaseTextEdit):
|
|
177
|
+
def apply(self) -> bool:
|
|
178
|
+
if (
|
|
179
|
+
self._position is None
|
|
180
|
+
and self._line_spacing is None
|
|
181
|
+
and self._font_size is None
|
|
182
|
+
and self._font_name is None
|
|
183
|
+
and self._color is None
|
|
184
|
+
):
|
|
185
|
+
# noinspection PyProtectedMember
|
|
186
|
+
return self._target_obj._client._modify_paragraph(self._object_ref, self._new_text)
|
|
187
|
+
else:
|
|
188
|
+
new_paragraph = Paragraph(
|
|
189
|
+
position=self._position,
|
|
190
|
+
line_spacing=self._line_spacing if self._line_spacing is not None else DEFAULT_LINE_SPACING,
|
|
191
|
+
font=Font(name=self._font_name, size=self._font_size),
|
|
192
|
+
text_lines=_process_text_lines(self._new_text),
|
|
193
|
+
color=self._color if self._color is not None else DEFAULT_COLOR,
|
|
194
|
+
)
|
|
195
|
+
# noinspection PyProtectedMember
|
|
196
|
+
return self._target_obj._client._modify_paragraph(self._object_ref, new_paragraph)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class TextLineEdit(BaseTextEdit):
|
|
200
|
+
def apply(self) -> bool:
|
|
201
|
+
if (
|
|
202
|
+
self._position is None
|
|
203
|
+
and self._line_spacing is None
|
|
204
|
+
and self._font_size is None
|
|
205
|
+
and self._font_name is None
|
|
206
|
+
and self._color is None
|
|
207
|
+
):
|
|
208
|
+
# noinspection PyProtectedMember
|
|
209
|
+
return self._target_obj._client._modify_text_line(self._object_ref, self._new_text)
|
|
210
|
+
else:
|
|
211
|
+
raise UnsupportedOperation("TextLineEdit cannot be applied to text lines")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class ParagraphObject(PDFObjectBase):
|
|
215
|
+
"""Represents a paragraph text block inside a PDF page."""
|
|
216
|
+
|
|
217
|
+
def edit(self) -> ParagraphEdit:
|
|
218
|
+
return ParagraphEdit(self, self.object_ref())
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class TextLineObject(PDFObjectBase):
|
|
222
|
+
"""Represents a single line of text inside a PDF page."""
|
|
223
|
+
|
|
224
|
+
def edit(self) -> TextLineEdit:
|
|
225
|
+
return TextLineEdit(self, self.object_ref())
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class FormFieldEdit:
|
|
229
|
+
def __init__(self, form_field: 'FormFieldObject', object_ref: FormFieldRef):
|
|
230
|
+
self.form_field = form_field
|
|
231
|
+
self.object_ref = object_ref
|
|
232
|
+
|
|
233
|
+
def value(self, new_value: str) -> 'FormFieldEdit':
|
|
234
|
+
self.form_field.value = new_value
|
|
235
|
+
return self
|
|
236
|
+
|
|
237
|
+
def apply(self) -> bool:
|
|
238
|
+
# noinspection PyProtectedMember
|
|
239
|
+
return self.form_field._client._change_form_field(self.object_ref, self.form_field.value)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class FormFieldObject(PDFObjectBase):
|
|
243
|
+
def __init__(self, client: 'PDFDancer', internal_id: str, object_type: ObjectType, position: Position,
|
|
244
|
+
field_name: str, field_value: str):
|
|
245
|
+
super().__init__(client, internal_id, object_type, position)
|
|
246
|
+
self.name = field_name
|
|
247
|
+
self.value = field_value
|
|
248
|
+
|
|
249
|
+
def edit(self) -> FormFieldEdit:
|
|
250
|
+
return FormFieldEdit(self, self.object_ref())
|
|
251
|
+
|
|
252
|
+
def object_ref(self) -> FormFieldRef:
|
|
253
|
+
ref = FormFieldRef(self.internal_id, self.position, self.object_type)
|
|
254
|
+
ref.name = self.name
|
|
255
|
+
ref.value = self.value
|
|
256
|
+
return ref
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class PageObject(PDFObjectBase):
|
|
260
|
+
|
|
261
|
+
def delete(self) -> bool:
|
|
262
|
+
# noinspection PyProtectedMember
|
|
263
|
+
return self._client._delete_page(self.object_ref())
|
{pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.2.dist-info}/METADATA
RENAMED
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pdfdancer-client-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Python client for PDFDancer API
|
|
5
|
-
Author-email:
|
|
5
|
+
Author-email: "The Famous Cat Ltd." <hi@thefamouscat.com>
|
|
6
6
|
License: MIT
|
|
7
|
-
Project-URL: Homepage, https://
|
|
8
|
-
Project-URL: Repository, https://github.com/
|
|
7
|
+
Project-URL: Homepage, https://www.pdfdancer.com/
|
|
8
|
+
Project-URL: Repository, https://github.com/MenschMachine/pdfdancer-client-python
|
|
9
9
|
Classifier: Development Status :: 4 - Beta
|
|
10
10
|
Classifier: Intended Audience :: Developers
|
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
-
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
14
12
|
Classifier: Programming Language :: Python :: 3.9
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
16
|
Description-Content-Type: text/markdown
|
|
18
17
|
Requires-Dist: requests>=2.25.0
|
|
19
18
|
Requires-Dist: pydantic>=1.8.0
|
|
@@ -60,8 +59,8 @@ paragraphs = client.find_paragraphs(None)
|
|
|
60
59
|
images = client.find_images(Position.at_page(0))
|
|
61
60
|
|
|
62
61
|
# Manipulation operations (mirrors Java client methods)
|
|
63
|
-
client.
|
|
64
|
-
client.
|
|
62
|
+
client._delete(paragraphs[0])
|
|
63
|
+
client._move(images[0], Position.at_page_coordinates(0, 100, 200))
|
|
65
64
|
|
|
66
65
|
# Builder pattern (mirrors Java ParagraphBuilder)
|
|
67
66
|
paragraph = (client.paragraph_builder()
|
|
@@ -71,7 +70,7 @@ paragraph = (client.paragraph_builder()
|
|
|
71
70
|
.with_position(Position.at_page(0))
|
|
72
71
|
.build())
|
|
73
72
|
|
|
74
|
-
client.
|
|
73
|
+
client._add_paragraph(paragraph)
|
|
75
74
|
|
|
76
75
|
# Save result (mirrors Java savePDF)
|
|
77
76
|
client.save_pdf("output.pdf")
|
|
@@ -85,7 +84,7 @@ from pdfdancer import ClientV1
|
|
|
85
84
|
# Automatic resource management
|
|
86
85
|
with ClientV1(token="jwt-token", pdf_data="input.pdf") as client:
|
|
87
86
|
paragraphs = client.find_paragraphs(None)
|
|
88
|
-
client.
|
|
87
|
+
client._delete(paragraphs[0])
|
|
89
88
|
client.save_pdf("output.pdf")
|
|
90
89
|
# Session automatically cleaned up
|
|
91
90
|
```
|
|
@@ -105,37 +104,39 @@ client = ClientV1(token="jwt-token", pdf_data=pdf_file, base_url="https://api.se
|
|
|
105
104
|
```
|
|
106
105
|
|
|
107
106
|
### Find Operations
|
|
107
|
+
|
|
108
108
|
```python
|
|
109
109
|
# Generic find (Java: client.find())
|
|
110
|
-
objects = client.
|
|
110
|
+
objects = client._find(ObjectType.PARAGRAPH, position)
|
|
111
111
|
|
|
112
112
|
# Specific finders (Java: client.findParagraphs(), etc.)
|
|
113
|
-
paragraphs = client.
|
|
114
|
-
images = client.
|
|
115
|
-
forms = client.
|
|
116
|
-
paths = client.
|
|
117
|
-
text_lines = client.
|
|
113
|
+
paragraphs = client._find_paragraphs(position)
|
|
114
|
+
images = client._find_images(position)
|
|
115
|
+
forms = client._find_form_x_objects(position)
|
|
116
|
+
paths = client._find_paths(position)
|
|
117
|
+
text_lines = client._find_text_lines(position)
|
|
118
118
|
|
|
119
119
|
# Page operations (Java: client.getPages(), client.getPage())
|
|
120
120
|
pages = client.get_pages()
|
|
121
|
-
page = client.
|
|
121
|
+
page = client._get_page(1) # 1-based indexing
|
|
122
122
|
```
|
|
123
123
|
|
|
124
124
|
### Manipulation Operations
|
|
125
|
+
|
|
125
126
|
```python
|
|
126
127
|
# Delete (Java: client.delete(), client.deletePage())
|
|
127
|
-
result = client.
|
|
128
|
-
result = client.
|
|
128
|
+
result = client._delete(object_ref)
|
|
129
|
+
result = client._delete_page(page_ref)
|
|
129
130
|
|
|
130
131
|
# Move (Java: client.move())
|
|
131
|
-
result = client.
|
|
132
|
+
result = client._move(object_ref, new_position)
|
|
132
133
|
|
|
133
134
|
# Add (Java: client.addImage(), client.addParagraph())
|
|
134
|
-
result = client.
|
|
135
|
-
result = client.
|
|
135
|
+
result = client._add_image(image, position)
|
|
136
|
+
result = client._add_paragraph(paragraph)
|
|
136
137
|
|
|
137
138
|
# Modify (Java: client.modifyParagraph(), client.modifyTextLine())
|
|
138
|
-
result = client.
|
|
139
|
+
result = client._modify_paragraph(ref, new_paragraph)
|
|
139
140
|
result = client.modify_text_line(ref, "new text")
|
|
140
141
|
```
|
|
141
142
|
|
|
@@ -171,8 +172,8 @@ position = Position.at_page(0)
|
|
|
171
172
|
position = Position.at_page_coordinates(0, 100, 200)
|
|
172
173
|
|
|
173
174
|
# Coordinate access (Java: position.getX(), position.getY())
|
|
174
|
-
x = position.
|
|
175
|
-
y = position.
|
|
175
|
+
x = position.x()
|
|
176
|
+
y = position.y()
|
|
176
177
|
|
|
177
178
|
# Movement (Java: position.moveX(), position.moveY())
|
|
178
179
|
position.move_x(50.0)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
pdfdancer/__init__.py,sha256=5HVIUIEESA_GQHKJhItdperrx079oFZM028ciSt2Z9o,1004
|
|
2
|
+
pdfdancer/exceptions.py,sha256=Y5zwNVZprsv2hvKX304cXWobJt11nrEhCzLklu2wiO8,1567
|
|
3
|
+
pdfdancer/image_builder.py,sha256=Omxc2LcieJ1MbvWBXR5_sfia--eAucTUe0KWgr22HYo,842
|
|
4
|
+
pdfdancer/models.py,sha256=SmkKScr47uVs6FCWUAVIg6rucYrYHvbIxZngyA50XyI,15498
|
|
5
|
+
pdfdancer/paragraph_builder.py,sha256=bAfwX9U2YT1UGX9EKkPnGYvGK3SQP3X1ocxlgyLE_rU,8872
|
|
6
|
+
pdfdancer/pdfdancer_v1.py,sha256=Jsr1ZnqUoeNd3So-9ZhygDdwXSCN-m3G_9HUZ3cU_Cc,33196
|
|
7
|
+
pdfdancer/types.py,sha256=lcYnqCFgnrGpplSPCxKh3X9AZ3-9t-lJqMH5ZLew_I4,8188
|
|
8
|
+
pdfdancer_client_python-0.2.2.dist-info/METADATA,sha256=5XM20sBrcRra-BCRaJZL1L4bKgVzvmdOjdM1CmkwSCw,9242
|
|
9
|
+
pdfdancer_client_python-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
+
pdfdancer_client_python-0.2.2.dist-info/top_level.txt,sha256=ICwSVRpcCKrdBF9QlaX9Y0e_N3Nk1p7QVxadGOnbxeY,10
|
|
11
|
+
pdfdancer_client_python-0.2.2.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
pdfdancer/__init__.py,sha256=uSIbj9rI81_o_asKd6Er15rI_Fa-TcMl1N7BEq7P_Gc,964
|
|
2
|
-
pdfdancer/client_v1.py,sha256=M4A_q8iZ2d9uUrmDSRrVRw1LADpM-MInti_FKsBRAnM,25147
|
|
3
|
-
pdfdancer/exceptions.py,sha256=Y5zwNVZprsv2hvKX304cXWobJt11nrEhCzLklu2wiO8,1567
|
|
4
|
-
pdfdancer/models.py,sha256=MHp5_iFpvHhJC7-kN71D5xc2NHhFj0PMSF9afmxdjL8,14064
|
|
5
|
-
pdfdancer/paragraph_builder.py,sha256=uBMSNhL3b5DgbCJWf5VFWxgm3RpsQyQukk67FDd86Bs,8727
|
|
6
|
-
pdfdancer_client_python-0.1.2.dist-info/METADATA,sha256=nv1xrRZG_kIIL1-2iFSX8N5l0UKiIITqUZsmBnlPOWo,9248
|
|
7
|
-
pdfdancer_client_python-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
pdfdancer_client_python-0.1.2.dist-info/top_level.txt,sha256=ICwSVRpcCKrdBF9QlaX9Y0e_N3Nk1p7QVxadGOnbxeY,10
|
|
9
|
-
pdfdancer_client_python-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
{pdfdancer_client_python-0.1.2.dist-info → pdfdancer_client_python-0.2.2.dist-info}/top_level.txt
RENAMED
|
File without changes
|