pdfdancer-client-python 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdfdancer-client-python might be problematic. Click here for more details.
- pdfdancer/__init__.py +2 -1
- pdfdancer/models.py +50 -18
- pdfdancer/pdfdancer_v1.py +120 -37
- pdfdancer/types.py +111 -12
- {pdfdancer_client_python-0.2.10.dist-info → pdfdancer_client_python-0.2.12.dist-info}/METADATA +1 -1
- pdfdancer_client_python-0.2.12.dist-info/RECORD +11 -0
- pdfdancer_client_python-0.2.10.dist-info/RECORD +0 -11
- {pdfdancer_client_python-0.2.10.dist-info → pdfdancer_client_python-0.2.12.dist-info}/WHEEL +0 -0
- {pdfdancer_client_python-0.2.10.dist-info → pdfdancer_client_python-0.2.12.dist-info}/top_level.txt +0 -0
pdfdancer/__init__.py
CHANGED
|
@@ -11,7 +11,7 @@ from .exceptions import (
|
|
|
11
11
|
HttpClientException, SessionException
|
|
12
12
|
)
|
|
13
13
|
from .models import (
|
|
14
|
-
ObjectRef, Position, ObjectType, Font, Color, Image, BoundingRect, Paragraph, FormFieldRef,
|
|
14
|
+
ObjectRef, Position, ObjectType, Font, Color, Image, BoundingRect, Paragraph, FormFieldRef, TextObjectRef,
|
|
15
15
|
PositionMode, ShapeType, Point, StandardFonts
|
|
16
16
|
)
|
|
17
17
|
from .paragraph_builder import ParagraphBuilder
|
|
@@ -29,6 +29,7 @@ __all__ = [
|
|
|
29
29
|
"BoundingRect",
|
|
30
30
|
"Paragraph",
|
|
31
31
|
"FormFieldRef",
|
|
32
|
+
"TextObjectRef",
|
|
32
33
|
"PositionMode",
|
|
33
34
|
"ShapeType",
|
|
34
35
|
"Point",
|
pdfdancer/models.py
CHANGED
|
@@ -121,6 +121,7 @@ class Position:
|
|
|
121
121
|
mode: Optional[PositionMode] = None
|
|
122
122
|
bounding_rect: Optional[BoundingRect] = None
|
|
123
123
|
text_starts_with: Optional[str] = None
|
|
124
|
+
text_pattern: Optional[str] = None
|
|
124
125
|
name: Optional[str] = None
|
|
125
126
|
|
|
126
127
|
@staticmethod
|
|
@@ -185,21 +186,6 @@ class Position:
|
|
|
185
186
|
"""Returns the Y coordinate of this position."""
|
|
186
187
|
return self.bounding_rect.get_y() if self.bounding_rect else None
|
|
187
188
|
|
|
188
|
-
def copy(self) -> 'Position':
|
|
189
|
-
"""Creates a copy of this position."""
|
|
190
|
-
# Create deep copy of bounding_rect if it exists
|
|
191
|
-
bounding_rect_copy = None
|
|
192
|
-
if self.bounding_rect:
|
|
193
|
-
bounding_rect_copy = BoundingRect(
|
|
194
|
-
self.bounding_rect.x,
|
|
195
|
-
self.bounding_rect.y,
|
|
196
|
-
self.bounding_rect.width,
|
|
197
|
-
self.bounding_rect.height
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
pos = Position(self.page_index, self.shape, self.mode, bounding_rect_copy, self.text_starts_with)
|
|
201
|
-
return pos
|
|
202
|
-
|
|
203
189
|
|
|
204
190
|
@dataclass
|
|
205
191
|
class ObjectRef:
|
|
@@ -325,7 +311,8 @@ class FindRequest:
|
|
|
325
311
|
"""Convert Position to dictionary for JSON serialization."""
|
|
326
312
|
result = {
|
|
327
313
|
"pageIndex": position.page_index,
|
|
328
|
-
"textStartsWith": position.text_starts_with
|
|
314
|
+
"textStartsWith": position.text_starts_with,
|
|
315
|
+
"textPattern": position.text_pattern
|
|
329
316
|
}
|
|
330
317
|
if position.name:
|
|
331
318
|
result["name"] = position.name
|
|
@@ -419,7 +406,8 @@ class AddRequest:
|
|
|
419
406
|
text_element = {
|
|
420
407
|
"text": line,
|
|
421
408
|
"font": {"name": obj.font.name, "size": obj.font.size} if obj.font else None,
|
|
422
|
-
"color": {"
|
|
409
|
+
"color": {"red": obj.color.r, "green": obj.color.g, "blue": obj.color.b,
|
|
410
|
+
"alpha": obj.color.a} if obj.color else None,
|
|
423
411
|
"position": FindRequest._position_to_dict(obj.position) if obj.position else None
|
|
424
412
|
}
|
|
425
413
|
text_line = {
|
|
@@ -427,7 +415,8 @@ class AddRequest:
|
|
|
427
415
|
}
|
|
428
416
|
# TextLine has color and position
|
|
429
417
|
if obj.color:
|
|
430
|
-
text_line["color"] = {"
|
|
418
|
+
text_line["color"] = {"red": obj.color.r, "green": obj.color.g, "blue": obj.color.b,
|
|
419
|
+
"alpha": obj.color.a}
|
|
431
420
|
if obj.position:
|
|
432
421
|
text_line["position"] = FindRequest._position_to_dict(obj.position)
|
|
433
422
|
lines.append(text_line)
|
|
@@ -515,3 +504,46 @@ class FormFieldRef(ObjectRef):
|
|
|
515
504
|
def get_value(self) -> Optional[str]:
|
|
516
505
|
"""Get the form field value."""
|
|
517
506
|
return self.value
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
class TextObjectRef(ObjectRef):
|
|
510
|
+
"""
|
|
511
|
+
Represents a text object reference with additional text-specific properties.
|
|
512
|
+
Extends ObjectRef to include text content, font information, and hierarchical structure.
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
def __init__(self, internal_id: str, position: Position, object_type: ObjectType,
|
|
516
|
+
text: Optional[str] = None, font_name: Optional[str] = None,
|
|
517
|
+
font_size: Optional[float] = None, line_spacings: Optional[List[float]] = None,
|
|
518
|
+
color: Optional[Color] = None):
|
|
519
|
+
super().__init__(internal_id, position, object_type)
|
|
520
|
+
self.text = text
|
|
521
|
+
self.font_name = font_name
|
|
522
|
+
self.font_size = font_size
|
|
523
|
+
self.line_spacings = line_spacings
|
|
524
|
+
self.color = color
|
|
525
|
+
self.children: List['TextObjectRef'] = []
|
|
526
|
+
|
|
527
|
+
def get_text(self) -> Optional[str]:
|
|
528
|
+
"""Get the text content."""
|
|
529
|
+
return self.text
|
|
530
|
+
|
|
531
|
+
def get_font_name(self) -> Optional[str]:
|
|
532
|
+
"""Get the font name."""
|
|
533
|
+
return self.font_name
|
|
534
|
+
|
|
535
|
+
def get_font_size(self) -> Optional[float]:
|
|
536
|
+
"""Get the font size."""
|
|
537
|
+
return self.font_size
|
|
538
|
+
|
|
539
|
+
def get_line_spacings(self) -> Optional[List[float]]:
|
|
540
|
+
"""Get the line spacings."""
|
|
541
|
+
return self.line_spacings
|
|
542
|
+
|
|
543
|
+
def get_color(self) -> Optional[Color]:
|
|
544
|
+
"""Get the color."""
|
|
545
|
+
return self.color
|
|
546
|
+
|
|
547
|
+
def get_children(self) -> List['TextObjectRef']:
|
|
548
|
+
"""Get the child text objects."""
|
|
549
|
+
return self.children
|
pdfdancer/pdfdancer_v1.py
CHANGED
|
@@ -22,7 +22,7 @@ from .exceptions import (
|
|
|
22
22
|
)
|
|
23
23
|
from .image_builder import ImageBuilder
|
|
24
24
|
from .models import (
|
|
25
|
-
ObjectRef, Position, ObjectType, Font, Image, Paragraph, FormFieldRef,
|
|
25
|
+
ObjectRef, Position, ObjectType, Font, Image, Paragraph, FormFieldRef, TextObjectRef,
|
|
26
26
|
FindRequest, DeleteRequest, MoveRequest, AddRequest, ModifyRequest, ModifyTextRequest, ChangeFormFieldRequest,
|
|
27
27
|
ShapeType, PositionMode
|
|
28
28
|
)
|
|
@@ -52,6 +52,18 @@ class PageClient:
|
|
|
52
52
|
# noinspection PyProtectedMember
|
|
53
53
|
return self.root._to_paragraph_objects(self.root._find_paragraphs(position))
|
|
54
54
|
|
|
55
|
+
def select_paragraphs_matching(self, pattern):
|
|
56
|
+
position = Position.at_page(self.page_index)
|
|
57
|
+
position.text_pattern = pattern
|
|
58
|
+
# noinspection PyProtectedMember
|
|
59
|
+
return self.root._to_paragraph_objects(self.root._find_paragraphs(position))
|
|
60
|
+
|
|
61
|
+
def select_text_lines_matching(self, pattern: str) -> List[TextLineObject]:
|
|
62
|
+
position = Position.at_page(self.page_index)
|
|
63
|
+
position.text_pattern = pattern
|
|
64
|
+
# noinspection PyProtectedMember
|
|
65
|
+
return self.root._to_textline_objects(self.root._find_text_lines(position))
|
|
66
|
+
|
|
55
67
|
def select_paragraphs_at(self, x: float, y: float) -> List[ParagraphObject]:
|
|
56
68
|
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
57
69
|
# noinspection PyProtectedMember
|
|
@@ -68,6 +80,11 @@ class PageClient:
|
|
|
68
80
|
# noinspection PyProtectedMember
|
|
69
81
|
return self.root._to_textline_objects(self.root._find_text_lines(position))
|
|
70
82
|
|
|
83
|
+
def select_text_lines_at(self, x, y) -> List[TextLineObject]:
|
|
84
|
+
position = Position.at_page_coordinates(self.page_index, x, y)
|
|
85
|
+
# noinspection PyProtectedMember
|
|
86
|
+
return self.root._to_textline_objects(self.root._find_text_lines(position))
|
|
87
|
+
|
|
71
88
|
def select_images(self) -> List[ImageObject]:
|
|
72
89
|
# noinspection PyProtectedMember
|
|
73
90
|
return self.root._to_image_objects(self.root._find_images(Position.at_page(self.page_index)))
|
|
@@ -149,6 +166,21 @@ class PDFDancer:
|
|
|
149
166
|
Returns:
|
|
150
167
|
A ready-to-use `PDFDancer` client instance.
|
|
151
168
|
"""
|
|
169
|
+
resolved_token = cls._resolve_token(token)
|
|
170
|
+
resolved_base_url = cls._resolve_base_url(base_url)
|
|
171
|
+
|
|
172
|
+
return PDFDancer(resolved_token, pdf_data, resolved_base_url, timeout)
|
|
173
|
+
|
|
174
|
+
@classmethod
|
|
175
|
+
def _resolve_base_url(cls, base_url: Optional[str]) -> Optional[str]:
|
|
176
|
+
env_base_url = os.getenv("PDFDANCER_BASE_URL")
|
|
177
|
+
resolved_base_url = base_url or (env_base_url.strip() if env_base_url and env_base_url.strip() else None)
|
|
178
|
+
if resolved_base_url is None:
|
|
179
|
+
resolved_base_url = "https://api.pdfdancer.com"
|
|
180
|
+
return resolved_base_url
|
|
181
|
+
|
|
182
|
+
@classmethod
|
|
183
|
+
def _resolve_token(cls, token: Optional[str]) -> Optional[str]:
|
|
152
184
|
resolved_token = token.strip() if token and token.strip() else None
|
|
153
185
|
if resolved_token is None:
|
|
154
186
|
env_token = os.getenv("PDFDANCER_TOKEN")
|
|
@@ -159,13 +191,18 @@ class PDFDancer:
|
|
|
159
191
|
"Missing PDFDancer API token. Pass a token via the `token` argument "
|
|
160
192
|
"or set the PDFDANCER_TOKEN environment variable."
|
|
161
193
|
)
|
|
194
|
+
return resolved_token
|
|
162
195
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
196
|
+
@classmethod
|
|
197
|
+
def new(cls,
|
|
198
|
+
token: Optional[str] = None,
|
|
199
|
+
base_url: Optional[str] = None,
|
|
200
|
+
timeout: float = 30.0) -> "PDFDancer":
|
|
167
201
|
|
|
168
|
-
|
|
202
|
+
resolved_token = cls._resolve_token(token)
|
|
203
|
+
resolved_base_url = cls._resolve_base_url(base_url)
|
|
204
|
+
|
|
205
|
+
raise Exception("Unsupported Operation Exception: TODO")
|
|
169
206
|
|
|
170
207
|
def __init__(self, token: str, pdf_data: Union[bytes, Path, str, BinaryIO],
|
|
171
208
|
base_url: str, read_timeout: float = 0):
|
|
@@ -205,7 +242,8 @@ class PDFDancer:
|
|
|
205
242
|
# Create session - equivalent to Java constructor behavior
|
|
206
243
|
self._session_id = self._create_session()
|
|
207
244
|
|
|
208
|
-
|
|
245
|
+
@staticmethod
|
|
246
|
+
def _process_pdf_data(pdf_data: Union[bytes, Path, str, BinaryIO]) -> bytes:
|
|
209
247
|
"""
|
|
210
248
|
Process PDF data from various input types with strict validation.
|
|
211
249
|
"""
|
|
@@ -347,7 +385,7 @@ class PDFDancer:
|
|
|
347
385
|
timeout=self._read_timeout if self._read_timeout > 0 else None
|
|
348
386
|
)
|
|
349
387
|
|
|
350
|
-
# Handle FontNotFoundException
|
|
388
|
+
# Handle FontNotFoundException
|
|
351
389
|
if response.status_code == 404:
|
|
352
390
|
try:
|
|
353
391
|
error_data = response.json()
|
|
@@ -366,8 +404,6 @@ class PDFDancer:
|
|
|
366
404
|
raise HttpClientException(f"API request failed: {error_message}", response=getattr(e, 'response', None),
|
|
367
405
|
cause=e) from None
|
|
368
406
|
|
|
369
|
-
# Search Operations - matching Java client exactly
|
|
370
|
-
|
|
371
407
|
def _find(self, object_type: Optional[ObjectType] = None, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
372
408
|
"""
|
|
373
409
|
Searches for PDF objects matching the specified criteria.
|
|
@@ -388,17 +424,21 @@ class PDFDancer:
|
|
|
388
424
|
objects_data = response.json()
|
|
389
425
|
return [self._parse_object_ref(obj_data) for obj_data in objects_data]
|
|
390
426
|
|
|
391
|
-
def select_paragraphs(self) -> List[
|
|
427
|
+
def select_paragraphs(self) -> List[TextObjectRef]:
|
|
392
428
|
"""
|
|
393
|
-
Searches for paragraph objects
|
|
429
|
+
Searches for paragraph objects returning TextObjectRef with hierarchical structure.
|
|
394
430
|
"""
|
|
395
|
-
return self.
|
|
431
|
+
return self._find_paragraphs(None)
|
|
396
432
|
|
|
397
|
-
def _find_paragraphs(self, position: Optional[Position] = None) -> List[
|
|
433
|
+
def _find_paragraphs(self, position: Optional[Position] = None) -> List[TextObjectRef]:
|
|
398
434
|
"""
|
|
399
|
-
Searches for paragraph objects
|
|
435
|
+
Searches for paragraph objects returning TextObjectRef with hierarchical structure.
|
|
400
436
|
"""
|
|
401
|
-
|
|
437
|
+
request_data = FindRequest(ObjectType.PARAGRAPH, position).to_dict()
|
|
438
|
+
response = self._make_request('POST', '/pdf/find', data=request_data)
|
|
439
|
+
|
|
440
|
+
objects_data = response.json()
|
|
441
|
+
return [self._parse_text_object_ref(obj_data) for obj_data in objects_data]
|
|
402
442
|
|
|
403
443
|
def _find_images(self, position: Optional[Position] = None) -> List[ObjectRef]:
|
|
404
444
|
"""
|
|
@@ -471,17 +511,21 @@ class PDFDancer:
|
|
|
471
511
|
"""
|
|
472
512
|
return self._find(ObjectType.PATH, position)
|
|
473
513
|
|
|
474
|
-
def _find_text_lines(self, position: Optional[Position] = None) -> List[
|
|
514
|
+
def _find_text_lines(self, position: Optional[Position] = None) -> List[TextObjectRef]:
|
|
475
515
|
"""
|
|
476
|
-
Searches for text line objects
|
|
516
|
+
Searches for text line objects returning TextObjectRef with hierarchical structure.
|
|
477
517
|
"""
|
|
478
|
-
|
|
518
|
+
request_data = FindRequest(ObjectType.TEXT_LINE, position).to_dict()
|
|
519
|
+
response = self._make_request('POST', '/pdf/find', data=request_data)
|
|
520
|
+
|
|
521
|
+
objects_data = response.json()
|
|
522
|
+
return [self._parse_text_object_ref(obj_data) for obj_data in objects_data]
|
|
479
523
|
|
|
480
524
|
def select_text_lines(self) -> List[TextLineObject]:
|
|
481
525
|
"""
|
|
482
|
-
Searches for text line objects
|
|
526
|
+
Searches for text line objects returning TextLineObject wrappers.
|
|
483
527
|
"""
|
|
484
|
-
return self._to_textline_objects(self.
|
|
528
|
+
return self._to_textline_objects(self._find_text_lines(None))
|
|
485
529
|
|
|
486
530
|
def page(self, page_index: int) -> PageClient:
|
|
487
531
|
return PageClient(page_index, self)
|
|
@@ -835,7 +879,7 @@ class PDFDancer:
|
|
|
835
879
|
type=object_type
|
|
836
880
|
)
|
|
837
881
|
|
|
838
|
-
def _parse_form_field_ref(self, obj_data: dict) ->
|
|
882
|
+
def _parse_form_field_ref(self, obj_data: dict) -> FormFieldRef:
|
|
839
883
|
"""Parse JSON object data into ObjectRef instance."""
|
|
840
884
|
position_data = obj_data.get('position', {})
|
|
841
885
|
position = self._parse_position(position_data) if position_data else None
|
|
@@ -874,6 +918,45 @@ class PDFDancer:
|
|
|
874
918
|
|
|
875
919
|
return position
|
|
876
920
|
|
|
921
|
+
def _parse_text_object_ref(self, obj_data: dict, fallback_id: Optional[str] = None) -> TextObjectRef:
|
|
922
|
+
"""Parse JSON object data into TextObjectRef instance with hierarchical structure."""
|
|
923
|
+
position_data = obj_data.get('position', {})
|
|
924
|
+
position = self._parse_position(position_data) if position_data else Position()
|
|
925
|
+
|
|
926
|
+
object_type = ObjectType(obj_data.get('type', 'TEXT_LINE'))
|
|
927
|
+
line_spacings = obj_data.get('lineSpacings') if isinstance(obj_data.get('lineSpacings'), list) else None
|
|
928
|
+
internal_id = obj_data.get('internalId', fallback_id or '')
|
|
929
|
+
|
|
930
|
+
color = None
|
|
931
|
+
color_data = obj_data.get('color')
|
|
932
|
+
if isinstance(color_data, dict):
|
|
933
|
+
from .models import Color
|
|
934
|
+
red = color_data.get('red')
|
|
935
|
+
green = color_data.get('green')
|
|
936
|
+
blue = color_data.get('blue')
|
|
937
|
+
alpha = color_data.get('alpha', 255)
|
|
938
|
+
if all(isinstance(v, int) for v in [red, green, blue]):
|
|
939
|
+
color = Color(red, green, blue, alpha)
|
|
940
|
+
|
|
941
|
+
text_object = TextObjectRef(
|
|
942
|
+
internal_id=internal_id,
|
|
943
|
+
position=position,
|
|
944
|
+
object_type=object_type,
|
|
945
|
+
text=obj_data.get('text') if isinstance(obj_data.get('text'), str) else None,
|
|
946
|
+
font_name=obj_data.get('fontName') if isinstance(obj_data.get('fontName'), str) else None,
|
|
947
|
+
font_size=obj_data.get('fontSize') if isinstance(obj_data.get('fontSize'), (int, float)) else None,
|
|
948
|
+
line_spacings=line_spacings,
|
|
949
|
+
color=color
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
if isinstance(obj_data.get('children'), list) and len(obj_data['children']) > 0:
|
|
953
|
+
text_object.children = [
|
|
954
|
+
self._parse_text_object_ref(child_data, f"{internal_id or 'child'}-{index}")
|
|
955
|
+
for index, child_data in enumerate(obj_data['children'])
|
|
956
|
+
]
|
|
957
|
+
|
|
958
|
+
return text_object
|
|
959
|
+
|
|
877
960
|
# Builder Pattern Support
|
|
878
961
|
|
|
879
962
|
def _paragraph_builder(self) -> 'ParagraphBuilder':
|
|
@@ -892,30 +975,30 @@ class PDFDancer:
|
|
|
892
975
|
|
|
893
976
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
894
977
|
"""Context manager exit - cleanup if needed."""
|
|
895
|
-
# Could add session cleanup here if API supports it
|
|
978
|
+
# TODO Could add session cleanup here if API supports it. Cleanup on the server
|
|
896
979
|
pass
|
|
897
980
|
|
|
898
|
-
def _to_path_objects(self,
|
|
899
|
-
return [PathObject(self, ref.internal_id, ref.type, ref.position) for ref in
|
|
981
|
+
def _to_path_objects(self, refs: List[ObjectRef]) -> List[PathObject]:
|
|
982
|
+
return [PathObject(self, ref.internal_id, ref.type, ref.position) for ref in refs]
|
|
900
983
|
|
|
901
|
-
def _to_paragraph_objects(self,
|
|
902
|
-
return [ParagraphObject(self, ref
|
|
984
|
+
def _to_paragraph_objects(self, refs: List[TextObjectRef]) -> List[ParagraphObject]:
|
|
985
|
+
return [ParagraphObject(self, ref) for ref in refs]
|
|
903
986
|
|
|
904
|
-
def _to_textline_objects(self,
|
|
905
|
-
return [TextLineObject(self, ref
|
|
987
|
+
def _to_textline_objects(self, refs: List[TextObjectRef]) -> List[TextLineObject]:
|
|
988
|
+
return [TextLineObject(self, ref) for ref in refs]
|
|
906
989
|
|
|
907
|
-
def _to_image_objects(self,
|
|
908
|
-
return [ImageObject(self, ref.internal_id, ref.type, ref.position) for ref in
|
|
990
|
+
def _to_image_objects(self, refs: List[ObjectRef]) -> List[ImageObject]:
|
|
991
|
+
return [ImageObject(self, ref.internal_id, ref.type, ref.position) for ref in refs]
|
|
909
992
|
|
|
910
|
-
def _to_form_objects(self,
|
|
911
|
-
return [FormObject(self, ref.internal_id, ref.type, ref.position) for ref in
|
|
993
|
+
def _to_form_objects(self, refs: List[ObjectRef]) -> List[FormObject]:
|
|
994
|
+
return [FormObject(self, ref.internal_id, ref.type, ref.position) for ref in refs]
|
|
912
995
|
|
|
913
|
-
def _to_form_field_objects(self,
|
|
996
|
+
def _to_form_field_objects(self, refs: List[FormFieldRef]) -> List[FormFieldObject]:
|
|
914
997
|
return [FormFieldObject(self, ref.internal_id, ref.type, ref.position, ref.name, ref.value) for ref in
|
|
915
|
-
|
|
998
|
+
refs]
|
|
916
999
|
|
|
917
|
-
def _to_page_objects(self,
|
|
918
|
-
return [PageClient.from_ref(self, ref) for ref in
|
|
1000
|
+
def _to_page_objects(self, refs: List[ObjectRef]) -> List[PageClient]:
|
|
1001
|
+
return [PageClient.from_ref(self, ref) for ref in refs]
|
|
919
1002
|
|
|
920
1003
|
def _to_page_object(self, ref: ObjectRef) -> PageClient:
|
|
921
1004
|
return PageClient.from_ref(self, ref)
|
pdfdancer/types.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import statistics
|
|
3
4
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Optional
|
|
5
|
+
from typing import Optional, List
|
|
5
6
|
|
|
6
|
-
from . import ObjectType, Position, ObjectRef, Point, Paragraph, Font, Color, FormFieldRef
|
|
7
|
+
from . import ObjectType, Position, ObjectRef, Point, Paragraph, Font, Color, FormFieldRef, TextObjectRef
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
@dataclass
|
|
@@ -66,16 +67,33 @@ class PathObject(PDFObjectBase):
|
|
|
66
67
|
"""Optional bounding rectangle (if available)."""
|
|
67
68
|
return self.position.bounding_rect
|
|
68
69
|
|
|
70
|
+
def __eq__(self, other):
|
|
71
|
+
if not isinstance(other, PathObject):
|
|
72
|
+
return False
|
|
73
|
+
return (self.internal_id == other.internal_id and
|
|
74
|
+
self.object_type == other.object_type and
|
|
75
|
+
self.position == other.position)
|
|
76
|
+
|
|
69
77
|
|
|
70
78
|
class ImageObject(PDFObjectBase):
|
|
71
|
-
|
|
79
|
+
def __eq__(self, other):
|
|
80
|
+
if not isinstance(other, ImageObject):
|
|
81
|
+
return False
|
|
82
|
+
return (self.internal_id == other.internal_id and
|
|
83
|
+
self.object_type == other.object_type and
|
|
84
|
+
self.position == other.position)
|
|
72
85
|
|
|
73
86
|
|
|
74
87
|
class FormObject(PDFObjectBase):
|
|
75
|
-
|
|
88
|
+
def __eq__(self, other):
|
|
89
|
+
if not isinstance(other, FormObject):
|
|
90
|
+
return False
|
|
91
|
+
return (self.internal_id == other.internal_id and
|
|
92
|
+
self.object_type == other.object_type and
|
|
93
|
+
self.position == other.position)
|
|
76
94
|
|
|
77
95
|
|
|
78
|
-
def _process_text_lines(text: str) ->
|
|
96
|
+
def _process_text_lines(text: str) -> List[str]:
|
|
79
97
|
"""
|
|
80
98
|
Process text into lines for the paragraph.
|
|
81
99
|
This is a simplified version - the full implementation would handle
|
|
@@ -171,21 +189,53 @@ class ParagraphEdit(BaseTextEdit):
|
|
|
171
189
|
return self._target_obj._client._modify_paragraph(self._object_ref, self._new_text)
|
|
172
190
|
else:
|
|
173
191
|
new_paragraph = Paragraph(
|
|
174
|
-
position=self._position,
|
|
175
|
-
line_spacing=self.
|
|
176
|
-
font=
|
|
177
|
-
text_lines=
|
|
178
|
-
color=self.
|
|
192
|
+
position=self._position if self._position is not None else self._object_ref.position,
|
|
193
|
+
line_spacing=self._get_line_spacing(),
|
|
194
|
+
font=self._get_font(),
|
|
195
|
+
text_lines=self._get_text_lines(),
|
|
196
|
+
color=self._get_color(),
|
|
179
197
|
)
|
|
180
198
|
# noinspection PyProtectedMember
|
|
181
199
|
return self._target_obj._client._modify_paragraph(self._object_ref, new_paragraph)
|
|
182
200
|
|
|
201
|
+
def _get_line_spacing(self) -> float:
|
|
202
|
+
if self._line_spacing is not None:
|
|
203
|
+
return self._line_spacing
|
|
204
|
+
elif self._object_ref.line_spacings is not None:
|
|
205
|
+
return statistics.mean(self._object_ref.line_spacings)
|
|
206
|
+
else:
|
|
207
|
+
return DEFAULT_LINE_SPACING
|
|
208
|
+
|
|
209
|
+
def _get_font(self):
|
|
210
|
+
if self._font_name is not None and self._font_size is not None:
|
|
211
|
+
return Font(name=self._font_name, size=self._font_size)
|
|
212
|
+
elif self._object_ref.font_name is not None and self._object_ref.font_size is not None:
|
|
213
|
+
return Font(name=self._object_ref.font_name, size=self._object_ref.font_size)
|
|
214
|
+
else:
|
|
215
|
+
raise Exception("Font is none")
|
|
216
|
+
|
|
217
|
+
def _get_text_lines(self):
|
|
218
|
+
if self._new_text is not None:
|
|
219
|
+
return _process_text_lines(self._new_text)
|
|
220
|
+
elif self._object_ref.text is not None:
|
|
221
|
+
# TODO this actually messes up existing text line internals
|
|
222
|
+
return _process_text_lines(self._object_ref.text)
|
|
223
|
+
else:
|
|
224
|
+
raise Exception("Paragraph has no text")
|
|
225
|
+
|
|
226
|
+
def _get_color(self):
|
|
227
|
+
if self._color is not None:
|
|
228
|
+
return self._color
|
|
229
|
+
elif self._object_ref.color is not None:
|
|
230
|
+
return self._object_ref.color
|
|
231
|
+
else:
|
|
232
|
+
return DEFAULT_COLOR
|
|
233
|
+
|
|
183
234
|
|
|
184
235
|
class TextLineEdit(BaseTextEdit):
|
|
185
236
|
def apply(self) -> bool:
|
|
186
237
|
if (
|
|
187
|
-
self.
|
|
188
|
-
and self._line_spacing is None
|
|
238
|
+
self._line_spacing is None
|
|
189
239
|
and self._font_size is None
|
|
190
240
|
and self._font_name is None
|
|
191
241
|
and self._color is None
|
|
@@ -199,16 +249,56 @@ class TextLineEdit(BaseTextEdit):
|
|
|
199
249
|
class ParagraphObject(PDFObjectBase):
|
|
200
250
|
"""Represents a paragraph text block inside a PDF page."""
|
|
201
251
|
|
|
252
|
+
def __init__(self, client: 'PDFDancer', object_ref: TextObjectRef):
|
|
253
|
+
super().__init__(client, object_ref.internal_id, object_ref.type, object_ref.position)
|
|
254
|
+
self._object_ref = object_ref
|
|
255
|
+
|
|
202
256
|
def edit(self) -> ParagraphEdit:
|
|
203
257
|
return ParagraphEdit(self, self.object_ref())
|
|
204
258
|
|
|
259
|
+
def object_ref(self) -> TextObjectRef:
|
|
260
|
+
return self._object_ref
|
|
261
|
+
|
|
262
|
+
def __eq__(self, other):
|
|
263
|
+
if not isinstance(other, ParagraphObject):
|
|
264
|
+
return False
|
|
265
|
+
return (self.internal_id == other.internal_id and
|
|
266
|
+
self.object_type == other.object_type and
|
|
267
|
+
self.position == other.position and
|
|
268
|
+
self._object_ref.text == other._object_ref.text and
|
|
269
|
+
self._object_ref.font_name == other._object_ref.font_name and
|
|
270
|
+
self._object_ref.font_size == other._object_ref.font_size and
|
|
271
|
+
self._object_ref.line_spacings == other._object_ref.line_spacings and
|
|
272
|
+
self._object_ref.color == other._object_ref.color and
|
|
273
|
+
self._object_ref.children == other._object_ref.children)
|
|
274
|
+
|
|
205
275
|
|
|
206
276
|
class TextLineObject(PDFObjectBase):
|
|
207
277
|
"""Represents a single line of text inside a PDF page."""
|
|
208
278
|
|
|
279
|
+
def __init__(self, client: 'PDFDancer', object_ref: TextObjectRef):
|
|
280
|
+
super().__init__(client, object_ref.internal_id, object_ref.type, object_ref.position)
|
|
281
|
+
self._object_ref = object_ref
|
|
282
|
+
|
|
209
283
|
def edit(self) -> TextLineEdit:
|
|
210
284
|
return TextLineEdit(self, self.object_ref())
|
|
211
285
|
|
|
286
|
+
def object_ref(self) -> TextObjectRef:
|
|
287
|
+
return self._object_ref
|
|
288
|
+
|
|
289
|
+
def __eq__(self, other):
|
|
290
|
+
if not isinstance(other, TextLineObject):
|
|
291
|
+
return False
|
|
292
|
+
return (self.internal_id == other.internal_id and
|
|
293
|
+
self.object_type == other.object_type and
|
|
294
|
+
self.position == other.position and
|
|
295
|
+
self._object_ref.text == other._object_ref.text and
|
|
296
|
+
self._object_ref.font_name == other._object_ref.font_name and
|
|
297
|
+
self._object_ref.font_size == other._object_ref.font_size and
|
|
298
|
+
self._object_ref.line_spacings == other._object_ref.line_spacings and
|
|
299
|
+
self._object_ref.color == other._object_ref.color and
|
|
300
|
+
self._object_ref.children == other._object_ref.children)
|
|
301
|
+
|
|
212
302
|
|
|
213
303
|
class FormFieldEdit:
|
|
214
304
|
def __init__(self, form_field: 'FormFieldObject', object_ref: FormFieldRef):
|
|
@@ -239,3 +329,12 @@ class FormFieldObject(PDFObjectBase):
|
|
|
239
329
|
ref.name = self.name
|
|
240
330
|
ref.value = self.value
|
|
241
331
|
return ref
|
|
332
|
+
|
|
333
|
+
def __eq__(self, other):
|
|
334
|
+
if not isinstance(other, FormFieldObject):
|
|
335
|
+
return False
|
|
336
|
+
return (self.internal_id == other.internal_id and
|
|
337
|
+
self.object_type == other.object_type and
|
|
338
|
+
self.position == other.position and
|
|
339
|
+
self.name == other.name and
|
|
340
|
+
self.value == other.value)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
pdfdancer/__init__.py,sha256=71HwLjHHPsCQMTUtLHYAwzslhF3PqN5g1QwMr4HbKSQ,1076
|
|
2
|
+
pdfdancer/exceptions.py,sha256=Y5zwNVZprsv2hvKX304cXWobJt11nrEhCzLklu2wiO8,1567
|
|
3
|
+
pdfdancer/image_builder.py,sha256=Omxc2LcieJ1MbvWBXR5_sfia--eAucTUe0KWgr22HYo,842
|
|
4
|
+
pdfdancer/models.py,sha256=ZoB5ZP1jaZsubqzhMr9W9nsIUirVUty_FkRiPZWq8vY,18276
|
|
5
|
+
pdfdancer/paragraph_builder.py,sha256=mjV36-XOqcYATfIjSOy7_SBO0EKXjsAtMqYL8IaowGU,9218
|
|
6
|
+
pdfdancer/pdfdancer_v1.py,sha256=XgcyKHPOBMI5vNM86ZRhRvJFA3wB7DTcWwDt6tHQxpI,39851
|
|
7
|
+
pdfdancer/types.py,sha256=SOmYP49XPVy6DZ4JXSJrfy0Aww-Tv7QjZCDnOB8VTT4,11860
|
|
8
|
+
pdfdancer_client_python-0.2.12.dist-info/METADATA,sha256=XHEG0LuL-bi7MQyYVUX2RWrq6mutyVlLcYSlqiUzMAg,6770
|
|
9
|
+
pdfdancer_client_python-0.2.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
+
pdfdancer_client_python-0.2.12.dist-info/top_level.txt,sha256=ICwSVRpcCKrdBF9QlaX9Y0e_N3Nk1p7QVxadGOnbxeY,10
|
|
11
|
+
pdfdancer_client_python-0.2.12.dist-info/RECORD,,
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
pdfdancer/__init__.py,sha256=4sMkf0d6GvQ-6L1cPzEDyWuSt0b-ek8ZlyvashOXqZ4,1040
|
|
2
|
-
pdfdancer/exceptions.py,sha256=Y5zwNVZprsv2hvKX304cXWobJt11nrEhCzLklu2wiO8,1567
|
|
3
|
-
pdfdancer/image_builder.py,sha256=Omxc2LcieJ1MbvWBXR5_sfia--eAucTUe0KWgr22HYo,842
|
|
4
|
-
pdfdancer/models.py,sha256=MmiW1xEU9WwpZV6cHNotNcbm-GoxLfeoRt_CurNGnjc,17105
|
|
5
|
-
pdfdancer/paragraph_builder.py,sha256=mjV36-XOqcYATfIjSOy7_SBO0EKXjsAtMqYL8IaowGU,9218
|
|
6
|
-
pdfdancer/pdfdancer_v1.py,sha256=fUfXdc-WCwYHcoK779nxwgV171TR7yGUgv2-Xl9fM8A,36104
|
|
7
|
-
pdfdancer/types.py,sha256=TtXbOqa8febqTUmYF97tq-y6i-jNuO6UHvmAmSJHz20,7466
|
|
8
|
-
pdfdancer_client_python-0.2.10.dist-info/METADATA,sha256=zzVq_3CxC0YG5Wb5vNpSY4LqoC-XXdLNVyLsJPDDwFA,6770
|
|
9
|
-
pdfdancer_client_python-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
-
pdfdancer_client_python-0.2.10.dist-info/top_level.txt,sha256=ICwSVRpcCKrdBF9QlaX9Y0e_N3Nk1p7QVxadGOnbxeY,10
|
|
11
|
-
pdfdancer_client_python-0.2.10.dist-info/RECORD,,
|
|
File without changes
|
{pdfdancer_client_python-0.2.10.dist-info → pdfdancer_client_python-0.2.12.dist-info}/top_level.txt
RENAMED
|
File without changes
|