pdfdancer-client-python 0.2.10__tar.gz → 0.2.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {pdfdancer_client_python-0.2.10/src/pdfdancer_client_python.egg-info → pdfdancer_client_python-0.2.12}/PKG-INFO +1 -1
  2. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/pyproject.toml +1 -1
  3. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer/__init__.py +2 -1
  4. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer/models.py +50 -18
  5. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer/pdfdancer_v1.py +120 -37
  6. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer/types.py +111 -12
  7. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12/src/pdfdancer_client_python.egg-info}/PKG-INFO +1 -1
  8. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer_client_python.egg-info/SOURCES.txt +3 -0
  9. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/e2e/__init__.py +9 -4
  10. pdfdancer_client_python-0.2.12/tests/e2e/pdf_assertions.py +100 -0
  11. pdfdancer_client_python-0.2.12/tests/e2e/test_paragraph.py +422 -0
  12. pdfdancer_client_python-0.2.12/tests/fixtures/Empty.pdf +0 -0
  13. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/test_models.py +0 -18
  14. pdfdancer_client_python-0.2.12/tests/test_pdf_object_equality.py +264 -0
  15. pdfdancer_client_python-0.2.10/tests/e2e/test_paragraph.py +0 -227
  16. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/.github/workflows/ci.yml +0 -0
  17. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/.gitignore +0 -0
  18. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/CLAUDE.md +0 -0
  19. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/README.md +0 -0
  20. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/docs/openapi.yml +0 -0
  21. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/release.py +0 -0
  22. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/requirements-dev.txt +0 -0
  23. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/requirements.txt +0 -0
  24. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/setup.cfg +0 -0
  25. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer/exceptions.py +0 -0
  26. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer/image_builder.py +0 -0
  27. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer/paragraph_builder.py +0 -0
  28. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer_client_python.egg-info/dependency_links.txt +0 -0
  29. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer_client_python.egg-info/requires.txt +0 -0
  30. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/src/pdfdancer_client_python.egg-info/top_level.txt +0 -0
  31. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/__init__.py +0 -0
  32. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/conftest.py +0 -0
  33. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/e2e/test_acroform.py +0 -0
  34. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/e2e/test_form_x_objects.py +0 -0
  35. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/e2e/test_image.py +0 -0
  36. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/e2e/test_line.py +0 -0
  37. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/e2e/test_page.py +0 -0
  38. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/e2e/test_path.py +0 -0
  39. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/e2e/test_pdfdancer.py +0 -0
  40. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/fixtures/DancingScript-Regular.ttf +0 -0
  41. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/fixtures/JetBrainsMono-Regular.ttf +0 -0
  42. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/fixtures/ObviouslyAwesome.pdf +0 -0
  43. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/fixtures/basic-paths.pdf +0 -0
  44. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/fixtures/form-xobject-example.pdf +0 -0
  45. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/fixtures/logo-80.png +0 -0
  46. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/fixtures/mixed-form-types.pdf +0 -0
  47. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/test_openapi_compliance.py +0 -0
  48. {pdfdancer_client_python-0.2.10 → pdfdancer_client_python-0.2.12}/tests/test_standard_fonts.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pdfdancer-client-python
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: Python client for PDFDancer API
5
5
  Author-email: "The Famous Cat Ltd." <hi@thefamouscat.com>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pdfdancer-client-python"
7
- version = "0.2.10"
7
+ version = "0.2.12"
8
8
  description = "Python client for PDFDancer API"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -11,7 +11,7 @@ from .exceptions import (
11
11
  HttpClientException, SessionException
12
12
  )
13
13
  from .models import (
14
- ObjectRef, Position, ObjectType, Font, Color, Image, BoundingRect, Paragraph, FormFieldRef,
14
+ ObjectRef, Position, ObjectType, Font, Color, Image, BoundingRect, Paragraph, FormFieldRef, TextObjectRef,
15
15
  PositionMode, ShapeType, Point, StandardFonts
16
16
  )
17
17
  from .paragraph_builder import ParagraphBuilder
@@ -29,6 +29,7 @@ __all__ = [
29
29
  "BoundingRect",
30
30
  "Paragraph",
31
31
  "FormFieldRef",
32
+ "TextObjectRef",
32
33
  "PositionMode",
33
34
  "ShapeType",
34
35
  "Point",
@@ -121,6 +121,7 @@ class Position:
121
121
  mode: Optional[PositionMode] = None
122
122
  bounding_rect: Optional[BoundingRect] = None
123
123
  text_starts_with: Optional[str] = None
124
+ text_pattern: Optional[str] = None
124
125
  name: Optional[str] = None
125
126
 
126
127
  @staticmethod
@@ -185,21 +186,6 @@ class Position:
185
186
  """Returns the Y coordinate of this position."""
186
187
  return self.bounding_rect.get_y() if self.bounding_rect else None
187
188
 
188
- def copy(self) -> 'Position':
189
- """Creates a copy of this position."""
190
- # Create deep copy of bounding_rect if it exists
191
- bounding_rect_copy = None
192
- if self.bounding_rect:
193
- bounding_rect_copy = BoundingRect(
194
- self.bounding_rect.x,
195
- self.bounding_rect.y,
196
- self.bounding_rect.width,
197
- self.bounding_rect.height
198
- )
199
-
200
- pos = Position(self.page_index, self.shape, self.mode, bounding_rect_copy, self.text_starts_with)
201
- return pos
202
-
203
189
 
204
190
  @dataclass
205
191
  class ObjectRef:
@@ -325,7 +311,8 @@ class FindRequest:
325
311
  """Convert Position to dictionary for JSON serialization."""
326
312
  result = {
327
313
  "pageIndex": position.page_index,
328
- "textStartsWith": position.text_starts_with
314
+ "textStartsWith": position.text_starts_with,
315
+ "textPattern": position.text_pattern
329
316
  }
330
317
  if position.name:
331
318
  result["name"] = position.name
@@ -419,7 +406,8 @@ class AddRequest:
419
406
  text_element = {
420
407
  "text": line,
421
408
  "font": {"name": obj.font.name, "size": obj.font.size} if obj.font else None,
422
- "color": {"r": obj.color.r, "g": obj.color.g, "b": obj.color.b} if obj.color else None,
409
+ "color": {"red": obj.color.r, "green": obj.color.g, "blue": obj.color.b,
410
+ "alpha": obj.color.a} if obj.color else None,
423
411
  "position": FindRequest._position_to_dict(obj.position) if obj.position else None
424
412
  }
425
413
  text_line = {
@@ -427,7 +415,8 @@ class AddRequest:
427
415
  }
428
416
  # TextLine has color and position
429
417
  if obj.color:
430
- text_line["color"] = {"r": obj.color.r, "g": obj.color.g, "b": obj.color.b}
418
+ text_line["color"] = {"red": obj.color.r, "green": obj.color.g, "blue": obj.color.b,
419
+ "alpha": obj.color.a}
431
420
  if obj.position:
432
421
  text_line["position"] = FindRequest._position_to_dict(obj.position)
433
422
  lines.append(text_line)
@@ -515,3 +504,46 @@ class FormFieldRef(ObjectRef):
515
504
  def get_value(self) -> Optional[str]:
516
505
  """Get the form field value."""
517
506
  return self.value
507
+
508
+
509
+ class TextObjectRef(ObjectRef):
510
+ """
511
+ Represents a text object reference with additional text-specific properties.
512
+ Extends ObjectRef to include text content, font information, and hierarchical structure.
513
+ """
514
+
515
+ def __init__(self, internal_id: str, position: Position, object_type: ObjectType,
516
+ text: Optional[str] = None, font_name: Optional[str] = None,
517
+ font_size: Optional[float] = None, line_spacings: Optional[List[float]] = None,
518
+ color: Optional[Color] = None):
519
+ super().__init__(internal_id, position, object_type)
520
+ self.text = text
521
+ self.font_name = font_name
522
+ self.font_size = font_size
523
+ self.line_spacings = line_spacings
524
+ self.color = color
525
+ self.children: List['TextObjectRef'] = []
526
+
527
+ def get_text(self) -> Optional[str]:
528
+ """Get the text content."""
529
+ return self.text
530
+
531
+ def get_font_name(self) -> Optional[str]:
532
+ """Get the font name."""
533
+ return self.font_name
534
+
535
+ def get_font_size(self) -> Optional[float]:
536
+ """Get the font size."""
537
+ return self.font_size
538
+
539
+ def get_line_spacings(self) -> Optional[List[float]]:
540
+ """Get the line spacings."""
541
+ return self.line_spacings
542
+
543
+ def get_color(self) -> Optional[Color]:
544
+ """Get the color."""
545
+ return self.color
546
+
547
+ def get_children(self) -> List['TextObjectRef']:
548
+ """Get the child text objects."""
549
+ return self.children
@@ -22,7 +22,7 @@ from .exceptions import (
22
22
  )
23
23
  from .image_builder import ImageBuilder
24
24
  from .models import (
25
- ObjectRef, Position, ObjectType, Font, Image, Paragraph, FormFieldRef,
25
+ ObjectRef, Position, ObjectType, Font, Image, Paragraph, FormFieldRef, TextObjectRef,
26
26
  FindRequest, DeleteRequest, MoveRequest, AddRequest, ModifyRequest, ModifyTextRequest, ChangeFormFieldRequest,
27
27
  ShapeType, PositionMode
28
28
  )
@@ -52,6 +52,18 @@ class PageClient:
52
52
  # noinspection PyProtectedMember
53
53
  return self.root._to_paragraph_objects(self.root._find_paragraphs(position))
54
54
 
55
+ def select_paragraphs_matching(self, pattern):
56
+ position = Position.at_page(self.page_index)
57
+ position.text_pattern = pattern
58
+ # noinspection PyProtectedMember
59
+ return self.root._to_paragraph_objects(self.root._find_paragraphs(position))
60
+
61
+ def select_text_lines_matching(self, pattern: str) -> List[TextLineObject]:
62
+ position = Position.at_page(self.page_index)
63
+ position.text_pattern = pattern
64
+ # noinspection PyProtectedMember
65
+ return self.root._to_textline_objects(self.root._find_text_lines(position))
66
+
55
67
  def select_paragraphs_at(self, x: float, y: float) -> List[ParagraphObject]:
56
68
  position = Position.at_page_coordinates(self.page_index, x, y)
57
69
  # noinspection PyProtectedMember
@@ -68,6 +80,11 @@ class PageClient:
68
80
  # noinspection PyProtectedMember
69
81
  return self.root._to_textline_objects(self.root._find_text_lines(position))
70
82
 
83
+ def select_text_lines_at(self, x, y) -> List[TextLineObject]:
84
+ position = Position.at_page_coordinates(self.page_index, x, y)
85
+ # noinspection PyProtectedMember
86
+ return self.root._to_textline_objects(self.root._find_text_lines(position))
87
+
71
88
  def select_images(self) -> List[ImageObject]:
72
89
  # noinspection PyProtectedMember
73
90
  return self.root._to_image_objects(self.root._find_images(Position.at_page(self.page_index)))
@@ -149,6 +166,21 @@ class PDFDancer:
149
166
  Returns:
150
167
  A ready-to-use `PDFDancer` client instance.
151
168
  """
169
+ resolved_token = cls._resolve_token(token)
170
+ resolved_base_url = cls._resolve_base_url(base_url)
171
+
172
+ return PDFDancer(resolved_token, pdf_data, resolved_base_url, timeout)
173
+
174
+ @classmethod
175
+ def _resolve_base_url(cls, base_url: Optional[str]) -> Optional[str]:
176
+ env_base_url = os.getenv("PDFDANCER_BASE_URL")
177
+ resolved_base_url = base_url or (env_base_url.strip() if env_base_url and env_base_url.strip() else None)
178
+ if resolved_base_url is None:
179
+ resolved_base_url = "https://api.pdfdancer.com"
180
+ return resolved_base_url
181
+
182
+ @classmethod
183
+ def _resolve_token(cls, token: Optional[str]) -> Optional[str]:
152
184
  resolved_token = token.strip() if token and token.strip() else None
153
185
  if resolved_token is None:
154
186
  env_token = os.getenv("PDFDANCER_TOKEN")
@@ -159,13 +191,18 @@ class PDFDancer:
159
191
  "Missing PDFDancer API token. Pass a token via the `token` argument "
160
192
  "or set the PDFDANCER_TOKEN environment variable."
161
193
  )
194
+ return resolved_token
162
195
 
163
- env_base_url = os.getenv("PDFDANCER_BASE_URL")
164
- resolved_base_url = base_url or (env_base_url.strip() if env_base_url and env_base_url.strip() else None)
165
- if resolved_base_url is None:
166
- resolved_base_url = "https://api.pdfdancer.com"
196
+ @classmethod
197
+ def new(cls,
198
+ token: Optional[str] = None,
199
+ base_url: Optional[str] = None,
200
+ timeout: float = 30.0) -> "PDFDancer":
167
201
 
168
- return PDFDancer(resolved_token, pdf_data, resolved_base_url, timeout)
202
+ resolved_token = cls._resolve_token(token)
203
+ resolved_base_url = cls._resolve_base_url(base_url)
204
+
205
+ raise Exception("Unsupported Operation Exception: TODO")
169
206
 
170
207
  def __init__(self, token: str, pdf_data: Union[bytes, Path, str, BinaryIO],
171
208
  base_url: str, read_timeout: float = 0):
@@ -205,7 +242,8 @@ class PDFDancer:
205
242
  # Create session - equivalent to Java constructor behavior
206
243
  self._session_id = self._create_session()
207
244
 
208
- def _process_pdf_data(self, pdf_data: Union[bytes, Path, str, BinaryIO]) -> bytes:
245
+ @staticmethod
246
+ def _process_pdf_data(pdf_data: Union[bytes, Path, str, BinaryIO]) -> bytes:
209
247
  """
210
248
  Process PDF data from various input types with strict validation.
211
249
  """
@@ -347,7 +385,7 @@ class PDFDancer:
347
385
  timeout=self._read_timeout if self._read_timeout > 0 else None
348
386
  )
349
387
 
350
- # Handle FontNotFoundException specifically like Java client
388
+ # Handle FontNotFoundException
351
389
  if response.status_code == 404:
352
390
  try:
353
391
  error_data = response.json()
@@ -366,8 +404,6 @@ class PDFDancer:
366
404
  raise HttpClientException(f"API request failed: {error_message}", response=getattr(e, 'response', None),
367
405
  cause=e) from None
368
406
 
369
- # Search Operations - matching Java client exactly
370
-
371
407
  def _find(self, object_type: Optional[ObjectType] = None, position: Optional[Position] = None) -> List[ObjectRef]:
372
408
  """
373
409
  Searches for PDF objects matching the specified criteria.
@@ -388,17 +424,21 @@ class PDFDancer:
388
424
  objects_data = response.json()
389
425
  return [self._parse_object_ref(obj_data) for obj_data in objects_data]
390
426
 
391
- def select_paragraphs(self) -> List[ParagraphObject]:
427
+ def select_paragraphs(self) -> List[TextObjectRef]:
392
428
  """
393
- Searches for paragraph objects at the specified position.
429
+ Searches for paragraph objects returning TextObjectRef with hierarchical structure.
394
430
  """
395
- return self._to_paragraph_objects(self._find(ObjectType.PARAGRAPH, None))
431
+ return self._find_paragraphs(None)
396
432
 
397
- def _find_paragraphs(self, position: Optional[Position] = None) -> List[ObjectRef]:
433
+ def _find_paragraphs(self, position: Optional[Position] = None) -> List[TextObjectRef]:
398
434
  """
399
- Searches for paragraph objects at the specified position.
435
+ Searches for paragraph objects returning TextObjectRef with hierarchical structure.
400
436
  """
401
- return self._find(ObjectType.PARAGRAPH, position)
437
+ request_data = FindRequest(ObjectType.PARAGRAPH, position).to_dict()
438
+ response = self._make_request('POST', '/pdf/find', data=request_data)
439
+
440
+ objects_data = response.json()
441
+ return [self._parse_text_object_ref(obj_data) for obj_data in objects_data]
402
442
 
403
443
  def _find_images(self, position: Optional[Position] = None) -> List[ObjectRef]:
404
444
  """
@@ -471,17 +511,21 @@ class PDFDancer:
471
511
  """
472
512
  return self._find(ObjectType.PATH, position)
473
513
 
474
- def _find_text_lines(self, position: Optional[Position] = None) -> List[ObjectRef]:
514
+ def _find_text_lines(self, position: Optional[Position] = None) -> List[TextObjectRef]:
475
515
  """
476
- Searches for text line objects at the specified position.
516
+ Searches for text line objects returning TextObjectRef with hierarchical structure.
477
517
  """
478
- return self._find(ObjectType.TEXT_LINE, position)
518
+ request_data = FindRequest(ObjectType.TEXT_LINE, position).to_dict()
519
+ response = self._make_request('POST', '/pdf/find', data=request_data)
520
+
521
+ objects_data = response.json()
522
+ return [self._parse_text_object_ref(obj_data) for obj_data in objects_data]
479
523
 
480
524
  def select_text_lines(self) -> List[TextLineObject]:
481
525
  """
482
- Searches for text line objects at the specified position.
526
+ Searches for text line objects returning TextLineObject wrappers.
483
527
  """
484
- return self._to_textline_objects(self._find(ObjectType.TEXT_LINE, None))
528
+ return self._to_textline_objects(self._find_text_lines(None))
485
529
 
486
530
  def page(self, page_index: int) -> PageClient:
487
531
  return PageClient(page_index, self)
@@ -835,7 +879,7 @@ class PDFDancer:
835
879
  type=object_type
836
880
  )
837
881
 
838
- def _parse_form_field_ref(self, obj_data: dict) -> ObjectRef:
882
+ def _parse_form_field_ref(self, obj_data: dict) -> FormFieldRef:
839
883
  """Parse JSON object data into ObjectRef instance."""
840
884
  position_data = obj_data.get('position', {})
841
885
  position = self._parse_position(position_data) if position_data else None
@@ -874,6 +918,45 @@ class PDFDancer:
874
918
 
875
919
  return position
876
920
 
921
+ def _parse_text_object_ref(self, obj_data: dict, fallback_id: Optional[str] = None) -> TextObjectRef:
922
+ """Parse JSON object data into TextObjectRef instance with hierarchical structure."""
923
+ position_data = obj_data.get('position', {})
924
+ position = self._parse_position(position_data) if position_data else Position()
925
+
926
+ object_type = ObjectType(obj_data.get('type', 'TEXT_LINE'))
927
+ line_spacings = obj_data.get('lineSpacings') if isinstance(obj_data.get('lineSpacings'), list) else None
928
+ internal_id = obj_data.get('internalId', fallback_id or '')
929
+
930
+ color = None
931
+ color_data = obj_data.get('color')
932
+ if isinstance(color_data, dict):
933
+ from .models import Color
934
+ red = color_data.get('red')
935
+ green = color_data.get('green')
936
+ blue = color_data.get('blue')
937
+ alpha = color_data.get('alpha', 255)
938
+ if all(isinstance(v, int) for v in [red, green, blue]):
939
+ color = Color(red, green, blue, alpha)
940
+
941
+ text_object = TextObjectRef(
942
+ internal_id=internal_id,
943
+ position=position,
944
+ object_type=object_type,
945
+ text=obj_data.get('text') if isinstance(obj_data.get('text'), str) else None,
946
+ font_name=obj_data.get('fontName') if isinstance(obj_data.get('fontName'), str) else None,
947
+ font_size=obj_data.get('fontSize') if isinstance(obj_data.get('fontSize'), (int, float)) else None,
948
+ line_spacings=line_spacings,
949
+ color=color
950
+ )
951
+
952
+ if isinstance(obj_data.get('children'), list) and len(obj_data['children']) > 0:
953
+ text_object.children = [
954
+ self._parse_text_object_ref(child_data, f"{internal_id or 'child'}-{index}")
955
+ for index, child_data in enumerate(obj_data['children'])
956
+ ]
957
+
958
+ return text_object
959
+
877
960
  # Builder Pattern Support
878
961
 
879
962
  def _paragraph_builder(self) -> 'ParagraphBuilder':
@@ -892,30 +975,30 @@ class PDFDancer:
892
975
 
893
976
  def __exit__(self, exc_type, exc_val, exc_tb):
894
977
  """Context manager exit - cleanup if needed."""
895
- # Could add session cleanup here if API supports it
978
+ # TODO Could add session cleanup here if API supports it. Cleanup on the server
896
979
  pass
897
980
 
898
- def _to_path_objects(self, path_refs: List[ObjectRef]) -> List[PathObject]:
899
- return [PathObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
981
+ def _to_path_objects(self, refs: List[ObjectRef]) -> List[PathObject]:
982
+ return [PathObject(self, ref.internal_id, ref.type, ref.position) for ref in refs]
900
983
 
901
- def _to_paragraph_objects(self, path_refs: List[ObjectRef]) -> List[ParagraphObject]:
902
- return [ParagraphObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
984
+ def _to_paragraph_objects(self, refs: List[TextObjectRef]) -> List[ParagraphObject]:
985
+ return [ParagraphObject(self, ref) for ref in refs]
903
986
 
904
- def _to_textline_objects(self, path_refs: List[ObjectRef]) -> List[TextLineObject]:
905
- return [TextLineObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
987
+ def _to_textline_objects(self, refs: List[TextObjectRef]) -> List[TextLineObject]:
988
+ return [TextLineObject(self, ref) for ref in refs]
906
989
 
907
- def _to_image_objects(self, path_refs: List[ObjectRef]) -> List[ImageObject]:
908
- return [ImageObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
990
+ def _to_image_objects(self, refs: List[ObjectRef]) -> List[ImageObject]:
991
+ return [ImageObject(self, ref.internal_id, ref.type, ref.position) for ref in refs]
909
992
 
910
- def _to_form_objects(self, path_refs: List[ObjectRef]) -> List[FormObject]:
911
- return [FormObject(self, ref.internal_id, ref.type, ref.position) for ref in path_refs]
993
+ def _to_form_objects(self, refs: List[ObjectRef]) -> List[FormObject]:
994
+ return [FormObject(self, ref.internal_id, ref.type, ref.position) for ref in refs]
912
995
 
913
- def _to_form_field_objects(self, path_refs: List[FormFieldRef]) -> List[FormFieldObject]:
996
+ def _to_form_field_objects(self, refs: List[FormFieldRef]) -> List[FormFieldObject]:
914
997
  return [FormFieldObject(self, ref.internal_id, ref.type, ref.position, ref.name, ref.value) for ref in
915
- path_refs]
998
+ refs]
916
999
 
917
- def _to_page_objects(self, path_refs: List[ObjectRef]) -> List[PageClient]:
918
- return [PageClient.from_ref(self, ref) for ref in path_refs]
1000
+ def _to_page_objects(self, refs: List[ObjectRef]) -> List[PageClient]:
1001
+ return [PageClient.from_ref(self, ref) for ref in refs]
919
1002
 
920
1003
  def _to_page_object(self, ref: ObjectRef) -> PageClient:
921
1004
  return PageClient.from_ref(self, ref)
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import statistics
3
4
  from dataclasses import dataclass
4
- from typing import Optional
5
+ from typing import Optional, List
5
6
 
6
- from . import ObjectType, Position, ObjectRef, Point, Paragraph, Font, Color, FormFieldRef
7
+ from . import ObjectType, Position, ObjectRef, Point, Paragraph, Font, Color, FormFieldRef, TextObjectRef
7
8
 
8
9
 
9
10
  @dataclass
@@ -66,16 +67,33 @@ class PathObject(PDFObjectBase):
66
67
  """Optional bounding rectangle (if available)."""
67
68
  return self.position.bounding_rect
68
69
 
70
+ def __eq__(self, other):
71
+ if not isinstance(other, PathObject):
72
+ return False
73
+ return (self.internal_id == other.internal_id and
74
+ self.object_type == other.object_type and
75
+ self.position == other.position)
76
+
69
77
 
70
78
  class ImageObject(PDFObjectBase):
71
- pass
79
+ def __eq__(self, other):
80
+ if not isinstance(other, ImageObject):
81
+ return False
82
+ return (self.internal_id == other.internal_id and
83
+ self.object_type == other.object_type and
84
+ self.position == other.position)
72
85
 
73
86
 
74
87
  class FormObject(PDFObjectBase):
75
- pass
88
+ def __eq__(self, other):
89
+ if not isinstance(other, FormObject):
90
+ return False
91
+ return (self.internal_id == other.internal_id and
92
+ self.object_type == other.object_type and
93
+ self.position == other.position)
76
94
 
77
95
 
78
- def _process_text_lines(text: str) -> list[str]:
96
+ def _process_text_lines(text: str) -> List[str]:
79
97
  """
80
98
  Process text into lines for the paragraph.
81
99
  This is a simplified version - the full implementation would handle
@@ -171,21 +189,53 @@ class ParagraphEdit(BaseTextEdit):
171
189
  return self._target_obj._client._modify_paragraph(self._object_ref, self._new_text)
172
190
  else:
173
191
  new_paragraph = Paragraph(
174
- position=self._position,
175
- line_spacing=self._line_spacing if self._line_spacing is not None else DEFAULT_LINE_SPACING,
176
- font=Font(name=self._font_name, size=self._font_size),
177
- text_lines=_process_text_lines(self._new_text),
178
- color=self._color if self._color is not None else DEFAULT_COLOR,
192
+ position=self._position if self._position is not None else self._object_ref.position,
193
+ line_spacing=self._get_line_spacing(),
194
+ font=self._get_font(),
195
+ text_lines=self._get_text_lines(),
196
+ color=self._get_color(),
179
197
  )
180
198
  # noinspection PyProtectedMember
181
199
  return self._target_obj._client._modify_paragraph(self._object_ref, new_paragraph)
182
200
 
201
+ def _get_line_spacing(self) -> float:
202
+ if self._line_spacing is not None:
203
+ return self._line_spacing
204
+ elif self._object_ref.line_spacings is not None:
205
+ return statistics.mean(self._object_ref.line_spacings)
206
+ else:
207
+ return DEFAULT_LINE_SPACING
208
+
209
+ def _get_font(self):
210
+ if self._font_name is not None and self._font_size is not None:
211
+ return Font(name=self._font_name, size=self._font_size)
212
+ elif self._object_ref.font_name is not None and self._object_ref.font_size is not None:
213
+ return Font(name=self._object_ref.font_name, size=self._object_ref.font_size)
214
+ else:
215
+ raise Exception("Font is none")
216
+
217
+ def _get_text_lines(self):
218
+ if self._new_text is not None:
219
+ return _process_text_lines(self._new_text)
220
+ elif self._object_ref.text is not None:
221
+ # TODO this actually messes up existing text line internals
222
+ return _process_text_lines(self._object_ref.text)
223
+ else:
224
+ raise Exception("Paragraph has no text")
225
+
226
+ def _get_color(self):
227
+ if self._color is not None:
228
+ return self._color
229
+ elif self._object_ref.color is not None:
230
+ return self._object_ref.color
231
+ else:
232
+ return DEFAULT_COLOR
233
+
183
234
 
184
235
  class TextLineEdit(BaseTextEdit):
185
236
  def apply(self) -> bool:
186
237
  if (
187
- self._position is None
188
- and self._line_spacing is None
238
+ self._line_spacing is None
189
239
  and self._font_size is None
190
240
  and self._font_name is None
191
241
  and self._color is None
@@ -199,16 +249,56 @@ class TextLineEdit(BaseTextEdit):
199
249
  class ParagraphObject(PDFObjectBase):
200
250
  """Represents a paragraph text block inside a PDF page."""
201
251
 
252
+ def __init__(self, client: 'PDFDancer', object_ref: TextObjectRef):
253
+ super().__init__(client, object_ref.internal_id, object_ref.type, object_ref.position)
254
+ self._object_ref = object_ref
255
+
202
256
  def edit(self) -> ParagraphEdit:
203
257
  return ParagraphEdit(self, self.object_ref())
204
258
 
259
+ def object_ref(self) -> TextObjectRef:
260
+ return self._object_ref
261
+
262
+ def __eq__(self, other):
263
+ if not isinstance(other, ParagraphObject):
264
+ return False
265
+ return (self.internal_id == other.internal_id and
266
+ self.object_type == other.object_type and
267
+ self.position == other.position and
268
+ self._object_ref.text == other._object_ref.text and
269
+ self._object_ref.font_name == other._object_ref.font_name and
270
+ self._object_ref.font_size == other._object_ref.font_size and
271
+ self._object_ref.line_spacings == other._object_ref.line_spacings and
272
+ self._object_ref.color == other._object_ref.color and
273
+ self._object_ref.children == other._object_ref.children)
274
+
205
275
 
206
276
  class TextLineObject(PDFObjectBase):
207
277
  """Represents a single line of text inside a PDF page."""
208
278
 
279
+ def __init__(self, client: 'PDFDancer', object_ref: TextObjectRef):
280
+ super().__init__(client, object_ref.internal_id, object_ref.type, object_ref.position)
281
+ self._object_ref = object_ref
282
+
209
283
  def edit(self) -> TextLineEdit:
210
284
  return TextLineEdit(self, self.object_ref())
211
285
 
286
+ def object_ref(self) -> TextObjectRef:
287
+ return self._object_ref
288
+
289
+ def __eq__(self, other):
290
+ if not isinstance(other, TextLineObject):
291
+ return False
292
+ return (self.internal_id == other.internal_id and
293
+ self.object_type == other.object_type and
294
+ self.position == other.position and
295
+ self._object_ref.text == other._object_ref.text and
296
+ self._object_ref.font_name == other._object_ref.font_name and
297
+ self._object_ref.font_size == other._object_ref.font_size and
298
+ self._object_ref.line_spacings == other._object_ref.line_spacings and
299
+ self._object_ref.color == other._object_ref.color and
300
+ self._object_ref.children == other._object_ref.children)
301
+
212
302
 
213
303
  class FormFieldEdit:
214
304
  def __init__(self, form_field: 'FormFieldObject', object_ref: FormFieldRef):
@@ -239,3 +329,12 @@ class FormFieldObject(PDFObjectBase):
239
329
  ref.name = self.name
240
330
  ref.value = self.value
241
331
  return ref
332
+
333
+ def __eq__(self, other):
334
+ if not isinstance(other, FormFieldObject):
335
+ return False
336
+ return (self.internal_id == other.internal_id and
337
+ self.object_type == other.object_type and
338
+ self.position == other.position and
339
+ self.name == other.name and
340
+ self.value == other.value)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pdfdancer-client-python
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: Python client for PDFDancer API
5
5
  Author-email: "The Famous Cat Ltd." <hi@thefamouscat.com>
6
6
  License: MIT
@@ -23,8 +23,10 @@ tests/__init__.py
23
23
  tests/conftest.py
24
24
  tests/test_models.py
25
25
  tests/test_openapi_compliance.py
26
+ tests/test_pdf_object_equality.py
26
27
  tests/test_standard_fonts.py
27
28
  tests/e2e/__init__.py
29
+ tests/e2e/pdf_assertions.py
28
30
  tests/e2e/test_acroform.py
29
31
  tests/e2e/test_form_x_objects.py
30
32
  tests/e2e/test_image.py
@@ -34,6 +36,7 @@ tests/e2e/test_paragraph.py
34
36
  tests/e2e/test_path.py
35
37
  tests/e2e/test_pdfdancer.py
36
38
  tests/fixtures/DancingScript-Regular.ttf
39
+ tests/fixtures/Empty.pdf
37
40
  tests/fixtures/JetBrainsMono-Regular.ttf
38
41
  tests/fixtures/ObviouslyAwesome.pdf
39
42
  tests/fixtures/basic-paths.pdf
@@ -34,13 +34,18 @@ def _server_up(base_url: str) -> bool:
34
34
 
35
35
 
36
36
  def _require_env_and_fixture(pdf_filename: str) -> tuple[str, str, Path]:
37
+ base_url, token = _require_env()
38
+ pdf_path = Path(__file__).resolve().parent.parent / 'fixtures' / pdf_filename
39
+ if not pdf_path.exists():
40
+ pytest.fail(f"{pdf_filename} fixture not found")
41
+ return base_url, token, pdf_path
42
+
43
+
44
+ def _require_env() -> tuple[str, str | None]:
37
45
  base_url = _get_base_url()
38
46
  token = _read_token()
39
47
  if not _server_up(base_url):
40
48
  pytest.fail(f"PDFDancer server not reachable at {base_url}; set PDFDANCER_BASE_URL or start server")
41
49
  if not token:
42
50
  pytest.fail("PDFDANCER_TOKEN not set and no token file found; set env or place jwt-token-*.txt in repo")
43
- pdf_path = Path(__file__).resolve().parent.parent / 'fixtures' / pdf_filename
44
- if not pdf_path.exists():
45
- pytest.fail(f"{pdf_filename} fixture not found")
46
- return base_url, token, pdf_path
51
+ return base_url, token