pdfdancer-client-python 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pdfdancer/__init__.py CHANGED
@@ -12,7 +12,7 @@ from .exceptions import (
12
12
  )
13
13
  from .models import (
14
14
  ObjectRef, Position, ObjectType, Font, Color, Image, BoundingRect, Paragraph, FormFieldRef, TextObjectRef,
15
- PositionMode, ShapeType, Point, StandardFonts
15
+ PageRef, PositionMode, ShapeType, Point, StandardFonts, PageSize, Orientation
16
16
  )
17
17
  from .paragraph_builder import ParagraphBuilder
18
18
 
@@ -30,10 +30,13 @@ __all__ = [
30
30
  "Paragraph",
31
31
  "FormFieldRef",
32
32
  "TextObjectRef",
33
+ "PageRef",
33
34
  "PositionMode",
34
35
  "ShapeType",
35
36
  "Point",
36
37
  "StandardFonts",
38
+ "PageSize",
39
+ "Orientation",
37
40
  "PdfDancerException",
38
41
  "FontNotFoundException",
39
42
  "ValidationException",
pdfdancer/models.py CHANGED
@@ -1,11 +1,113 @@
1
1
  """
2
2
  Model classes for the PDFDancer Python client.
3
- Closely mirrors the Java model classes with Python conventions.
4
3
  """
5
4
 
6
5
  from dataclasses import dataclass
7
6
  from enum import Enum
8
- from typing import Optional, List, Any
7
+ from typing import Optional, List, Any, Dict, Mapping, Tuple, ClassVar, Union
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class PageSize:
12
+ """Represents a page size specification, covering both standard and custom dimensions."""
13
+
14
+ name: Optional[str]
15
+ width: float
16
+ height: float
17
+
18
+ _STANDARD_SIZES: ClassVar[Dict[str, Tuple[float, float]]] = {
19
+ "A4": (595.0, 842.0),
20
+ "LETTER": (612.0, 792.0),
21
+ "LEGAL": (612.0, 1008.0),
22
+ "TABLOID": (792.0, 1224.0),
23
+ "A3": (842.0, 1191.0),
24
+ "A5": (420.0, 595.0),
25
+ }
26
+
27
+ # Convenience aliases populated after class definition; annotated for type checkers.
28
+ A4: ClassVar['PageSize']
29
+ LETTER: ClassVar['PageSize']
30
+ LEGAL: ClassVar['PageSize']
31
+ TABLOID: ClassVar['PageSize']
32
+ A3: ClassVar['PageSize']
33
+ A5: ClassVar['PageSize']
34
+
35
+ def __post_init__(self) -> None:
36
+ if not isinstance(self.width, (int, float)) or not isinstance(self.height, (int, float)):
37
+ raise TypeError("Page width and height must be numeric")
38
+ if self.width <= 0 or self.height <= 0:
39
+ raise ValueError("Page width and height must be positive values")
40
+
41
+ width = float(self.width)
42
+ height = float(self.height)
43
+ object.__setattr__(self, 'width', width)
44
+ object.__setattr__(self, 'height', height)
45
+
46
+ if self.name is not None:
47
+ if not isinstance(self.name, str):
48
+ raise TypeError("Page size name must be a string when provided")
49
+ normalized_name = self.name.strip().upper()
50
+ object.__setattr__(self, 'name', normalized_name if normalized_name else None)
51
+
52
+ def to_dict(self) -> dict:
53
+ """Convert to dictionary for JSON serialization."""
54
+ return {
55
+ "name": self.name,
56
+ "width": self.width,
57
+ "height": self.height,
58
+ }
59
+
60
+ @classmethod
61
+ def from_name(cls, name: str) -> 'PageSize':
62
+ """Create a page size from a known standard name."""
63
+ if not name or not isinstance(name, str):
64
+ raise ValueError("Page size name must be a non-empty string")
65
+ normalized = name.strip().upper()
66
+ if normalized not in cls._STANDARD_SIZES:
67
+ raise ValueError(f"Unknown page size name: {name}")
68
+ width, height = cls._STANDARD_SIZES[normalized]
69
+ return cls(name=normalized, width=width, height=height)
70
+
71
+ @classmethod
72
+ def from_dict(cls, data: Mapping[str, Any]) -> 'PageSize':
73
+ """Create a page size from a dictionary-like object."""
74
+ width = data.get('width') if isinstance(data, Mapping) else None
75
+ height = data.get('height') if isinstance(data, Mapping) else None
76
+ if width is None or height is None:
77
+ raise ValueError("Page size dictionary must contain 'width' and 'height'")
78
+ name = data.get('name') if isinstance(data, Mapping) else None
79
+ return cls(name=name, width=width, height=height)
80
+
81
+ @classmethod
82
+ def coerce(cls, value: Union['PageSize', str, Mapping[str, Any]]) -> 'PageSize':
83
+ """Normalize various page size inputs into a PageSize instance."""
84
+ if isinstance(value, cls):
85
+ return value
86
+ if isinstance(value, str):
87
+ return cls.from_name(value)
88
+ if isinstance(value, Mapping):
89
+ return cls.from_dict(value)
90
+ raise TypeError(f"Cannot convert type {type(value)} to PageSize")
91
+
92
+ @classmethod
93
+ def standard_names(cls) -> List[str]:
94
+ """Return a list of supported standard page size names."""
95
+ return sorted(cls._STANDARD_SIZES.keys())
96
+
97
+
98
+ # Populate convenience constants for standard sizes.
99
+ PageSize.A4 = PageSize.from_name("A4")
100
+ PageSize.LETTER = PageSize.from_name("LETTER")
101
+ PageSize.LEGAL = PageSize.from_name("LEGAL")
102
+ PageSize.TABLOID = PageSize.from_name("TABLOID")
103
+ PageSize.A3 = PageSize.from_name("A3")
104
+ PageSize.A5 = PageSize.from_name("A5")
105
+
106
+
107
+ class Orientation(Enum):
108
+ """Page orientation options."""
109
+ PORTRAIT = "PORTRAIT"
110
+ LANDSCAPE = "LANDSCAPE"
9
111
 
10
112
 
11
113
  class StandardFonts(Enum):
@@ -365,6 +467,19 @@ class MoveRequest:
365
467
  }
366
468
 
367
469
 
470
+ @dataclass
471
+ class PageMoveRequest:
472
+ """Request object for moving pages within the document."""
473
+ from_page_index: int
474
+ to_page_index: int
475
+
476
+ def to_dict(self) -> dict:
477
+ return {
478
+ "fromPageIndex": self.from_page_index,
479
+ "toPageIndex": self.to_page_index
480
+ }
481
+
482
+
368
483
  @dataclass
369
484
  class AddRequest:
370
485
  """Request object for add operations."""
@@ -547,3 +662,21 @@ class TextObjectRef(ObjectRef):
547
662
  def get_children(self) -> List['TextObjectRef']:
548
663
  """Get the child text objects."""
549
664
  return self.children
665
+
666
+
667
+ @dataclass
668
+ class PageRef(ObjectRef):
669
+ """
670
+ Represents a page reference with additional page-specific properties.
671
+ Extends ObjectRef to include page size and orientation.
672
+ """
673
+ page_size: Optional[PageSize]
674
+ orientation: Optional[Orientation]
675
+
676
+ def get_page_size(self) -> Optional[PageSize]:
677
+ """Get the page size."""
678
+ return self.page_size
679
+
680
+ def get_orientation(self) -> Optional[Orientation]:
681
+ """Get the page orientation."""
682
+ return self.orientation
@@ -6,6 +6,7 @@ Closely mirrors the Java ParagraphBuilder class with Python conventions.
6
6
  from pathlib import Path
7
7
  from typing import Optional, Union
8
8
 
9
+ from . import StandardFonts
9
10
  from .exceptions import ValidationException
10
11
  from .models import Paragraph, Font, Color, Position
11
12
 
@@ -60,7 +61,7 @@ class ParagraphBuilder:
60
61
 
61
62
  return self
62
63
 
63
- def font(self, font_name: str, font_size: float) -> 'ParagraphBuilder':
64
+ def font(self, font_name: str | StandardFonts, font_size: float) -> 'ParagraphBuilder':
64
65
  """
65
66
  Set the font for the paragraph using an existing Font object.
66
67
  Equivalent to withFont(Font) in Java ParagraphBuilder.
@@ -75,6 +76,10 @@ class ParagraphBuilder:
75
76
  Raises:
76
77
  ValidationException: If font is None
77
78
  """
79
+ # If font_name is an enum member, use its value
80
+ if isinstance(font_name, StandardFonts):
81
+ font_name = font_name.value
82
+
78
83
  font = Font(font_name, font_size)
79
84
  if font is None:
80
85
  raise ValidationException("Font cannot be null")
@@ -185,7 +190,7 @@ class ParagraphBuilder:
185
190
  self._paragraph.set_position(position)
186
191
  return self
187
192
 
188
- def build(self) -> Paragraph:
193
+ def _build(self) -> Paragraph:
189
194
  """
190
195
  Build and return the final Paragraph object.
191
196
  Equivalent to build() in Java ParagraphBuilder.
@@ -267,7 +272,7 @@ class ParagraphBuilder:
267
272
  return lines
268
273
 
269
274
  def add(self):
270
- self._client._add_paragraph(self.build())
275
+ self._client._add_paragraph(self._build())
271
276
 
272
277
 
273
278
  class ParagraphPageBuilder(ParagraphBuilder):
pdfdancer/pdfdancer_v1.py CHANGED
@@ -8,7 +8,7 @@ Provides session-based PDF manipulation operations with strict validation.
8
8
  import json
9
9
  import os
10
10
  from pathlib import Path
11
- from typing import List, Optional, Union, BinaryIO
11
+ from typing import List, Optional, Union, BinaryIO, Mapping, Any
12
12
 
13
13
  import requests
14
14
 
@@ -22,21 +22,32 @@ from .exceptions import (
22
22
  )
23
23
  from .image_builder import ImageBuilder
24
24
  from .models import (
25
- ObjectRef, Position, ObjectType, Font, Image, Paragraph, FormFieldRef, TextObjectRef,
26
- FindRequest, DeleteRequest, MoveRequest, AddRequest, ModifyRequest, ModifyTextRequest, ChangeFormFieldRequest,
27
- ShapeType, PositionMode
25
+ ObjectRef, Position, ObjectType, Font, Image, Paragraph, FormFieldRef, TextObjectRef, PageRef,
26
+ FindRequest, DeleteRequest, MoveRequest, PageMoveRequest, AddRequest, ModifyRequest, ModifyTextRequest,
27
+ ChangeFormFieldRequest,
28
+ ShapeType, PositionMode, PageSize, Orientation
28
29
  )
29
30
  from .paragraph_builder import ParagraphPageBuilder
30
31
  from .types import PathObject, ParagraphObject, TextLineObject, ImageObject, FormObject, FormFieldObject
31
32
 
32
33
 
33
34
  class PageClient:
34
- def __init__(self, page_index: int, root: "PDFDancer"):
35
+ def __init__(self, page_index: int, root: "PDFDancer", page_size: Optional[PageSize] = None,
36
+ orientation: Optional[Union[Orientation, str]] = Orientation.PORTRAIT):
35
37
  self.page_index = page_index
36
38
  self.root = root
37
39
  self.object_type = ObjectType.PAGE
38
40
  self.position = Position.at_page(page_index)
39
41
  self.internal_id = f"PAGE-{page_index}"
42
+ self.page_size = page_size
43
+ if isinstance(orientation, str):
44
+ normalized = orientation.strip().upper()
45
+ try:
46
+ self.orientation = Orientation(normalized)
47
+ except ValueError:
48
+ self.orientation = normalized
49
+ else:
50
+ self.orientation = orientation
40
51
 
41
52
  def select_paths_at(self, x: float, y: float) -> List[PathObject]:
42
53
  # noinspection PyProtectedMember
@@ -121,20 +132,71 @@ class PageClient:
121
132
  return self.root._to_form_field_objects(self.root._find_form_fields(position))
122
133
 
123
134
  @classmethod
124
- def from_ref(cls, root: 'PDFDancer', object_ref: ObjectRef) -> 'PageClient':
125
- page_client = PageClient(page_index=object_ref.position.page_index, root=root)
135
+ def from_ref(cls, root: 'PDFDancer', page_ref: PageRef) -> 'PageClient':
136
+ page_client = PageClient(
137
+ page_index=page_ref.position.page_index,
138
+ root=root,
139
+ page_size=page_ref.page_size,
140
+ orientation=page_ref.orientation
141
+ )
142
+ page_client.internal_id = page_ref.internal_id
143
+ if page_ref.position is not None:
144
+ page_client.position = page_ref.position
145
+ page_client.page_index = page_ref.position.page_index
126
146
  return page_client
127
147
 
128
148
  def delete(self) -> bool:
129
149
  # noinspection PyProtectedMember
130
150
  return self.root._delete_page(self._ref())
131
151
 
152
+ def move_to(self, target_page_index: int) -> bool:
153
+ """Move this page to a different index within the document."""
154
+ if target_page_index is None or target_page_index < 0:
155
+ raise ValidationException(f"Target page index must be >= 0, got {target_page_index}")
156
+
157
+ # noinspection PyProtectedMember
158
+ moved = self.root._move_page(self.page_index, target_page_index)
159
+ if moved:
160
+ self.page_index = target_page_index
161
+ self.position = Position.at_page(target_page_index)
162
+ return moved
163
+
132
164
  def _ref(self):
133
165
  return ObjectRef(internal_id=self.internal_id, position=self.position, type=self.object_type)
134
166
 
135
167
  def new_paragraph(self):
136
168
  return ParagraphPageBuilder(self.root, self.page_index)
137
169
 
170
+ def select_paths(self):
171
+ # noinspection PyProtectedMember
172
+ return self.root._to_path_objects(self.root._find_paths(Position.at_page(self.page_index)))
173
+
174
+ def select_elements(self):
175
+ """
176
+ Select all elements (paragraphs, images, paths, forms) on this page.
177
+
178
+ Returns:
179
+ List of all PDF objects on this page
180
+ """
181
+ result = []
182
+ result.extend(self.select_paragraphs())
183
+ result.extend(self.select_text_lines())
184
+ result.extend(self.select_images())
185
+ result.extend(self.select_paths())
186
+ result.extend(self.select_forms())
187
+ result.extend(self.select_form_fields())
188
+ return result
189
+
190
+ @property
191
+ def size(self):
192
+ """Property alias for page size."""
193
+ return self.page_size
194
+
195
+ @property
196
+ def page_orientation(self):
197
+ """Property alias for orientation."""
198
+ return self.orientation
199
+
138
200
 
139
201
  class PDFDancer:
140
202
  """
@@ -172,7 +234,7 @@ class PDFDancer:
172
234
  return PDFDancer(resolved_token, pdf_data, resolved_base_url, timeout)
173
235
 
174
236
  @classmethod
175
- def _resolve_base_url(cls, base_url: str | None) -> str | None:
237
+ def _resolve_base_url(cls, base_url: Optional[str]) -> Optional[str]:
176
238
  env_base_url = os.getenv("PDFDANCER_BASE_URL")
177
239
  resolved_base_url = base_url or (env_base_url.strip() if env_base_url and env_base_url.strip() else None)
178
240
  if resolved_base_url is None:
@@ -180,7 +242,7 @@ class PDFDancer:
180
242
  return resolved_base_url
181
243
 
182
244
  @classmethod
183
- def _resolve_token(cls, token: str | None) -> str | None:
245
+ def _resolve_token(cls, token: Optional[str]) -> Optional[str]:
184
246
  resolved_token = token.strip() if token and token.strip() else None
185
247
  if resolved_token is None:
186
248
  env_token = os.getenv("PDFDANCER_TOKEN")
@@ -197,12 +259,57 @@ class PDFDancer:
197
259
  def new(cls,
198
260
  token: Optional[str] = None,
199
261
  base_url: Optional[str] = None,
200
- timeout: float = 30.0) -> "PDFDancer":
262
+ timeout: float = 30.0,
263
+ page_size: Optional[Union[PageSize, str, Mapping[str, Any]]] = None,
264
+ orientation: Optional[Union[Orientation, str]] = None,
265
+ initial_page_count: int = 1) -> "PDFDancer":
266
+ """
267
+ Create a new blank PDF document with optional configuration.
268
+
269
+ Args:
270
+ token: Override for the API token; falls back to `PDFDANCER_TOKEN` environment variable.
271
+ base_url: Override for the API base URL; falls back to `PDFDANCER_BASE_URL`
272
+ or defaults to `https://api.pdfdancer.com`.
273
+ timeout: HTTP read timeout in seconds.
274
+ page_size: Page size for the PDF (default: A4). Accepts `PageSize`, a standard name string, or a
275
+ mapping with `width`/`height` values.
276
+ orientation: Page orientation (default: PORTRAIT). Can be Orientation enum or string.
277
+ initial_page_count: Number of initial blank pages (default: 1).
201
278
 
279
+ Returns:
280
+ A ready-to-use `PDFDancer` client instance with a blank PDF.
281
+ """
202
282
  resolved_token = cls._resolve_token(token)
203
283
  resolved_base_url = cls._resolve_base_url(base_url)
204
284
 
205
- raise Exception("Unsupported Operation Exception: TODO")
285
+ # Create a new instance that will call _create_blank_pdf_session
286
+ instance = object.__new__(cls)
287
+
288
+ # Initialize instance variables
289
+ if not resolved_token or not resolved_token.strip():
290
+ raise ValidationException("Authentication token cannot be null or empty")
291
+
292
+ instance._token = resolved_token.strip()
293
+ instance._base_url = resolved_base_url.rstrip('/')
294
+ instance._read_timeout = timeout
295
+
296
+ # Create HTTP session for connection reuse
297
+ instance._session = requests.Session()
298
+ instance._session.headers.update({
299
+ 'Authorization': f'Bearer {instance._token}'
300
+ })
301
+
302
+ # Create blank PDF session
303
+ instance._session_id = instance._create_blank_pdf_session(
304
+ page_size=page_size,
305
+ orientation=orientation,
306
+ initial_page_count=initial_page_count
307
+ )
308
+
309
+ # Set pdf_bytes to None since we don't have the PDF bytes yet
310
+ instance._pdf_bytes = None
311
+
312
+ return instance
206
313
 
207
314
  def __init__(self, token: str, pdf_data: Union[bytes, Path, str, BinaryIO],
208
315
  base_url: str, read_timeout: float = 0):
@@ -335,6 +442,22 @@ class PDFDancer:
335
442
  f"Server response: {details}"
336
443
  )
337
444
 
445
+ @staticmethod
446
+ def _cleanup_url_path(base_url: str, path: str) -> str:
447
+ """
448
+ Combine base_url and path, ensuring no double slashes.
449
+
450
+ Args:
451
+ base_url: Base URL (may or may not have trailing slash)
452
+ path: Path segment (may or may not have leading slash)
453
+
454
+ Returns:
455
+ Combined URL with no double slashes
456
+ """
457
+ base = base_url.rstrip('/')
458
+ path = path.lstrip('/')
459
+ return f"{base}/{path}"
460
+
338
461
  def _create_session(self) -> str:
339
462
  """
340
463
  Creates a new PDF processing session by uploading the PDF data.
@@ -345,7 +468,7 @@ class PDFDancer:
345
468
  }
346
469
 
347
470
  response = self._session.post(
348
- f"{self._base_url}/session/create",
471
+ self._cleanup_url_path(self._base_url, "/session/create"),
349
472
  files=files,
350
473
  timeout=self._read_timeout if self._read_timeout > 0 else None
351
474
  )
@@ -365,6 +488,76 @@ class PDFDancer:
365
488
  raise HttpClientException(f"Failed to create session: {error_message}",
366
489
  response=getattr(e, 'response', None), cause=e) from None
367
490
 
491
+ def _create_blank_pdf_session(self,
492
+ page_size: Optional[Union[PageSize, str, Mapping[str, Any]]] = None,
493
+ orientation: Optional[Union[Orientation, str]] = None,
494
+ initial_page_count: int = 1) -> str:
495
+ """
496
+ Creates a new PDF processing session with a blank PDF document.
497
+
498
+ Args:
499
+ page_size: Page size (default: A4). Accepts `PageSize`, a standard name string, or a
500
+ mapping with `width`/`height` values.
501
+ orientation: Page orientation (default: PORTRAIT). Can be Orientation enum or string.
502
+ initial_page_count: Number of initial pages (default: 1)
503
+
504
+ Returns:
505
+ Session ID for the newly created blank PDF
506
+
507
+ Raises:
508
+ SessionException: If session creation fails
509
+ HttpClientException: If HTTP communication fails
510
+ """
511
+ try:
512
+ # Build request payload
513
+ request_data = {}
514
+
515
+ # Handle page_size - convert to type-safe object with dimensions
516
+ if page_size is not None:
517
+ try:
518
+ request_data['pageSize'] = PageSize.coerce(page_size).to_dict()
519
+ except ValueError as exc:
520
+ raise ValidationException(str(exc)) from exc
521
+ except TypeError:
522
+ raise ValidationException(f"Invalid page_size type: {type(page_size)}")
523
+
524
+ # Handle orientation
525
+ if orientation is not None:
526
+ if isinstance(orientation, Orientation):
527
+ request_data['orientation'] = orientation.value
528
+ elif isinstance(orientation, str):
529
+ request_data['orientation'] = orientation
530
+ else:
531
+ raise ValidationException(f"Invalid orientation type: {type(orientation)}")
532
+
533
+ # Handle initial_page_count with validation
534
+ if initial_page_count < 1:
535
+ raise ValidationException(f"Initial page count must be at least 1, got {initial_page_count}")
536
+ request_data['initialPageCount'] = initial_page_count
537
+
538
+ headers = {'Content-Type': 'application/json'}
539
+ response = self._session.post(
540
+ self._cleanup_url_path(self._base_url, "/session/new"),
541
+ json=request_data,
542
+ headers=headers,
543
+ timeout=self._read_timeout if self._read_timeout > 0 else None
544
+ )
545
+
546
+ self._handle_authentication_error(response)
547
+ response.raise_for_status()
548
+ session_id = response.text.strip()
549
+
550
+ if not session_id:
551
+ raise SessionException("Server returned empty session ID")
552
+
553
+ return session_id
554
+
555
+ except requests.exceptions.RequestException as e:
556
+ self._handle_authentication_error(getattr(e, 'response', None))
557
+ error_message = self._extract_error_message(getattr(e, 'response', None))
558
+ raise HttpClientException(f"Failed to create blank PDF session: {error_message}",
559
+ response=getattr(e, 'response', None), cause=e) from None
560
+
368
561
  def _make_request(self, method: str, path: str, data: Optional[dict] = None,
369
562
  params: Optional[dict] = None) -> requests.Response:
370
563
  """
@@ -378,7 +571,7 @@ class PDFDancer:
378
571
  try:
379
572
  response = self._session.request(
380
573
  method=method,
381
- url=f"{self._base_url}{path}",
574
+ url=self._cleanup_url_path(self._base_url, path),
382
575
  json=data,
383
576
  params=params,
384
577
  headers=headers,
@@ -528,22 +721,36 @@ class PDFDancer:
528
721
  return self._to_textline_objects(self._find_text_lines(None))
529
722
 
530
723
  def page(self, page_index: int) -> PageClient:
531
- return PageClient(page_index, self)
724
+ """
725
+ Get a specific page by index, fetching page properties from the server.
726
+
727
+ Args:
728
+ page_index: The 0-based page index
729
+
730
+ Returns:
731
+ PageClient with page properties populated
732
+ """
733
+ page_ref = self._get_page(page_index)
734
+ if page_ref:
735
+ return PageClient.from_ref(self, page_ref)
736
+ else:
737
+ # Fallback to basic PageClient if page not found
738
+ return PageClient(page_index, self)
532
739
 
533
740
  # Page Operations
534
741
 
535
742
  def pages(self) -> List[PageClient]:
536
743
  return self._to_page_objects(self._get_pages())
537
744
 
538
- def _get_pages(self) -> List[ObjectRef]:
745
+ def _get_pages(self) -> List[PageRef]:
539
746
  """
540
747
  Retrieves references to all pages in the PDF document.
541
748
  """
542
749
  response = self._make_request('POST', '/pdf/page/find')
543
750
  pages_data = response.json()
544
- return [self._parse_object_ref(page_data) for page_data in pages_data]
751
+ return [self._parse_page_ref(page_data) for page_data in pages_data]
545
752
 
546
- def _get_page(self, page_index: int) -> Optional[ObjectRef]:
753
+ def _get_page(self, page_index: int) -> Optional[PageRef]:
547
754
  """
548
755
  Retrieves a reference to a specific page by its page index.
549
756
 
@@ -551,7 +758,7 @@ class PDFDancer:
551
758
  page_index: The page index to retrieve (1-based indexing)
552
759
 
553
760
  Returns:
554
- Object reference for the specified page, or None if not found
761
+ Page reference for the specified page, or None if not found
555
762
  """
556
763
  if page_index < 0:
557
764
  raise ValidationException(f"Page index must be >= 0, got {page_index}")
@@ -563,7 +770,7 @@ class PDFDancer:
563
770
  if not pages_data:
564
771
  return None
565
772
 
566
- return self._parse_object_ref(pages_data[0])
773
+ return self._parse_page_ref(pages_data[0])
567
774
 
568
775
  def _delete_page(self, page_ref: ObjectRef) -> bool:
569
776
  """
@@ -583,6 +790,25 @@ class PDFDancer:
583
790
  response = self._make_request('DELETE', '/pdf/page/delete', data=request_data)
584
791
  return response.json()
585
792
 
793
+ def move_page(self, from_page_index: int, to_page_index: int) -> bool:
794
+ """Move a page to a different index within the document."""
795
+ return self._move_page(from_page_index, to_page_index)
796
+
797
+ def _move_page(self, from_page_index: int, to_page_index: int) -> bool:
798
+ """Internal helper to perform the page move operation."""
799
+ for value, label in ((from_page_index, "from_page_index"), (to_page_index, "to_page_index")):
800
+ if value is None:
801
+ raise ValidationException(f"{label} cannot be null")
802
+ if not isinstance(value, int):
803
+ raise ValidationException(f"{label} must be an integer, got {type(value)}")
804
+ if value < 0:
805
+ raise ValidationException(f"{label} must be >= 0, got {value}")
806
+
807
+ request_data = PageMoveRequest(from_page_index, to_page_index).to_dict()
808
+ response = self._make_request('PUT', '/pdf/page/move', data=request_data)
809
+ result = response.json()
810
+ return bool(result)
811
+
586
812
  # Manipulation Operations
587
813
 
588
814
  def _delete(self, object_ref: ObjectRef) -> bool:
@@ -678,6 +904,10 @@ class PDFDancer:
678
904
  def new_paragraph(self) -> ParagraphBuilder:
679
905
  return ParagraphBuilder(self)
680
906
 
907
+ def new_page(self):
908
+ response = self._make_request('POST', '/pdf/page/add', data=None)
909
+ return self._parse_page_ref(response.json())
910
+
681
911
  def new_image(self) -> ImageBuilder:
682
912
  return ImageBuilder(self)
683
913
 
@@ -809,7 +1039,7 @@ class PDFDancer:
809
1039
 
810
1040
  headers = {'X-Session-Id': self._session_id}
811
1041
  response = self._session.post(
812
- f"{self._base_url}/font/register",
1042
+ self._cleanup_url_path(self._base_url, "/font/register"),
813
1043
  files=files,
814
1044
  headers=headers,
815
1045
  timeout=30
@@ -957,6 +1187,42 @@ class PDFDancer:
957
1187
 
958
1188
  return text_object
959
1189
 
1190
+ def _parse_page_ref(self, obj_data: dict) -> PageRef:
1191
+ """Parse JSON object data into PageRef instance with page-specific properties."""
1192
+ position_data = obj_data.get('position', {})
1193
+ position = self._parse_position(position_data) if position_data else None
1194
+
1195
+ object_type = ObjectType(obj_data['type'])
1196
+
1197
+ # Parse page size if present
1198
+ page_size = None
1199
+ if 'pageSize' in obj_data and isinstance(obj_data['pageSize'], dict):
1200
+ page_size_data = obj_data['pageSize']
1201
+ try:
1202
+ page_size = PageSize.from_dict(page_size_data)
1203
+ except ValueError:
1204
+ page_size = None
1205
+
1206
+ # Parse orientation if present
1207
+ orientation_value = obj_data.get('orientation')
1208
+ orientation = None
1209
+ if isinstance(orientation_value, str):
1210
+ normalized = orientation_value.strip().upper()
1211
+ try:
1212
+ orientation = Orientation(normalized)
1213
+ except ValueError:
1214
+ orientation = None
1215
+ elif isinstance(orientation_value, Orientation):
1216
+ orientation = orientation_value
1217
+
1218
+ return PageRef(
1219
+ internal_id=obj_data.get('internalId'),
1220
+ position=position,
1221
+ type=object_type,
1222
+ page_size=page_size,
1223
+ orientation=orientation
1224
+ )
1225
+
960
1226
  # Builder Pattern Support
961
1227
 
962
1228
  def _paragraph_builder(self) -> 'ParagraphBuilder':
@@ -997,8 +1263,59 @@ class PDFDancer:
997
1263
  return [FormFieldObject(self, ref.internal_id, ref.type, ref.position, ref.name, ref.value) for ref in
998
1264
  refs]
999
1265
 
1000
- def _to_page_objects(self, refs: List[ObjectRef]) -> List[PageClient]:
1266
+ def _to_page_objects(self, refs: List[PageRef]) -> List[PageClient]:
1001
1267
  return [PageClient.from_ref(self, ref) for ref in refs]
1002
1268
 
1003
- def _to_page_object(self, ref: ObjectRef) -> PageClient:
1269
+ def _to_page_object(self, ref: PageRef) -> PageClient:
1004
1270
  return PageClient.from_ref(self, ref)
1271
+
1272
+ def _to_mixed_objects(self, refs: List[ObjectRef]) -> List:
1273
+ """
1274
+ Convert a list of ObjectRefs to their appropriate object types.
1275
+ Handles mixed object types by checking the type of each ref.
1276
+ """
1277
+ result = []
1278
+ for ref in refs:
1279
+ if ref.type == ObjectType.PARAGRAPH:
1280
+ # Need to convert to TextObjectRef first
1281
+ if isinstance(ref, TextObjectRef):
1282
+ result.append(ParagraphObject(self, ref))
1283
+ else:
1284
+ # Re-fetch with proper type
1285
+ text_refs = self._find_paragraphs(ref.position)
1286
+ result.extend(self._to_paragraph_objects(text_refs))
1287
+ elif ref.type == ObjectType.TEXT_LINE:
1288
+ if isinstance(ref, TextObjectRef):
1289
+ result.append(TextLineObject(self, ref))
1290
+ else:
1291
+ text_refs = self._find_text_lines(ref.position)
1292
+ result.extend(self._to_textline_objects(text_refs))
1293
+ elif ref.type == ObjectType.IMAGE:
1294
+ result.append(ImageObject(self, ref.internal_id, ref.type, ref.position))
1295
+ elif ref.type == ObjectType.PATH:
1296
+ result.append(PathObject(self, ref.internal_id, ref.type, ref.position))
1297
+ elif ref.type == ObjectType.FORM_X_OBJECT:
1298
+ result.append(FormObject(self, ref.internal_id, ref.type, ref.position))
1299
+ elif ref.type == ObjectType.FORM_FIELD:
1300
+ if isinstance(ref, FormFieldRef):
1301
+ result.append(FormFieldObject(self, ref.internal_id, ref.type, ref.position, ref.name, ref.value))
1302
+ else:
1303
+ form_refs = self._find_form_fields(ref.position)
1304
+ result.extend(self._to_form_field_objects(form_refs))
1305
+ return result
1306
+
1307
+ def select_elements(self):
1308
+ """
1309
+ Select all elements (paragraphs, images, paths, forms) in the document.
1310
+
1311
+ Returns:
1312
+ List of all PDF objects in the document
1313
+ """
1314
+ result = []
1315
+ result.extend(self.select_paragraphs())
1316
+ result.extend(self.select_text_lines())
1317
+ result.extend(self.select_images())
1318
+ result.extend(self.select_paths())
1319
+ result.extend(self.select_forms())
1320
+ result.extend(self.select_form_fields())
1321
+ return result
pdfdancer/types.py CHANGED
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import statistics
4
4
  from dataclasses import dataclass
5
- from typing import Optional
5
+ from typing import Optional, List
6
6
 
7
7
  from . import ObjectType, Position, ObjectRef, Point, Paragraph, Font, Color, FormFieldRef, TextObjectRef
8
8
 
@@ -93,7 +93,7 @@ class FormObject(PDFObjectBase):
93
93
  self.position == other.position)
94
94
 
95
95
 
96
- def _process_text_lines(text: str) -> list[str]:
96
+ def _process_text_lines(text: str) -> List[str]:
97
97
  """
98
98
  Process text into lines for the paragraph.
99
99
  This is a simplified version - the full implementation would handle
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pdfdancer-client-python
3
- Version: 0.2.11
3
+ Version: 0.2.13
4
4
  Summary: Python client for PDFDancer API
5
5
  Author-email: "The Famous Cat Ltd." <hi@thefamouscat.com>
6
6
  License: MIT
@@ -9,10 +9,11 @@ Project-URL: Repository, https://github.com/MenschMachine/pdfdancer-client-pytho
9
9
  Classifier: Development Status :: 4 - Beta
10
10
  Classifier: Intended Audience :: Developers
11
11
  Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Programming Language :: Python :: 3.9
13
12
  Classifier: Programming Language :: Python :: 3.10
14
13
  Classifier: Programming Language :: Python :: 3.11
15
14
  Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Python: >=3.10
16
17
  Description-Content-Type: text/markdown
17
18
  Requires-Dist: requests>=2.25.0
18
19
  Requires-Dist: pydantic>=1.8.0
@@ -26,11 +27,16 @@ Requires-Dist: mypy>=1.0; extra == "dev"
26
27
 
27
28
  # PDFDancer Python Client
28
29
 
29
- Automate PDF clean-up, redaction, form filling, and content injection against the PDFDancer API from Python. The client gives you page-scoped selectors, fluent editors, and builders so you can read, modify, and export PDFs programmatically in just a few lines.
30
+ Automate PDF clean-up, redaction, form filling, and content injection against the PDFDancer API from Python. The client
31
+ gives you page-scoped selectors, fluent editors, and builders so you can read, modify, and export PDFs programmatically
32
+ in just a few lines.
33
+
34
+ Latest schema version available at https://bucket.pdfdancer.com/api-doc/development-0.0.yml.
30
35
 
31
36
  ## Highlights
32
37
 
33
- - Locate anything inside a PDF—paragraphs, text lines, images, vector paths, pages, AcroForm fields—by page, coordinates, or text prefixes
38
+ - Locate anything inside a PDF—paragraphs, text lines, images, vector paths, pages, AcroForm fields—by page,
39
+ coordinates, or text prefixes
34
40
  - Edit or delete existing content with fluent paragraph/text editors and safe apply-on-exit context managers
35
41
  - Fill or update form fields and propagate the changes back to the document instantly
36
42
  - Add brand-new content with paragraph/image builders, custom fonts, and precise page positioning
@@ -47,7 +53,7 @@ Automate PDF clean-up, redaction, form filling, and content injection against th
47
53
 
48
54
  ## Requirements
49
55
 
50
- - Python 3.9 or newer
56
+ - Python 3.10 or newer
51
57
  - A PDFDancer API token (set `PDFDANCER_TOKEN` or pass `token=...`)
52
58
  - Network access to a PDFDancer service (defaults to `https://api.pdfdancer.com`; override with `PDFDANCER_BASE_URL`)
53
59
 
@@ -67,21 +73,21 @@ from pathlib import Path
67
73
  from pdfdancer import Color, PDFDancer
68
74
 
69
75
  with PDFDancer.open(
70
- pdf_data=Path("input.pdf"),
71
- token="your-api-token", # optional when PDFDANCER_TOKEN is set
72
- base_url="https://api.pdfdancer.com",
76
+ pdf_data=Path("input.pdf"),
77
+ token="your-api-token", # optional when PDFDANCER_TOKEN is set
78
+ base_url="https://api.pdfdancer.com",
73
79
  ) as pdf:
74
80
  # Locate existing content
75
81
  heading = pdf.page(0).select_paragraphs_starting_with("Executive Summary")[0]
76
82
  heading.edit().replace("Overview").apply()
77
83
 
78
84
  # Add a new paragraph using the fluent builder
79
- pdf.new_paragraph() \
80
- .text("Generated with PDFDancer") \
81
- .font("Helvetica", 12) \
82
- .color(Color(70, 70, 70)) \
83
- .line_spacing(1.4) \
84
- .at(page_index=0, x=72, y=520) \
85
+ pdf.new_paragraph()
86
+ .text("Generated with PDFDancer")
87
+ .font("Helvetica", 12)
88
+ .color(Color(70, 70, 70))
89
+ .line_spacing(1.4)
90
+ .at(page_index=0, x=72, y=520)
85
91
  .add()
86
92
 
87
93
  # Persist the modified document
@@ -107,7 +113,8 @@ with PDFDancer.open("report.pdf") as pdf: # environment variables provide token
107
113
  print(page.internal_id, page.position.bounding_rect)
108
114
  ```
109
115
 
110
- Selectors return rich objects (`ParagraphObject`, `TextLineObject`, `ImageObject`, `FormFieldObject`, etc.) with helpers such as `delete()`, `move_to(x, y)`, or `edit()` depending on the object type.
116
+ Selectors return rich objects (`ParagraphObject`, `TextLineObject`, `ImageObject`, `FormFieldObject`, etc.) with helpers
117
+ such as `delete()`, `move_to(x, y)`, or `edit()` depending on the object type.
111
118
 
112
119
  ## Editing Text and Forms
113
120
 
@@ -116,11 +123,11 @@ with PDFDancer.open("report.pdf") as pdf:
116
123
  paragraph = pdf.page(0).select_paragraphs_starting_with("Disclaimer")[0]
117
124
 
118
125
  # Chain updates explicitly…
119
- paragraph.edit() \
120
- .replace("Updated disclaimer text") \
121
- .font("Roboto-Regular", 11) \
122
- .line_spacing(1.1) \
123
- .move_to(72, 140) \
126
+ paragraph.edit()
127
+ .replace("Updated disclaimer text")
128
+ .font("Roboto-Regular", 11)
129
+ .line_spacing(1.1)
130
+ .move_to(72, 140)
124
131
  .apply()
125
132
 
126
133
  # …or use the context manager to auto-apply on success
@@ -141,16 +148,16 @@ with PDFDancer.open("report.pdf") as pdf:
141
148
  pdf.register_font("/path/to/custom.ttf")
142
149
 
143
150
  # Paragraphs
144
- pdf.new_paragraph() \
145
- .text("Greetings from PDFDancer!") \
146
- .font(fonts[0].name, fonts[0].size) \
147
- .at(page_index=0, x=220, y=480) \
151
+ pdf.new_paragraph()
152
+ .text("Greetings from PDFDancer!")
153
+ .font(fonts[0].name, fonts[0].size)
154
+ .at(page_index=0, x=220, y=480)
148
155
  .add()
149
156
 
150
157
  # Raster images
151
- pdf.new_image() \
152
- .from_file(Path("logo.png")) \
153
- .at(page=0, x=48, y=700) \
158
+ pdf.new_image()
159
+ .from_file(Path("logo.png"))
160
+ .at(page=0, x=48, y=700)
154
161
  .add()
155
162
  ```
156
163
 
@@ -0,0 +1,11 @@
1
+ pdfdancer/__init__.py,sha256=STOBUkVrBG7SbgoT6wM6tfwBVbjUiQ9JTpmznJwBF94,1158
2
+ pdfdancer/exceptions.py,sha256=Y5zwNVZprsv2hvKX304cXWobJt11nrEhCzLklu2wiO8,1567
3
+ pdfdancer/image_builder.py,sha256=Omxc2LcieJ1MbvWBXR5_sfia--eAucTUe0KWgr22HYo,842
4
+ pdfdancer/models.py,sha256=yhatfgMWxYareL7J20Wz_6-V7oCzrqX35oZdNJ8UFJM,22984
5
+ pdfdancer/paragraph_builder.py,sha256=pgFTkyhYrx4VQDKy4Vhp-042OMlJOD8D0MW9flkvC7Y,9410
6
+ pdfdancer/pdfdancer_v1.py,sha256=_-twUSJ7IgIJbnQdK8eL8pgqSRb-psnd4KEXUL2oIEI,53250
7
+ pdfdancer/types.py,sha256=SOmYP49XPVy6DZ4JXSJrfy0Aww-Tv7QjZCDnOB8VTT4,11860
8
+ pdfdancer_client_python-0.2.13.dist-info/METADATA,sha256=nuaEmk4zdhsmAUw3jPu7Sn1vnUFSfvYKDxhVNXC0sus,6868
9
+ pdfdancer_client_python-0.2.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
+ pdfdancer_client_python-0.2.13.dist-info/top_level.txt,sha256=ICwSVRpcCKrdBF9QlaX9Y0e_N3Nk1p7QVxadGOnbxeY,10
11
+ pdfdancer_client_python-0.2.13.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- pdfdancer/__init__.py,sha256=71HwLjHHPsCQMTUtLHYAwzslhF3PqN5g1QwMr4HbKSQ,1076
2
- pdfdancer/exceptions.py,sha256=Y5zwNVZprsv2hvKX304cXWobJt11nrEhCzLklu2wiO8,1567
3
- pdfdancer/image_builder.py,sha256=Omxc2LcieJ1MbvWBXR5_sfia--eAucTUe0KWgr22HYo,842
4
- pdfdancer/models.py,sha256=ZoB5ZP1jaZsubqzhMr9W9nsIUirVUty_FkRiPZWq8vY,18276
5
- pdfdancer/paragraph_builder.py,sha256=mjV36-XOqcYATfIjSOy7_SBO0EKXjsAtMqYL8IaowGU,9218
6
- pdfdancer/pdfdancer_v1.py,sha256=ICLALD5QxhXBZti8nQmq8pF1Ig9EulnD6SclCjRqThA,39839
7
- pdfdancer/types.py,sha256=jlsThSR4VYu1VLfTJO0sImSfvqlARLDxpk0x5ycFLsI,11854
8
- pdfdancer_client_python-0.2.11.dist-info/METADATA,sha256=Mk1d756dVlHKrpSoJPFfagXjR7NHWKJED0v0amCucZs,6770
9
- pdfdancer_client_python-0.2.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- pdfdancer_client_python-0.2.11.dist-info/top_level.txt,sha256=ICwSVRpcCKrdBF9QlaX9Y0e_N3Nk1p7QVxadGOnbxeY,10
11
- pdfdancer_client_python-0.2.11.dist-info/RECORD,,