chunkr-ai 0.0.2__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. chunkr_ai-0.0.4/PKG-INFO +204 -0
  2. chunkr_ai-0.0.4/README.md +187 -0
  3. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/pyproject.toml +4 -4
  4. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/src/chunkr_ai/api/auth.py +0 -2
  5. chunkr_ai-0.0.4/src/chunkr_ai/api/base.py +173 -0
  6. chunkr_ai-0.0.4/src/chunkr_ai/api/chunkr.py +108 -0
  7. chunkr_ai-0.0.4/src/chunkr_ai/api/chunkr_async.py +105 -0
  8. chunkr_ai-0.0.4/src/chunkr_ai/api/config.py +131 -0
  9. chunkr_ai-0.0.4/src/chunkr_ai/api/protocol.py +19 -0
  10. chunkr_ai-0.0.4/src/chunkr_ai/api/task.py +131 -0
  11. chunkr_ai-0.0.4/src/chunkr_ai/models.py +48 -0
  12. chunkr_ai-0.0.4/src/chunkr_ai.egg-info/PKG-INFO +204 -0
  13. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/src/chunkr_ai.egg-info/SOURCES.txt +6 -1
  14. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/src/chunkr_ai.egg-info/requires.txt +1 -2
  15. chunkr_ai-0.0.4/tests/test_chunkr.py +158 -0
  16. chunkr_ai-0.0.2/PKG-INFO +0 -16
  17. chunkr_ai-0.0.2/src/chunkr_ai/api/chunkr.py +0 -125
  18. chunkr_ai-0.0.2/src/chunkr_ai/api/chunkr_async.py +0 -39
  19. chunkr_ai-0.0.2/src/chunkr_ai/api/models.py +0 -231
  20. chunkr_ai-0.0.2/src/chunkr_ai.egg-info/PKG-INFO +0 -16
  21. chunkr_ai-0.0.2/tests/test_chunkr.py +0 -69
  22. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/LICENSE +0 -0
  23. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/setup.cfg +0 -0
  24. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/src/chunkr_ai/__init__.py +0 -0
  25. /chunkr_ai-0.0.2/README.md → /chunkr_ai-0.0.4/src/chunkr_ai/api/__init__.py +0 -0
  26. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/src/chunkr_ai/api/api.py +0 -0
  27. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/src/chunkr_ai/main.py +0 -0
  28. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  29. {chunkr_ai-0.0.2 → chunkr_ai-0.0.4}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -0,0 +1,204 @@
1
+ Metadata-Version: 2.2
2
+ Name: chunkr-ai
3
+ Version: 0.0.4
4
+ Summary: Python client for Chunkr: open source document intelligence
5
+ Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
+ Project-URL: Homepage, https://chunkr.ai
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: httpx>=0.28.1
10
+ Requires-Dist: pillow>=11.1.0
11
+ Requires-Dist: pydantic>=2.10.4
12
+ Requires-Dist: python-dotenv>=1.0.1
13
+ Requires-Dist: requests>=2.32.3
14
+ Provides-Extra: test
15
+ Requires-Dist: pytest>=8.3.4; extra == "test"
16
+ Requires-Dist: pytest-xdist>=3.6.1; extra == "test"
17
+
18
+ # Chunkr Python Client
19
+
20
+ This provides a simple interface to interact with the Chunkr API.
21
+
22
+ ## Getting Started
23
+
24
+ You can get an API key from [Chunkr](https://chunkr.ai) or deploy your own Chunkr instance. For self-hosted deployment options, check out our [deployment guide](https://github.com/lumina-ai-inc/chunkr/tree/main?tab=readme-ov-file#self-hosted-deployment-options).
25
+
26
+ For more information about the API and its capabilities, visit the [Chunkr API docs](https://docs.chunkr.ai).
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install chunkr-ai
32
+ ```
33
+
34
+ ## Usage
35
+
36
+ We provide two clients: `Chunkr` for synchronous operations and `ChunkrAsync` for asynchronous operations.
37
+
38
+ ### Synchronous Usage
39
+
40
+ ```python
41
+ from chunkr_ai import Chunkr
42
+
43
+ # Initialize client
44
+ chunkr = Chunkr()
45
+
46
+ # Upload a file and wait for processing
47
+ task = chunkr.upload("document.pdf")
48
+
49
+ # Print the response
50
+ print(task)
51
+
52
+ # Get output from task
53
+ output = task.output
54
+
55
+ # If you want to upload without waiting for processing
56
+ task = chunkr.start_upload("document.pdf")
57
+ # ... do other things ...
58
+ task.poll() # Check status when needed
59
+ ```
60
+
61
+ ### Asynchronous Usage
62
+
63
+ ```python
64
+ from chunkr_ai import ChunkrAsync
65
+
66
+ async def process_document():
67
+ # Initialize client
68
+ chunkr = ChunkrAsync()
69
+
70
+ # Upload a file and wait for processing
71
+ task = await chunkr.upload("document.pdf")
72
+
73
+ # Print the response
74
+ print(task)
75
+
76
+ # Get output from task
77
+ output = task.output
78
+
79
+ # If you want to upload without waiting for processing
80
+ task = await chunkr.start_upload("document.pdf")
81
+ # ... do other things ...
82
+ await task.poll_async() # Check status when needed
83
+ ```
84
+
85
+ ### Additional Features
86
+
87
+ Both clients support various input types:
88
+
89
+ ```python
90
+ # Upload from file path
91
+ chunkr.upload("document.pdf")
92
+
93
+ # Upload from opened file
94
+ with open("document.pdf", "rb") as f:
95
+ chunkr.upload(f)
96
+
97
+ # Upload from URL
98
+ chunkr.upload("https://example.com/document.pdf")
99
+
100
+ # Upload from base64 string
101
+ chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
102
+
103
+ # Upload an image
104
+ from PIL import Image
105
+ img = Image.open("photo.jpg")
106
+ chunkr.upload(img)
107
+ ```
108
+
109
+ ### Configuration
110
+
111
+ You can customize the processing behavior by passing a `Configuration` object:
112
+
113
+ ```python
114
+ from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
115
+
116
+ # Basic configuration
117
+ config = Configuration(
118
+ ocr_strategy=OcrStrategy.AUTO,
119
+ segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
120
+ high_resolution=True,
121
+ expires_in=3600, # seconds
122
+ )
123
+
124
+ # Upload with configuration
125
+ task = chunkr.upload("document.pdf", config)
126
+ ```
127
+
128
+ #### Available Configuration Examples
129
+
130
+ - **Chunk Processing**
131
+ ```python
132
+ from chunkr_ai.models import ChunkProcessing
133
+ config = Configuration(
134
+ chunk_processing=ChunkProcessing(target_length=1024)
135
+ )
136
+ ```
137
+ - **Expires In**
138
+ ```python
139
+ config = Configuration(expires_in=3600)
140
+ ```
141
+
142
+ - **High Resolution**
143
+ ```python
144
+ config = Configuration(high_resolution=True)
145
+ ```
146
+
147
+ - **JSON Schema**
148
+ ```python
149
+ config = Configuration(json_schema=JsonSchema(
150
+ title="Sales Data",
151
+ properties=[
152
+ Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
153
+ Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
154
+ ]
155
+ ))
156
+ ```
157
+
158
+ - **OCR Strategy**
159
+ ```python
160
+ config = Configuration(ocr_strategy=OcrStrategy.AUTO)
161
+ ```
162
+
163
+ - **Segment Processing**
164
+ ```python
165
+ from chunkr_ai.models import SegmentProcessing, GenerationConfig, GenerationStrategy
166
+ config = Configuration(
167
+ segment_processing=SegmentProcessing(
168
+ page=GenerationConfig(
169
+ html=GenerationStrategy.LLM,
170
+ markdown=GenerationStrategy.LLM
171
+ )
172
+ )
173
+ )
174
+ ```
175
+
176
+ - **Segmentation Strategy**
177
+ ```python
178
+ config = Configuration(
179
+ segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS # or SegmentationStrategy.PAGE
180
+ )
181
+ ```
182
+
183
+ ## Environment setup
184
+
185
+ You can provide your API key and URL in several ways:
186
+ 1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
187
+ 2. `.env` file
188
+ 3. Direct initialization:
189
+ ```python
190
+ chunkr = Chunkr(
191
+ api_key="your-api-key",
192
+ url="https://api.chunkr.ai"
193
+ )
194
+ ```
195
+
196
+ ## Run tests
197
+
198
+ ```python
199
+ # Install dependencies
200
+ uv pip install -e ".[test]"
201
+
202
+ # Run tests
203
+ uv run pytest
204
+ ```
@@ -0,0 +1,187 @@
1
+ # Chunkr Python Client
2
+
3
+ This provides a simple interface to interact with the Chunkr API.
4
+
5
+ ## Getting Started
6
+
7
+ You can get an API key from [Chunkr](https://chunkr.ai) or deploy your own Chunkr instance. For self-hosted deployment options, check out our [deployment guide](https://github.com/lumina-ai-inc/chunkr/tree/main?tab=readme-ov-file#self-hosted-deployment-options).
8
+
9
+ For more information about the API and its capabilities, visit the [Chunkr API docs](https://docs.chunkr.ai).
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ pip install chunkr-ai
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ We provide two clients: `Chunkr` for synchronous operations and `ChunkrAsync` for asynchronous operations.
20
+
21
+ ### Synchronous Usage
22
+
23
+ ```python
24
+ from chunkr_ai import Chunkr
25
+
26
+ # Initialize client
27
+ chunkr = Chunkr()
28
+
29
+ # Upload a file and wait for processing
30
+ task = chunkr.upload("document.pdf")
31
+
32
+ # Print the response
33
+ print(task)
34
+
35
+ # Get output from task
36
+ output = task.output
37
+
38
+ # If you want to upload without waiting for processing
39
+ task = chunkr.start_upload("document.pdf")
40
+ # ... do other things ...
41
+ task.poll() # Check status when needed
42
+ ```
43
+
44
+ ### Asynchronous Usage
45
+
46
+ ```python
47
+ from chunkr_ai import ChunkrAsync
48
+
49
+ async def process_document():
50
+ # Initialize client
51
+ chunkr = ChunkrAsync()
52
+
53
+ # Upload a file and wait for processing
54
+ task = await chunkr.upload("document.pdf")
55
+
56
+ # Print the response
57
+ print(task)
58
+
59
+ # Get output from task
60
+ output = task.output
61
+
62
+ # If you want to upload without waiting for processing
63
+ task = await chunkr.start_upload("document.pdf")
64
+ # ... do other things ...
65
+ await task.poll_async() # Check status when needed
66
+ ```
67
+
68
+ ### Additional Features
69
+
70
+ Both clients support various input types:
71
+
72
+ ```python
73
+ # Upload from file path
74
+ chunkr.upload("document.pdf")
75
+
76
+ # Upload from opened file
77
+ with open("document.pdf", "rb") as f:
78
+ chunkr.upload(f)
79
+
80
+ # Upload from URL
81
+ chunkr.upload("https://example.com/document.pdf")
82
+
83
+ # Upload from base64 string
84
+ chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
85
+
86
+ # Upload an image
87
+ from PIL import Image
88
+ img = Image.open("photo.jpg")
89
+ chunkr.upload(img)
90
+ ```
91
+
92
+ ### Configuration
93
+
94
+ You can customize the processing behavior by passing a `Configuration` object:
95
+
96
+ ```python
97
+ from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
98
+
99
+ # Basic configuration
100
+ config = Configuration(
101
+ ocr_strategy=OcrStrategy.AUTO,
102
+ segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
103
+ high_resolution=True,
104
+ expires_in=3600, # seconds
105
+ )
106
+
107
+ # Upload with configuration
108
+ task = chunkr.upload("document.pdf", config)
109
+ ```
110
+
111
+ #### Available Configuration Examples
112
+
113
+ - **Chunk Processing**
114
+ ```python
115
+ from chunkr_ai.models import ChunkProcessing
116
+ config = Configuration(
117
+ chunk_processing=ChunkProcessing(target_length=1024)
118
+ )
119
+ ```
120
+ - **Expires In**
121
+ ```python
122
+ config = Configuration(expires_in=3600)
123
+ ```
124
+
125
+ - **High Resolution**
126
+ ```python
127
+ config = Configuration(high_resolution=True)
128
+ ```
129
+
130
+ - **JSON Schema**
131
+ ```python
132
+ config = Configuration(json_schema=JsonSchema(
133
+ title="Sales Data",
134
+ properties=[
135
+ Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
136
+ Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
137
+ ]
138
+ ))
139
+ ```
140
+
141
+ - **OCR Strategy**
142
+ ```python
143
+ config = Configuration(ocr_strategy=OcrStrategy.AUTO)
144
+ ```
145
+
146
+ - **Segment Processing**
147
+ ```python
148
+ from chunkr_ai.models import SegmentProcessing, GenerationConfig, GenerationStrategy
149
+ config = Configuration(
150
+ segment_processing=SegmentProcessing(
151
+ page=GenerationConfig(
152
+ html=GenerationStrategy.LLM,
153
+ markdown=GenerationStrategy.LLM
154
+ )
155
+ )
156
+ )
157
+ ```
158
+
159
+ - **Segmentation Strategy**
160
+ ```python
161
+ config = Configuration(
162
+ segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS # or SegmentationStrategy.PAGE
163
+ )
164
+ ```
165
+
166
+ ## Environment setup
167
+
168
+ You can provide your API key and URL in several ways:
169
+ 1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
170
+ 2. `.env` file
171
+ 3. Direct initialization:
172
+ ```python
173
+ chunkr = Chunkr(
174
+ api_key="your-api-key",
175
+ url="https://api.chunkr.ai"
176
+ )
177
+ ```
178
+
179
+ ## Run tests
180
+
181
+ ```python
182
+ # Install dependencies
183
+ uv pip install -e ".[test]"
184
+
185
+ # Run tests
186
+ uv run pytest
187
+ ```
@@ -4,22 +4,22 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.2"
7
+ version = "0.0.4"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
- description = "Python client for chunkr: open source document intelligence"
9
+ description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
11
11
  license = {"file" = "LICENSE"}
12
+ urls = {Homepage = "https://chunkr.ai"}
12
13
  dependencies = [
13
- "build>=1.2.2.post1",
14
14
  "httpx>=0.28.1",
15
15
  "pillow>=11.1.0",
16
16
  "pydantic>=2.10.4",
17
17
  "python-dotenv>=1.0.1",
18
18
  "requests>=2.32.3",
19
- "twine>=6.0.1",
20
19
  ]
21
20
 
22
21
  [project.optional-dependencies]
23
22
  test = [
24
23
  "pytest>=8.3.4",
24
+ "pytest-xdist>=3.6.1",
25
25
  ]
@@ -1,5 +1,3 @@
1
- from typing import Optional
2
-
3
1
  class HeadersMixin:
4
2
  """Mixin class for handling authorization headers"""
5
3
 
@@ -0,0 +1,173 @@
1
+ from .config import Configuration
2
+ from .task import TaskResponse
3
+ from .auth import HeadersMixin
4
+ from abc import abstractmethod
5
+ from dotenv import load_dotenv
6
+ import io
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+ from PIL import Image
11
+ import requests
12
+ from typing import BinaryIO, Tuple, Union
13
+
14
+ class ChunkrBase(HeadersMixin):
15
+ """Base class with shared functionality for Chunkr API clients."""
16
+
17
+ def __init__(self, url: str = None, api_key: str = None):
18
+ load_dotenv()
19
+ self.url = (
20
+ url or
21
+ os.getenv('CHUNKR_URL') or
22
+ 'https://api.chunkr.ai'
23
+ )
24
+ self._api_key = (
25
+ api_key or
26
+ os.getenv('CHUNKR_API_KEY')
27
+ )
28
+ if not self._api_key:
29
+ raise ValueError("API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai")
30
+
31
+ self.url = self.url.rstrip("/")
32
+
33
+ def _prepare_file(
34
+ self,
35
+ file: Union[str, Path, BinaryIO, Image.Image]
36
+ ) -> Tuple[str, BinaryIO]:
37
+ """Convert various file types into a tuple of (filename, file-like object).
38
+
39
+ Args:
40
+ file: Input file, can be:
41
+ - String or Path to a file
42
+ - URL string starting with http:// or https://
43
+ - Base64 string
44
+ - Opened binary file (mode='rb')
45
+ - PIL/Pillow Image object
46
+
47
+ Returns:
48
+ Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
49
+
50
+ Raises:
51
+ FileNotFoundError: If the file path doesn't exist
52
+ TypeError: If the file type is not supported
53
+ ValueError: If the URL is invalid or unreachable
54
+ ValueError: If the MIME type is unsupported
55
+ """
56
+ # Handle URLs
57
+ if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
58
+ response = requests.get(file)
59
+ response.raise_for_status()
60
+ file_obj = io.BytesIO(response.content)
61
+ filename = Path(file.split('/')[-1]).name or 'downloaded_file'
62
+ return filename, file_obj
63
+
64
+ # Handle base64 strings
65
+ if isinstance(file, str) and ',' in file and ';base64,' in file:
66
+ try:
67
+ # Split header and data
68
+ header, base64_data = file.split(',', 1)
69
+ import base64
70
+ file_bytes = base64.b64decode(base64_data)
71
+ file_obj = io.BytesIO(file_bytes)
72
+
73
+ # Try to determine format from header
74
+ format = 'bin'
75
+ mime_type = header.split(':')[-1].split(';')[0].lower()
76
+
77
+ # Map MIME types to file extensions
78
+ mime_to_ext = {
79
+ 'application/pdf': 'pdf',
80
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
81
+ 'application/msword': 'doc',
82
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
83
+ 'application/vnd.ms-powerpoint': 'ppt',
84
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
85
+ 'application/vnd.ms-excel': 'xls',
86
+ 'image/jpeg': 'jpg',
87
+ 'image/png': 'png',
88
+ 'image/jpg': 'jpg'
89
+ }
90
+
91
+ if mime_type in mime_to_ext:
92
+ format = mime_to_ext[mime_type]
93
+ else:
94
+ raise ValueError(f"Unsupported MIME type: {mime_type}")
95
+
96
+ return f"file.{format}", file_obj
97
+ except Exception as e:
98
+ raise ValueError(f"Invalid base64 string: {str(e)}")
99
+
100
+ # Handle file paths
101
+ if isinstance(file, (str, Path)):
102
+ path = Path(file).resolve()
103
+ if not path.exists():
104
+ raise FileNotFoundError(f"File not found: {file}")
105
+ return path.name, open(path, 'rb')
106
+
107
+ # Handle PIL Images
108
+ if isinstance(file, Image.Image):
109
+ img_byte_arr = io.BytesIO()
110
+ format = file.format or 'PNG'
111
+ file.save(img_byte_arr, format=format)
112
+ img_byte_arr.seek(0)
113
+ return f"image.{format.lower()}", img_byte_arr
114
+
115
+ # Handle file-like objects
116
+ if hasattr(file, 'read') and hasattr(file, 'seek'):
117
+ # Try to get the filename from the file object if possible
118
+ name = getattr(file, 'name', 'document') if hasattr(file, 'name') else 'document'
119
+ return Path(name).name, file
120
+
121
+ raise TypeError(f"Unsupported file type: {type(file)}")
122
+
123
+ def _prepare_upload_data(
124
+ self,
125
+ file: Union[str, Path, BinaryIO, Image.Image],
126
+ config: Configuration = None
127
+ ) -> Tuple[dict, dict]:
128
+ """Prepare files and data dictionaries for upload.
129
+
130
+ Args:
131
+ file: The file to upload
132
+ config: Optional configuration settings
133
+
134
+ Returns:
135
+ Tuple[dict, dict]: (files dict, data dict) ready for upload
136
+ """
137
+ filename, file_obj = self._prepare_file(file)
138
+ files = {"file": (filename, file_obj)}
139
+ data = {}
140
+
141
+ if config:
142
+ config_dict = config.model_dump(mode="json", exclude_none=True)
143
+ for key, value in config_dict.items():
144
+ if isinstance(value, dict):
145
+ files[key] = (None, json.dumps(value), 'application/json')
146
+ else:
147
+ data[key] = value
148
+
149
+ return files, data
150
+
151
+ @abstractmethod
152
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
153
+ """Upload a file and wait for processing to complete.
154
+
155
+ Must be implemented by subclasses.
156
+ """
157
+ pass
158
+
159
+ @abstractmethod
160
+ def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
161
+ """Upload a file for processing and immediately return the task response.
162
+
163
+ Must be implemented by subclasses.
164
+ """
165
+ pass
166
+
167
+ @abstractmethod
168
+ def get_task(self, task_id: str) -> TaskResponse:
169
+ """Get a task response by its ID.
170
+
171
+ Must be implemented by subclasses.
172
+ """
173
+ pass
@@ -0,0 +1,108 @@
1
+ from .base import ChunkrBase
2
+ from .config import Configuration
3
+ from .task import TaskResponse
4
+ from pathlib import Path
5
+ from PIL import Image
6
+ import requests
7
+ from typing import Union, BinaryIO
8
+
9
+ class Chunkr(ChunkrBase):
10
+ """Chunkr API client"""
11
+
12
+ def __init__(self, url: str = None, api_key: str = None):
13
+ super().__init__(url, api_key)
14
+ self._session = requests.Session()
15
+
16
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
17
+ """Upload a file and wait for processing to complete.
18
+
19
+ Args:
20
+ file: The file to upload.
21
+ config: Configuration options for processing. Optional.
22
+
23
+ Examples:
24
+ ```
25
+ # Upload from file path
26
+ chunkr.upload("document.pdf")
27
+
28
+ # Upload from URL
29
+ chunkr.upload("https://example.com/document.pdf")
30
+
31
+ # Upload from base64 string (must include MIME type header)
32
+ chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
33
+
34
+ # Upload from opened file
35
+ with open("document.pdf", "rb") as f:
36
+ chunkr.upload(f)
37
+
38
+ # Upload an image
39
+ from PIL import Image
40
+ img = Image.open("photo.jpg")
41
+ chunkr.upload(img)
42
+ ```
43
+ Returns:
44
+ TaskResponse: The completed task response
45
+ """
46
+ task = self.start_upload(file, config)
47
+ return task.poll()
48
+
49
+ def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
50
+ """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`
51
+
52
+ Args:
53
+ file: The file to upload.
54
+ config: Configuration options for processing. Optional.
55
+
56
+ Examples:
57
+ ```
58
+ # Upload from file path
59
+ task = chunkr.start_upload("document.pdf")
60
+
61
+ # Upload from opened file
62
+ with open("document.pdf", "rb") as f:
63
+ task = chunkr.start_upload(f)
64
+
65
+ # Upload from URL
66
+ task = chunkr.start_upload("https://example.com/document.pdf")
67
+
68
+ # Upload from base64 string (must include MIME type header)
69
+ task = chunkr.start_upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
70
+
71
+ # Upload an image
72
+ from PIL import Image
73
+ img = Image.open("photo.jpg")
74
+ task = chunkr.start_upload(img)
75
+
76
+ # Wait for the task to complete - this can be done when needed
77
+ task.poll()
78
+ ```
79
+
80
+ Returns:
81
+ TaskResponse: The initial task response
82
+ """
83
+ files, data = self._prepare_upload_data(file, config)
84
+ r = self._session.post(
85
+ f"{self.url}/api/v1/task",
86
+ files=files,
87
+ data=data,
88
+ headers=self._headers()
89
+ )
90
+ r.raise_for_status()
91
+ return TaskResponse(**r.json()).with_client(self)
92
+
93
+ def get_task(self, task_id: str) -> TaskResponse:
94
+ """Get a task response by its ID.
95
+
96
+ Args:
97
+ task_id: The ID of the task to get
98
+
99
+ Returns:
100
+ TaskResponse: The task response
101
+ """
102
+ r = self._session.get(
103
+ f"{self.url}/api/v1/task/{task_id}",
104
+ headers=self._headers()
105
+ )
106
+ r.raise_for_status()
107
+ return TaskResponse(**r.json()).with_client(self)
108
+