chunkr-ai 0.1.0__py3-none-any.whl → 0.1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. chunkr_ai/__init__.py +89 -2
  2. chunkr_ai/_base_client.py +1995 -0
  3. chunkr_ai/_client.py +403 -0
  4. chunkr_ai/_compat.py +219 -0
  5. chunkr_ai/_constants.py +14 -0
  6. chunkr_ai/_exceptions.py +108 -0
  7. chunkr_ai/_files.py +123 -0
  8. chunkr_ai/_models.py +829 -0
  9. chunkr_ai/_qs.py +150 -0
  10. chunkr_ai/_resource.py +43 -0
  11. chunkr_ai/_response.py +830 -0
  12. chunkr_ai/_streaming.py +333 -0
  13. chunkr_ai/_types.py +219 -0
  14. chunkr_ai/_utils/__init__.py +57 -0
  15. chunkr_ai/_utils/_logs.py +25 -0
  16. chunkr_ai/_utils/_proxy.py +65 -0
  17. chunkr_ai/_utils/_reflection.py +42 -0
  18. chunkr_ai/_utils/_resources_proxy.py +24 -0
  19. chunkr_ai/_utils/_streams.py +12 -0
  20. chunkr_ai/_utils/_sync.py +86 -0
  21. chunkr_ai/_utils/_transform.py +447 -0
  22. chunkr_ai/_utils/_typing.py +151 -0
  23. chunkr_ai/_utils/_utils.py +422 -0
  24. chunkr_ai/_version.py +4 -0
  25. chunkr_ai/lib/.keep +4 -0
  26. chunkr_ai/pagination.py +71 -0
  27. chunkr_ai/resources/__init__.py +33 -0
  28. chunkr_ai/resources/health.py +136 -0
  29. chunkr_ai/resources/task/__init__.py +33 -0
  30. chunkr_ai/resources/task/parse.py +616 -0
  31. chunkr_ai/resources/task/task.py +664 -0
  32. chunkr_ai/types/__init__.py +8 -0
  33. chunkr_ai/types/health_check_response.py +7 -0
  34. chunkr_ai/types/task/__init__.py +7 -0
  35. chunkr_ai/types/task/parse_create_params.py +806 -0
  36. chunkr_ai/types/task/parse_update_params.py +806 -0
  37. chunkr_ai/types/task/task.py +1186 -0
  38. chunkr_ai/types/task_get_params.py +18 -0
  39. chunkr_ai/types/task_list_params.py +37 -0
  40. chunkr_ai-0.1.0a2.dist-info/METADATA +504 -0
  41. chunkr_ai-0.1.0a2.dist-info/RECORD +44 -0
  42. {chunkr_ai-0.1.0.dist-info → chunkr_ai-0.1.0a2.dist-info}/WHEEL +1 -2
  43. chunkr_ai-0.1.0a2.dist-info/licenses/LICENSE +201 -0
  44. chunkr_ai/api/auth.py +0 -13
  45. chunkr_ai/api/chunkr.py +0 -103
  46. chunkr_ai/api/chunkr_base.py +0 -185
  47. chunkr_ai/api/configuration.py +0 -313
  48. chunkr_ai/api/decorators.py +0 -101
  49. chunkr_ai/api/misc.py +0 -139
  50. chunkr_ai/api/protocol.py +0 -14
  51. chunkr_ai/api/task_response.py +0 -208
  52. chunkr_ai/models.py +0 -55
  53. chunkr_ai-0.1.0.dist-info/METADATA +0 -268
  54. chunkr_ai-0.1.0.dist-info/RECORD +0 -16
  55. chunkr_ai-0.1.0.dist-info/licenses/LICENSE +0 -21
  56. chunkr_ai-0.1.0.dist-info/top_level.txt +0 -1
  57. /chunkr_ai/{api/__init__.py → py.typed} +0 -0
@@ -1,313 +0,0 @@
1
- from pydantic import BaseModel, Field, ConfigDict
2
- from enum import Enum
3
- from typing import Any, List, Optional, Union
4
- from pydantic import field_validator, field_serializer
5
-
6
- class CroppingStrategy(str, Enum):
7
- ALL = "All"
8
- AUTO = "Auto"
9
-
10
- class SegmentFormat(str, Enum):
11
- HTML = "Html"
12
- MARKDOWN = "Markdown"
13
-
14
- class EmbedSource(str, Enum):
15
- CONTENT = "Content"
16
- HTML = "HTML" # Deprecated
17
- MARKDOWN = "Markdown" # Deprecated
18
- LLM = "LLM"
19
-
20
- class GenerationStrategy(str, Enum):
21
- LLM = "LLM"
22
- AUTO = "Auto"
23
-
24
- class GenerationConfig(BaseModel):
25
- format: Optional[SegmentFormat] = None
26
- strategy: Optional[GenerationStrategy] = None
27
- llm: Optional[str] = None
28
- crop_image: Optional[CroppingStrategy] = None
29
- embed_sources: Optional[List[EmbedSource]] = None
30
- extended_context: Optional[bool] = None
31
- # Deprecated fields for backwards compatibility
32
- html: Optional[GenerationStrategy] = None # Deprecated: Use format=SegmentFormat.HTML and strategy instead
33
- markdown: Optional[GenerationStrategy] = None # Deprecated: Use format=SegmentFormat.MARKDOWN and strategy instead
34
-
35
- class SegmentProcessing(BaseModel):
36
- model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
37
-
38
- caption: Optional[GenerationConfig] = Field(default=None, alias="Caption")
39
- footnote: Optional[GenerationConfig] = Field(default=None, alias="Footnote")
40
- formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
41
- list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
42
- page: Optional[GenerationConfig] = Field(default=None, alias="Page")
43
- page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
44
- page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
45
- picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
46
- section_header: Optional[GenerationConfig] = Field(default=None, alias="SectionHeader")
47
- table: Optional[GenerationConfig] = Field(default=None, alias="Table")
48
- text: Optional[GenerationConfig] = Field(default=None, alias="Text")
49
- title: Optional[GenerationConfig] = Field(default=None, alias="Title")
50
-
51
- class Tokenizer(str, Enum):
52
- WORD = "Word"
53
- CL100K_BASE = "Cl100kBase"
54
- XLM_ROBERTA_BASE = "XlmRobertaBase"
55
- BERT_BASE_UNCASED = "BertBaseUncased"
56
-
57
- class TokenizerType(BaseModel):
58
- enum_value: Optional[Tokenizer] = None
59
- string_value: Optional[str] = None
60
-
61
- @classmethod
62
- def from_enum(cls, enum_value: Tokenizer) -> "TokenizerType":
63
- return cls(enum_value=enum_value)
64
-
65
- @classmethod
66
- def from_string(cls, string_value: str) -> "TokenizerType":
67
- return cls(string_value=string_value)
68
-
69
- def __str__(self) -> str:
70
- if self.enum_value is not None:
71
- return f"enum:{self.enum_value.value}"
72
- elif self.string_value is not None:
73
- return f"string:{self.string_value}"
74
- return ""
75
-
76
- model_config = ConfigDict()
77
-
78
- def model_dump(self, **kwargs):
79
- if self.enum_value is not None:
80
- return {"Enum": self.enum_value.value}
81
- elif self.string_value is not None:
82
- return {"String": self.string_value}
83
- return {}
84
-
85
- class ChunkProcessing(BaseModel):
86
- ignore_headers_and_footers: Optional[bool] = True
87
- target_length: Optional[int] = None
88
- tokenizer: Optional[Union[TokenizerType, Tokenizer, str]] = None
89
-
90
- model_config = ConfigDict(
91
- arbitrary_types_allowed=True,
92
- )
93
-
94
- @field_serializer('tokenizer')
95
- def serialize_tokenizer(self, tokenizer: Optional[TokenizerType], _info):
96
- if tokenizer is None:
97
- return None
98
- return tokenizer.model_dump()
99
-
100
- @field_validator('tokenizer', mode='before')
101
- def validate_tokenizer(cls, v):
102
- if v is None:
103
- return None
104
-
105
- if isinstance(v, TokenizerType):
106
- return v
107
-
108
- if isinstance(v, Tokenizer):
109
- return TokenizerType(enum_value=v)
110
-
111
- if isinstance(v, dict):
112
- if "Enum" in v:
113
- try:
114
- return TokenizerType(enum_value=Tokenizer(v["Enum"]))
115
- except ValueError:
116
- return TokenizerType(string_value=v["Enum"])
117
- elif "String" in v:
118
- return TokenizerType(string_value=v["String"])
119
-
120
- if isinstance(v, str):
121
- try:
122
- return TokenizerType(enum_value=Tokenizer(v))
123
- except ValueError:
124
- return TokenizerType(string_value=v)
125
-
126
- raise ValueError(f"Cannot convert {v} to TokenizerType")
127
-
128
- class OcrStrategy(str, Enum):
129
- ALL = "All"
130
- AUTO = "Auto"
131
-
132
- class SegmentationStrategy(str, Enum):
133
- LAYOUT_ANALYSIS = "LayoutAnalysis"
134
- PAGE = "Page"
135
-
136
- class ErrorHandlingStrategy(str, Enum):
137
- FAIL = "Fail"
138
- CONTINUE = "Continue"
139
-
140
- class FallbackStrategy(BaseModel):
141
- type: str
142
- model_id: Optional[str] = None
143
-
144
- @classmethod
145
- def none(cls) -> "FallbackStrategy":
146
- return cls(type="None")
147
-
148
- @classmethod
149
- def default(cls) -> "FallbackStrategy":
150
- return cls(type="Default")
151
-
152
- @classmethod
153
- def model(cls, model_id: str) -> "FallbackStrategy":
154
- return cls(type="Model", model_id=model_id)
155
-
156
- def __str__(self) -> str:
157
- if self.type == "Model":
158
- return f"Model({self.model_id})"
159
- return self.type
160
-
161
- def model_dump(self, **kwargs):
162
- if self.type == "Model":
163
- return {"Model": self.model_id}
164
- return self.type
165
-
166
- @field_validator('type')
167
- def validate_type(cls, v):
168
- if v not in ["None", "Default", "Model"]:
169
- raise ValueError(f"Invalid fallback strategy: {v}")
170
- return v
171
-
172
- model_config = ConfigDict()
173
-
174
- @classmethod
175
- def model_validate(cls, obj):
176
- # Handle string values like "None" or "Default"
177
- if isinstance(obj, str):
178
- if obj in ["None", "Default"]:
179
- return cls(type=obj)
180
- # Try to parse as Enum value if it's not a direct match
181
- try:
182
- return cls(type=obj)
183
- except ValueError:
184
- pass # Let it fall through to normal validation
185
-
186
- # Handle dictionary format like {"Model": "model-id"}
187
- elif isinstance(obj, dict) and len(obj) == 1:
188
- if "Model" in obj:
189
- return cls(type="Model", model_id=obj["Model"])
190
-
191
- # Fall back to normal validation
192
- return super().model_validate(obj)
193
-
194
- class LlmProcessing(BaseModel):
195
- model_id: Optional[str] = None
196
- fallback_strategy: FallbackStrategy = Field(default_factory=FallbackStrategy.default)
197
- max_completion_tokens: Optional[int] = None
198
- temperature: float = 0.0
199
-
200
- model_config = ConfigDict()
201
-
202
- @field_serializer('fallback_strategy')
203
- def serialize_fallback_strategy(self, fallback_strategy: FallbackStrategy, _info):
204
- return fallback_strategy.model_dump()
205
-
206
- @field_validator('fallback_strategy', mode='before')
207
- def validate_fallback_strategy(cls, v):
208
- if isinstance(v, str):
209
- if v == "None":
210
- return FallbackStrategy.none()
211
- elif v == "Default":
212
- return FallbackStrategy.default()
213
- # Try to parse as a model ID if it's not None or Default
214
- try:
215
- return FallbackStrategy.model(v)
216
- except ValueError:
217
- pass # Let it fall through to normal validation
218
- # Handle dictionary format like {"Model": "model-id"}
219
- elif isinstance(v, dict) and len(v) == 1:
220
- if "Model" in v:
221
- return FallbackStrategy.model(v["Model"])
222
- elif "None" in v or v.get("None") is None:
223
- return FallbackStrategy.none()
224
- elif "Default" in v or v.get("Default") is None:
225
- return FallbackStrategy.default()
226
-
227
- return v
228
-
229
- class BoundingBox(BaseModel):
230
- left: float
231
- top: float
232
- width: float
233
- height: float
234
-
235
- class OCRResult(BaseModel):
236
- bbox: BoundingBox
237
- text: str
238
- confidence: Optional[float]
239
-
240
- class SegmentType(str, Enum):
241
- CAPTION = "Caption"
242
- FOOTNOTE = "Footnote"
243
- FORMULA = "Formula"
244
- LIST_ITEM = "ListItem"
245
- PAGE = "Page"
246
- PAGE_FOOTER = "PageFooter"
247
- PAGE_HEADER = "PageHeader"
248
- PICTURE = "Picture"
249
- SECTION_HEADER = "SectionHeader"
250
- TABLE = "Table"
251
- TEXT = "Text"
252
- TITLE = "Title"
253
-
254
- class Segment(BaseModel):
255
- bbox: BoundingBox
256
- content: str = ""
257
- page_height: float
258
- llm: Optional[str] = None
259
- html: Optional[str] = None
260
- image: Optional[str] = None
261
- markdown: Optional[str] = None
262
- ocr: Optional[List[OCRResult]] = Field(default_factory=list)
263
- page_number: int
264
- page_width: float
265
- segment_id: str
266
- segment_type: SegmentType
267
- confidence: Optional[float]
268
- text: str = ""
269
-
270
- class Chunk(BaseModel):
271
- chunk_id: str
272
- chunk_length: int
273
- segments: List[Segment]
274
- embed: Optional[str] = None
275
-
276
- class OutputResponse(BaseModel):
277
- chunks: List[Chunk]
278
- file_name: Optional[str]
279
- page_count: Optional[int]
280
- pdf_url: Optional[str]
281
-
282
- class Model(str, Enum):
283
- FAST = "Fast"
284
- HIGH_QUALITY = "HighQuality"
285
-
286
- class Pipeline(str, Enum):
287
- AZURE = "Azure"
288
- CHUNKR = "Chunkr"
289
-
290
- class Configuration(BaseModel):
291
- chunk_processing: Optional[ChunkProcessing] = None
292
- expires_in: Optional[int] = None
293
- error_handling: Optional[ErrorHandlingStrategy] = None
294
- high_resolution: Optional[bool] = None
295
- ocr_strategy: Optional[OcrStrategy] = None
296
- segment_processing: Optional[SegmentProcessing] = None
297
- segmentation_strategy: Optional[SegmentationStrategy] = None
298
- pipeline: Optional[Pipeline] = None
299
- llm_processing: Optional[LlmProcessing] = None
300
-
301
- class OutputConfiguration(Configuration):
302
- input_file_url: Optional[str] = None
303
- # Deprecated
304
- json_schema: Optional[Any] = None
305
- model: Optional[Model] = None
306
- target_chunk_length: Optional[int] = None
307
-
308
- class Status(str, Enum):
309
- STARTING = "Starting"
310
- PROCESSING = "Processing"
311
- SUCCEEDED = "Succeeded"
312
- FAILED = "Failed"
313
- CANCELLED = "Cancelled"
@@ -1,101 +0,0 @@
1
- import asyncio
2
- import functools
3
- import httpx
4
- import nest_asyncio
5
- from typing import Callable, Any, TypeVar, Awaitable, Union, overload
6
- try:
7
- from typing import ParamSpec
8
- except ImportError:
9
- from typing_extensions import ParamSpec
10
-
11
- T = TypeVar('T')
12
- P = ParamSpec('P')
13
-
14
- _sync_loop = None
15
-
16
- def anywhere() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Union[Awaitable[T], T]]]:
17
- """Decorator that allows an async function to run anywhere - sync or async context."""
18
- def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Union[Awaitable[T], T]]:
19
- @functools.wraps(async_func)
20
- def wrapper(*args: P.args, **kwargs: P.kwargs) -> Union[Awaitable[T], T]:
21
- global _sync_loop
22
-
23
- try:
24
- nest_asyncio.apply()
25
- except ImportError:
26
- pass
27
-
28
- try:
29
- asyncio.get_running_loop()
30
- return async_func(*args, **kwargs)
31
- except RuntimeError:
32
- if _sync_loop is None:
33
- _sync_loop = asyncio.new_event_loop()
34
- asyncio.set_event_loop(_sync_loop)
35
- try:
36
- return _sync_loop.run_until_complete(async_func(*args, **kwargs))
37
- finally:
38
- asyncio.set_event_loop(None)
39
- return wrapper
40
- return decorator
41
-
42
- def ensure_client() -> Callable[[Callable[..., Awaitable[T]]], Callable[..., Awaitable[T]]]:
43
- """Decorator that ensures a valid httpx.AsyncClient exists before executing the method"""
44
- def decorator(async_func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
45
- @functools.wraps(async_func)
46
- async def wrapper(self: Any, *args: Any, **kwargs: Any) -> T:
47
- if not self._client or self._client.is_closed:
48
- self._client = httpx.AsyncClient()
49
- return await async_func(self, *args, **kwargs)
50
- return wrapper
51
- return decorator
52
-
53
- def require_task() -> Callable[[Callable[..., Awaitable[T]]], Callable[..., Awaitable[T]]]:
54
- """Decorator that ensures task has required attributes and valid client before execution"""
55
- def decorator(async_func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
56
- @functools.wraps(async_func)
57
- async def wrapper(self: Any, *args: Any, **kwargs: Any) -> T:
58
- if not self.task_url:
59
- raise ValueError("Task URL not found")
60
- if not self._client:
61
- raise ValueError("Client not found")
62
- if not self._client._client or self._client._client.is_closed:
63
- self._client._client = httpx.AsyncClient()
64
- return await async_func(self, *args, **kwargs)
65
- return wrapper
66
- return decorator
67
-
68
- def retry_on_429(max_retries: int = 3, initial_delay: float = 0.5) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
69
- """Decorator that retries the request when encountering 429 Too Many Requests errors.
70
-
71
- Args:
72
- max_retries: Maximum number of retry attempts (default: 3)
73
- initial_delay: Initial delay in seconds, will be exponentially increased with jitter (default: 0.5)
74
- """
75
- def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
76
- @functools.wraps(async_func)
77
- async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
78
- import random
79
- retries = 0
80
- while True:
81
- try:
82
- return await async_func(*args, **kwargs)
83
- except httpx.HTTPStatusError as e:
84
- if e.response.status_code != 429:
85
- raise e
86
- if retries >= max_retries:
87
- print("Max retries reached")
88
- raise e
89
- retries += 1
90
- delay = initial_delay * (2 ** retries)
91
- # Use Retry-After header if available
92
- retry_after = e.response.headers.get('Retry-After')
93
- if retry_after:
94
- try:
95
- delay = float(retry_after)
96
- except (ValueError, TypeError):
97
- pass
98
- jitter = random.uniform(0, 0.25) * delay
99
- await asyncio.sleep(delay + jitter)
100
- return wrapper
101
- return decorator
chunkr_ai/api/misc.py DELETED
@@ -1,139 +0,0 @@
1
- from .configuration import Configuration
2
- import base64
3
- import io
4
- from pathlib import Path
5
- from PIL import Image
6
- from typing import Union, Tuple, BinaryIO, Optional, Any
7
-
8
- async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image, bytes, bytearray, memoryview]) -> Tuple[Optional[str], str]:
9
- """Convert various file types into a tuple of (filename, file content).
10
-
11
- Args:
12
- file: Input file, can be:
13
- - URL string starting with http:// or https://
14
- - Base64 string
15
- - Local file path (will be converted to base64)
16
- - Opened binary file (will be converted to base64)
17
- - PIL/Pillow Image object (will be converted to base64)
18
- - Bytes object (will be converted to base64)
19
-
20
- Returns:
21
- Tuple[Optional[str], str]: (filename, content) where content is either a URL or base64 string
22
- The filename may be None for URLs, base64 strings, and PIL Images
23
-
24
- Raises:
25
- FileNotFoundError: If the file path doesn't exist
26
- TypeError: If the file type is not supported
27
- ValueError: If the URL is invalid or unreachable
28
- ValueError: If the MIME type is unsupported
29
- """
30
- # Handle bytes-like objects
31
- if isinstance(file, (bytes, bytearray, memoryview)):
32
- # Convert to bytes first if it's not already
33
- file_bytes = bytes(file)
34
-
35
- # Check if this might be an already-encoded base64 string in bytes form
36
- try:
37
- # Try to decode the bytes to a string and see if it's valid base64
38
- potential_base64 = file_bytes.decode('utf-8', errors='strict')
39
- base64.b64decode(potential_base64)
40
- # If we get here, it was a valid base64 string in bytes form
41
- return None, potential_base64
42
- except:
43
- # Not a base64 string in bytes form, encode it as base64
44
- base64_str = base64.b64encode(file_bytes).decode()
45
- return None, base64_str
46
-
47
- # Handle strings - urls or paths or base64
48
- if isinstance(file, str):
49
- # Handle URLs
50
- if file.startswith(('http://', 'https://')):
51
- return None, file
52
-
53
- # Handle data URLs
54
- if file.startswith('data:'):
55
- return None, file
56
-
57
- # Try to handle as a file path
58
- try:
59
- path = Path(file)
60
- if path.exists():
61
- # It's a valid file path, convert to Path object and continue processing
62
- file = path
63
- else:
64
- # If not a valid file path, try treating as base64
65
- try:
66
- # Just test if it's valid base64, don't store the result
67
- base64.b64decode(file)
68
- return None, file
69
- except:
70
- raise ValueError(f"File not found: {file} and it's not a valid base64 string")
71
- except Exception as e:
72
- # If string can't be converted to Path or decoded as base64, it might still be a base64 string
73
- try:
74
- base64.b64decode(file)
75
- return None, file
76
- except:
77
- raise ValueError(f"Unable to process file: {e}")
78
-
79
- # Handle file paths - convert to base64
80
- if isinstance(file, Path):
81
- path = Path(file).resolve()
82
- if not path.exists():
83
- raise FileNotFoundError(f"File not found: {file}")
84
-
85
- with open(path, "rb") as f:
86
- file_content = f.read()
87
- file_ext = path.suffix.lower().lstrip('.')
88
- if not file_ext:
89
- raise ValueError("File must have an extension")
90
- base64_str = base64.b64encode(file_content).decode()
91
- return path.name, base64_str
92
-
93
- # Handle PIL Images - convert to base64
94
- if isinstance(file, Image.Image):
95
- img_byte_arr = io.BytesIO()
96
- format = file.format or "PNG"
97
- file.save(img_byte_arr, format=format)
98
- img_byte_arr.seek(0)
99
- base64_str = base64.b64encode(img_byte_arr.getvalue()).decode()
100
- return None, base64_str
101
-
102
- # Handle file-like objects - convert to base64
103
- if hasattr(file, "read") and hasattr(file, "seek"):
104
- file.seek(0)
105
- file_content = file.read()
106
- name = getattr(file, "name", "document")
107
- if not name or not isinstance(name, str):
108
- name = None
109
- base64_str = base64.b64encode(file_content).decode()
110
- return name, base64_str
111
-
112
- raise TypeError(f"Unsupported file type: {type(file)}")
113
-
114
-
115
- async def prepare_upload_data(
116
- file: Optional[Union[str, Path, BinaryIO, Image.Image, bytes, bytearray, memoryview]] = None,
117
- filename: Optional[str] = None,
118
- config: Optional[Configuration] = None,
119
- ) -> dict:
120
- """Prepare data dictionary for upload.
121
-
122
- Args:
123
- file: The file to upload
124
- filename: Optional filename to use (overrides any filename from the file)
125
- config: Optional configuration settings
126
-
127
- Returns:
128
- dict: JSON-serializable data dictionary ready for upload
129
- """
130
- data = {}
131
- if file:
132
- processed_filename, processed_file = await prepare_file(file)
133
- data["file"] = processed_file
134
- data["file_name"] = filename or processed_filename
135
-
136
- if config:
137
- data.update(config.model_dump(mode="json", exclude_none=True))
138
-
139
- return data
chunkr_ai/api/protocol.py DELETED
@@ -1,14 +0,0 @@
1
- from typing import Optional, runtime_checkable, Protocol
2
- from httpx import AsyncClient
3
-
4
-
5
- @runtime_checkable
6
- class ChunkrClientProtocol(Protocol):
7
- """Protocol defining the interface for Chunkr clients"""
8
-
9
- raise_on_failure: bool = True
10
- _client: Optional[AsyncClient] = None
11
-
12
- def _headers(self) -> dict:
13
- """Return headers required for API requests"""
14
- ...