chunkr-ai 0.0.50__py3-none-any.whl → 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. chunkr_ai/__init__.py +89 -2
  2. chunkr_ai/_base_client.py +1995 -0
  3. chunkr_ai/_client.py +402 -0
  4. chunkr_ai/_compat.py +219 -0
  5. chunkr_ai/_constants.py +14 -0
  6. chunkr_ai/_exceptions.py +108 -0
  7. chunkr_ai/_files.py +123 -0
  8. chunkr_ai/_models.py +829 -0
  9. chunkr_ai/_qs.py +150 -0
  10. chunkr_ai/_resource.py +43 -0
  11. chunkr_ai/_response.py +830 -0
  12. chunkr_ai/_streaming.py +333 -0
  13. chunkr_ai/_types.py +219 -0
  14. chunkr_ai/_utils/__init__.py +57 -0
  15. chunkr_ai/_utils/_logs.py +25 -0
  16. chunkr_ai/_utils/_proxy.py +65 -0
  17. chunkr_ai/_utils/_reflection.py +42 -0
  18. chunkr_ai/_utils/_resources_proxy.py +24 -0
  19. chunkr_ai/_utils/_streams.py +12 -0
  20. chunkr_ai/_utils/_sync.py +86 -0
  21. chunkr_ai/_utils/_transform.py +447 -0
  22. chunkr_ai/_utils/_typing.py +151 -0
  23. chunkr_ai/_utils/_utils.py +422 -0
  24. chunkr_ai/_version.py +4 -0
  25. chunkr_ai/lib/.keep +4 -0
  26. chunkr_ai/pagination.py +71 -0
  27. chunkr_ai/resources/__init__.py +33 -0
  28. chunkr_ai/resources/health.py +136 -0
  29. chunkr_ai/resources/task.py +1166 -0
  30. chunkr_ai/types/__init__.py +27 -0
  31. chunkr_ai/types/auto_generation_config.py +39 -0
  32. chunkr_ai/types/auto_generation_config_param.py +39 -0
  33. chunkr_ai/types/bounding_box.py +19 -0
  34. chunkr_ai/types/chunk_processing.py +40 -0
  35. chunkr_ai/types/chunk_processing_param.py +42 -0
  36. chunkr_ai/types/health_check_response.py +7 -0
  37. chunkr_ai/types/ignore_generation_config.py +39 -0
  38. chunkr_ai/types/ignore_generation_config_param.py +39 -0
  39. chunkr_ai/types/llm_generation_config.py +39 -0
  40. chunkr_ai/types/llm_generation_config_param.py +39 -0
  41. chunkr_ai/types/llm_processing.py +36 -0
  42. chunkr_ai/types/llm_processing_param.py +36 -0
  43. chunkr_ai/types/picture_generation_config.py +39 -0
  44. chunkr_ai/types/picture_generation_config_param.py +39 -0
  45. chunkr_ai/types/segment_processing.py +280 -0
  46. chunkr_ai/types/segment_processing_param.py +281 -0
  47. chunkr_ai/types/table_generation_config.py +39 -0
  48. chunkr_ai/types/table_generation_config_param.py +39 -0
  49. chunkr_ai/types/task.py +379 -0
  50. chunkr_ai/types/task_get_params.py +18 -0
  51. chunkr_ai/types/task_list_params.py +37 -0
  52. chunkr_ai/types/task_parse_params.py +90 -0
  53. chunkr_ai/types/task_update_params.py +90 -0
  54. chunkr_ai-0.1.0a1.dist-info/METADATA +504 -0
  55. chunkr_ai-0.1.0a1.dist-info/RECORD +58 -0
  56. {chunkr_ai-0.0.50.dist-info → chunkr_ai-0.1.0a1.dist-info}/WHEEL +1 -2
  57. chunkr_ai-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  58. chunkr_ai/api/auth.py +0 -13
  59. chunkr_ai/api/chunkr.py +0 -103
  60. chunkr_ai/api/chunkr_base.py +0 -185
  61. chunkr_ai/api/configuration.py +0 -305
  62. chunkr_ai/api/decorators.py +0 -101
  63. chunkr_ai/api/misc.py +0 -139
  64. chunkr_ai/api/protocol.py +0 -14
  65. chunkr_ai/api/task_response.py +0 -208
  66. chunkr_ai/models.py +0 -53
  67. chunkr_ai-0.0.50.dist-info/METADATA +0 -268
  68. chunkr_ai-0.0.50.dist-info/RECORD +0 -16
  69. chunkr_ai-0.0.50.dist-info/licenses/LICENSE +0 -21
  70. chunkr_ai-0.0.50.dist-info/top_level.txt +0 -1
  71. /chunkr_ai/{api/__init__.py → py.typed} +0 -0
@@ -1,305 +0,0 @@
1
- from pydantic import BaseModel, Field, ConfigDict
2
- from enum import Enum
3
- from typing import Any, List, Optional, Union
4
- from pydantic import field_validator, field_serializer
5
-
6
- class GenerationStrategy(str, Enum):
7
- LLM = "LLM"
8
- AUTO = "Auto"
9
-
10
- class CroppingStrategy(str, Enum):
11
- ALL = "All"
12
- AUTO = "Auto"
13
-
14
- class EmbedSource(str, Enum):
15
- HTML = "HTML"
16
- MARKDOWN = "Markdown"
17
- LLM = "LLM"
18
- CONTENT = "Content"
19
-
20
- class GenerationConfig(BaseModel):
21
- html: Optional[GenerationStrategy] = None
22
- llm: Optional[str] = None
23
- markdown: Optional[GenerationStrategy] = None
24
- crop_image: Optional[CroppingStrategy] = None
25
- embed_sources: Optional[List[EmbedSource]] = Field(default_factory=lambda: [EmbedSource.MARKDOWN])
26
- extended_context: Optional[bool] = None
27
-
28
- class SegmentProcessing(BaseModel):
29
- model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
30
-
31
- caption: Optional[GenerationConfig] = Field(default=None, alias="Caption")
32
- footnote: Optional[GenerationConfig] = Field(default=None, alias="Footnote")
33
- formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
34
- list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
35
- page: Optional[GenerationConfig] = Field(default=None, alias="Page")
36
- page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
37
- page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
38
- picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
39
- section_header: Optional[GenerationConfig] = Field(default=None, alias="SectionHeader")
40
- table: Optional[GenerationConfig] = Field(default=None, alias="Table")
41
- text: Optional[GenerationConfig] = Field(default=None, alias="Text")
42
- title: Optional[GenerationConfig] = Field(default=None, alias="Title")
43
-
44
- class Tokenizer(str, Enum):
45
- WORD = "Word"
46
- CL100K_BASE = "Cl100kBase"
47
- XLM_ROBERTA_BASE = "XlmRobertaBase"
48
- BERT_BASE_UNCASED = "BertBaseUncased"
49
-
50
- class TokenizerType(BaseModel):
51
- enum_value: Optional[Tokenizer] = None
52
- string_value: Optional[str] = None
53
-
54
- @classmethod
55
- def from_enum(cls, enum_value: Tokenizer) -> "TokenizerType":
56
- return cls(enum_value=enum_value)
57
-
58
- @classmethod
59
- def from_string(cls, string_value: str) -> "TokenizerType":
60
- return cls(string_value=string_value)
61
-
62
- def __str__(self) -> str:
63
- if self.enum_value is not None:
64
- return f"enum:{self.enum_value.value}"
65
- elif self.string_value is not None:
66
- return f"string:{self.string_value}"
67
- return ""
68
-
69
- model_config = ConfigDict()
70
-
71
- def model_dump(self, **kwargs):
72
- if self.enum_value is not None:
73
- return {"Enum": self.enum_value.value}
74
- elif self.string_value is not None:
75
- return {"String": self.string_value}
76
- return {}
77
-
78
- class ChunkProcessing(BaseModel):
79
- ignore_headers_and_footers: Optional[bool] = True
80
- target_length: Optional[int] = None
81
- tokenizer: Optional[Union[TokenizerType, Tokenizer, str]] = None
82
-
83
- model_config = ConfigDict(
84
- arbitrary_types_allowed=True,
85
- )
86
-
87
- @field_serializer('tokenizer')
88
- def serialize_tokenizer(self, tokenizer: Optional[TokenizerType], _info):
89
- if tokenizer is None:
90
- return None
91
- return tokenizer.model_dump()
92
-
93
- @field_validator('tokenizer', mode='before')
94
- def validate_tokenizer(cls, v):
95
- if v is None:
96
- return None
97
-
98
- if isinstance(v, TokenizerType):
99
- return v
100
-
101
- if isinstance(v, Tokenizer):
102
- return TokenizerType(enum_value=v)
103
-
104
- if isinstance(v, dict):
105
- if "Enum" in v:
106
- try:
107
- return TokenizerType(enum_value=Tokenizer(v["Enum"]))
108
- except ValueError:
109
- return TokenizerType(string_value=v["Enum"])
110
- elif "String" in v:
111
- return TokenizerType(string_value=v["String"])
112
-
113
- if isinstance(v, str):
114
- try:
115
- return TokenizerType(enum_value=Tokenizer(v))
116
- except ValueError:
117
- return TokenizerType(string_value=v)
118
-
119
- raise ValueError(f"Cannot convert {v} to TokenizerType")
120
-
121
- class OcrStrategy(str, Enum):
122
- ALL = "All"
123
- AUTO = "Auto"
124
-
125
- class SegmentationStrategy(str, Enum):
126
- LAYOUT_ANALYSIS = "LayoutAnalysis"
127
- PAGE = "Page"
128
-
129
- class ErrorHandlingStrategy(str, Enum):
130
- FAIL = "Fail"
131
- CONTINUE = "Continue"
132
-
133
- class FallbackStrategy(BaseModel):
134
- type: str
135
- model_id: Optional[str] = None
136
-
137
- @classmethod
138
- def none(cls) -> "FallbackStrategy":
139
- return cls(type="None")
140
-
141
- @classmethod
142
- def default(cls) -> "FallbackStrategy":
143
- return cls(type="Default")
144
-
145
- @classmethod
146
- def model(cls, model_id: str) -> "FallbackStrategy":
147
- return cls(type="Model", model_id=model_id)
148
-
149
- def __str__(self) -> str:
150
- if self.type == "Model":
151
- return f"Model({self.model_id})"
152
- return self.type
153
-
154
- def model_dump(self, **kwargs):
155
- if self.type == "Model":
156
- return {"Model": self.model_id}
157
- return self.type
158
-
159
- @field_validator('type')
160
- def validate_type(cls, v):
161
- if v not in ["None", "Default", "Model"]:
162
- raise ValueError(f"Invalid fallback strategy: {v}")
163
- return v
164
-
165
- model_config = ConfigDict()
166
-
167
- @classmethod
168
- def model_validate(cls, obj):
169
- # Handle string values like "None" or "Default"
170
- if isinstance(obj, str):
171
- if obj in ["None", "Default"]:
172
- return cls(type=obj)
173
- # Try to parse as Enum value if it's not a direct match
174
- try:
175
- return cls(type=obj)
176
- except ValueError:
177
- pass # Let it fall through to normal validation
178
-
179
- # Handle dictionary format like {"Model": "model-id"}
180
- elif isinstance(obj, dict) and len(obj) == 1:
181
- if "Model" in obj:
182
- return cls(type="Model", model_id=obj["Model"])
183
-
184
- # Fall back to normal validation
185
- return super().model_validate(obj)
186
-
187
- class LlmProcessing(BaseModel):
188
- model_id: Optional[str] = None
189
- fallback_strategy: FallbackStrategy = Field(default_factory=FallbackStrategy.default)
190
- max_completion_tokens: Optional[int] = None
191
- temperature: float = 0.0
192
-
193
- model_config = ConfigDict()
194
-
195
- @field_serializer('fallback_strategy')
196
- def serialize_fallback_strategy(self, fallback_strategy: FallbackStrategy, _info):
197
- return fallback_strategy.model_dump()
198
-
199
- @field_validator('fallback_strategy', mode='before')
200
- def validate_fallback_strategy(cls, v):
201
- if isinstance(v, str):
202
- if v == "None":
203
- return FallbackStrategy.none()
204
- elif v == "Default":
205
- return FallbackStrategy.default()
206
- # Try to parse as a model ID if it's not None or Default
207
- try:
208
- return FallbackStrategy.model(v)
209
- except ValueError:
210
- pass # Let it fall through to normal validation
211
- # Handle dictionary format like {"Model": "model-id"}
212
- elif isinstance(v, dict) and len(v) == 1:
213
- if "Model" in v:
214
- return FallbackStrategy.model(v["Model"])
215
- elif "None" in v or v.get("None") is None:
216
- return FallbackStrategy.none()
217
- elif "Default" in v or v.get("Default") is None:
218
- return FallbackStrategy.default()
219
-
220
- return v
221
-
222
- class BoundingBox(BaseModel):
223
- left: float
224
- top: float
225
- width: float
226
- height: float
227
-
228
- class OCRResult(BaseModel):
229
- bbox: BoundingBox
230
- text: str
231
- confidence: Optional[float]
232
-
233
- class SegmentType(str, Enum):
234
- CAPTION = "Caption"
235
- FOOTNOTE = "Footnote"
236
- FORMULA = "Formula"
237
- LIST_ITEM = "ListItem"
238
- PAGE = "Page"
239
- PAGE_FOOTER = "PageFooter"
240
- PAGE_HEADER = "PageHeader"
241
- PICTURE = "Picture"
242
- SECTION_HEADER = "SectionHeader"
243
- TABLE = "Table"
244
- TEXT = "Text"
245
- TITLE = "Title"
246
-
247
- class Segment(BaseModel):
248
- bbox: BoundingBox
249
- content: str
250
- page_height: float
251
- llm: Optional[str] = None
252
- html: Optional[str] = None
253
- image: Optional[str] = None
254
- markdown: Optional[str] = None
255
- ocr: Optional[List[OCRResult]] = Field(default_factory=list)
256
- page_number: int
257
- page_width: float
258
- segment_id: str
259
- segment_type: SegmentType
260
- confidence: Optional[float]
261
-
262
- class Chunk(BaseModel):
263
- chunk_id: str
264
- chunk_length: int
265
- segments: List[Segment]
266
- embed: Optional[str] = None
267
-
268
- class OutputResponse(BaseModel):
269
- chunks: List[Chunk]
270
- file_name: Optional[str]
271
- page_count: Optional[int]
272
- pdf_url: Optional[str]
273
-
274
- class Model(str, Enum):
275
- FAST = "Fast"
276
- HIGH_QUALITY = "HighQuality"
277
-
278
- class Pipeline(str, Enum):
279
- AZURE = "Azure"
280
- CHUNKR = "Chunkr"
281
-
282
- class Configuration(BaseModel):
283
- chunk_processing: Optional[ChunkProcessing] = None
284
- expires_in: Optional[int] = None
285
- error_handling: Optional[ErrorHandlingStrategy] = None
286
- high_resolution: Optional[bool] = None
287
- ocr_strategy: Optional[OcrStrategy] = None
288
- segment_processing: Optional[SegmentProcessing] = None
289
- segmentation_strategy: Optional[SegmentationStrategy] = None
290
- pipeline: Optional[Pipeline] = None
291
- llm_processing: Optional[LlmProcessing] = None
292
-
293
- class OutputConfiguration(Configuration):
294
- input_file_url: Optional[str] = None
295
- # Deprecated
296
- json_schema: Optional[Any] = None
297
- model: Optional[Model] = None
298
- target_chunk_length: Optional[int] = None
299
-
300
- class Status(str, Enum):
301
- STARTING = "Starting"
302
- PROCESSING = "Processing"
303
- SUCCEEDED = "Succeeded"
304
- FAILED = "Failed"
305
- CANCELLED = "Cancelled"
@@ -1,101 +0,0 @@
1
- import asyncio
2
- import functools
3
- import httpx
4
- import nest_asyncio
5
- from typing import Callable, Any, TypeVar, Awaitable, Union, overload
6
- try:
7
- from typing import ParamSpec
8
- except ImportError:
9
- from typing_extensions import ParamSpec
10
-
11
- T = TypeVar('T')
12
- P = ParamSpec('P')
13
-
14
- _sync_loop = None
15
-
16
- def anywhere() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Union[Awaitable[T], T]]]:
17
- """Decorator that allows an async function to run anywhere - sync or async context."""
18
- def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Union[Awaitable[T], T]]:
19
- @functools.wraps(async_func)
20
- def wrapper(*args: P.args, **kwargs: P.kwargs) -> Union[Awaitable[T], T]:
21
- global _sync_loop
22
-
23
- try:
24
- nest_asyncio.apply()
25
- except ImportError:
26
- pass
27
-
28
- try:
29
- asyncio.get_running_loop()
30
- return async_func(*args, **kwargs)
31
- except RuntimeError:
32
- if _sync_loop is None:
33
- _sync_loop = asyncio.new_event_loop()
34
- asyncio.set_event_loop(_sync_loop)
35
- try:
36
- return _sync_loop.run_until_complete(async_func(*args, **kwargs))
37
- finally:
38
- asyncio.set_event_loop(None)
39
- return wrapper
40
- return decorator
41
-
42
- def ensure_client() -> Callable[[Callable[..., Awaitable[T]]], Callable[..., Awaitable[T]]]:
43
- """Decorator that ensures a valid httpx.AsyncClient exists before executing the method"""
44
- def decorator(async_func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
45
- @functools.wraps(async_func)
46
- async def wrapper(self: Any, *args: Any, **kwargs: Any) -> T:
47
- if not self._client or self._client.is_closed:
48
- self._client = httpx.AsyncClient()
49
- return await async_func(self, *args, **kwargs)
50
- return wrapper
51
- return decorator
52
-
53
- def require_task() -> Callable[[Callable[..., Awaitable[T]]], Callable[..., Awaitable[T]]]:
54
- """Decorator that ensures task has required attributes and valid client before execution"""
55
- def decorator(async_func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
56
- @functools.wraps(async_func)
57
- async def wrapper(self: Any, *args: Any, **kwargs: Any) -> T:
58
- if not self.task_url:
59
- raise ValueError("Task URL not found")
60
- if not self._client:
61
- raise ValueError("Client not found")
62
- if not self._client._client or self._client._client.is_closed:
63
- self._client._client = httpx.AsyncClient()
64
- return await async_func(self, *args, **kwargs)
65
- return wrapper
66
- return decorator
67
-
68
- def retry_on_429(max_retries: int = 3, initial_delay: float = 0.5) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
69
- """Decorator that retries the request when encountering 429 Too Many Requests errors.
70
-
71
- Args:
72
- max_retries: Maximum number of retry attempts (default: 3)
73
- initial_delay: Initial delay in seconds, will be exponentially increased with jitter (default: 0.5)
74
- """
75
- def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
76
- @functools.wraps(async_func)
77
- async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
78
- import random
79
- retries = 0
80
- while True:
81
- try:
82
- return await async_func(*args, **kwargs)
83
- except httpx.HTTPStatusError as e:
84
- if e.response.status_code != 429:
85
- raise e
86
- if retries >= max_retries:
87
- print("Max retries reached")
88
- raise e
89
- retries += 1
90
- delay = initial_delay * (2 ** retries)
91
- # Use Retry-After header if available
92
- retry_after = e.response.headers.get('Retry-After')
93
- if retry_after:
94
- try:
95
- delay = float(retry_after)
96
- except (ValueError, TypeError):
97
- pass
98
- jitter = random.uniform(0, 0.25) * delay
99
- await asyncio.sleep(delay + jitter)
100
- return wrapper
101
- return decorator
chunkr_ai/api/misc.py DELETED
@@ -1,139 +0,0 @@
1
- from .configuration import Configuration
2
- import base64
3
- import io
4
- from pathlib import Path
5
- from PIL import Image
6
- from typing import Union, Tuple, BinaryIO, Optional, Any
7
-
8
- async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image, bytes, bytearray, memoryview]) -> Tuple[Optional[str], str]:
9
- """Convert various file types into a tuple of (filename, file content).
10
-
11
- Args:
12
- file: Input file, can be:
13
- - URL string starting with http:// or https://
14
- - Base64 string
15
- - Local file path (will be converted to base64)
16
- - Opened binary file (will be converted to base64)
17
- - PIL/Pillow Image object (will be converted to base64)
18
- - Bytes object (will be converted to base64)
19
-
20
- Returns:
21
- Tuple[Optional[str], str]: (filename, content) where content is either a URL or base64 string
22
- The filename may be None for URLs, base64 strings, and PIL Images
23
-
24
- Raises:
25
- FileNotFoundError: If the file path doesn't exist
26
- TypeError: If the file type is not supported
27
- ValueError: If the URL is invalid or unreachable
28
- ValueError: If the MIME type is unsupported
29
- """
30
- # Handle bytes-like objects
31
- if isinstance(file, (bytes, bytearray, memoryview)):
32
- # Convert to bytes first if it's not already
33
- file_bytes = bytes(file)
34
-
35
- # Check if this might be an already-encoded base64 string in bytes form
36
- try:
37
- # Try to decode the bytes to a string and see if it's valid base64
38
- potential_base64 = file_bytes.decode('utf-8', errors='strict')
39
- base64.b64decode(potential_base64)
40
- # If we get here, it was a valid base64 string in bytes form
41
- return None, potential_base64
42
- except:
43
- # Not a base64 string in bytes form, encode it as base64
44
- base64_str = base64.b64encode(file_bytes).decode()
45
- return None, base64_str
46
-
47
- # Handle strings - urls or paths or base64
48
- if isinstance(file, str):
49
- # Handle URLs
50
- if file.startswith(('http://', 'https://')):
51
- return None, file
52
-
53
- # Handle data URLs
54
- if file.startswith('data:'):
55
- return None, file
56
-
57
- # Try to handle as a file path
58
- try:
59
- path = Path(file)
60
- if path.exists():
61
- # It's a valid file path, convert to Path object and continue processing
62
- file = path
63
- else:
64
- # If not a valid file path, try treating as base64
65
- try:
66
- # Just test if it's valid base64, don't store the result
67
- base64.b64decode(file)
68
- return None, file
69
- except:
70
- raise ValueError(f"File not found: {file} and it's not a valid base64 string")
71
- except Exception as e:
72
- # If string can't be converted to Path or decoded as base64, it might still be a base64 string
73
- try:
74
- base64.b64decode(file)
75
- return None, file
76
- except:
77
- raise ValueError(f"Unable to process file: {e}")
78
-
79
- # Handle file paths - convert to base64
80
- if isinstance(file, Path):
81
- path = Path(file).resolve()
82
- if not path.exists():
83
- raise FileNotFoundError(f"File not found: {file}")
84
-
85
- with open(path, "rb") as f:
86
- file_content = f.read()
87
- file_ext = path.suffix.lower().lstrip('.')
88
- if not file_ext:
89
- raise ValueError("File must have an extension")
90
- base64_str = base64.b64encode(file_content).decode()
91
- return path.name, base64_str
92
-
93
- # Handle PIL Images - convert to base64
94
- if isinstance(file, Image.Image):
95
- img_byte_arr = io.BytesIO()
96
- format = file.format or "PNG"
97
- file.save(img_byte_arr, format=format)
98
- img_byte_arr.seek(0)
99
- base64_str = base64.b64encode(img_byte_arr.getvalue()).decode()
100
- return None, base64_str
101
-
102
- # Handle file-like objects - convert to base64
103
- if hasattr(file, "read") and hasattr(file, "seek"):
104
- file.seek(0)
105
- file_content = file.read()
106
- name = getattr(file, "name", "document")
107
- if not name or not isinstance(name, str):
108
- name = None
109
- base64_str = base64.b64encode(file_content).decode()
110
- return name, base64_str
111
-
112
- raise TypeError(f"Unsupported file type: {type(file)}")
113
-
114
-
115
- async def prepare_upload_data(
116
- file: Optional[Union[str, Path, BinaryIO, Image.Image, bytes, bytearray, memoryview]] = None,
117
- filename: Optional[str] = None,
118
- config: Optional[Configuration] = None,
119
- ) -> dict:
120
- """Prepare data dictionary for upload.
121
-
122
- Args:
123
- file: The file to upload
124
- filename: Optional filename to use (overrides any filename from the file)
125
- config: Optional configuration settings
126
-
127
- Returns:
128
- dict: JSON-serializable data dictionary ready for upload
129
- """
130
- data = {}
131
- if file:
132
- processed_filename, processed_file = await prepare_file(file)
133
- data["file"] = processed_file
134
- data["file_name"] = filename or processed_filename
135
-
136
- if config:
137
- data.update(config.model_dump(mode="json", exclude_none=True))
138
-
139
- return data
chunkr_ai/api/protocol.py DELETED
@@ -1,14 +0,0 @@
1
- from typing import Optional, runtime_checkable, Protocol
2
- from httpx import AsyncClient
3
-
4
-
5
- @runtime_checkable
6
- class ChunkrClientProtocol(Protocol):
7
- """Protocol defining the interface for Chunkr clients"""
8
-
9
- raise_on_failure: bool = True
10
- _client: Optional[AsyncClient] = None
11
-
12
- def _headers(self) -> dict:
13
- """Return headers required for API requests"""
14
- ...