chunkr-ai 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/api/chunkr.py +1 -3
- chunkr_ai/api/chunkr_async.py +1 -1
- chunkr_ai/api/config.py +0 -1
- chunkr_ai/api/misc.py +56 -3
- {chunkr_ai-0.0.9.dist-info → chunkr_ai-0.0.10.dist-info}/METADATA +2 -2
- chunkr_ai-0.0.10.dist-info/RECORD +19 -0
- chunkr_ai/api/api.py +0 -0
- chunkr_ai-0.0.9.dist-info/RECORD +0 -20
- /chunkr_ai/api/{base.py → chunkr_base.py} +0 -0
- {chunkr_ai-0.0.9.dist-info → chunkr_ai-0.0.10.dist-info}/LICENSE +0 -0
- {chunkr_ai-0.0.9.dist-info → chunkr_ai-0.0.10.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.0.9.dist-info → chunkr_ai-0.0.10.dist-info}/top_level.txt +0 -0
chunkr_ai/api/chunkr.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from .
|
1
|
+
from .chunkr_base import ChunkrBase
|
2
2
|
from .config import Configuration
|
3
3
|
from .task import TaskResponse
|
4
4
|
from pathlib import Path
|
@@ -163,5 +163,3 @@ class Chunkr(ChunkrBase):
|
|
163
163
|
headers=self._headers()
|
164
164
|
)
|
165
165
|
r.raise_for_status()
|
166
|
-
|
167
|
-
|
chunkr_ai/api/chunkr_async.py
CHANGED
chunkr_ai/api/config.py
CHANGED
chunkr_ai/api/misc.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
+
from .config import Configuration, Property, JsonSchema
|
1
2
|
import io
|
2
3
|
import json
|
3
4
|
from pathlib import Path
|
4
5
|
from PIL import Image
|
5
6
|
import requests
|
6
7
|
from typing import Union, Tuple, BinaryIO, Optional
|
7
|
-
from
|
8
|
-
|
8
|
+
from pydantic import BaseModel
|
9
9
|
|
10
10
|
def prepare_file(
|
11
11
|
file: Union[str, Path, BinaryIO, Image.Image]
|
@@ -15,8 +15,31 @@ def prepare_file(
|
|
15
15
|
if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
|
16
16
|
response = requests.get(file)
|
17
17
|
response.raise_for_status()
|
18
|
+
|
19
|
+
# Try to get filename from Content-Disposition header first
|
20
|
+
filename = None
|
21
|
+
content_disposition = response.headers.get('Content-Disposition')
|
22
|
+
if content_disposition and 'filename=' in content_disposition:
|
23
|
+
filename = content_disposition.split('filename=')[-1].strip('"\'')
|
24
|
+
|
25
|
+
# If no Content-Disposition, try to get clean filename from URL path
|
26
|
+
if not filename:
|
27
|
+
from urllib.parse import urlparse, unquote
|
28
|
+
parsed_url = urlparse(file)
|
29
|
+
path = unquote(parsed_url.path)
|
30
|
+
filename = Path(path).name if path else None
|
31
|
+
|
32
|
+
# Fallback to default name if we couldn't extract one
|
33
|
+
filename = filename or 'downloaded_file'
|
34
|
+
|
35
|
+
# Sanitize filename: remove invalid characters and limit length
|
36
|
+
import re
|
37
|
+
filename = re.sub(r'[<>:"/\\|?*%]', '_', filename) # Replace invalid chars with underscore
|
38
|
+
filename = re.sub(r'\s+', '_', filename) # Replace whitespace with underscore
|
39
|
+
filename = filename.strip('._') # Remove leading/trailing dots and underscores
|
40
|
+
filename = filename[:255] # Limit length to 255 characters
|
41
|
+
|
18
42
|
file_obj = io.BytesIO(response.content)
|
19
|
-
filename = Path(file.split('/')[-1]).name or 'downloaded_file'
|
20
43
|
return filename, file_obj
|
21
44
|
|
22
45
|
# Handle base64 strings
|
@@ -104,3 +127,33 @@ def prepare_upload_data(
|
|
104
127
|
files[key] = (None, json.dumps(value), 'application/json')
|
105
128
|
|
106
129
|
return files
|
130
|
+
|
131
|
+
def from_pydantic(pydantic: BaseModel) -> dict:
|
132
|
+
"""Convert a Pydantic model to a Chunk json schema.
|
133
|
+
|
134
|
+
Args:
|
135
|
+
pydantic: A Pydantic BaseModel class or instance
|
136
|
+
|
137
|
+
Returns:
|
138
|
+
dict: A JSON schema compatible with Chunk's format
|
139
|
+
"""
|
140
|
+
model = pydantic if isinstance(pydantic, type) else pydantic.__class__
|
141
|
+
schema = model.model_json_schema()
|
142
|
+
print(schema)
|
143
|
+
properties = []
|
144
|
+
for name, details in schema.get('properties', {}).items():
|
145
|
+
prop = Property(
|
146
|
+
name=name,
|
147
|
+
title=details.get('title'),
|
148
|
+
prop_type=details.get('type', 'string'),
|
149
|
+
description=details.get('description'),
|
150
|
+
default=str(details.get('default')) if details.get('default') is not None else None
|
151
|
+
)
|
152
|
+
properties.append(prop)
|
153
|
+
|
154
|
+
json_schema = JsonSchema(
|
155
|
+
title=schema.get('title', model.__name__),
|
156
|
+
properties=properties
|
157
|
+
)
|
158
|
+
|
159
|
+
return json_schema.model_dump(mode="json", exclude_none=True)
|
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.10
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
Project-URL: Homepage, https://chunkr.ai
|
7
7
|
Description-Content-Type: text/markdown
|
8
8
|
License-File: LICENSE
|
9
|
-
Requires-Dist: httpx>=0.
|
9
|
+
Requires-Dist: httpx>=0.25.0
|
10
10
|
Requires-Dist: pillow>=10.0.0
|
11
11
|
Requires-Dist: pydantic>=2.0.0
|
12
12
|
Requires-Dist: pytest-asyncio>=0.21.0
|
@@ -0,0 +1,19 @@
|
|
1
|
+
chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
|
2
|
+
chunkr_ai/main.py,sha256=_MT1lcnNiXjVW9ZkZYl28SB_f6M9g_IOgZxvhodTzAo,394
|
3
|
+
chunkr_ai/models.py,sha256=T8_F-Y1US21ZJVzLIaroqp-Hd0_ZFbdkbEOxr63-PNE,827
|
4
|
+
chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
|
6
|
+
chunkr_ai/api/chunkr.py,sha256=0qpV9b1hOpDhA9EuKkXW9X_laUmw5NY3ZYq0cUOTbww,5190
|
7
|
+
chunkr_ai/api/chunkr_async.py,sha256=ZkLBrn4cqzu3sqMfS8cfZZgSvpdyQuWZP95lfGxuHx0,4900
|
8
|
+
chunkr_ai/api/chunkr_base.py,sha256=IYO0pmoL02GchIggj6_Q5nvtAUoOvYAAvT7VLFU6scY,2506
|
9
|
+
chunkr_ai/api/config.py,sha256=eu7a28UjlNaM3QRrzElRTQXqMPBynAvlusVSIAMNXUY,4203
|
10
|
+
chunkr_ai/api/misc.py,sha256=DiY-BV5nPMDVKAiHTcND8w-8mSB-dENxrOhxnkyEoRA,6034
|
11
|
+
chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
|
12
|
+
chunkr_ai/api/task.py,sha256=EB6RK8ms7EaNj57tNJZoNgNMHGWKXFhkQ1WC7gk5ht4,6059
|
13
|
+
chunkr_ai/api/task_async.py,sha256=Dd-Fenie0Q6GxXce7OlXvuQ14NQ58F_0b9P7AGKWyYA,3833
|
14
|
+
chunkr_ai/api/task_base.py,sha256=Tkk7dhIeB3ic5M9g_b-MVRdNv4XQTvajpaUy8JylQ8A,526
|
15
|
+
chunkr_ai-0.0.10.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
+
chunkr_ai-0.0.10.dist-info/METADATA,sha256=W8PCDpT4hN5tpn_9fyVrjEbd0abG0ReP5reG4_9Glp8,4845
|
17
|
+
chunkr_ai-0.0.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
18
|
+
chunkr_ai-0.0.10.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
19
|
+
chunkr_ai-0.0.10.dist-info/RECORD,,
|
chunkr_ai/api/api.py
DELETED
File without changes
|
chunkr_ai-0.0.9.dist-info/RECORD
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
|
2
|
-
chunkr_ai/main.py,sha256=_MT1lcnNiXjVW9ZkZYl28SB_f6M9g_IOgZxvhodTzAo,394
|
3
|
-
chunkr_ai/models.py,sha256=T8_F-Y1US21ZJVzLIaroqp-Hd0_ZFbdkbEOxr63-PNE,827
|
4
|
-
chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
|
7
|
-
chunkr_ai/api/base.py,sha256=IYO0pmoL02GchIggj6_Q5nvtAUoOvYAAvT7VLFU6scY,2506
|
8
|
-
chunkr_ai/api/chunkr.py,sha256=PmrK37HbK2T1KUPitKnt4wZqIujL61Jo12qW9DEpNMI,5186
|
9
|
-
chunkr_ai/api/chunkr_async.py,sha256=2yYyAO9-j2xKQYH0fJb2S6gL26hgbtL4QyqlG9l0QBY,4893
|
10
|
-
chunkr_ai/api/config.py,sha256=XIqXZ_8q7U_BEmY5wyIC9mbQGZBw1956EN9yhC4svD0,4235
|
11
|
-
chunkr_ai/api/misc.py,sha256=tScsUUcrqeVh_bZv1YlbmjGkQSTDQN8NyKxoNwAG6XA,3792
|
12
|
-
chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
|
13
|
-
chunkr_ai/api/task.py,sha256=EB6RK8ms7EaNj57tNJZoNgNMHGWKXFhkQ1WC7gk5ht4,6059
|
14
|
-
chunkr_ai/api/task_async.py,sha256=Dd-Fenie0Q6GxXce7OlXvuQ14NQ58F_0b9P7AGKWyYA,3833
|
15
|
-
chunkr_ai/api/task_base.py,sha256=Tkk7dhIeB3ic5M9g_b-MVRdNv4XQTvajpaUy8JylQ8A,526
|
16
|
-
chunkr_ai-0.0.9.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
chunkr_ai-0.0.9.dist-info/METADATA,sha256=XFGPjuDARO1VYvdcyMOHhxZK1FYjEr0_ySI0Ni6tWMc,4844
|
18
|
-
chunkr_ai-0.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
19
|
-
chunkr_ai-0.0.9.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
20
|
-
chunkr_ai-0.0.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|