chunkr-ai 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/api/chunkr.py CHANGED
@@ -1,4 +1,4 @@
1
- from .base import ChunkrBase
1
+ from .chunkr_base import ChunkrBase
2
2
  from .config import Configuration
3
3
  from .task import TaskResponse
4
4
  from pathlib import Path
@@ -163,5 +163,3 @@ class Chunkr(ChunkrBase):
163
163
  headers=self._headers()
164
164
  )
165
165
  r.raise_for_status()
166
-
167
-
@@ -1,4 +1,4 @@
1
- from .base import ChunkrBase
1
+ from .chunkr_base import ChunkrBase
2
2
  from .task import TaskResponse
3
3
  from .config import Configuration
4
4
  import httpx
chunkr_ai/api/config.py CHANGED
@@ -40,7 +40,6 @@ class ChunkProcessing(BaseModel):
40
40
 
41
41
  class Property(BaseModel):
42
42
  name: str
43
- title: Optional[str] = None
44
43
  prop_type: str
45
44
  description: Optional[str] = None
46
45
  default: Optional[str] = None
chunkr_ai/api/misc.py CHANGED
@@ -1,11 +1,11 @@
1
+ from .config import Configuration, Property, JsonSchema
1
2
  import io
2
3
  import json
3
4
  from pathlib import Path
4
5
  from PIL import Image
5
6
  import requests
6
7
  from typing import Union, Tuple, BinaryIO, Optional
7
- from .config import Configuration
8
-
8
+ from pydantic import BaseModel
9
9
 
10
10
  def prepare_file(
11
11
  file: Union[str, Path, BinaryIO, Image.Image]
@@ -15,8 +15,31 @@ def prepare_file(
15
15
  if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
16
16
  response = requests.get(file)
17
17
  response.raise_for_status()
18
+
19
+ # Try to get filename from Content-Disposition header first
20
+ filename = None
21
+ content_disposition = response.headers.get('Content-Disposition')
22
+ if content_disposition and 'filename=' in content_disposition:
23
+ filename = content_disposition.split('filename=')[-1].strip('"\'')
24
+
25
+ # If no Content-Disposition, try to get clean filename from URL path
26
+ if not filename:
27
+ from urllib.parse import urlparse, unquote
28
+ parsed_url = urlparse(file)
29
+ path = unquote(parsed_url.path)
30
+ filename = Path(path).name if path else None
31
+
32
+ # Fallback to default name if we couldn't extract one
33
+ filename = filename or 'downloaded_file'
34
+
35
+ # Sanitize filename: remove invalid characters and limit length
36
+ import re
37
+ filename = re.sub(r'[<>:"/\\|?*%]', '_', filename) # Replace invalid chars with underscore
38
+ filename = re.sub(r'\s+', '_', filename) # Replace whitespace with underscore
39
+ filename = filename.strip('._') # Remove leading/trailing dots and underscores
40
+ filename = filename[:255] # Limit length to 255 characters
41
+
18
42
  file_obj = io.BytesIO(response.content)
19
- filename = Path(file.split('/')[-1]).name or 'downloaded_file'
20
43
  return filename, file_obj
21
44
 
22
45
  # Handle base64 strings
@@ -104,3 +127,33 @@ def prepare_upload_data(
104
127
  files[key] = (None, json.dumps(value), 'application/json')
105
128
 
106
129
  return files
130
+
131
+ def from_pydantic(pydantic: BaseModel) -> dict:
132
+ """Convert a Pydantic model to a Chunk json schema.
133
+
134
+ Args:
135
+ pydantic: A Pydantic BaseModel class or instance
136
+
137
+ Returns:
138
+ dict: A JSON schema compatible with Chunk's format
139
+ """
140
+ model = pydantic if isinstance(pydantic, type) else pydantic.__class__
141
+ schema = model.model_json_schema()
142
+ print(schema)
143
+ properties = []
144
+ for name, details in schema.get('properties', {}).items():
145
+ prop = Property(
146
+ name=name,
147
+ title=details.get('title'),
148
+ prop_type=details.get('type', 'string'),
149
+ description=details.get('description'),
150
+ default=str(details.get('default')) if details.get('default') is not None else None
151
+ )
152
+ properties.append(prop)
153
+
154
+ json_schema = JsonSchema(
155
+ title=schema.get('title', model.__name__),
156
+ properties=properties
157
+ )
158
+
159
+ return json_schema.model_dump(mode="json", exclude_none=True)
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.9
3
+ Version: 0.0.10
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: httpx>=0.24.0
9
+ Requires-Dist: httpx>=0.25.0
10
10
  Requires-Dist: pillow>=10.0.0
11
11
  Requires-Dist: pydantic>=2.0.0
12
12
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -0,0 +1,19 @@
1
+ chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
+ chunkr_ai/main.py,sha256=_MT1lcnNiXjVW9ZkZYl28SB_f6M9g_IOgZxvhodTzAo,394
3
+ chunkr_ai/models.py,sha256=T8_F-Y1US21ZJVzLIaroqp-Hd0_ZFbdkbEOxr63-PNE,827
4
+ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
6
+ chunkr_ai/api/chunkr.py,sha256=0qpV9b1hOpDhA9EuKkXW9X_laUmw5NY3ZYq0cUOTbww,5190
7
+ chunkr_ai/api/chunkr_async.py,sha256=ZkLBrn4cqzu3sqMfS8cfZZgSvpdyQuWZP95lfGxuHx0,4900
8
+ chunkr_ai/api/chunkr_base.py,sha256=IYO0pmoL02GchIggj6_Q5nvtAUoOvYAAvT7VLFU6scY,2506
9
+ chunkr_ai/api/config.py,sha256=eu7a28UjlNaM3QRrzElRTQXqMPBynAvlusVSIAMNXUY,4203
10
+ chunkr_ai/api/misc.py,sha256=DiY-BV5nPMDVKAiHTcND8w-8mSB-dENxrOhxnkyEoRA,6034
11
+ chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
12
+ chunkr_ai/api/task.py,sha256=EB6RK8ms7EaNj57tNJZoNgNMHGWKXFhkQ1WC7gk5ht4,6059
13
+ chunkr_ai/api/task_async.py,sha256=Dd-Fenie0Q6GxXce7OlXvuQ14NQ58F_0b9P7AGKWyYA,3833
14
+ chunkr_ai/api/task_base.py,sha256=Tkk7dhIeB3ic5M9g_b-MVRdNv4XQTvajpaUy8JylQ8A,526
15
+ chunkr_ai-0.0.10.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ chunkr_ai-0.0.10.dist-info/METADATA,sha256=W8PCDpT4hN5tpn_9fyVrjEbd0abG0ReP5reG4_9Glp8,4845
17
+ chunkr_ai-0.0.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
18
+ chunkr_ai-0.0.10.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
19
+ chunkr_ai-0.0.10.dist-info/RECORD,,
chunkr_ai/api/api.py DELETED
File without changes
@@ -1,20 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
- chunkr_ai/main.py,sha256=_MT1lcnNiXjVW9ZkZYl28SB_f6M9g_IOgZxvhodTzAo,394
3
- chunkr_ai/models.py,sha256=T8_F-Y1US21ZJVzLIaroqp-Hd0_ZFbdkbEOxr63-PNE,827
4
- chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
7
- chunkr_ai/api/base.py,sha256=IYO0pmoL02GchIggj6_Q5nvtAUoOvYAAvT7VLFU6scY,2506
8
- chunkr_ai/api/chunkr.py,sha256=PmrK37HbK2T1KUPitKnt4wZqIujL61Jo12qW9DEpNMI,5186
9
- chunkr_ai/api/chunkr_async.py,sha256=2yYyAO9-j2xKQYH0fJb2S6gL26hgbtL4QyqlG9l0QBY,4893
10
- chunkr_ai/api/config.py,sha256=XIqXZ_8q7U_BEmY5wyIC9mbQGZBw1956EN9yhC4svD0,4235
11
- chunkr_ai/api/misc.py,sha256=tScsUUcrqeVh_bZv1YlbmjGkQSTDQN8NyKxoNwAG6XA,3792
12
- chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
13
- chunkr_ai/api/task.py,sha256=EB6RK8ms7EaNj57tNJZoNgNMHGWKXFhkQ1WC7gk5ht4,6059
14
- chunkr_ai/api/task_async.py,sha256=Dd-Fenie0Q6GxXce7OlXvuQ14NQ58F_0b9P7AGKWyYA,3833
15
- chunkr_ai/api/task_base.py,sha256=Tkk7dhIeB3ic5M9g_b-MVRdNv4XQTvajpaUy8JylQ8A,526
16
- chunkr_ai-0.0.9.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- chunkr_ai-0.0.9.dist-info/METADATA,sha256=XFGPjuDARO1VYvdcyMOHhxZK1FYjEr0_ySI0Ni6tWMc,4844
18
- chunkr_ai-0.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
- chunkr_ai-0.0.9.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
20
- chunkr_ai-0.0.9.dist-info/RECORD,,
File without changes