chunkr-ai 0.0.9__tar.gz → 0.0.10__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (26) hide show
  1. {chunkr_ai-0.0.9/src/chunkr_ai.egg-info → chunkr_ai-0.0.10}/PKG-INFO +2 -2
  2. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/pyproject.toml +2 -2
  3. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/chunkr.py +1 -3
  4. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/chunkr_async.py +1 -1
  5. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/config.py +0 -1
  6. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/misc.py +56 -3
  7. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10/src/chunkr_ai.egg-info}/PKG-INFO +2 -2
  8. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai.egg-info/SOURCES.txt +1 -2
  9. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai.egg-info/requires.txt +1 -1
  10. chunkr_ai-0.0.9/src/chunkr_ai/api/api.py +0 -0
  11. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/LICENSE +0 -0
  12. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/README.md +0 -0
  13. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/setup.cfg +0 -0
  14. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/__init__.py +0 -0
  15. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/__init__.py +0 -0
  16. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/auth.py +0 -0
  17. /chunkr_ai-0.0.9/src/chunkr_ai/api/base.py → /chunkr_ai-0.0.10/src/chunkr_ai/api/chunkr_base.py +0 -0
  18. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/protocol.py +0 -0
  19. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/task.py +0 -0
  20. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/task_async.py +0 -0
  21. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/api/task_base.py +0 -0
  22. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/main.py +0 -0
  23. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai/models.py +0 -0
  24. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  25. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/src/chunkr_ai.egg-info/top_level.txt +0 -0
  26. {chunkr_ai-0.0.9 → chunkr_ai-0.0.10}/tests/test_chunkr.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.9
3
+ Version: 0.0.10
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: httpx>=0.24.0
9
+ Requires-Dist: httpx>=0.25.0
10
10
  Requires-Dist: pillow>=10.0.0
11
11
  Requires-Dist: pydantic>=2.0.0
12
12
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.9"
7
+ version = "0.0.10"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
11
11
  license = {"file" = "LICENSE"}
12
12
  urls = {Homepage = "https://chunkr.ai"}
13
13
  dependencies = [
14
- "httpx>=0.24.0",
14
+ "httpx>=0.25.0",
15
15
  "pillow>=10.0.0",
16
16
  "pydantic>=2.0.0",
17
17
  "pytest-asyncio>=0.21.0",
@@ -1,4 +1,4 @@
1
- from .base import ChunkrBase
1
+ from .chunkr_base import ChunkrBase
2
2
  from .config import Configuration
3
3
  from .task import TaskResponse
4
4
  from pathlib import Path
@@ -163,5 +163,3 @@ class Chunkr(ChunkrBase):
163
163
  headers=self._headers()
164
164
  )
165
165
  r.raise_for_status()
166
-
167
-
@@ -1,4 +1,4 @@
1
- from .base import ChunkrBase
1
+ from .chunkr_base import ChunkrBase
2
2
  from .task import TaskResponse
3
3
  from .config import Configuration
4
4
  import httpx
@@ -40,7 +40,6 @@ class ChunkProcessing(BaseModel):
40
40
 
41
41
  class Property(BaseModel):
42
42
  name: str
43
- title: Optional[str] = None
44
43
  prop_type: str
45
44
  description: Optional[str] = None
46
45
  default: Optional[str] = None
@@ -1,11 +1,11 @@
1
+ from .config import Configuration, Property, JsonSchema
1
2
  import io
2
3
  import json
3
4
  from pathlib import Path
4
5
  from PIL import Image
5
6
  import requests
6
7
  from typing import Union, Tuple, BinaryIO, Optional
7
- from .config import Configuration
8
-
8
+ from pydantic import BaseModel
9
9
 
10
10
  def prepare_file(
11
11
  file: Union[str, Path, BinaryIO, Image.Image]
@@ -15,8 +15,31 @@ def prepare_file(
15
15
  if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
16
16
  response = requests.get(file)
17
17
  response.raise_for_status()
18
+
19
+ # Try to get filename from Content-Disposition header first
20
+ filename = None
21
+ content_disposition = response.headers.get('Content-Disposition')
22
+ if content_disposition and 'filename=' in content_disposition:
23
+ filename = content_disposition.split('filename=')[-1].strip('"\'')
24
+
25
+ # If no Content-Disposition, try to get clean filename from URL path
26
+ if not filename:
27
+ from urllib.parse import urlparse, unquote
28
+ parsed_url = urlparse(file)
29
+ path = unquote(parsed_url.path)
30
+ filename = Path(path).name if path else None
31
+
32
+ # Fallback to default name if we couldn't extract one
33
+ filename = filename or 'downloaded_file'
34
+
35
+ # Sanitize filename: remove invalid characters and limit length
36
+ import re
37
+ filename = re.sub(r'[<>:"/\\|?*%]', '_', filename) # Replace invalid chars with underscore
38
+ filename = re.sub(r'\s+', '_', filename) # Replace whitespace with underscore
39
+ filename = filename.strip('._') # Remove leading/trailing dots and underscores
40
+ filename = filename[:255] # Limit length to 255 characters
41
+
18
42
  file_obj = io.BytesIO(response.content)
19
- filename = Path(file.split('/')[-1]).name or 'downloaded_file'
20
43
  return filename, file_obj
21
44
 
22
45
  # Handle base64 strings
@@ -104,3 +127,33 @@ def prepare_upload_data(
104
127
  files[key] = (None, json.dumps(value), 'application/json')
105
128
 
106
129
  return files
130
+
131
+ def from_pydantic(pydantic: BaseModel) -> dict:
132
+ """Convert a Pydantic model to a Chunk json schema.
133
+
134
+ Args:
135
+ pydantic: A Pydantic BaseModel class or instance
136
+
137
+ Returns:
138
+ dict: A JSON schema compatible with Chunk's format
139
+ """
140
+ model = pydantic if isinstance(pydantic, type) else pydantic.__class__
141
+ schema = model.model_json_schema()
142
+ print(schema)
143
+ properties = []
144
+ for name, details in schema.get('properties', {}).items():
145
+ prop = Property(
146
+ name=name,
147
+ title=details.get('title'),
148
+ prop_type=details.get('type', 'string'),
149
+ description=details.get('description'),
150
+ default=str(details.get('default')) if details.get('default') is not None else None
151
+ )
152
+ properties.append(prop)
153
+
154
+ json_schema = JsonSchema(
155
+ title=schema.get('title', model.__name__),
156
+ properties=properties
157
+ )
158
+
159
+ return json_schema.model_dump(mode="json", exclude_none=True)
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.9
3
+ Version: 0.0.10
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: httpx>=0.24.0
9
+ Requires-Dist: httpx>=0.25.0
10
10
  Requires-Dist: pillow>=10.0.0
11
11
  Requires-Dist: pydantic>=2.0.0
12
12
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -10,11 +10,10 @@ src/chunkr_ai.egg-info/dependency_links.txt
10
10
  src/chunkr_ai.egg-info/requires.txt
11
11
  src/chunkr_ai.egg-info/top_level.txt
12
12
  src/chunkr_ai/api/__init__.py
13
- src/chunkr_ai/api/api.py
14
13
  src/chunkr_ai/api/auth.py
15
- src/chunkr_ai/api/base.py
16
14
  src/chunkr_ai/api/chunkr.py
17
15
  src/chunkr_ai/api/chunkr_async.py
16
+ src/chunkr_ai/api/chunkr_base.py
18
17
  src/chunkr_ai/api/config.py
19
18
  src/chunkr_ai/api/misc.py
20
19
  src/chunkr_ai/api/protocol.py
@@ -1,4 +1,4 @@
1
- httpx>=0.24.0
1
+ httpx>=0.25.0
2
2
  pillow>=10.0.0
3
3
  pydantic>=2.0.0
4
4
  pytest-asyncio>=0.21.0
File without changes
File without changes
File without changes
File without changes