chunkr-ai 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
chunkr_ai/api/auth.py CHANGED
@@ -1,5 +1,3 @@
1
- from typing import Optional
2
-
3
1
  class HeadersMixin:
4
2
  """Mixin class for handling authorization headers"""
5
3
 
chunkr_ai/api/base.py ADDED
@@ -0,0 +1,173 @@
1
+ from .config import Configuration
2
+ from .task import TaskResponse
3
+ from .auth import HeadersMixin
4
+ from abc import abstractmethod
5
+ from dotenv import load_dotenv
6
+ import io
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+ from PIL import Image
11
+ import requests
12
+ from typing import BinaryIO, Tuple, Union
13
+
14
+ class ChunkrBase(HeadersMixin):
15
+ """Base class with shared functionality for Chunkr API clients."""
16
+
17
+ def __init__(self, url: str = None, api_key: str = None):
18
+ load_dotenv()
19
+ self.url = (
20
+ url or
21
+ os.getenv('CHUNKR_URL') or
22
+ 'https://api.chunkr.ai'
23
+ )
24
+ self._api_key = (
25
+ api_key or
26
+ os.getenv('CHUNKR_API_KEY')
27
+ )
28
+ if not self._api_key:
29
+ raise ValueError("API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai")
30
+
31
+ self.url = self.url.rstrip("/")
32
+
33
+ def _prepare_file(
34
+ self,
35
+ file: Union[str, Path, BinaryIO, Image.Image]
36
+ ) -> Tuple[str, BinaryIO]:
37
+ """Convert various file types into a tuple of (filename, file-like object).
38
+
39
+ Args:
40
+ file: Input file, can be:
41
+ - String or Path to a file
42
+ - URL string starting with http:// or https://
43
+ - Base64 string
44
+ - Opened binary file (mode='rb')
45
+ - PIL/Pillow Image object
46
+
47
+ Returns:
48
+ Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
49
+
50
+ Raises:
51
+ FileNotFoundError: If the file path doesn't exist
52
+ TypeError: If the file type is not supported
53
+ ValueError: If the URL is invalid or unreachable
54
+ ValueError: If the MIME type is unsupported
55
+ """
56
+ # Handle URLs
57
+ if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
58
+ response = requests.get(file)
59
+ response.raise_for_status()
60
+ file_obj = io.BytesIO(response.content)
61
+ filename = Path(file.split('/')[-1]).name or 'downloaded_file'
62
+ return filename, file_obj
63
+
64
+ # Handle base64 strings
65
+ if isinstance(file, str) and ',' in file and ';base64,' in file:
66
+ try:
67
+ # Split header and data
68
+ header, base64_data = file.split(',', 1)
69
+ import base64
70
+ file_bytes = base64.b64decode(base64_data)
71
+ file_obj = io.BytesIO(file_bytes)
72
+
73
+ # Try to determine format from header
74
+ format = 'bin'
75
+ mime_type = header.split(':')[-1].split(';')[0].lower()
76
+
77
+ # Map MIME types to file extensions
78
+ mime_to_ext = {
79
+ 'application/pdf': 'pdf',
80
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
81
+ 'application/msword': 'doc',
82
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
83
+ 'application/vnd.ms-powerpoint': 'ppt',
84
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
85
+ 'application/vnd.ms-excel': 'xls',
86
+ 'image/jpeg': 'jpg',
87
+ 'image/png': 'png',
88
+ 'image/jpg': 'jpg'
89
+ }
90
+
91
+ if mime_type in mime_to_ext:
92
+ format = mime_to_ext[mime_type]
93
+ else:
94
+ raise ValueError(f"Unsupported MIME type: {mime_type}")
95
+
96
+ return f"file.{format}", file_obj
97
+ except Exception as e:
98
+ raise ValueError(f"Invalid base64 string: {str(e)}")
99
+
100
+ # Handle file paths
101
+ if isinstance(file, (str, Path)):
102
+ path = Path(file).resolve()
103
+ if not path.exists():
104
+ raise FileNotFoundError(f"File not found: {file}")
105
+ return path.name, open(path, 'rb')
106
+
107
+ # Handle PIL Images
108
+ if isinstance(file, Image.Image):
109
+ img_byte_arr = io.BytesIO()
110
+ format = file.format or 'PNG'
111
+ file.save(img_byte_arr, format=format)
112
+ img_byte_arr.seek(0)
113
+ return f"image.{format.lower()}", img_byte_arr
114
+
115
+ # Handle file-like objects
116
+ if hasattr(file, 'read') and hasattr(file, 'seek'):
117
+ # Try to get the filename from the file object if possible
118
+ name = getattr(file, 'name', 'document') if hasattr(file, 'name') else 'document'
119
+ return Path(name).name, file
120
+
121
+ raise TypeError(f"Unsupported file type: {type(file)}")
122
+
123
+ def _prepare_upload_data(
124
+ self,
125
+ file: Union[str, Path, BinaryIO, Image.Image],
126
+ config: Configuration = None
127
+ ) -> Tuple[dict, dict]:
128
+ """Prepare files and data dictionaries for upload.
129
+
130
+ Args:
131
+ file: The file to upload
132
+ config: Optional configuration settings
133
+
134
+ Returns:
135
+ Tuple[dict, dict]: (files dict, data dict) ready for upload
136
+ """
137
+ filename, file_obj = self._prepare_file(file)
138
+ files = {"file": (filename, file_obj)}
139
+ data = {}
140
+
141
+ if config:
142
+ config_dict = config.model_dump(mode="json", exclude_none=True)
143
+ for key, value in config_dict.items():
144
+ if isinstance(value, dict):
145
+ files[key] = (None, json.dumps(value), 'application/json')
146
+ else:
147
+ data[key] = value
148
+
149
+ return files, data
150
+
151
+ @abstractmethod
152
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
153
+ """Upload a file and wait for processing to complete.
154
+
155
+ Must be implemented by subclasses.
156
+ """
157
+ pass
158
+
159
+ @abstractmethod
160
+ def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
161
+ """Upload a file for processing and immediately return the task response.
162
+
163
+ Must be implemented by subclasses.
164
+ """
165
+ pass
166
+
167
+ @abstractmethod
168
+ def get_task(self, task_id: str) -> TaskResponse:
169
+ """Get a task response by its ID.
170
+
171
+ Must be implemented by subclasses.
172
+ """
173
+ pass
chunkr_ai/api/chunkr.py CHANGED
@@ -1,125 +1,108 @@
1
- from .models import TaskResponse, Configuration
2
- from .auth import HeadersMixin
3
- from dotenv import load_dotenv
4
- import io
5
- import os
1
+ from .base import ChunkrBase
2
+ from .config import Configuration
3
+ from .task import TaskResponse
6
4
  from pathlib import Path
7
5
  from PIL import Image
8
6
  import requests
9
- from typing import Union, BinaryIO, Tuple
7
+ from typing import Union, BinaryIO
10
8
 
11
- class Chunkr(HeadersMixin):
12
- """Client for interacting with the Chunkr API."""
9
+ class Chunkr(ChunkrBase):
10
+ """Chunkr API client"""
13
11
 
14
12
  def __init__(self, url: str = None, api_key: str = None):
15
- load_dotenv()
16
- self.url = (
17
- url or
18
- os.getenv('CHUNKR_URL') or
19
- 'https://api.chunkr.ai'
20
- )
21
- self._api_key = (
22
- api_key or
23
- os.getenv('CHUNKR_API_KEY')
24
- )
25
- if not self._api_key:
26
- raise ValueError("API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai")
27
-
28
- self.url = self.url.rstrip("/")
13
+ super().__init__(url, api_key)
14
+ self._session = requests.Session()
29
15
 
30
- def _prepare_file(
31
- self,
32
- file: Union[str, BinaryIO, Image.Image, bytes, io.BytesIO]
33
- ) -> Tuple[str, BinaryIO]:
34
- """Convert various file types into a tuple of (filename, file-like object).
16
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
17
+ """Upload a file and wait for processing to complete.
35
18
 
36
19
  Args:
37
- file: Input file in various formats
20
+ file: The file to upload.
21
+ config: Configuration options for processing. Optional.
38
22
 
39
- Returns:
40
- Tuple[str, BinaryIO]: Filename and file-like object ready for upload
41
- """
42
- if isinstance(file, str):
43
- path = Path(file).resolve()
44
- if not path.exists():
45
- raise FileNotFoundError(f"File not found: {file}")
46
- return path.name, path.open("rb")
47
- elif isinstance(file, Image.Image):
48
- img_byte_arr = io.BytesIO()
49
- file.save(img_byte_arr, format=file.format or 'PNG')
50
- img_byte_arr.seek(0)
51
- return "image.png", img_byte_arr
52
- elif isinstance(file, bytes):
53
- return "document", io.BytesIO(file)
54
- elif isinstance(file, io.BytesIO):
55
- return "document", file
56
- else:
57
- return "document", file
58
-
59
- def upload(self, file: Union[str, BinaryIO, Image.Image, bytes, io.BytesIO], config: Configuration = None) -> TaskResponse:
60
- """Upload a file and wait for processing to complete.
23
+ Examples:
24
+ ```
25
+ # Upload from file path
26
+ chunkr.upload("document.pdf")
61
27
 
62
- The file can be one of:
63
- - str: Path to a file on disk
64
- - BinaryIO: A file-like object (e.g., opened with 'rb' mode)
65
- - Image.Image: A PIL/Pillow Image object
66
- - bytes: Raw binary data
67
- - io.BytesIO: A binary stream in memory
28
+ # Upload from URL
29
+ chunkr.upload("https://example.com/document.pdf")
68
30
 
69
- Args:
70
- file: The file to upload.
71
- config:
72
- Configuration options for processing. Optional.
31
+ # Upload from base64 string (must include MIME type header)
32
+ chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
73
33
 
34
+ # Upload from opened file
35
+ with open("document.pdf", "rb") as f:
36
+ chunkr.upload(f)
37
+
38
+ # Upload an image
39
+ from PIL import Image
40
+ img = Image.open("photo.jpg")
41
+ chunkr.upload(img)
42
+ ```
74
43
  Returns:
75
44
  TaskResponse: The completed task response
76
45
  """
77
- return self.start_upload(file, config).poll()
46
+ task = self.start_upload(file, config)
47
+ return task.poll()
78
48
 
79
- def start_upload(self, file: Union[str, BinaryIO, Image.Image, bytes, io.BytesIO], config: Configuration = None) -> TaskResponse:
49
+ def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
80
50
  """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`
81
51
 
82
- The file can be one of:
83
- - str: Path to a file on disk
84
- - BinaryIO: A file-like object (e.g., opened with 'rb' mode)
85
- - Image.Image: A PIL/Pillow Image object
86
- - bytes: Raw binary data
87
- - io.BytesIO: A binary stream in memory
88
-
89
52
  Args:
90
53
  file: The file to upload.
91
- config (Configuration, optional): Configuration options for processing
54
+ config: Configuration options for processing. Optional.
55
+
56
+ Examples:
57
+ ```
58
+ # Upload from file path
59
+ task = chunkr.start_upload("document.pdf")
60
+
61
+ # Upload from opened file
62
+ with open("document.pdf", "rb") as f:
63
+ task = chunkr.start_upload(f)
64
+
65
+ # Upload from URL
66
+ task = chunkr.start_upload("https://example.com/document.pdf")
67
+
68
+ # Upload from base64 string (must include MIME type header)
69
+ task = chunkr.start_upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
70
+
71
+ # Upload an image
72
+ from PIL import Image
73
+ img = Image.open("photo.jpg")
74
+ task = chunkr.start_upload(img)
75
+
76
+ # Wait for the task to complete - this can be done when needed
77
+ task.poll()
78
+ ```
92
79
 
93
80
  Returns:
94
81
  TaskResponse: The initial task response
95
-
96
- Raises:
97
- requests.exceptions.HTTPError: If the API request fails
98
82
  """
99
- url = f"{self.url}/api/v1/task"
100
- filename, file_obj = self._prepare_file(file)
101
-
102
- files = {"file": (filename, file_obj)}
103
- r = requests.post(
104
- url,
105
- files=files,
106
- json=config.dict() if config else {},
83
+ files, data = self._prepare_upload_data(file, config)
84
+ r = self._session.post(
85
+ f"{self.url}/api/v1/task",
86
+ files=files,
87
+ data=data,
107
88
  headers=self._headers()
108
89
  )
109
90
  r.raise_for_status()
110
- return TaskResponse(**r.json()).with_api_key(self._api_key)
91
+ return TaskResponse(**r.json()).with_client(self)
111
92
 
112
93
  def get_task(self, task_id: str) -> TaskResponse:
113
94
  """Get a task response by its ID.
114
95
 
115
96
  Args:
116
- task_id (str): The ID of the task to get
97
+ task_id: The ID of the task to get
117
98
 
118
99
  Returns:
119
100
  TaskResponse: The task response
120
101
  """
121
- url = f"{self.url}/api/v1/task/{task_id}"
122
- r = requests.get(url, headers=self._headers())
102
+ r = self._session.get(
103
+ f"{self.url}/api/v1/task/{task_id}",
104
+ headers=self._headers()
105
+ )
123
106
  r.raise_for_status()
124
- return TaskResponse(**r.json()).with_api_key(self._api_key)
107
+ return TaskResponse(**r.json()).with_client(self)
125
108
 
@@ -1,39 +1,105 @@
1
- from .chunkr import Chunkr
2
- from .models import TaskResponse, Configuration
1
+ from .base import ChunkrBase
2
+ from .task import TaskResponse
3
+ from .config import Configuration
3
4
  import httpx
4
- import io
5
+ from pathlib import Path
5
6
  from PIL import Image
6
7
  from typing import Union, BinaryIO
7
8
 
8
- class ChunkrAsync(Chunkr):
9
- """Async client for interacting with the Chunkr API.
9
+ class ChunkrAsync(ChunkrBase):
10
+ """Asynchronous Chunkr API client"""
10
11
 
11
- This class inherits from the Chunkr class but works with async HTTP requests.
12
- """
12
+ def __init__(self, url: str = None, api_key: str = None):
13
+ super().__init__(url, api_key)
14
+ self._client = httpx.AsyncClient()
13
15
 
14
- async def upload(self, file: Union[str, BinaryIO, Image.Image, bytes, io.BytesIO], config: Configuration = None) -> TaskResponse:
16
+ async def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
17
+ """Upload a file and wait for processing to complete.
18
+
19
+ Args:
20
+ file: The file to upload.
21
+ config: Configuration options for processing. Optional.
22
+
23
+ Examples:
24
+ ```python
25
+ # Upload from file path
26
+ await chunkr.upload("document.pdf")
27
+
28
+ # Upload from opened file
29
+ with open("document.pdf", "rb") as f:
30
+ await chunkr.upload(f)
31
+
32
+ # Upload from URL
33
+ await chunkr.upload("https://example.com/document.pdf")
34
+
35
+ # Upload from base64 string (must include MIME type header)
36
+ await chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
37
+
38
+ # Upload an image
39
+ from PIL import Image
40
+ img = Image.open("photo.jpg")
41
+ await chunkr.upload(img)
42
+ ```
43
+ Returns:
44
+ TaskResponse: The completed task response
45
+ """
15
46
  task = await self.start_upload(file, config)
16
47
  return await task.poll_async()
17
48
 
18
- async def start_upload(self, file: Union[str, BinaryIO, Image.Image, bytes, io.BytesIO], config: Configuration = None) -> TaskResponse:
19
- url = f"{self.url}/api/v1/task"
20
- filename, file_obj = self._prepare_file(file)
21
- async with httpx.AsyncClient() as client:
22
- files = {"file": (filename, file_obj)}
23
- r = await client.post(
24
- url,
25
- files=files,
26
- json=config.dict() if config else {},
27
- headers=self._headers()
28
- )
29
- r.raise_for_status()
30
- return TaskResponse(**r.json()).with_api_key(self._api_key)
49
+ async def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
50
+ """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll_async()`.
51
+
52
+ Args:
53
+ file: The file to upload.
54
+ config: Configuration options for processing. Optional.
55
+
56
+ Examples:
57
+ ```
58
+ # Upload from file path
59
+ task = await chunkr.start_upload("document.pdf")
60
+
61
+ # Upload from opened file
62
+ with open("document.pdf", "rb") as f:
63
+ task = await chunkr.start_upload(f)
64
+
65
+ # Upload from URL
66
+ task = await chunkr.start_upload("https://example.com/document.pdf")
67
+
68
+ # Upload from base64 string (must include MIME type header)
69
+ task = await chunkr.start_upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
70
+
71
+ # Upload an image
72
+ from PIL import Image
73
+ img = Image.open("photo.jpg")
74
+ task = await chunkr.start_upload(img)
75
+
76
+ # Wait for the task to complete - this can be done when needed
77
+ await task.poll_async()
78
+ ```
79
+
80
+ Returns:
81
+ TaskResponse: The initial task response
82
+ """
83
+ files, data = self._prepare_upload_data(file, config)
84
+ r = await self._client.post(
85
+ f"{self.url}/api/v1/task",
86
+ files=files,
87
+ json=config.model_dump() if config else {},
88
+ headers=self._headers()
89
+ )
90
+ r.raise_for_status()
91
+ return TaskResponse(**r.json()).with_client(self)
31
92
 
32
93
  async def get_task(self, task_id: str) -> TaskResponse:
33
- url = f"{self.url}/api/v1/task/{task_id}"
34
- async with httpx.AsyncClient() as client:
35
- r = await client.get(url, headers=self._headers())
36
- r.raise_for_status()
37
- return TaskResponse(**r.json()).with_api_key(self._api_key)
94
+ r = await self._client.get(
95
+ f"{self.url}/api/v1/task/{task_id}",
96
+ headers=self._headers()
97
+ )
98
+ r.raise_for_status()
99
+ return TaskResponse(**r.json()).with_client(self)
100
+
101
+ async def __aenter__(self):
102
+ return self
38
103
 
39
-
104
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
105
+ await self._client.aclose()
@@ -0,0 +1,130 @@
1
+ from pydantic import BaseModel, Field
2
+ from enum import Enum
3
+ from typing import Optional, List, Dict
4
+
5
+ class GenerationStrategy(str, Enum):
6
+ LLM = "LLM"
7
+ AUTO = "Auto"
8
+
9
+ class CroppingStrategy(str, Enum):
10
+ ALL = "All"
11
+ AUTO = "Auto"
12
+
13
+ class LlmConfig(BaseModel):
14
+ model: str
15
+ prompt: str
16
+ temperature: float = 0.0
17
+
18
+ class GenerationConfig(BaseModel):
19
+ html: Optional[GenerationStrategy] = None
20
+ llm: Optional[LlmConfig] = None
21
+ markdown: Optional[GenerationStrategy] = None
22
+ crop_image: Optional[CroppingStrategy] = None
23
+
24
+ class SegmentProcessing(BaseModel):
25
+ title: Optional[GenerationConfig] = None
26
+ section_header: Optional[GenerationConfig] = None
27
+ text: Optional[GenerationConfig] = None
28
+ list_item: Optional[GenerationConfig] = None
29
+ table: Optional[GenerationConfig] = None
30
+ picture: Optional[GenerationConfig] = None
31
+ caption: Optional[GenerationConfig] = None
32
+ formula: Optional[GenerationConfig] = None
33
+ footnote: Optional[GenerationConfig] = None
34
+ page_header: Optional[GenerationConfig] = None
35
+ page_footer: Optional[GenerationConfig] = None
36
+ page: Optional[GenerationConfig] = None
37
+
38
+ class ChunkProcessing(BaseModel):
39
+ target_length: Optional[int] = None
40
+
41
+ class Property(BaseModel):
42
+ name: str
43
+ title: Optional[str]
44
+ prop_type: str
45
+ description: Optional[str]
46
+ default: Optional[str]
47
+
48
+ class JsonSchema(BaseModel):
49
+ title: str
50
+ properties: List[Property]
51
+ schema_type: Optional[str]
52
+
53
+ class OcrStrategy(str, Enum):
54
+ ALL = "All"
55
+ AUTO = "Auto"
56
+
57
+ class SegmentationStrategy(str, Enum):
58
+ LAYOUT_ANALYSIS = "LayoutAnalysis"
59
+ PAGE = "Page"
60
+
61
+ class BoundingBox(BaseModel):
62
+ left: float
63
+ top: float
64
+ width: float
65
+ height: float
66
+
67
+ class OCRResult(BaseModel):
68
+ bbox: BoundingBox
69
+ text: str
70
+ confidence: Optional[float]
71
+
72
+ class SegmentType(str, Enum):
73
+ CAPTION = "Caption"
74
+ FOOTNOTE = "Footnote"
75
+ FORMULA = "Formula"
76
+ LIST_ITEM = "ListItem"
77
+ PAGE = "Page"
78
+ PAGE_FOOTER = "PageFooter"
79
+ PAGE_HEADER = "PageHeader"
80
+ PICTURE = "Picture"
81
+ SECTION_HEADER = "SectionHeader"
82
+ TABLE = "Table"
83
+ TEXT = "Text"
84
+ TITLE = "Title"
85
+
86
+ class Segment(BaseModel):
87
+ bbox: BoundingBox
88
+ content: str
89
+ page_height: float
90
+ html: Optional[str]
91
+ image: Optional[str]
92
+ markdown: Optional[str]
93
+ ocr: List[OCRResult]
94
+ page_number: int
95
+ page_width: float
96
+ segment_id: str
97
+ segment_type: SegmentType
98
+
99
+ class Chunk(BaseModel):
100
+ chunk_id: str
101
+ chunk_length: int
102
+ segments: List[Segment]
103
+
104
+ class ExtractedJson(BaseModel):
105
+ data: Dict
106
+
107
+ class OutputResponse(BaseModel):
108
+ chunks: List[Chunk] = []
109
+ extracted_json: Optional[ExtractedJson]
110
+
111
+ class Model(str, Enum):
112
+ FAST = "Fast"
113
+ HIGH_QUALITY = "HighQuality"
114
+
115
+ class Configuration(BaseModel):
116
+ chunk_processing: Optional[ChunkProcessing] = Field(default=None)
117
+ expires_in: Optional[int] = Field(default=None)
118
+ high_resolution: Optional[bool] = Field(default=None)
119
+ json_schema: Optional[JsonSchema] = Field(default=None)
120
+ model: Optional[Model] = Field(default=None)
121
+ ocr_strategy: Optional[OcrStrategy] = Field(default=None)
122
+ segment_processing: Optional[SegmentProcessing] = Field(default=None)
123
+ segmentation_strategy: Optional[SegmentationStrategy] = Field(default=None)
124
+ target_chunk_length: Optional[int] = Field(default=None)
125
+
126
+ class Status(str, Enum):
127
+ STARTING = "Starting"
128
+ PROCESSING = "Processing"
129
+ SUCCEEDED = "Succeeded"
130
+ FAILED = "Failed"
@@ -0,0 +1,19 @@
1
+ from typing import runtime_checkable, Protocol
2
+ from requests import Session
3
+ from httpx import AsyncClient
4
+
5
+ @runtime_checkable
6
+ class ChunkrClientProtocol(Protocol):
7
+ """Protocol defining the interface for Chunkr clients"""
8
+ url: str
9
+ _api_key: str
10
+ _session: Session
11
+ _client: AsyncClient
12
+
13
+ def get_api_key(self) -> str:
14
+ """Get the API key"""
15
+ ...
16
+
17
+ def _headers(self) -> dict:
18
+ """Return headers required for API requests"""
19
+ ...
chunkr_ai/api/task.py ADDED
@@ -0,0 +1,124 @@
1
+ from .protocol import ChunkrClientProtocol
2
+ from .config import Configuration, Status, OutputResponse
3
+ import asyncio
4
+ from datetime import datetime
5
+ from pydantic import BaseModel, PrivateAttr
6
+ import time
7
+ from typing import Optional, Union
8
+
9
+ class TaskResponse(BaseModel):
10
+ configuration: Configuration
11
+ created_at: datetime
12
+ expires_at: Optional[datetime]
13
+ file_name: Optional[str]
14
+ finished_at: Optional[datetime]
15
+ input_file_url: Optional[str]
16
+ message: str
17
+ output: Optional[OutputResponse]
18
+ page_count: Optional[int]
19
+ pdf_url: Optional[str]
20
+ status: Status
21
+ task_id: str
22
+ task_url: Optional[str]
23
+ _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
24
+
25
+ def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponse':
26
+ self._client = client
27
+ return self
28
+
29
+ def _poll_request_sync(self) -> dict:
30
+ """Helper method to make polling request with retry logic (synchronous)"""
31
+ if not self.task_url:
32
+ raise ValueError("Task URL not found in response")
33
+
34
+ while True:
35
+ try:
36
+ r = self._client._session.get(self.task_url, headers=self._client._headers())
37
+ r.raise_for_status()
38
+ return r.json()
39
+ except (ConnectionError, TimeoutError) as _:
40
+ print("Connection error while polling the task, retrying...")
41
+ time.sleep(0.5)
42
+ except Exception as e:
43
+ raise
44
+
45
+ async def _poll_request_async(self) -> dict:
46
+ """Helper method to make polling request with retry logic (asynchronous)"""
47
+ if not self.task_url:
48
+ raise ValueError("Task URL not found in response")
49
+
50
+ while True:
51
+ try:
52
+ r = await self._client._client.get(self.task_url, headers=self._client._headers())
53
+ await r.raise_for_status()
54
+ return await r.json()
55
+ except (ConnectionError, TimeoutError) as _:
56
+ print("Connection error while polling the task, retrying...")
57
+ await asyncio.sleep(0.5)
58
+ except Exception as e:
59
+ raise
60
+
61
+ def _check_status(self) -> Optional['TaskResponse']:
62
+ """Helper method to check task status and handle completion/failure"""
63
+ if self.status == "Failed":
64
+ raise ValueError(self.message)
65
+ if self.status not in ("Starting", "Processing"):
66
+ return self
67
+ return None
68
+
69
+ def poll(self) -> 'TaskResponse':
70
+ """Poll the task for completion."""
71
+ while True:
72
+ response = self._poll_request_sync()
73
+ self.__dict__.update(response)
74
+
75
+ if result := self._check_status():
76
+ return result
77
+
78
+ time.sleep(0.5)
79
+
80
+ async def poll_async(self) -> 'TaskResponse':
81
+ """Poll the task for completion asynchronously."""
82
+ while True:
83
+ response = await self._poll_request_async()
84
+ self.__dict__.update(response)
85
+
86
+ if result := self._check_status():
87
+ return result
88
+
89
+ await asyncio.sleep(0.5)
90
+
91
+ def _get_content(self, content_type: str) -> str:
92
+ """Helper method to get either HTML, Markdown, or raw content."""
93
+ if not self.output:
94
+ return ""
95
+ parts = []
96
+ for c in self.output.chunks:
97
+ for s in c.segments:
98
+ content = getattr(s, content_type)
99
+ if content:
100
+ parts.append(content)
101
+ return "\n".join(parts)
102
+
103
+ def html(self) -> str:
104
+ """Get full HTML for the task"""
105
+ return self._get_content("html")
106
+
107
+ def markdown(self) -> str:
108
+ """Get full markdown for the task"""
109
+ return self._get_content("markdown")
110
+
111
+ def content(self) -> str:
112
+ """Get full text for the task"""
113
+ return self._get_content("content")
114
+
115
+ class TaskPayload(BaseModel):
116
+ current_configuration: Configuration
117
+ file_name: str
118
+ image_folder_location: str
119
+ input_location: str
120
+ output_location: str
121
+ pdf_location: str
122
+ previous_configuration: Optional[Configuration]
123
+ task_id: str
124
+ user_id: str
chunkr_ai/models.py ADDED
@@ -0,0 +1,49 @@
1
+ from .api.config import (
2
+ BoundingBox,
3
+ Chunk,
4
+ ChunkProcessing,
5
+ Configuration,
6
+ CroppingStrategy,
7
+ ExtractedJson,
8
+ GenerationStrategy,
9
+ GenerationConfig,
10
+ JsonSchema,
11
+ LlmConfig,
12
+ Model,
13
+ OCRResult,
14
+ OcrStrategy,
15
+ OutputResponse,
16
+ Property,
17
+ Segment,
18
+ SegmentProcessing,
19
+ SegmentType,
20
+ SegmentationStrategy,
21
+ Status
22
+ )
23
+
24
+ from .api.task import TaskResponse, TaskPayload
25
+
26
+ __all__ = [
27
+ 'BoundingBox',
28
+ 'Chunk',
29
+ 'ChunkProcessing',
30
+ 'Configuration',
31
+ 'CroppingStrategy',
32
+ 'ExtractedJson',
33
+ 'GenerationConfig',
34
+ 'GenerationStrategy',
35
+ 'JsonSchema',
36
+ 'LlmConfig',
37
+ 'Model',
38
+ 'OCRResult',
39
+ 'OcrStrategy',
40
+ 'OutputResponse',
41
+ 'Property',
42
+ 'Segment',
43
+ 'SegmentProcessing',
44
+ 'SegmentType',
45
+ 'SegmentationStrategy',
46
+ 'Status',
47
+ 'TaskPayload',
48
+ 'TaskResponse'
49
+ ]
@@ -0,0 +1,124 @@
1
+ Metadata-Version: 2.2
2
+ Name: chunkr-ai
3
+ Version: 0.0.3
4
+ Summary: Python client for Chunkr: open source document intelligence
5
+ Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
+ Project-URL: Homepage, https://chunkr.ai
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: httpx>=0.28.1
10
+ Requires-Dist: pillow>=11.1.0
11
+ Requires-Dist: pydantic>=2.10.4
12
+ Requires-Dist: python-dotenv>=1.0.1
13
+ Requires-Dist: requests>=2.32.3
14
+ Provides-Extra: test
15
+ Requires-Dist: pytest>=8.3.4; extra == "test"
16
+ Requires-Dist: pytest-xdist>=3.6.1; extra == "test"
17
+
18
+ # Chunkr Python Client
19
+
20
+ This is the Python client for the Chunkr API. It provides a simple interface to interact with Chunkr's services.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install chunkr-ai
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ We provide two clients: `Chunkr` for synchronous operations and `ChunkrAsync` for asynchronous operations.
31
+
32
+ ### Synchronous Usage
33
+
34
+ ```python
35
+ from chunkr_ai import Chunkr
36
+
37
+ # Initialize client
38
+ chunkr = Chunkr()
39
+
40
+ # Upload a file and wait for processing
41
+ task = chunkr.upload("document.pdf")
42
+
43
+ # Print the response
44
+ print(task)
45
+
46
+ # Get output from task
47
+ output = task.output
48
+
49
+ # If you want to upload without waiting for processing
50
+ task = chunkr.start_upload("document.pdf")
51
+ # ... do other things ...
52
+ task.poll() # Check status when needed
53
+ ```
54
+
55
+ ### Asynchronous Usage
56
+
57
+ ```python
58
+ from chunkr_ai import ChunkrAsync
59
+
60
+ async def process_document():
61
+ # Initialize client
62
+ chunkr = ChunkrAsync()
63
+
64
+ # Upload a file and wait for processing
65
+ task = await chunkr.upload("document.pdf")
66
+
67
+ # Print the response
68
+ print(task)
69
+
70
+ # Get output from task
71
+ output = task.output
72
+
73
+ # If you want to upload without waiting for processing
74
+ task = await chunkr.start_upload("document.pdf")
75
+ # ... do other things ...
76
+ await task.poll_async() # Check status when needed
77
+ ```
78
+
79
+ ### Additional Features
80
+
81
+ Both clients support various input types:
82
+
83
+ ```python
84
+ # Upload from file path
85
+ chunkr.upload("document.pdf")
86
+
87
+ # Upload from opened file
88
+ with open("document.pdf", "rb") as f:
89
+ chunkr.upload(f)
90
+
91
+ # Upload from URL
92
+ chunkr.upload("https://example.com/document.pdf")
93
+
94
+ # Upload from base64 string
95
+ chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
96
+
97
+ # Upload an image
98
+ from PIL import Image
99
+ img = Image.open("photo.jpg")
100
+ chunkr.upload(img)
101
+ ```
102
+
103
+ ### Configuration
104
+
105
+ You can provide your API key and URL in several ways:
106
+ 1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
107
+ 2. `.env` file
108
+ 3. Direct initialization:
109
+ ```python
110
+ chunkr = Chunkr(
111
+ api_key="your-api-key",
112
+ url="https://api.chunkr.ai"
113
+ )
114
+ ```
115
+
116
+ ## Run tests
117
+
118
+ ```python
119
+ # Install dependencies
120
+ uv pip install -e ".[test]"
121
+
122
+ # Run tests
123
+ uv run pytest
124
+ ```
@@ -0,0 +1,17 @@
1
+ chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
+ chunkr_ai/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ chunkr_ai/models.py,sha256=DIKuoLOes6CXIcAQIYDUEZLPUZOT7KKndXu2_ZwNMsk,877
4
+ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
7
+ chunkr_ai/api/base.py,sha256=WDHx8tU0fl9_-yvYTKL-U0uaxHv-8_bRfiw9Xkl-mWM,6499
8
+ chunkr_ai/api/chunkr.py,sha256=LkBFzGB_T0y3fnBeIn_nwQW6Mb7eZO-iTlzWrmWBoko,3450
9
+ chunkr_ai/api/chunkr_async.py,sha256=B9deRVoe4h3Csh_jEuQxuxQ-DKSuZPdwkanFTyfHmeM,3603
10
+ chunkr_ai/api/config.py,sha256=z7Udwwlw7YD3LahLbgq2fht5v16evK7UBPTgocfxylw,3514
11
+ chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
12
+ chunkr_ai/api/task.py,sha256=5Mg5u3CVJxmKBKPauogOWX8UEG6zv7-mNd7JHioUILk,4242
13
+ chunkr_ai-0.0.3.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ chunkr_ai-0.0.3.dist-info/METADATA,sha256=vQM4TeWfpaBh3r5ZxVS-S6nROAVoahLnMj2qy3UwgyA,2685
15
+ chunkr_ai-0.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
+ chunkr_ai-0.0.3.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
+ chunkr_ai-0.0.3.dist-info/RECORD,,
chunkr_ai/api/models.py DELETED
@@ -1,231 +0,0 @@
1
- from .auth import HeadersMixin
2
- import asyncio
3
- from datetime import datetime
4
- from enum import Enum
5
- import httpx
6
- from pydantic import BaseModel, Field, PrivateAttr
7
- import requests
8
- import time
9
- from typing import Optional, List, Dict, Union
10
-
11
- class GenerationStrategy(str, Enum):
12
- LLM = "LLM"
13
- AUTO = "Auto"
14
-
15
- class CroppingStrategy(str, Enum):
16
- ALL = "All"
17
- AUTO = "Auto"
18
-
19
- class LlmConfig(BaseModel):
20
- model: str
21
- prompt: str
22
- temperature: float = 0.0
23
-
24
- class AutoGenerationConfig(BaseModel):
25
- html: GenerationStrategy = GenerationStrategy.AUTO
26
- llm: Optional[LlmConfig] = None
27
- markdown: GenerationStrategy = GenerationStrategy.AUTO
28
- crop_image: CroppingStrategy = CroppingStrategy.ALL
29
-
30
- class LlmGenerationConfig(BaseModel):
31
- html: GenerationStrategy = GenerationStrategy.LLM
32
- llm: Optional[LlmConfig] = None
33
- markdown: GenerationStrategy = GenerationStrategy.LLM
34
- crop_image: CroppingStrategy = CroppingStrategy.ALL
35
-
36
- class SegmentProcessing(BaseModel):
37
- title: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
38
- section_header: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
39
- text: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
40
- list_item: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
41
- table: LlmGenerationConfig = Field(default_factory=LlmGenerationConfig)
42
- picture: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
43
- caption: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
44
- formula: LlmGenerationConfig = Field(default_factory=LlmGenerationConfig)
45
- footnote: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
46
- page_header: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
47
- page_footer: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
48
- page: AutoGenerationConfig = Field(default_factory=AutoGenerationConfig)
49
-
50
- class ChunkProcessing(BaseModel):
51
- target_length: int = 512
52
-
53
- class Property(BaseModel):
54
- name: str
55
- title: Optional[str]
56
- prop_type: str
57
- description: Optional[str]
58
- default: Optional[str]
59
-
60
- class JsonSchema(BaseModel):
61
- title: str
62
- properties: List[Property]
63
- schema_type: Optional[str]
64
-
65
- class OcrStrategy(str, Enum):
66
- ALL = "All"
67
- AUTO = "Auto"
68
-
69
- class SegmentationStrategy(str, Enum):
70
- LAYOUT_ANALYSIS = "LayoutAnalysis"
71
- PAGE = "Page"
72
-
73
- class BoundingBox(BaseModel):
74
- left: float
75
- top: float
76
- width: float
77
- height: float
78
-
79
- class OCRResult(BaseModel):
80
- bbox: BoundingBox
81
- text: str
82
- confidence: Optional[float]
83
-
84
- class SegmentType(str, Enum):
85
- CAPTION = "Caption"
86
- FOOTNOTE = "Footnote"
87
- FORMULA = "Formula"
88
- LIST_ITEM = "ListItem"
89
- PAGE = "Page"
90
- PAGE_FOOTER = "PageFooter"
91
- PAGE_HEADER = "PageHeader"
92
- PICTURE = "Picture"
93
- SECTION_HEADER = "SectionHeader"
94
- TABLE = "Table"
95
- TEXT = "Text"
96
- TITLE = "Title"
97
-
98
- class Segment(BaseModel):
99
- bbox: BoundingBox
100
- content: str
101
- page_height: float
102
- html: Optional[str]
103
- image: Optional[str]
104
- markdown: Optional[str]
105
- ocr: List[OCRResult]
106
- page_number: int
107
- page_width: float
108
- segment_id: str
109
- segment_type: SegmentType
110
-
111
- class Chunk(BaseModel):
112
- chunk_id: str
113
- chunk_length: int
114
- segments: List[Segment]
115
-
116
- class ExtractedJson(BaseModel):
117
- data: Dict
118
-
119
- class OutputResponse(BaseModel):
120
- chunks: List[Chunk] = []
121
- extracted_json: Optional[ExtractedJson]
122
-
123
- class Model(str, Enum):
124
- FAST = "Fast"
125
- HIGH_QUALITY = "HighQuality"
126
-
127
- class Configuration(BaseModel):
128
- chunk_processing: ChunkProcessing = Field(default_factory=ChunkProcessing)
129
- expires_in: Optional[int] = None
130
- high_resolution: bool = False
131
- json_schema: Optional[JsonSchema] = None
132
- model: Optional[Model] = Field(None, deprecated=True)
133
- ocr_strategy: OcrStrategy = OcrStrategy.AUTO
134
- segment_processing: SegmentProcessing = Field(default_factory=SegmentProcessing)
135
- segmentation_strategy: SegmentationStrategy = SegmentationStrategy.LAYOUT_ANALYSIS
136
- target_chunk_length: Optional[int] = Field(None, deprecated=True)
137
-
138
-
139
- class Status(str, Enum):
140
- STARTING = "Starting"
141
- PROCESSING = "Processing"
142
- SUCCEEDED = "Succeeded"
143
- FAILED = "Failed"
144
-
145
- class TaskResponse(BaseModel, HeadersMixin):
146
- configuration: Configuration
147
- created_at: datetime
148
- expires_at: Optional[datetime]
149
- file_name: Optional[str]
150
- finished_at: Optional[datetime]
151
- input_file_url: Optional[str]
152
- message: str
153
- output: Optional[OutputResponse]
154
- page_count: Optional[int]
155
- pdf_url: Optional[str]
156
- status: Status
157
- task_id: str
158
- task_url: Optional[str]
159
- _api_key: Optional[str] = PrivateAttr(default=None)
160
-
161
- def with_api_key(self, api_key: str) -> 'TaskResponse':
162
- """Helper function to set api key on a TaskResponse after creation"""
163
- self._api_key = api_key
164
- return self
165
-
166
- def poll(self) -> 'TaskResponse':
167
- """Poll the task for completion"""
168
- if not self.task_url:
169
- raise ValueError("Task URL not found in response")
170
-
171
- while True:
172
- r = requests.get(self.task_url, headers=self._headers())
173
- r.raise_for_status()
174
- self.__dict__.update(r.json())
175
- if self.status == "Failed":
176
- raise ValueError(self.message)
177
- if self.status not in ("Starting", "Processing"):
178
- return self
179
- time.sleep(0.5)
180
-
181
- async def poll_async(self) -> 'TaskResponse':
182
- """Async poll the task for completion"""
183
- if not self.task_url:
184
- raise ValueError("Task URL not found in response")
185
-
186
- async with httpx.AsyncClient() as client:
187
- while True:
188
- r = await client.get(self.task_url, headers=self._headers())
189
- r.raise_for_status()
190
- self.__dict__.update(r.json())
191
- if self.status == "Failed":
192
- raise ValueError(self.message)
193
- if self.status not in ("Starting", "Processing"):
194
- return self
195
- await asyncio.sleep(0.5)
196
-
197
-
198
- def _get_content(self, content_type: str) -> str:
199
- """Helper method to get either HTML, Markdown, or raw content."""
200
- if not self.output:
201
- return ""
202
- parts = []
203
- for c in self.output.chunks:
204
- for s in c.segments:
205
- content = getattr(s, content_type)
206
- if content:
207
- parts.append(content)
208
- return "\n".join(parts)
209
-
210
- def html(self) -> str:
211
- """Get full HTML for the task"""
212
- return self._get_content("html")
213
-
214
- def markdown(self) -> str:
215
- """Get full markdown for the task"""
216
- return self._get_content("markdown")
217
-
218
- def content(self) -> str:
219
- """Get full text for the task"""
220
- return self._get_content("content")
221
-
222
- class TaskPayload(BaseModel):
223
- current_configuration: Configuration
224
- file_name: str
225
- image_folder_location: str
226
- input_location: str
227
- output_location: str
228
- pdf_location: str
229
- previous_configuration: Optional[Configuration]
230
- task_id: str
231
- user_id: str
@@ -1,16 +0,0 @@
1
- Metadata-Version: 2.2
2
- Name: chunkr-ai
3
- Version: 0.0.2
4
- Summary: Python client for chunkr: open source document intelligence
5
- Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
- Description-Content-Type: text/markdown
7
- License-File: LICENSE
8
- Requires-Dist: build>=1.2.2.post1
9
- Requires-Dist: httpx>=0.28.1
10
- Requires-Dist: pillow>=11.1.0
11
- Requires-Dist: pydantic>=2.10.4
12
- Requires-Dist: python-dotenv>=1.0.1
13
- Requires-Dist: requests>=2.32.3
14
- Requires-Dist: twine>=6.0.1
15
- Provides-Extra: test
16
- Requires-Dist: pytest>=8.3.4; extra == "test"
@@ -1,12 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
- chunkr_ai/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- chunkr_ai/api/auth.py,sha256=U25WiNQBsrAWYAntuds0zSMvB4gUpAwGoSa5wnQ2LRQ,454
5
- chunkr_ai/api/chunkr.py,sha256=UqFoK8ytCsW1I5F0nM4OD6I4zigy-UHzGuMDtpvMSmE,4454
6
- chunkr_ai/api/chunkr_async.py,sha256=Kfh7_DEon6QTPe-XJops8l9R6rp0zIfJKeh9ZEGFQao,1529
7
- chunkr_ai/api/models.py,sha256=vAVeRHgdSO4SDl009R2Vz75WtuXAwkUZW8ZsVXk9yBA,7221
8
- chunkr_ai-0.0.2.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- chunkr_ai-0.0.2.dist-info/METADATA,sha256=ZK6gdzkukxMEVr1WxodLZ9dZNHar32C00ST1LG9mFl8,519
10
- chunkr_ai-0.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
- chunkr_ai-0.0.2.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
12
- chunkr_ai-0.0.2.dist-info/RECORD,,