chunkr-ai 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/api/config.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from pydantic import BaseModel, Field, model_validator, ConfigDict
2
2
  from enum import Enum
3
- from typing import Optional, List, Dict
3
+ from typing import Optional, List, Dict, Union, Type
4
+ from .schema import from_pydantic
4
5
 
5
6
  class GenerationStrategy(str, Enum):
6
7
  LLM = "LLM"
@@ -114,7 +115,7 @@ class Configuration(BaseModel):
114
115
  chunk_processing: Optional[ChunkProcessing] = Field(default=None)
115
116
  expires_in: Optional[int] = Field(default=None)
116
117
  high_resolution: Optional[bool] = Field(default=None)
117
- json_schema: Optional[JsonSchema] = Field(default=None)
118
+ json_schema: Optional[Union[JsonSchema, Type[BaseModel], BaseModel]] = Field(default=None)
118
119
  model: Optional[Model] = Field(default=None)
119
120
  ocr_strategy: Optional[OcrStrategy] = Field(default=None)
120
121
  segment_processing: Optional[SegmentProcessing] = Field(default=None)
@@ -128,3 +129,17 @@ class Configuration(BaseModel):
128
129
  values["chunk_processing"] = values.get("chunk_processing", {}) or {}
129
130
  values["chunk_processing"]["target_length"] = target_length
130
131
  return values
132
+
133
+ @model_validator(mode='after')
134
+ def convert_json_schema(self) -> 'Configuration':
135
+ if self.json_schema is not None and not isinstance(self.json_schema, JsonSchema):
136
+ if isinstance(self.json_schema, (BaseModel, type)) and issubclass(getattr(self.json_schema, '__class__', type), BaseModel):
137
+ self.json_schema = JsonSchema(**from_pydantic(self.json_schema))
138
+ return self
139
+
140
+ class Status(str, Enum):
141
+ STARTING = "Starting"
142
+ PROCESSING = "Processing"
143
+ SUCCEEDED = "Succeeded"
144
+ FAILED = "Failed"
145
+ CANCELLED = "Cancelled"
chunkr_ai/api/misc.py CHANGED
@@ -1,11 +1,10 @@
1
- from .config import Configuration, Property, JsonSchema
1
+ from .config import Configuration
2
2
  import io
3
3
  import json
4
4
  from pathlib import Path
5
5
  from PIL import Image
6
6
  import requests
7
7
  from typing import Union, Tuple, BinaryIO, Optional
8
- from pydantic import BaseModel
9
8
 
10
9
  def prepare_file(
11
10
  file: Union[str, Path, BinaryIO, Image.Image]
@@ -127,33 +126,3 @@ def prepare_upload_data(
127
126
  files[key] = (None, json.dumps(value), 'application/json')
128
127
 
129
128
  return files
130
-
131
- def from_pydantic(pydantic: BaseModel) -> dict:
132
- """Convert a Pydantic model to a Chunk json schema.
133
-
134
- Args:
135
- pydantic: A Pydantic BaseModel class or instance
136
-
137
- Returns:
138
- dict: A JSON schema compatible with Chunk's format
139
- """
140
- model = pydantic if isinstance(pydantic, type) else pydantic.__class__
141
- schema = model.model_json_schema()
142
- print(schema)
143
- properties = []
144
- for name, details in schema.get('properties', {}).items():
145
- prop = Property(
146
- name=name,
147
- title=details.get('title'),
148
- prop_type=details.get('type', 'string'),
149
- description=details.get('description'),
150
- default=str(details.get('default')) if details.get('default') is not None else None
151
- )
152
- properties.append(prop)
153
-
154
- json_schema = JsonSchema(
155
- title=schema.get('title', model.__name__),
156
- properties=properties
157
- )
158
-
159
- return json_schema.model_dump(mode="json", exclude_none=True)
@@ -0,0 +1,128 @@
1
+ from pydantic import BaseModel
2
+ from typing import Optional, List, Union, Type
3
+ import json
4
+
5
+ class Property(BaseModel):
6
+ name: str
7
+ prop_type: str
8
+ description: Optional[str] = None
9
+ default: Optional[str] = None
10
+
11
+ class JsonSchema(BaseModel):
12
+ title: str
13
+ properties: List[Property]
14
+
15
+ def from_pydantic(pydantic: Union[BaseModel, Type[BaseModel]], current_depth: int = 0) -> dict:
16
+ """Convert a Pydantic model to a Chunk json schema."""
17
+ MAX_DEPTH = 5
18
+ model = pydantic if isinstance(pydantic, type) else pydantic.__class__
19
+ schema = model.model_json_schema()
20
+ properties = []
21
+
22
+ def get_enum_description(details: dict) -> str:
23
+ """Get description including enum values if they exist"""
24
+ description = details.get('description', '')
25
+
26
+ # First check if this is a direct enum
27
+ if 'enum' in details:
28
+ enum_values = details['enum']
29
+ enum_str = '\nAllowed values:\n' + '\n'.join(f'- {val}' for val in enum_values)
30
+ return f"{description}{enum_str}"
31
+
32
+ # Then check if it's a reference to an enum
33
+ if '$ref' in details:
34
+ ref_schema = resolve_ref(details['$ref'], schema.get('$defs', {}))
35
+ if 'enum' in ref_schema:
36
+ enum_values = ref_schema['enum']
37
+ enum_str = '\nAllowed values:\n' + '\n'.join(f'- {val}' for val in enum_values)
38
+ return f"{description}{enum_str}"
39
+
40
+ return description
41
+
42
+ def resolve_ref(ref: str, definitions: dict) -> dict:
43
+ """Resolve a $ref reference to its actual schema"""
44
+ if not ref.startswith('#/$defs/'):
45
+ return {}
46
+ ref_name = ref[len('#/$defs/'):]
47
+ return definitions.get(ref_name, {})
48
+
49
+ def get_nested_schema(field_schema: dict, depth: int) -> dict:
50
+ if depth >= MAX_DEPTH:
51
+ return {}
52
+
53
+ # If there's a $ref, resolve it first
54
+ if '$ref' in field_schema:
55
+ field_schema = resolve_ref(field_schema['$ref'], schema.get('$defs', {}))
56
+
57
+ nested_props = {}
58
+ if field_schema.get('type') == 'object':
59
+ for name, details in field_schema.get('properties', {}).items():
60
+ if details.get('type') == 'object' or '$ref' in details:
61
+ ref_schema = details
62
+ if '$ref' in details:
63
+ ref_schema = resolve_ref(details['$ref'], schema.get('$defs', {}))
64
+ nested_schema = get_nested_schema(ref_schema, depth + 1)
65
+ nested_props[name] = {
66
+ 'type': 'object',
67
+ 'description': get_enum_description(details),
68
+ 'properties': nested_schema
69
+ }
70
+ else:
71
+ nested_props[name] = {
72
+ 'type': details.get('type', 'string'),
73
+ 'description': get_enum_description(details)
74
+ }
75
+ return nested_props
76
+
77
+ for name, details in schema.get('properties', {}).items():
78
+ # Handle arrays
79
+ if details.get('type') == 'array':
80
+ items = details.get('items', {})
81
+ if '$ref' in items:
82
+ items = resolve_ref(items['$ref'], schema.get('$defs', {}))
83
+
84
+ # Get nested schema for array items
85
+ item_schema = get_nested_schema(items, current_depth)
86
+ description = get_enum_description(details)
87
+
88
+ if item_schema:
89
+ description = f"{description}\nList items schema:\n{json.dumps(item_schema, indent=2)}"
90
+
91
+ prop = Property(
92
+ name=name,
93
+ prop_type='list',
94
+ description=description
95
+ )
96
+ # Handle objects and references
97
+ elif details.get('type') == 'object' or '$ref' in details:
98
+ prop_type = 'object'
99
+ ref_schema = details
100
+ if '$ref' in details:
101
+ ref_schema = resolve_ref(details['$ref'], schema.get('$defs', {}))
102
+
103
+ nested_schema = get_nested_schema(ref_schema, current_depth)
104
+
105
+ prop = Property(
106
+ name=name,
107
+ prop_type=prop_type,
108
+ description=get_enum_description(details),
109
+ properties=nested_schema
110
+ )
111
+
112
+ # Handle primitive types
113
+ else:
114
+ prop = Property(
115
+ name=name,
116
+ prop_type=details.get('type', 'string'),
117
+ description=get_enum_description(details),
118
+ default=str(details.get('default')) if details.get('default') is not None else None
119
+ )
120
+
121
+ properties.append(prop)
122
+
123
+ json_schema = JsonSchema(
124
+ title=schema.get('title', model.__name__),
125
+ properties=properties
126
+ )
127
+
128
+ return json_schema.model_dump(mode="json", exclude_none=True)
chunkr_ai/api/task.py CHANGED
@@ -1,46 +1,10 @@
1
- from .protocol import ChunkrClientProtocol
2
- from .config import Configuration, OutputResponse
1
+ from .config import Configuration
3
2
  from .misc import prepare_upload_data
4
- import asyncio
5
- from datetime import datetime
6
- from enum import Enum
7
- from pydantic import BaseModel, PrivateAttr
3
+ from .task_base import TaskBase
8
4
  import time
9
- from typing import Optional, Union
10
-
11
- class Status(str, Enum):
12
- STARTING = "Starting"
13
- PROCESSING = "Processing"
14
- SUCCEEDED = "Succeeded"
15
- FAILED = "Failed"
16
- CANCELLED = "Cancelled"
17
-
18
- class TaskResponse(BaseModel):
19
- configuration: Configuration
20
- created_at: datetime
21
- expires_at: Optional[datetime] = None
22
- file_name: Optional[str] = None
23
- finished_at: Optional[datetime] = None
24
- input_file_url: Optional[str] = None
25
- message: str
26
- output: Optional[OutputResponse] = None
27
- page_count: Optional[int] = None
28
- pdf_url: Optional[str] = None
29
- started_at: Optional[datetime] = None
30
- status: Status
31
- task_id: str
32
- task_url: Optional[str] = None
33
- _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
34
-
35
- def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponse':
36
- self._client = client
37
- return self
38
-
39
- def _poll_request_sync(self) -> dict:
40
- """Helper method to make polling request with retry logic (synchronous)"""
41
- if not self.task_url:
42
- raise ValueError("Task URL not found in response")
43
5
 
6
+ class TaskResponse(TaskBase):
7
+ def _poll_request(self) -> dict:
44
8
  while True:
45
9
  try:
46
10
  r = self._client._session.get(self.task_url, headers=self._client._headers())
@@ -52,68 +16,20 @@ class TaskResponse(BaseModel):
52
16
  except Exception as e:
53
17
  raise
54
18
 
55
- async def _poll_request_async(self) -> dict:
56
- """Helper method to make polling request with retry logic (asynchronous)"""
19
+ def poll(self) -> 'TaskResponse':
57
20
  if not self.task_url:
58
21
  raise ValueError("Task URL not found in response")
59
-
60
- while True:
61
- try:
62
- r = await self._client._client.get(self.task_url, headers=self._client._headers())
63
- r.raise_for_status()
64
- response = r.json()
65
- return response
66
- except (ConnectionError, TimeoutError) as _:
67
- print("Connection error while polling the task, retrying...")
68
- await asyncio.sleep(0.5)
69
- except Exception as e:
70
- raise
71
-
72
- def _check_status(self) -> Optional['TaskResponse']:
73
- """Helper method to check task status and handle completion/failure"""
74
- if self.status == "Failed":
75
- raise ValueError(self.message)
76
- if self.status not in ("Starting", "Processing"):
77
- return self
78
- return None
79
-
80
- def poll(self) -> 'TaskResponse':
81
- """Poll the task for completion."""
82
22
  while True:
83
23
  response = self._poll_request_sync()
84
24
  updated_task = TaskResponse(**response).with_client(self._client)
85
25
  self.__dict__.update(updated_task.__dict__)
86
-
87
26
  if result := self._check_status():
88
27
  return result
89
-
90
28
  time.sleep(0.5)
91
-
92
- async def poll_async(self) -> 'TaskResponse':
93
- """Poll the task for completion asynchronously."""
94
- while True:
95
- response = await self._poll_request_async()
96
- updated_task = TaskResponse(**response).with_client(self._client)
97
- self.__dict__.update(updated_task.__dict__)
98
-
99
- if result := self._check_status():
100
- return result
101
-
102
- await asyncio.sleep(0.5)
103
-
104
- def _get_content(self, content_type: str) -> str:
105
- """Helper method to get either HTML, Markdown, or raw content."""
106
- if not self.output:
107
- return ""
108
- parts = []
109
- for c in self.output.chunks:
110
- for s in c.segments:
111
- content = getattr(s, content_type)
112
- if content:
113
- parts.append(content)
114
- return "\n".join(parts)
115
29
 
116
30
  def update(self, config: Configuration) -> 'TaskResponse':
31
+ if not self.task_url:
32
+ raise ValueError("Task URL not found")
117
33
  files = prepare_upload_data(None, config)
118
34
  r = self._client._session.patch(
119
35
  f"{self.task_url}",
@@ -121,56 +37,25 @@ class TaskResponse(BaseModel):
121
37
  headers=self._client._headers()
122
38
  )
123
39
  r.raise_for_status()
124
- return TaskResponse(**r.json()).with_client(self._client)
125
-
126
- async def update_async(self, config: Configuration) -> 'TaskResponse':
127
- files = prepare_upload_data(None, config)
128
- r = await self._client._client.patch(
129
- f"{self.task_url}",
130
- files=files,
131
- headers=self._client._headers()
132
- )
133
- r.raise_for_status()
134
- return TaskResponse(**r.json()).with_client(self._client)
40
+ updated = TaskResponse(**r.json()).with_client(self._client)
41
+ self.__dict__.update(updated.__dict__)
42
+ return self.poll()
135
43
 
136
44
  def cancel(self):
45
+ if not self.task_url:
46
+ raise ValueError("Task URL not found")
137
47
  r = self._client._session.get(
138
48
  f"{self.task_url}/cancel",
139
49
  headers=self._client._headers()
140
50
  )
141
51
  r.raise_for_status()
142
52
  self.poll()
143
-
144
- async def cancel_async(self):
145
- r = await self._client._client.get(
146
- f"{self.task_url}/cancel",
147
- headers=self._client._headers()
148
- )
149
- r.raise_for_status()
150
- await self.poll_async()
151
53
 
152
54
  def delete(self):
55
+ if not self.task_url:
56
+ raise ValueError("Task URL not found")
153
57
  r = self._client._session.delete(
154
- f"{self.task_url}",
58
+ self.task_url,
155
59
  headers=self._client._headers()
156
60
  )
157
61
  r.raise_for_status()
158
-
159
- async def delete_async(self):
160
- r = await self._client._client.delete(
161
- f"{self.task_url}",
162
- headers=self._client._headers()
163
- )
164
- r.raise_for_status()
165
-
166
- def html(self) -> str:
167
- """Get full HTML for the task"""
168
- return self._get_content("html")
169
-
170
- def markdown(self) -> str:
171
- """Get full markdown for the task"""
172
- return self._get_content("markdown")
173
-
174
- def content(self) -> str:
175
- """Get full text for the task"""
176
- return self._get_content("content")
@@ -1,42 +1,23 @@
1
- import asyncio
2
- from pydantic import BaseModel, PrivateAttr
3
- from datetime import datetime
4
- from enum import Enum
5
- from typing import Optional, Union
6
- from .task_base import TaskBase
7
- from .protocol import ChunkrClientProtocol
8
- from .config import Configuration, OutputResponse
1
+ from .config import Configuration
9
2
  from .misc import prepare_upload_data
3
+ from .task_base import TaskBase
4
+ import asyncio
10
5
 
11
- class Status(str, Enum):
12
- STARTING = "Starting"
13
- PROCESSING = "Processing"
14
- SUCCEEDED = "Succeeded"
15
- FAILED = "Failed"
16
- CANCELLED = "Cancelled"
17
-
18
- class TaskResponseAsync(BaseModel, TaskBase):
19
- configuration: Configuration
20
- created_at: datetime
21
- expires_at: Optional[datetime]
22
- file_name: Optional[str]
23
- finished_at: Optional[datetime]
24
- input_file_url: Optional[str]
25
- message: str
26
- output: Optional[OutputResponse]
27
- page_count: Optional[int]
28
- pdf_url: Optional[str]
29
- started_at: Optional[datetime]
30
- status: Status
31
- task_id: str
32
- task_url: Optional[str]
33
- _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
34
-
35
- def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponseAsync':
36
- self._client = client
37
- return self
6
+ class TaskResponseAsync(TaskBase):
7
+ async def _poll_request(self) -> dict:
8
+ try:
9
+ r = await self._client._client.get(self.task_url, headers=self._client._headers())
10
+ r.raise_for_status()
11
+ return r.json()
12
+ except (ConnectionError, TimeoutError) as _:
13
+ print("Connection error while polling the task, retrying...")
14
+ await asyncio.sleep(0.5)
15
+ except Exception as e:
16
+ raise
38
17
 
39
18
  async def poll(self) -> 'TaskResponseAsync':
19
+ if not self.task_url:
20
+ raise ValueError("Task URL not found")
40
21
  while True:
41
22
  j = await self._poll_request()
42
23
  updated = TaskResponseAsync(**j).with_client(self._client)
@@ -45,28 +26,6 @@ class TaskResponseAsync(BaseModel, TaskBase):
45
26
  return res
46
27
  await asyncio.sleep(0.5)
47
28
 
48
- async def _poll_request(self) -> dict:
49
- if not self.task_url:
50
- raise ValueError("Task URL not found")
51
- while True:
52
- try:
53
- r = await self._client._client.get(self.task_url, headers=self._client._headers())
54
- r.raise_for_status()
55
- return r.json()
56
- except Exception as e:
57
- if self.status == Status.FAILED:
58
- raise ValueError(self.message) from e
59
- await asyncio.sleep(0.5)
60
-
61
- def _check_status(self) -> Optional['TaskResponseAsync']:
62
- if self.status == Status.FAILED:
63
- raise ValueError(f"Task failed: {self.message}")
64
- if self.status == Status.CANCELLED:
65
- return self
66
- if self.status not in [Status.STARTING, Status.PROCESSING]:
67
- return self
68
- return None
69
-
70
29
  async def update(self, config: Configuration) -> 'TaskResponseAsync':
71
30
  if not self.task_url:
72
31
  raise ValueError("Task URL not found")
@@ -85,27 +44,7 @@ class TaskResponseAsync(BaseModel, TaskBase):
85
44
  return await self.poll()
86
45
 
87
46
  async def delete(self):
47
+ if not self.task_url:
48
+ raise ValueError("Task URL not found")
88
49
  r = await self._client._client.delete(self.task_url, headers=self._client._headers())
89
- r.raise_for_status()
90
-
91
- def html(self) -> str:
92
- return self._get_content("html")
93
-
94
- def markdown(self) -> str:
95
- return self._get_content("markdown")
96
-
97
- def content(self) -> str:
98
- return self._get_content("content")
99
-
100
- def _get_content(self, t: str) -> str:
101
- if not self.output:
102
- return ""
103
- parts = []
104
- for c in self.output.chunks:
105
- for s in c.segments:
106
- v = getattr(s, t)
107
- if v:
108
- parts.append(v)
109
- return "\n".join(parts)
110
-
111
- # Satisfying TaskBase abstract methods with stubs
50
+ r.raise_for_status()
@@ -1,31 +1,83 @@
1
- from abc import ABC, abstractmethod
2
1
  from .config import Configuration
2
+ from .protocol import ChunkrClientProtocol
3
+ from ..models import Status, OutputResponse
4
+ from abc import ABC, abstractmethod
5
+ from typing import TypeVar, Optional, Generic, Union
6
+ from pydantic import BaseModel, PrivateAttr
7
+ from datetime import datetime
8
+
9
+ T = TypeVar('T', bound='TaskBase')
10
+
11
+ class TaskBase(BaseModel, ABC, Generic[T]):
12
+ configuration: Configuration
13
+ created_at: datetime
14
+ expires_at: Optional[datetime]
15
+ file_name: Optional[str]
16
+ finished_at: Optional[datetime]
17
+ input_file_url: Optional[str]
18
+ message: str
19
+ output: Optional[OutputResponse]
20
+ page_count: Optional[int]
21
+ pdf_url: Optional[str]
22
+ started_at: Optional[datetime]
23
+ status: Status
24
+ task_id: str
25
+ task_url: Optional[str]
26
+ _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
3
27
 
4
- class TaskBase(ABC):
5
28
  @abstractmethod
6
- def poll(self):
29
+ def _poll_request(self) -> dict:
30
+ """Helper method to make polling request with retry logic (synchronous)"""
7
31
  pass
8
32
 
9
33
  @abstractmethod
10
- def update(self, config: Configuration):
34
+ def poll(self) -> T:
35
+ """Poll the task for completion."""
11
36
  pass
12
37
 
13
38
  @abstractmethod
14
- def cancel(self):
39
+ def update(self, config: Configuration) -> T:
40
+ """Update the task configuration."""
15
41
  pass
16
42
 
17
43
  @abstractmethod
18
- def delete(self):
44
+ def cancel(self) -> T:
45
+ """Cancel the task."""
19
46
  pass
20
47
 
21
48
  @abstractmethod
22
- def html(self) -> str:
49
+ def delete(self) -> T:
50
+ """Delete the task."""
23
51
  pass
24
52
 
25
- @abstractmethod
53
+ def with_client(self, client: Union[ChunkrClientProtocol]) -> T:
54
+ self._client = client
55
+ return self
56
+
57
+ def _check_status(self) -> Optional[T]:
58
+ """Helper method to check task status and handle completion/failure"""
59
+ if self.status == "Failed":
60
+ raise ValueError(self.message)
61
+ if self.status not in ("Starting", "Processing"):
62
+ return self
63
+ return None
64
+
65
+ def html(self) -> str:
66
+ return self._get_content("html")
67
+
26
68
  def markdown(self) -> str:
27
- pass
69
+ return self._get_content("markdown")
28
70
 
29
- @abstractmethod
30
71
  def content(self) -> str:
31
- pass
72
+ return self._get_content("content")
73
+
74
+ def _get_content(self, t: str) -> str:
75
+ if not self.output:
76
+ return ""
77
+ parts = []
78
+ for c in self.output.chunks:
79
+ for s in c.segments:
80
+ v = getattr(s, t)
81
+ if v:
82
+ parts.append(v)
83
+ return "\n".join(parts)
chunkr_ai/models.py CHANGED
@@ -17,9 +17,11 @@ from .api.config import (
17
17
  SegmentProcessing,
18
18
  SegmentType,
19
19
  SegmentationStrategy,
20
+ Status,
20
21
  )
21
22
 
22
- from .api.task import TaskResponse, Status
23
+ from .api.task import TaskResponse
24
+ from .api.task_async import TaskResponseAsync
23
25
 
24
26
  __all__ = [
25
27
  'BoundingBox',
@@ -42,5 +44,6 @@ __all__ = [
42
44
  'SegmentType',
43
45
  'SegmentationStrategy',
44
46
  'Status',
45
- 'TaskResponse'
47
+ 'TaskResponse',
48
+ 'TaskResponseAsync',
46
49
  ]
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.25.0
10
+ Requires-Dist: httpx>=0.25.0
10
11
  Requires-Dist: pillow>=10.0.0
11
12
  Requires-Dist: pydantic>=2.0.0
12
13
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -0,0 +1,19 @@
1
+ chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
+ chunkr_ai/models.py,sha256=-dbwtTHTcGhH3LXUdVUPkobbPoeFNXRizeAW8BCGSkE,903
3
+ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
5
+ chunkr_ai/api/chunkr.py,sha256=0qpV9b1hOpDhA9EuKkXW9X_laUmw5NY3ZYq0cUOTbww,5190
6
+ chunkr_ai/api/chunkr_async.py,sha256=ZkLBrn4cqzu3sqMfS8cfZZgSvpdyQuWZP95lfGxuHx0,4900
7
+ chunkr_ai/api/chunkr_base.py,sha256=IYO0pmoL02GchIggj6_Q5nvtAUoOvYAAvT7VLFU6scY,2506
8
+ chunkr_ai/api/config.py,sha256=joTn7jiOlJXTwwza-jHauLV-39CMzaxZVGB9JBm8Cok,4862
9
+ chunkr_ai/api/misc.py,sha256=9vnfrbJ7sFlZqwEIQ4NTMb5rhPOmETT7e1jR-b42PXM,4977
10
+ chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
11
+ chunkr_ai/api/schema.py,sha256=OeLOhBRXeRBgEImg0Q6O9Z10ojT6aSEVvwnDR8UeENo,4971
12
+ chunkr_ai/api/task.py,sha256=4insrdGEVzBHs1ejZvde8bbEetVzgJELa47UjhfBqCA,2116
13
+ chunkr_ai/api/task_async.py,sha256=LqS-LL-mCOgfGsgvuSXhKkSEUM6MMro-EZHl_ZedQQk,1998
14
+ chunkr_ai/api/task_base.py,sha256=iS5UVIDEPIiDoWrn21Oh_dQurkd_hvKQ8ng32j6sGoA,2369
15
+ chunkr_ai-0.0.12.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ chunkr_ai-0.0.12.dist-info/METADATA,sha256=dfo9myRizW2A5W0H6FpIoBzHa4QxmEe3lsedPYhwjXM,4874
17
+ chunkr_ai-0.0.12.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
18
+ chunkr_ai-0.0.12.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
19
+ chunkr_ai-0.0.12.dist-info/RECORD,,
chunkr_ai/main.py DELETED
@@ -1,12 +0,0 @@
1
- from chunkr_ai.api.chunkr import Chunkr
2
- from chunkr_ai.models import Configuration
3
- from chunkr_ai.api.config import SegmentationStrategy, ChunkProcessing
4
-
5
- if __name__ == "__main__":
6
- chunkr = Chunkr()
7
- task = chunkr.update_task("556b4fe5-e3f7-48dc-9f56-0fb7fbacdb87", Configuration(
8
- chunk_processing=ChunkProcessing(
9
- target_length=1000
10
- )
11
- ))
12
- print(task)
@@ -1,19 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
- chunkr_ai/main.py,sha256=_MT1lcnNiXjVW9ZkZYl28SB_f6M9g_IOgZxvhodTzAo,394
3
- chunkr_ai/models.py,sha256=T8_F-Y1US21ZJVzLIaroqp-Hd0_ZFbdkbEOxr63-PNE,827
4
- chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
6
- chunkr_ai/api/chunkr.py,sha256=0qpV9b1hOpDhA9EuKkXW9X_laUmw5NY3ZYq0cUOTbww,5190
7
- chunkr_ai/api/chunkr_async.py,sha256=ZkLBrn4cqzu3sqMfS8cfZZgSvpdyQuWZP95lfGxuHx0,4900
8
- chunkr_ai/api/chunkr_base.py,sha256=IYO0pmoL02GchIggj6_Q5nvtAUoOvYAAvT7VLFU6scY,2506
9
- chunkr_ai/api/config.py,sha256=eu7a28UjlNaM3QRrzElRTQXqMPBynAvlusVSIAMNXUY,4203
10
- chunkr_ai/api/misc.py,sha256=DiY-BV5nPMDVKAiHTcND8w-8mSB-dENxrOhxnkyEoRA,6034
11
- chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
12
- chunkr_ai/api/task.py,sha256=EB6RK8ms7EaNj57tNJZoNgNMHGWKXFhkQ1WC7gk5ht4,6059
13
- chunkr_ai/api/task_async.py,sha256=Dd-Fenie0Q6GxXce7OlXvuQ14NQ58F_0b9P7AGKWyYA,3833
14
- chunkr_ai/api/task_base.py,sha256=Tkk7dhIeB3ic5M9g_b-MVRdNv4XQTvajpaUy8JylQ8A,526
15
- chunkr_ai-0.0.10.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- chunkr_ai-0.0.10.dist-info/METADATA,sha256=W8PCDpT4hN5tpn_9fyVrjEbd0abG0ReP5reG4_9Glp8,4845
17
- chunkr_ai-0.0.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
18
- chunkr_ai-0.0.10.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
19
- chunkr_ai-0.0.10.dist-info/RECORD,,