chunkr-ai 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
chunkr_ai/api/schema.py DELETED
@@ -1,136 +0,0 @@
1
- from pydantic import BaseModel
2
- from typing import Optional, List, Union, Type
3
- import json
4
-
5
-
6
- class Property(BaseModel):
7
- name: str
8
- prop_type: str
9
- description: Optional[str] = None
10
- default: Optional[str] = None
11
-
12
-
13
- class JsonSchema(BaseModel):
14
- title: str
15
- properties: List[Property]
16
-
17
-
18
- def from_pydantic(
19
- pydantic: Union[BaseModel, Type[BaseModel]], current_depth: int = 0
20
- ) -> dict:
21
- """Convert a Pydantic model to a Chunk json schema."""
22
- MAX_DEPTH = 5
23
- model = pydantic if isinstance(pydantic, type) else pydantic.__class__
24
- schema = model.model_json_schema()
25
- properties = []
26
-
27
- def get_enum_description(details: dict) -> str:
28
- """Get description including enum values if they exist"""
29
- description = details.get("description", "")
30
-
31
- # First check if this is a direct enum
32
- if "enum" in details:
33
- enum_values = details["enum"]
34
- enum_str = "\nAllowed values:\n" + "\n".join(
35
- f"- {val}" for val in enum_values
36
- )
37
- return f"{description}{enum_str}"
38
-
39
- # Then check if it's a reference to an enum
40
- if "$ref" in details:
41
- ref_schema = resolve_ref(details["$ref"], schema.get("$defs", {}))
42
- if "enum" in ref_schema:
43
- enum_values = ref_schema["enum"]
44
- enum_str = "\nAllowed values:\n" + "\n".join(
45
- f"- {val}" for val in enum_values
46
- )
47
- return f"{description}{enum_str}"
48
-
49
- return description
50
-
51
- def resolve_ref(ref: str, definitions: dict) -> dict:
52
- """Resolve a $ref reference to its actual schema"""
53
- if not ref.startswith("#/$defs/"):
54
- return {}
55
- ref_name = ref[len("#/$defs/") :]
56
- return definitions.get(ref_name, {})
57
-
58
- def get_nested_schema(field_schema: dict, depth: int) -> dict:
59
- if depth >= MAX_DEPTH:
60
- return {}
61
-
62
- # If there's a $ref, resolve it first
63
- if "$ref" in field_schema:
64
- field_schema = resolve_ref(field_schema["$ref"], schema.get("$defs", {}))
65
-
66
- nested_props = {}
67
- if field_schema.get("type") == "object":
68
- for name, details in field_schema.get("properties", {}).items():
69
- if details.get("type") == "object" or "$ref" in details:
70
- ref_schema = details
71
- if "$ref" in details:
72
- ref_schema = resolve_ref(
73
- details["$ref"], schema.get("$defs", {})
74
- )
75
- nested_schema = get_nested_schema(ref_schema, depth + 1)
76
- nested_props[name] = {
77
- "type": "object",
78
- "description": get_enum_description(details),
79
- "properties": nested_schema,
80
- }
81
- else:
82
- nested_props[name] = {
83
- "type": details.get("type", "string"),
84
- "description": get_enum_description(details),
85
- }
86
- return nested_props
87
-
88
- for name, details in schema.get("properties", {}).items():
89
- # Handle arrays
90
- if details.get("type") == "array":
91
- items = details.get("items", {})
92
- if "$ref" in items:
93
- items = resolve_ref(items["$ref"], schema.get("$defs", {}))
94
-
95
- # Get nested schema for array items
96
- item_schema = get_nested_schema(items, current_depth)
97
- description = get_enum_description(details)
98
-
99
- if item_schema:
100
- description = f"{description}\nList items schema:\n{json.dumps(item_schema, indent=2)}"
101
-
102
- prop = Property(name=name, prop_type="list", description=description)
103
- # Handle objects and references
104
- elif details.get("type") == "object" or "$ref" in details:
105
- prop_type = "object"
106
- ref_schema = details
107
- if "$ref" in details:
108
- ref_schema = resolve_ref(details["$ref"], schema.get("$defs", {}))
109
-
110
- nested_schema = get_nested_schema(ref_schema, current_depth)
111
-
112
- prop = Property(
113
- name=name,
114
- prop_type=prop_type,
115
- description=get_enum_description(details),
116
- properties=nested_schema,
117
- )
118
-
119
- # Handle primitive types
120
- else:
121
- prop = Property(
122
- name=name,
123
- prop_type=details.get("type", "string"),
124
- description=get_enum_description(details),
125
- default=str(details.get("default"))
126
- if details.get("default") is not None
127
- else None,
128
- )
129
-
130
- properties.append(prop)
131
-
132
- json_schema = JsonSchema(
133
- title=schema.get("title", model.__name__), properties=properties
134
- )
135
-
136
- return json_schema.model_dump(mode="json", exclude_none=True)
chunkr_ai/api/task.py DELETED
@@ -1,66 +0,0 @@
1
- from .config import Configuration
2
- from .misc import prepare_upload_data
3
- from .task_base import TaskBase
4
- import time
5
-
6
-
7
- class TaskResponse(TaskBase):
8
- def _poll_request(self) -> dict:
9
- while True:
10
- try:
11
- if not self.task_url:
12
- raise ValueError("Task URL not found in response")
13
- if not self._client._session:
14
- raise ValueError("Client session not found")
15
- r = self._client._session.get(
16
- self.task_url, headers=self._client._headers()
17
- )
18
- r.raise_for_status()
19
- return r.json()
20
- except (ConnectionError, TimeoutError) as _:
21
- print("Connection error while polling the task, retrying...")
22
- time.sleep(0.5)
23
- except Exception:
24
- raise
25
-
26
- def poll(self) -> "TaskResponse":
27
- while True:
28
- response = self._poll_request()
29
- updated_task = TaskResponse(**response).with_client(self._client)
30
- self.__dict__.update(updated_task.__dict__)
31
- if result := self._check_status():
32
- return result
33
- time.sleep(0.5)
34
-
35
- def update(self, config: Configuration) -> "TaskResponse":
36
- if not self.task_url:
37
- raise ValueError("Task URL not found")
38
- if not self._client._session:
39
- raise ValueError("Client session not found")
40
- files = prepare_upload_data(None, config)
41
- r = self._client._session.patch(
42
- self.task_url, files=files, headers=self._client._headers()
43
- )
44
- r.raise_for_status()
45
- updated = TaskResponse(**r.json()).with_client(self._client)
46
- self.__dict__.update(updated.__dict__)
47
- return self.poll()
48
-
49
- def cancel(self):
50
- if not self.task_url:
51
- raise ValueError("Task URL not found")
52
- if not self._client._session:
53
- raise ValueError("Client session not found")
54
- r = self._client._session.get(
55
- f"{self.task_url}/cancel", headers=self._client._headers()
56
- )
57
- r.raise_for_status()
58
- self.poll()
59
-
60
- def delete(self):
61
- if not self.task_url:
62
- raise ValueError("Task URL not found")
63
- if not self._client._session:
64
- raise ValueError("Client session not found")
65
- r = self._client._session.delete(self.task_url, headers=self._client._headers())
66
- r.raise_for_status()
@@ -1,69 +0,0 @@
1
- from .config import Configuration
2
- from .misc import prepare_upload_data
3
- from .task_base import TaskBase
4
- import asyncio
5
-
6
-
7
- class TaskResponseAsync(TaskBase):
8
- async def _poll_request(self) -> dict:
9
- try:
10
- if not self._client._client:
11
- raise ValueError("Client not found")
12
- r = await self._client._client.get(
13
- self.task_url, headers=self._client._headers()
14
- )
15
- r.raise_for_status()
16
- return r.json()
17
- except (ConnectionError, TimeoutError) as _:
18
- print("Connection error while polling the task, retrying...")
19
- await asyncio.sleep(0.5)
20
- except Exception:
21
- raise
22
-
23
- async def poll(self) -> "TaskResponseAsync":
24
- if not self.task_url:
25
- raise ValueError("Task URL not found")
26
- if not self._client._client:
27
- raise ValueError("Client not found")
28
- while True:
29
- j = await self._poll_request()
30
- updated = TaskResponseAsync(**j).with_client(self._client)
31
- self.__dict__.update(updated.__dict__)
32
- if res := self._check_status():
33
- return res
34
- await asyncio.sleep(0.5)
35
-
36
- async def update(self, config: Configuration) -> "TaskResponseAsync":
37
- if not self.task_url:
38
- raise ValueError("Task URL not found")
39
- if not self._client._client:
40
- raise ValueError("Client not found")
41
- f = prepare_upload_data(None, config)
42
- r = await self._client._client.patch(
43
- self.task_url, files=f, headers=self._client._headers()
44
- )
45
- r.raise_for_status()
46
- updated = TaskResponseAsync(**r.json()).with_client(self._client)
47
- self.__dict__.update(updated.__dict__)
48
- return await self.poll()
49
-
50
- async def cancel(self):
51
- if not self.task_url:
52
- raise ValueError("Task URL not found")
53
- if not self._client._client:
54
- raise ValueError("Client not found")
55
- r = await self._client._client.get(
56
- f"{self.task_url}/cancel", headers=self._client._headers()
57
- )
58
- r.raise_for_status()
59
- return await self.poll()
60
-
61
- async def delete(self):
62
- if not self.task_url:
63
- raise ValueError("Task URL not found")
64
- if not self._client._client:
65
- raise ValueError("Client not found")
66
- r = await self._client._client.delete(
67
- self.task_url, headers=self._client._headers()
68
- )
69
- r.raise_for_status()
@@ -1,85 +0,0 @@
1
- from .config import Configuration, Status, OutputResponse
2
- from .protocol import ChunkrClientProtocol
3
- from abc import ABC, abstractmethod
4
- from typing import TypeVar, Optional, Generic
5
- from pydantic import BaseModel, PrivateAttr
6
- from datetime import datetime
7
-
8
- T = TypeVar("T", bound="TaskBase")
9
-
10
- class TaskBase(BaseModel, ABC, Generic[T]):
11
- configuration: Configuration
12
- created_at: datetime
13
- expires_at: Optional[datetime]
14
- file_name: Optional[str]
15
- finished_at: Optional[datetime]
16
- input_file_url: Optional[str]
17
- message: str
18
- output: Optional[OutputResponse]
19
- page_count: Optional[int]
20
- pdf_url: Optional[str]
21
- started_at: Optional[datetime]
22
- status: Status
23
- task_id: str
24
- task_url: Optional[str]
25
- _client: Optional[ChunkrClientProtocol] = PrivateAttr(default=None)
26
-
27
- @abstractmethod
28
- def _poll_request(self) -> dict:
29
- """Helper method to make polling request with retry logic (synchronous)"""
30
- pass
31
-
32
- @abstractmethod
33
- def poll(self) -> T:
34
- """Poll the task for completion."""
35
- pass
36
-
37
- @abstractmethod
38
- def update(self, config: Configuration) -> T:
39
- """Update the task configuration."""
40
- pass
41
-
42
- @abstractmethod
43
- def cancel(self) -> T:
44
- """Cancel the task."""
45
- pass
46
-
47
- @abstractmethod
48
- def delete(self) -> T:
49
- """Delete the task."""
50
- pass
51
-
52
- def with_client(self, client: ChunkrClientProtocol) -> T:
53
- self._client = client
54
- return self
55
-
56
- def _check_status(self) -> Optional[T]:
57
- """Helper method to check task status and handle completion/failure"""
58
- if self.status == "Failed":
59
- raise ValueError(self.message)
60
- if self.status not in ("Starting", "Processing"):
61
- return self
62
- return None
63
-
64
- def html(self) -> str:
65
- """Get the full HTML of the task"""
66
- return self._get_content("html")
67
-
68
- def markdown(self) -> str:
69
- """Get the full markdown of the task"""
70
- return self._get_content("markdown")
71
-
72
- def content(self) -> str:
73
- """Get the full content of the task"""
74
- return self._get_content("content")
75
-
76
- def _get_content(self, t: str) -> str:
77
- if not self.output:
78
- return ""
79
- parts = []
80
- for c in self.output.chunks:
81
- for s in c.segments:
82
- v = getattr(s, t)
83
- if v:
84
- parts.append(v)
85
- return "\n".join(parts)
@@ -1,21 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=q5YosvCNXPNGjV10pZY1gcvdosqUh38nVQTQA9g8EuM,110
2
- chunkr_ai/models.py,sha256=hahbtxtTyzE_ygFgmlZwbfM6Vj2k5uSDEP02psxDOSQ,924
3
- chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
6
- chunkr_ai/api/base.py,sha256=QvHl8FInKHYKPLWDeEPpCchB1uktzOwTW7iPnyXccUc,6449
7
- chunkr_ai/api/chunkr.py,sha256=0extAWVeZtI7B-g14smTfFZD_csdJNCcVNXx2_L69OQ,2617
8
- chunkr_ai/api/chunkr_async.py,sha256=aa0s_tnYoujHBsfe8uLiPpVEnb2l9A3CXwPP34w9Mk8,4127
9
- chunkr_ai/api/chunkr_base.py,sha256=k34Dyt1f21NBWZvZJ3w6Svvpg4SKnzr2ldGQ4ib96Wc,4951
10
- chunkr_ai/api/config.py,sha256=TWl0Az6acKQCS1LIpKD4qr_lQ_63wqQ5M6calpLOlDM,5040
11
- chunkr_ai/api/misc.py,sha256=bQpURc7soT5GL2ZpY7EiYyvPYWEzDM9qaX-UHa-oFeI,4909
12
- chunkr_ai/api/protocol.py,sha256=lxIR_qoCA2a1OXjpq3LrWMdS0jRHct1bEmBlUzV8gvE,526
13
- chunkr_ai/api/schema.py,sha256=yYesvueGgtmRa7Fi_Tpdv8A2bzHlx-B-5DxRAPlaDHo,4926
14
- chunkr_ai/api/task.py,sha256=28J4dR8BDjvtkh3CQjW_YUEkgPXhCHBGu0wH6AQKKuE,2474
15
- chunkr_ai/api/task_async.py,sha256=K5hTEOnmD42snPZg_JtJsVWg6QBUFZ1aBz1Abwv58-A,2529
16
- chunkr_ai/api/task_base.py,sha256=KLiMhvvbCgcilguQKrtEPMlNs8oaatfQUtn8pYt9t6g,2467
17
- chunkr_ai-0.0.17.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- chunkr_ai-0.0.17.dist-info/METADATA,sha256=giy1xeKYXk18W5U-baNoAAlvXciJldhA_EBi87NqKpA,4839
19
- chunkr_ai-0.0.17.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
20
- chunkr_ai-0.0.17.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
21
- chunkr_ai-0.0.17.dist-info/RECORD,,