inferencesh 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
inferencesh/__init__.py CHANGED
@@ -2,4 +2,31 @@
2
2
 
3
3
  __version__ = "0.1.2"
4
4
 
5
- from .sdk import BaseApp, BaseAppInput, BaseAppOutput, File, LLMInput, ContextMessage, ContextMessageWithImage, LLMInputWithImage
5
+ from .models import (
6
+ BaseApp,
7
+ BaseAppInput,
8
+ BaseAppOutput,
9
+ File,
10
+ ContextMessageRole,
11
+ Message,
12
+ ContextMessage,
13
+ ContextMessageWithImage,
14
+ LLMInput,
15
+ LLMInputWithImage,
16
+ )
17
+ from .utils import StorageDir, download
18
+
19
+ __all__ = [
20
+ "BaseApp",
21
+ "BaseAppInput",
22
+ "BaseAppOutput",
23
+ "File",
24
+ "ContextMessageRole",
25
+ "Message",
26
+ "ContextMessage",
27
+ "ContextMessageWithImage",
28
+ "LLMInput",
29
+ "LLMInputWithImage",
30
+ "StorageDir",
31
+ "download",
32
+ ]
@@ -0,0 +1,25 @@
1
+ """Models package for inference.sh SDK."""
2
+
3
+ from .base import BaseApp, BaseAppInput, BaseAppOutput
4
+ from .file import File
5
+ from .llm import (
6
+ ContextMessageRole,
7
+ Message,
8
+ ContextMessage,
9
+ ContextMessageWithImage,
10
+ LLMInput,
11
+ LLMInputWithImage,
12
+ )
13
+
14
+ __all__ = [
15
+ "BaseApp",
16
+ "BaseAppInput",
17
+ "BaseAppOutput",
18
+ "File",
19
+ "ContextMessageRole",
20
+ "Message",
21
+ "ContextMessage",
22
+ "ContextMessageWithImage",
23
+ "LLMInput",
24
+ "LLMInputWithImage",
25
+ ]
@@ -0,0 +1,94 @@
1
+ from typing import Any, Dict, List
2
+ from pydantic import BaseModel, ConfigDict
3
+ import inspect
4
+ import ast
5
+ import textwrap
6
+ from collections import OrderedDict
7
+
8
+
9
+ class OrderedSchemaModel(BaseModel):
10
+ """A base model that ensures the JSON schema properties and required fields are in the order of field definition."""
11
+
12
+ @classmethod
13
+ def model_json_schema(cls, by_alias: bool = True, **kwargs: Any) -> Dict[str, Any]:
14
+ schema = super().model_json_schema(by_alias=by_alias, **kwargs)
15
+
16
+ field_order = cls._get_field_order()
17
+
18
+ if field_order:
19
+ # Order properties
20
+ ordered_properties = OrderedDict()
21
+ for field_name in field_order:
22
+ if field_name in schema['properties']:
23
+ ordered_properties[field_name] = schema['properties'][field_name]
24
+
25
+ # Add any remaining properties that weren't in field_order
26
+ for field_name, field_schema in schema['properties'].items():
27
+ if field_name not in ordered_properties:
28
+ ordered_properties[field_name] = field_schema
29
+
30
+ schema['properties'] = ordered_properties
31
+
32
+ # Order required fields
33
+ if 'required' in schema:
34
+ ordered_required = [field for field in field_order if field in schema['required']]
35
+ # Add any remaining required fields that weren't in field_order
36
+ ordered_required.extend([field for field in schema['required'] if field not in ordered_required])
37
+ schema['required'] = ordered_required
38
+
39
+ return schema
40
+
41
+ @classmethod
42
+ def _get_field_order(cls) -> List[str]:
43
+ """Get the order of fields as they were defined in the class."""
44
+ source = inspect.getsource(cls)
45
+
46
+ # Unindent the entire source code
47
+ source = textwrap.dedent(source)
48
+
49
+ try:
50
+ module = ast.parse(source)
51
+ except IndentationError:
52
+ # If we still get an IndentationError, wrap the class in a dummy module
53
+ source = f"class DummyModule:\n{textwrap.indent(source, ' ')}"
54
+ module = ast.parse(source)
55
+ # Adjust to look at the first class def inside DummyModule
56
+ # noinspection PyUnresolvedReferences
57
+ class_def = module.body[0].body[0]
58
+ else:
59
+ # Find the class definition
60
+ class_def = next(
61
+ node for node in module.body if isinstance(node, ast.ClassDef) and node.name == cls.__name__
62
+ )
63
+
64
+ # Extract field names in the order they were defined
65
+ field_order = []
66
+ for node in class_def.body:
67
+ if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
68
+ field_order.append(node.target.id)
69
+
70
+ return field_order
71
+
72
+
73
+ class BaseAppInput(OrderedSchemaModel):
74
+ pass
75
+
76
+
77
+ class BaseAppOutput(OrderedSchemaModel):
78
+ pass
79
+
80
+
81
+ class BaseApp(BaseModel):
82
+ model_config = ConfigDict(
83
+ arbitrary_types_allowed=True,
84
+ extra='allow'
85
+ )
86
+
87
+ async def setup(self):
88
+ pass
89
+
90
+ async def run(self, app_input: BaseAppInput) -> BaseAppOutput:
91
+ raise NotImplementedError("run method must be implemented")
92
+
93
+ async def unload(self):
94
+ pass
@@ -0,0 +1,182 @@
1
+ from typing import Optional, Union, Any
2
+ from pydantic import BaseModel, Field, PrivateAttr, model_validator
3
+ import mimetypes
4
+ import os
5
+ import urllib.request
6
+ import urllib.parse
7
+ import tempfile
8
+ from tqdm import tqdm
9
+
10
+
11
+ class File(BaseModel):
12
+ """A class representing a file in the inference.sh ecosystem."""
13
+ uri: Optional[str] = Field(default=None) # Original location (URL or file path)
14
+ path: Optional[str] = None # Resolved local file path
15
+ content_type: Optional[str] = None # MIME type of the file
16
+ size: Optional[int] = None # File size in bytes
17
+ filename: Optional[str] = None # Original filename if available
18
+ _tmp_path: Optional[str] = PrivateAttr(default=None) # Internal storage for temporary file path
19
+
20
+ def __init__(self, initializer=None, **data):
21
+ if initializer is not None:
22
+ if isinstance(initializer, str):
23
+ data['uri'] = initializer
24
+ elif isinstance(initializer, File):
25
+ data = initializer.model_dump()
26
+ else:
27
+ raise ValueError(f'Invalid input for File: {initializer}')
28
+ super().__init__(**data)
29
+
30
+ @model_validator(mode='before')
31
+ @classmethod
32
+ def convert_str_to_file(cls, values):
33
+ if isinstance(values, str): # Only accept strings
34
+ return {"uri": values}
35
+ elif isinstance(values, dict):
36
+ return values
37
+ raise ValueError(f'Invalid input for File: {values}')
38
+
39
+ @model_validator(mode='after')
40
+ def validate_required_fields(self) -> 'File':
41
+ """Validate that either uri or path is provided."""
42
+ if not self.uri and not self.path:
43
+ raise ValueError("Either 'uri' or 'path' must be provided")
44
+ return self
45
+
46
+ def model_post_init(self, _: Any) -> None:
47
+ """Initialize file path and metadata after model creation.
48
+
49
+ This method handles:
50
+ 1. Downloading URLs to local files if uri is a URL
51
+ 2. Converting relative paths to absolute paths
52
+ 3. Populating file metadata
53
+ """
54
+ # Handle uri if provided
55
+ if self.uri:
56
+ if self._is_url(self.uri):
57
+ self._download_url()
58
+ else:
59
+ # Convert relative paths to absolute, leave absolute paths unchanged
60
+ self.path = os.path.abspath(self.uri)
61
+
62
+ # Handle path if provided
63
+ if self.path:
64
+ # Convert relative paths to absolute, leave absolute paths unchanged
65
+ self.path = os.path.abspath(self.path)
66
+ self._populate_metadata()
67
+ return
68
+
69
+ raise ValueError("Either 'uri' or 'path' must be provided and be valid")
70
+
71
+ def _is_url(self, path: str) -> bool:
72
+ """Check if the path is a URL."""
73
+ parsed = urllib.parse.urlparse(path)
74
+ return parsed.scheme in ('http', 'https')
75
+
76
+ def _download_url(self) -> None:
77
+ """Download the URL to a temporary file and update the path."""
78
+ original_url = self.uri
79
+ tmp_file = None
80
+ try:
81
+ # Create a temporary file with a suffix based on the URL path
82
+ suffix = os.path.splitext(urllib.parse.urlparse(original_url).path)[1]
83
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
84
+ self._tmp_path = tmp_file.name
85
+
86
+ # Set up request with user agent
87
+ headers = {
88
+ 'User-Agent': (
89
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
90
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
91
+ 'Chrome/91.0.4472.124 Safari/537.36'
92
+ )
93
+ }
94
+ req = urllib.request.Request(original_url, headers=headers)
95
+
96
+ # Download the file with progress bar
97
+ print(f"Downloading URL: {original_url} to {self._tmp_path}")
98
+ try:
99
+ with urllib.request.urlopen(req) as response:
100
+ total_size = int(response.headers.get('content-length', 0))
101
+ block_size = 1024 # 1 Kibibyte
102
+
103
+ with tqdm(total=total_size, unit='iB', unit_scale=True) as pbar:
104
+ with open(self._tmp_path, 'wb') as out_file:
105
+ while True:
106
+ buffer = response.read(block_size)
107
+ if not buffer:
108
+ break
109
+ out_file.write(buffer)
110
+ pbar.update(len(buffer))
111
+
112
+ self.path = self._tmp_path
113
+ except (urllib.error.URLError, urllib.error.HTTPError) as e:
114
+ raise RuntimeError(f"Failed to download URL {original_url}: {str(e)}")
115
+ except IOError as e:
116
+ raise RuntimeError(f"Failed to write downloaded file to {self._tmp_path}: {str(e)}")
117
+ except Exception as e:
118
+ # Clean up temp file if something went wrong
119
+ if tmp_file is not None and hasattr(self, '_tmp_path'):
120
+ try:
121
+ os.unlink(self._tmp_path)
122
+ except (OSError, IOError):
123
+ pass
124
+ raise RuntimeError(f"Error downloading URL {original_url}: {str(e)}")
125
+
126
+ def __del__(self):
127
+ """Cleanup temporary file if it exists."""
128
+ if hasattr(self, '_tmp_path') and self._tmp_path:
129
+ try:
130
+ os.unlink(self._tmp_path)
131
+ except (OSError, IOError):
132
+ pass
133
+
134
+ def _populate_metadata(self) -> None:
135
+ """Populate file metadata from the path if it exists."""
136
+ if os.path.exists(self.path):
137
+ if not self.content_type:
138
+ self.content_type = self._guess_content_type()
139
+ if not self.size:
140
+ self.size = self._get_file_size()
141
+ if not self.filename:
142
+ self.filename = self._get_filename()
143
+
144
+ @classmethod
145
+ def from_path(cls, path: Union[str, os.PathLike]) -> 'File':
146
+ """Create a File instance from a file path."""
147
+ return cls(uri=str(path))
148
+
149
+ def _guess_content_type(self) -> Optional[str]:
150
+ """Guess the MIME type of the file."""
151
+ return mimetypes.guess_type(self.path)[0]
152
+
153
+ def _get_file_size(self) -> int:
154
+ """Get the size of the file in bytes."""
155
+ return os.path.getsize(self.path)
156
+
157
+ def _get_filename(self) -> str:
158
+ """Get the base filename from the path."""
159
+ return os.path.basename(self.path)
160
+
161
+ def exists(self) -> bool:
162
+ """Check if the file exists."""
163
+ return os.path.exists(self.path)
164
+
165
+ def refresh_metadata(self) -> None:
166
+ """Refresh all metadata from the file."""
167
+ if os.path.exists(self.path):
168
+ self.content_type = self._guess_content_type()
169
+ self.size = self._get_file_size() # Always update size
170
+ self.filename = self._get_filename()
171
+
172
+ @classmethod
173
+ def model_json_schema(cls, **kwargs):
174
+ schema = super().model_json_schema(**kwargs)
175
+ schema["$id"] = "/schemas/File"
176
+ # Create a schema that accepts either a string or the full object
177
+ return {
178
+ "oneOf": [
179
+ {"type": "string"}, # Accept string input
180
+ schema # Accept full object input
181
+ ]
182
+ }
@@ -0,0 +1,356 @@
1
+ from typing import Optional, List, Any, Callable, Dict, Generator
2
+ from enum import Enum
3
+ from pydantic import Field
4
+ from queue import Queue
5
+ from threading import Thread
6
+ import time
7
+ from contextlib import contextmanager
8
+
9
+ from .base import BaseAppInput, BaseAppOutput
10
+ from .file import File
11
+
12
+
13
+ class ContextMessageRole(str, Enum):
14
+ USER = "user"
15
+ ASSISTANT = "assistant"
16
+ SYSTEM = "system"
17
+
18
+
19
+ class Message(BaseAppInput):
20
+ role: ContextMessageRole
21
+ content: str
22
+
23
+
24
+ class ContextMessage(BaseAppInput):
25
+ role: ContextMessageRole = Field(
26
+ description="The role of the message",
27
+ )
28
+ text: str = Field(
29
+ description="The text content of the message"
30
+ )
31
+ image: Optional[File] = Field(
32
+ description="The image url of the message",
33
+ default=None
34
+ )
35
+
36
+ class LLMInput(BaseAppInput):
37
+ system_prompt: str = Field(
38
+ description="The system prompt to use for the model",
39
+ default="You are a helpful assistant that can answer questions and help with tasks.",
40
+ examples=[
41
+ "You are a helpful assistant that can answer questions and help with tasks.",
42
+ "You are a certified medical professional who can provide accurate health information.",
43
+ "You are a certified financial advisor who can give sound investment guidance.",
44
+ "You are a certified cybersecurity expert who can explain security best practices.",
45
+ "You are a certified environmental scientist who can discuss climate and sustainability.",
46
+ ]
47
+ )
48
+ context: List[ContextMessage] = Field(
49
+ description="The context to use for the model",
50
+ examples=[
51
+ [
52
+ {"role": "user", "content": [{"type": "text", "text": "What is the capital of France?"}]},
53
+ {"role": "assistant", "content": [{"type": "text", "text": "The capital of France is Paris."}]}
54
+ ],
55
+ [
56
+ {"role": "user", "content": [{"type": "text", "text": "What is the weather like today?"}]},
57
+ {"role": "assistant", "content": [{"type": "text", "text": "I apologize, but I don't have access to real-time weather information. You would need to check a weather service or app to get current weather conditions for your location."}]}
58
+ ],
59
+ [
60
+ {"role": "user", "content": [{"type": "text", "text": "Can you help me write a poem about spring?"}]},
61
+ {"role": "assistant", "content": [{"type": "text", "text": "Here's a short poem about spring:\n\nGreen buds awakening,\nSoft rain gently falling down,\nNew life springs anew.\n\nWarm sun breaks through clouds,\nBirds return with joyful song,\nNature's sweet rebirth."}]}
62
+ ],
63
+ [
64
+ {"role": "user", "content": [{"type": "text", "text": "Explain quantum computing in simple terms"}]},
65
+ {"role": "assistant", "content": [{"type": "text", "text": "Quantum computing is like having a super-powerful calculator that can solve many problems at once instead of one at a time. While regular computers use bits (0s and 1s), quantum computers use quantum bits or \"qubits\" that can be both 0 and 1 at the same time - kind of like being in two places at once! This allows them to process huge amounts of information much faster than regular computers for certain types of problems."}]}
66
+ ]
67
+ ],
68
+ default=[]
69
+ )
70
+ text: str = Field(
71
+ description="The user prompt to use for the model",
72
+ examples=[
73
+ "What is the capital of France?",
74
+ "What is the weather like today?",
75
+ "Can you help me write a poem about spring?",
76
+ "Explain quantum computing in simple terms"
77
+ ],
78
+ )
79
+ image: Optional[File] = Field(
80
+ description="The image to use for the model",
81
+ default=None
82
+ )
83
+ # Optional parameters
84
+ temperature: float = Field(default=0.7)
85
+ top_p: float = Field(default=0.95)
86
+ max_tokens: int = Field(default=4096)
87
+ context_size: int = Field(default=4096)
88
+
89
+ # Model specific flags
90
+ enable_thinking: bool = Field(default=False)
91
+
92
+ class LLMUsage(BaseAppOutput):
93
+ stop_reason: str = ""
94
+ time_to_first_token: float = 0.0
95
+ tokens_per_second: float = 0.0
96
+ prompt_tokens: int = 0
97
+ completion_tokens: int = 0
98
+ total_tokens: int = 0
99
+
100
+
101
+ class LLMOutput(BaseAppOutput):
102
+ response: str
103
+ thinking_content: Optional[str] = None
104
+ usage: Optional[LLMUsage] = None
105
+
106
+
107
+ @contextmanager
108
+ def timing_context():
109
+ """Context manager to track timing information for LLM generation."""
110
+ class TimingInfo:
111
+ def __init__(self):
112
+ self.start_time = time.time()
113
+ self.first_token_time = None
114
+
115
+ def mark_first_token(self):
116
+ if self.first_token_time is None:
117
+ self.first_token_time = time.time()
118
+
119
+ @property
120
+ def stats(self):
121
+ end_time = time.time()
122
+ if self.first_token_time is None:
123
+ self.first_token_time = end_time
124
+
125
+ time_to_first = self.first_token_time - self.start_time
126
+ generation_time = end_time - self.first_token_time
127
+
128
+ return {
129
+ "time_to_first_token": time_to_first,
130
+ "generation_time": generation_time
131
+ }
132
+
133
+ timing = TimingInfo()
134
+ try:
135
+ yield timing
136
+ finally:
137
+ pass
138
+
139
+
140
+ def build_messages(
141
+ input_data: LLMInput,
142
+ transform_user_message: Optional[Callable[[str], str]] = None
143
+ ) -> List[Dict[str, Any]]:
144
+ """Build messages for LLaMA.cpp chat completion.
145
+
146
+ Args:
147
+ input_data: The input data
148
+ transform_user_message: Optional function to transform user message text before building messages
149
+ """
150
+ messages = [
151
+ {
152
+ "role": "system",
153
+ "content": [{"type": "text", "text": input_data.system_prompt}],
154
+ }
155
+ ]
156
+
157
+ # Add context messages
158
+ for msg in input_data.context:
159
+ message_content = []
160
+ text = msg.text
161
+ if transform_user_message and msg.role == ContextMessageRole.USER:
162
+ text = transform_user_message(text)
163
+ if text:
164
+ message_content.append({"type": "text", "text": text})
165
+ if hasattr(msg, 'image') and msg.image:
166
+ if msg.image.path:
167
+ message_content.append({"type": "image_url", "image_url": {"url": msg.image.path}})
168
+ elif msg.image.uri:
169
+ message_content.append({"type": "image_url", "image_url": {"url": msg.image.uri}})
170
+ messages.append({
171
+ "role": msg.role,
172
+ "content": message_content
173
+ })
174
+
175
+ # Add user message
176
+ user_content = []
177
+ text = input_data.text
178
+ if transform_user_message:
179
+ text = transform_user_message(text)
180
+ if text:
181
+ user_content.append({"type": "text", "text": text})
182
+ if hasattr(input_data, 'image') and input_data.image:
183
+ if input_data.image.path:
184
+ user_content.append({"type": "image_url", "image_url": {"url": input_data.image.path}})
185
+ elif input_data.image.uri:
186
+ user_content.append({"type": "image_url", "image_url": {"url": input_data.image.uri}})
187
+ messages.append({"role": "user", "content": user_content})
188
+
189
+ return messages
190
+
191
+
192
+ def stream_generate(
193
+ model: Any,
194
+ messages: List[Dict[str, Any]],
195
+ output_cls: type[LLMOutput],
196
+ temperature: float = 0.7,
197
+ top_p: float = 0.95,
198
+ max_tokens: int = 4096,
199
+ stop: Optional[List[str]] = None,
200
+ handle_thinking: bool = False,
201
+ transform_response: Optional[Callable[[str, str], tuple[str, LLMOutput]]] = None,
202
+ ) -> Generator[LLMOutput, None, None]:
203
+ """Stream generate from LLaMA.cpp model with timing and usage tracking.
204
+
205
+ Args:
206
+ model: The LLaMA.cpp model instance
207
+ messages: List of messages to send to the model
208
+ output_cls: Output class type to use for responses
209
+ temperature: Sampling temperature
210
+ top_p: Top-p sampling threshold
211
+ max_tokens: Maximum tokens to generate
212
+ stop: Optional list of stop sequences
213
+ handle_thinking: Whether to handle thinking tags
214
+ transform_response: Optional function to transform responses, takes (piece, buffer) and returns (new_buffer, output)
215
+ """
216
+ response_queue: Queue[Optional[tuple[str, dict]]] = Queue()
217
+ thread_exception = None
218
+ usage_stats = {
219
+ "prompt_tokens": 0,
220
+ "completion_tokens": 0,
221
+ "total_tokens": 0,
222
+ "stop_reason": ""
223
+ }
224
+
225
+ with timing_context() as timing:
226
+ def generation_thread():
227
+ nonlocal thread_exception, usage_stats
228
+ try:
229
+ completion = model.create_chat_completion(
230
+ messages=messages,
231
+ stream=True,
232
+ temperature=temperature,
233
+ top_p=top_p,
234
+ max_tokens=max_tokens,
235
+ stop=stop
236
+ )
237
+
238
+ for chunk in completion:
239
+ # Get usage from root level if present
240
+ if "usage" in chunk and chunk["usage"] is not None:
241
+ usage_stats.update(chunk["usage"])
242
+
243
+ # Get content from choices
244
+ delta = chunk.get("choices", [{}])[0]
245
+ content = None
246
+ finish_reason = None
247
+
248
+ if "message" in delta:
249
+ content = delta["message"].get("content", "")
250
+ finish_reason = delta.get("finish_reason")
251
+ elif "delta" in delta:
252
+ content = delta["delta"].get("content", "")
253
+ finish_reason = delta.get("finish_reason")
254
+
255
+ if content:
256
+ if not timing.first_token_time:
257
+ timing.mark_first_token()
258
+ response_queue.put((content, {}))
259
+
260
+ if finish_reason:
261
+ usage_stats["stop_reason"] = finish_reason
262
+
263
+ except Exception as e:
264
+ thread_exception = e
265
+ finally:
266
+ timing_stats = timing.stats
267
+ generation_time = timing_stats["generation_time"]
268
+ tokens_per_second = (usage_stats["completion_tokens"] / generation_time) if generation_time > 0 else 0
269
+ response_queue.put((None, {
270
+ "time_to_first_token": timing_stats["time_to_first_token"],
271
+ "tokens_per_second": tokens_per_second
272
+ }))
273
+
274
+ thread = Thread(target=generation_thread, daemon=True)
275
+ thread.start()
276
+
277
+ buffer = ""
278
+ thinking_content = "" if handle_thinking else None
279
+ in_thinking = handle_thinking
280
+ try:
281
+ while True:
282
+ try:
283
+ result = response_queue.get(timeout=30.0)
284
+ if thread_exception:
285
+ raise thread_exception
286
+
287
+ piece, timing_stats = result
288
+ if piece is None:
289
+ # Final yield with complete usage stats
290
+ usage = LLMUsage(
291
+ stop_reason=usage_stats["stop_reason"],
292
+ time_to_first_token=timing_stats["time_to_first_token"],
293
+ tokens_per_second=timing_stats["tokens_per_second"],
294
+ prompt_tokens=usage_stats["prompt_tokens"],
295
+ completion_tokens=usage_stats["completion_tokens"],
296
+ total_tokens=usage_stats["total_tokens"]
297
+ )
298
+
299
+ if transform_response:
300
+ buffer, output = transform_response(piece or "", buffer)
301
+ output.usage = usage
302
+ yield output
303
+ else:
304
+ # Handle thinking vs response content if enabled
305
+ if handle_thinking and "</think>" in piece:
306
+ parts = piece.split("</think>")
307
+ if in_thinking:
308
+ thinking_content += parts[0].replace("<think>", "")
309
+ buffer = parts[1] if len(parts) > 1 else ""
310
+ in_thinking = False
311
+ else:
312
+ buffer += piece
313
+ else:
314
+ if in_thinking:
315
+ thinking_content += piece.replace("<think>", "")
316
+ else:
317
+ buffer += piece
318
+
319
+ yield output_cls(
320
+ response=buffer.strip(),
321
+ thinking_content=thinking_content.strip() if thinking_content else None,
322
+ usage=usage
323
+ )
324
+ break
325
+
326
+ if transform_response:
327
+ buffer, output = transform_response(piece, buffer)
328
+ yield output
329
+ else:
330
+ # Handle thinking vs response content if enabled
331
+ if handle_thinking and "</think>" in piece:
332
+ parts = piece.split("</think>")
333
+ if in_thinking:
334
+ thinking_content += parts[0].replace("<think>", "")
335
+ buffer = parts[1] if len(parts) > 1 else ""
336
+ in_thinking = False
337
+ else:
338
+ buffer += piece
339
+ else:
340
+ if in_thinking:
341
+ thinking_content += piece.replace("<think>", "")
342
+ else:
343
+ buffer += piece
344
+
345
+ yield output_cls(
346
+ response=buffer.strip(),
347
+ thinking_content=thinking_content.strip() if thinking_content else None
348
+ )
349
+
350
+ except Exception as e:
351
+ if thread_exception and isinstance(e, thread_exception.__class__):
352
+ raise thread_exception
353
+ break
354
+ finally:
355
+ if thread and thread.is_alive():
356
+ thread.join(timeout=2.0)
@@ -0,0 +1,6 @@
1
+ """Utilities package for inference.sh SDK."""
2
+
3
+ from .storage import StorageDir
4
+ from .download import download
5
+
6
+ __all__ = ["StorageDir", "download"]
@@ -0,0 +1,51 @@
1
+ import hashlib
2
+ import os
3
+ import urllib.parse
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import Union
7
+
8
+ from ..models.file import File
9
+ from .storage import StorageDir
10
+
11
+
12
+ def download(url: str, directory: Union[str, Path, StorageDir]) -> str:
13
+ """Download a file to the specified directory and return its path.
14
+
15
+ Args:
16
+ url: The URL to download from
17
+ directory: The directory to save the file to. Can be a string path,
18
+ Path object, or StorageDir enum value.
19
+
20
+ Returns:
21
+ str: The path to the downloaded file
22
+ """
23
+ # Convert directory to Path
24
+ dir_path = Path(directory)
25
+ dir_path.mkdir(exist_ok=True)
26
+
27
+ # Create hash directory from URL
28
+ url_hash = hashlib.sha256(url.encode()).hexdigest()[:12]
29
+ hash_dir = dir_path / url_hash
30
+ hash_dir.mkdir(exist_ok=True)
31
+
32
+ # Keep original filename
33
+ filename = os.path.basename(urllib.parse.urlparse(url).path)
34
+ if not filename:
35
+ filename = 'download'
36
+
37
+ output_path = hash_dir / filename
38
+
39
+ # If file exists in directory and it's not a temp directory, return it
40
+ if output_path.exists() and directory != StorageDir.TEMP:
41
+ return str(output_path)
42
+
43
+ # Download the file
44
+ file = File(url)
45
+ if file.path:
46
+ shutil.copy2(file.path, output_path)
47
+ # Prevent the File instance from deleting its temporary file
48
+ file._tmp_path = None
49
+ return str(output_path)
50
+
51
+ raise RuntimeError(f"Failed to download {url}")
@@ -0,0 +1,16 @@
1
+ from enum import Enum
2
+ from pathlib import Path
3
+
4
+
5
+ class StorageDir(str, Enum):
6
+ """Standard storage directories used by the SDK."""
7
+ DATA = "/app/data" # Persistent storage/cache directory
8
+ TEMP = "/app/tmp" # Temporary storage directory
9
+ CACHE = "/app/cache" # Cache directory
10
+
11
+ @property
12
+ def path(self) -> Path:
13
+ """Get the Path object for this storage directory, ensuring it exists."""
14
+ path = Path(self.value)
15
+ path.mkdir(parents=True, exist_ok=True)
16
+ return path
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
@@ -0,0 +1,14 @@
1
+ inferencesh/__init__.py,sha256=H26z9pSgivkYfH2OfO5ff6emzmiV_Tc77umCRpdvg0Y,561
2
+ inferencesh/models/__init__.py,sha256=CI9X3WyiXdRHUxKAwFuKSFCxrHwECPRrVJinJBugdP0,484
3
+ inferencesh/models/base.py,sha256=4gZQRi8J7y9U6PrGD9pRIehd1MJVJAqGakPQDs2AKFM,3251
4
+ inferencesh/models/file.py,sha256=5xnpypcRahM1YcEjj64rv9g2gTimxrZb41YT4r440hU,7393
5
+ inferencesh/models/llm.py,sha256=jcBHgBK7uNTc1blfRqVBnYYfvGT952At7DL3NI_OKGc,14784
6
+ inferencesh/utils/__init__.py,sha256=-xiD6uo2XzcrPAWFb_fUbaimmnW4KFKc-8IvBzaxNd4,148
7
+ inferencesh/utils/download.py,sha256=7n5twvoNYDcFnKJyefImaj2YfzRI7vddQw4usZbj38c,1521
8
+ inferencesh/utils/storage.py,sha256=E4J8emd4eFKdmdDgAqzz3TpaaDd3n0l8gYlMHuY8yIU,519
9
+ inferencesh-0.2.9.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
10
+ inferencesh-0.2.9.dist-info/METADATA,sha256=1vPRr6hhz0tbMYuY4CXd9AiAKs11DAiPjkvK_5yCqwY,2756
11
+ inferencesh-0.2.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ inferencesh-0.2.9.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
13
+ inferencesh-0.2.9.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
14
+ inferencesh-0.2.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
inferencesh/sdk.py DELETED
@@ -1,415 +0,0 @@
1
- from typing import Optional, Union
2
- from pydantic import BaseModel, ConfigDict, PrivateAttr, model_validator, Field, field_validator
3
- import mimetypes
4
- import os
5
- import urllib.request
6
- import urllib.parse
7
- import tempfile
8
- from typing import Any, Dict, List
9
-
10
- import inspect
11
- import ast
12
- import textwrap
13
- from collections import OrderedDict
14
- from enum import Enum
15
- import shutil
16
- from pathlib import Path
17
- import hashlib
18
- from tqdm import tqdm
19
-
20
-
21
- # inspired by https://github.com/pydantic/pydantic/issues/7580
22
- class OrderedSchemaModel(BaseModel):
23
- """A base model that ensures the JSON schema properties and required fields are in the order of field definition."""
24
-
25
- @classmethod
26
- def model_json_schema(cls, by_alias: bool = True, **kwargs: Any) -> Dict[str, Any]:
27
- schema = super().model_json_schema(by_alias=by_alias, **kwargs)
28
-
29
- field_order = cls._get_field_order()
30
-
31
- if field_order:
32
- # Order properties
33
- ordered_properties = OrderedDict()
34
- for field_name in field_order:
35
- if field_name in schema['properties']:
36
- ordered_properties[field_name] = schema['properties'][field_name]
37
-
38
- # Add any remaining properties that weren't in field_order
39
- for field_name, field_schema in schema['properties'].items():
40
- if field_name not in ordered_properties:
41
- ordered_properties[field_name] = field_schema
42
-
43
- schema['properties'] = ordered_properties
44
-
45
- # Order required fields
46
- if 'required' in schema:
47
- ordered_required = [field for field in field_order if field in schema['required']]
48
- # Add any remaining required fields that weren't in field_order
49
- ordered_required.extend([field for field in schema['required'] if field not in ordered_required])
50
- schema['required'] = ordered_required
51
-
52
- return schema
53
-
54
- @classmethod
55
- def _get_field_order(cls) -> List[str]:
56
- """Get the order of fields as they were defined in the class."""
57
- source = inspect.getsource(cls)
58
-
59
- # Unindent the entire source code
60
- source = textwrap.dedent(source)
61
-
62
- try:
63
- module = ast.parse(source)
64
- except IndentationError:
65
- # If we still get an IndentationError, wrap the class in a dummy module
66
- source = f"class DummyModule:\n{textwrap.indent(source, ' ')}"
67
- module = ast.parse(source)
68
- # Adjust to look at the first class def inside DummyModule
69
- # noinspection PyUnresolvedReferences
70
- class_def = module.body[0].body[0]
71
- else:
72
- # Find the class definition
73
- class_def = next(
74
- node for node in module.body if isinstance(node, ast.ClassDef) and node.name == cls.__name__
75
- )
76
-
77
- # Extract field names in the order they were defined
78
- field_order = []
79
- for node in class_def.body:
80
- if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
81
- field_order.append(node.target.id)
82
-
83
- return field_order
84
-
85
- class BaseAppInput(OrderedSchemaModel):
86
- pass
87
-
88
- class BaseAppOutput(OrderedSchemaModel):
89
- pass
90
-
91
- class BaseApp(BaseModel):
92
- model_config = ConfigDict(
93
- arbitrary_types_allowed=True,
94
- extra='allow'
95
- )
96
-
97
- async def setup(self):
98
- pass
99
-
100
- async def run(self, app_input: BaseAppInput) -> BaseAppOutput:
101
- raise NotImplementedError("run method must be implemented")
102
-
103
- async def unload(self):
104
- pass
105
-
106
-
107
- class File(BaseModel):
108
- """A class representing a file in the inference.sh ecosystem."""
109
- uri: Optional[str] = Field(default=None) # Original location (URL or file path)
110
- path: Optional[str] = None # Resolved local file path
111
- content_type: Optional[str] = None # MIME type of the file
112
- size: Optional[int] = None # File size in bytes
113
- filename: Optional[str] = None # Original filename if available
114
- _tmp_path: Optional[str] = PrivateAttr(default=None) # Internal storage for temporary file path
115
-
116
- def __init__(self, initializer=None, **data):
117
- if initializer is not None:
118
- if isinstance(initializer, str):
119
- data['uri'] = initializer
120
- elif isinstance(initializer, File):
121
- data = initializer.model_dump()
122
- else:
123
- raise ValueError(f'Invalid input for File: {initializer}')
124
- super().__init__(**data)
125
-
126
- @model_validator(mode='before')
127
- @classmethod
128
- def convert_str_to_file(cls, values):
129
- if isinstance(values, str): # Only accept strings
130
- return {"uri": values}
131
- elif isinstance(values, dict):
132
- return values
133
- raise ValueError(f'Invalid input for File: {values}')
134
-
135
- @model_validator(mode='after')
136
- def validate_required_fields(self) -> 'File':
137
- """Validate that either uri or path is provided."""
138
- if not self.uri and not self.path:
139
- raise ValueError("Either 'uri' or 'path' must be provided")
140
- return self
141
-
142
- def model_post_init(self, _: Any) -> None:
143
- """Initialize file path and metadata after model creation.
144
-
145
- This method handles:
146
- 1. Downloading URLs to local files if uri is a URL
147
- 2. Converting relative paths to absolute paths
148
- 3. Populating file metadata
149
- """
150
- # Handle uri if provided
151
- if self.uri:
152
- if self._is_url(self.uri):
153
- self._download_url()
154
- else:
155
- # Convert relative paths to absolute, leave absolute paths unchanged
156
- self.path = os.path.abspath(self.uri)
157
-
158
- # Handle path if provided
159
- if self.path:
160
- # Convert relative paths to absolute, leave absolute paths unchanged
161
- self.path = os.path.abspath(self.path)
162
- self._populate_metadata()
163
- return
164
-
165
- raise ValueError("Either 'uri' or 'path' must be provided and be valid")
166
- def _is_url(self, path: str) -> bool:
167
- """Check if the path is a URL."""
168
- parsed = urllib.parse.urlparse(path)
169
- return parsed.scheme in ('http', 'https')
170
-
171
- def _download_url(self) -> None:
172
- """Download the URL to a temporary file and update the path."""
173
- original_url = self.uri
174
- tmp_file = None
175
- try:
176
- # Create a temporary file with a suffix based on the URL path
177
- suffix = os.path.splitext(urllib.parse.urlparse(original_url).path)[1]
178
- tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
179
- self._tmp_path = tmp_file.name
180
-
181
- # Set up request with user agent
182
- headers = {
183
- 'User-Agent': (
184
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
185
- 'AppleWebKit/537.36 (KHTML, like Gecko) '
186
- 'Chrome/91.0.4472.124 Safari/537.36'
187
- )
188
- }
189
- req = urllib.request.Request(original_url, headers=headers)
190
-
191
- # Download the file with progress bar
192
- print(f"Downloading URL: {original_url} to {self._tmp_path}")
193
- try:
194
- with urllib.request.urlopen(req) as response:
195
- total_size = int(response.headers.get('content-length', 0))
196
- block_size = 1024 # 1 Kibibyte
197
-
198
- with tqdm(total=total_size, unit='iB', unit_scale=True) as pbar:
199
- with open(self._tmp_path, 'wb') as out_file:
200
- while True:
201
- buffer = response.read(block_size)
202
- if not buffer:
203
- break
204
- out_file.write(buffer)
205
- pbar.update(len(buffer))
206
-
207
- self.path = self._tmp_path
208
- except (urllib.error.URLError, urllib.error.HTTPError) as e:
209
- raise RuntimeError(f"Failed to download URL {original_url}: {str(e)}")
210
- except IOError as e:
211
- raise RuntimeError(f"Failed to write downloaded file to {self._tmp_path}: {str(e)}")
212
- except Exception as e:
213
- # Clean up temp file if something went wrong
214
- if tmp_file is not None and hasattr(self, '_tmp_path'):
215
- try:
216
- os.unlink(self._tmp_path)
217
- except (OSError, IOError):
218
- pass
219
- raise RuntimeError(f"Error downloading URL {original_url}: {str(e)}")
220
-
221
- def __del__(self):
222
- """Cleanup temporary file if it exists."""
223
- if hasattr(self, '_tmp_path') and self._tmp_path:
224
- try:
225
- os.unlink(self._tmp_path)
226
- except (OSError, IOError):
227
- pass
228
-
229
- def _populate_metadata(self) -> None:
230
- """Populate file metadata from the path if it exists."""
231
- if os.path.exists(self.path):
232
- if not self.content_type:
233
- self.content_type = self._guess_content_type()
234
- if not self.size:
235
- self.size = self._get_file_size()
236
- if not self.filename:
237
- self.filename = self._get_filename()
238
-
239
- @classmethod
240
- def from_path(cls, path: Union[str, os.PathLike]) -> 'File':
241
- """Create a File instance from a file path."""
242
- return cls(uri=str(path))
243
-
244
- def _guess_content_type(self) -> Optional[str]:
245
- """Guess the MIME type of the file."""
246
- return mimetypes.guess_type(self.path)[0]
247
-
248
- def _get_file_size(self) -> int:
249
- """Get the size of the file in bytes."""
250
- return os.path.getsize(self.path)
251
-
252
- def _get_filename(self) -> str:
253
- """Get the base filename from the path."""
254
- return os.path.basename(self.path)
255
-
256
- def exists(self) -> bool:
257
- """Check if the file exists."""
258
- return os.path.exists(self.path)
259
-
260
- def refresh_metadata(self) -> None:
261
- """Refresh all metadata from the file."""
262
- if os.path.exists(self.path):
263
- self.content_type = self._guess_content_type()
264
- self.size = self._get_file_size() # Always update size
265
- self.filename = self._get_filename()
266
-
267
- @classmethod
268
- def model_json_schema(cls, **kwargs):
269
- schema = super().model_json_schema(**kwargs)
270
- schema["$id"] = "/schemas/File"
271
- # Create a schema that accepts either a string or the full object
272
- return {
273
- "oneOf": [
274
- {"type": "string"}, # Accept string input
275
- schema # Accept full object input
276
- ]
277
- }
278
-
279
-
280
- class ContextMessageRole(str, Enum):
281
- USER = "user"
282
- ASSISTANT = "assistant"
283
- SYSTEM = "system"
284
-
285
- class Message(BaseModel):
286
- role: ContextMessageRole
287
- content: str
288
-
289
- class ContextMessage(BaseModel):
290
- role: ContextMessageRole = Field(
291
- description="The role of the message",
292
- )
293
- text: str = Field(
294
- description="The text content of the message"
295
- )
296
-
297
- class ContextMessageWithImage(ContextMessage):
298
- image: Optional[File] = Field(
299
- description="The image url of the message",
300
- default=None
301
- )
302
-
303
- class LLMInput(BaseAppInput):
304
- system_prompt: str = Field(
305
- description="The system prompt to use for the model",
306
- default="You are a helpful assistant that can answer questions and help with tasks.",
307
- examples=[
308
- "You are a helpful assistant that can answer questions and help with tasks.",
309
- "You are a certified medical professional who can provide accurate health information.",
310
- "You are a certified financial advisor who can give sound investment guidance.",
311
- "You are a certified cybersecurity expert who can explain security best practices.",
312
- "You are a certified environmental scientist who can discuss climate and sustainability.",
313
- ]
314
- )
315
- context: list[ContextMessage] = Field(
316
- description="The context to use for the model",
317
- examples=[
318
- [
319
- {"role": "user", "content": [{"type": "text", "text": "What is the capital of France?"}]},
320
- {"role": "assistant", "content": [{"type": "text", "text": "The capital of France is Paris."}]}
321
- ],
322
- [
323
- {"role": "user", "content": [{"type": "text", "text": "What is the weather like today?"}]},
324
- {"role": "assistant", "content": [{"type": "text", "text": "I apologize, but I don't have access to real-time weather information. You would need to check a weather service or app to get current weather conditions for your location."}]}
325
- ],
326
- [
327
- {"role": "user", "content": [{"type": "text", "text": "Can you help me write a poem about spring?"}]},
328
- {"role": "assistant", "content": [{"type": "text", "text": "Here's a short poem about spring:\n\nGreen buds awakening,\nSoft rain gently falling down,\nNew life springs anew.\n\nWarm sun breaks through clouds,\nBirds return with joyful song,\nNature's sweet rebirth."}]}
329
- ],
330
- [
331
- {"role": "user", "content": [{"type": "text", "text": "Explain quantum computing in simple terms"}]},
332
- {"role": "assistant", "content": [{"type": "text", "text": "Quantum computing is like having a super-powerful calculator that can solve many problems at once instead of one at a time. While regular computers use bits (0s and 1s), quantum computers use quantum bits or \"qubits\" that can be both 0 and 1 at the same time - kind of like being in two places at once! This allows them to process huge amounts of information much faster than regular computers for certain types of problems."}]}
333
- ]
334
- ],
335
- default=[]
336
- )
337
- text: str = Field(
338
- description="The user prompt to use for the model",
339
- examples=[
340
- "What is the capital of France?",
341
- "What is the weather like today?",
342
- "Can you help me write a poem about spring?",
343
- "Explain quantum computing in simple terms"
344
- ],
345
- )
346
-
347
- class LLMInputWithImage(LLMInput):
348
- context: list[ContextMessageWithImage] = Field(
349
- description="The context to use for the model",
350
- examples=[
351
- [
352
- {"role": "user", "content": [{"type": "text", "text": "What is the capital of France?"}, {"type": "image", "url": "https://example.com/image.jpg"}]},
353
- {"role": "assistant", "content": [{"type": "text", "text": "The capital of France is Paris."}]}
354
- ],
355
- ],
356
- default=[]
357
- )
358
- image: Optional[File] = Field(
359
- description="The image to use for the model",
360
- default=None
361
- )
362
-
363
- class StorageDir(str, Enum):
364
- """Standard storage directories used by the SDK."""
365
- DATA = "/app/data" # Persistent storage/cache directory
366
- TEMP = "/app/tmp" # Temporary storage directory
367
- CACHE = "/app/cache" # Cache directory
368
-
369
- @property
370
- def path(self) -> Path:
371
- """Get the Path object for this storage directory, ensuring it exists."""
372
- path = Path(self.value)
373
- path.mkdir(parents=True, exist_ok=True)
374
- return path
375
-
376
- def download(url: str, directory: Union[str, Path, StorageDir]) -> str:
377
- """Download a file to the specified directory and return its path.
378
-
379
- Args:
380
- url: The URL to download from
381
- directory: The directory to save the file to. Can be a string path,
382
- Path object, or StorageDir enum value.
383
-
384
- Returns:
385
- str: The path to the downloaded file
386
- """
387
- # Convert directory to Path
388
- dir_path = Path(directory)
389
- dir_path.mkdir(exist_ok=True)
390
-
391
- # Create hash directory from URL
392
- url_hash = hashlib.sha256(url.encode()).hexdigest()[:12]
393
- hash_dir = dir_path / url_hash
394
- hash_dir.mkdir(exist_ok=True)
395
-
396
- # Keep original filename
397
- filename = os.path.basename(urllib.parse.urlparse(url).path)
398
- if not filename:
399
- filename = 'download'
400
-
401
- output_path = hash_dir / filename
402
-
403
- # If file exists in directory and it's not a temp directory, return it
404
- if output_path.exists() and directory != StorageDir.TEMP:
405
- return str(output_path)
406
-
407
- # Download the file
408
- file = File(url)
409
- if file.path:
410
- shutil.copy2(file.path, output_path)
411
- # Prevent the File instance from deleting its temporary file
412
- file._tmp_path = None
413
- return str(output_path)
414
-
415
- raise RuntimeError(f"Failed to download {url}")
@@ -1,8 +0,0 @@
1
- inferencesh/__init__.py,sha256=hbKkgHCh0lCdhWyHs3FHHRd8JfLeHkTd1bT4v79Fi8M,192
2
- inferencesh/sdk.py,sha256=raPhrMovMiZ_di0IwT5bgwATe6avxSs0KBUtzes20R4,16704
3
- inferencesh-0.2.7.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
4
- inferencesh-0.2.7.dist-info/METADATA,sha256=UFRubSIB78URLW_ddkZl18xYrl2xkJUiSNFxUCJwFyY,2756
5
- inferencesh-0.2.7.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
6
- inferencesh-0.2.7.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
7
- inferencesh-0.2.7.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
8
- inferencesh-0.2.7.dist-info/RECORD,,