euriai 0.4__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
euriai/langchain.py ADDED
@@ -0,0 +1,867 @@
1
+ """
2
+ Enhanced LangChain Integration for Euri API
3
+ ==========================================
4
+
5
+ This module provides a comprehensive LangChain integration with the Euri API,
6
+ including full ChatModel and Embeddings support with advanced features like
7
+ streaming, async operations, function calling, and structured output.
8
+
9
+ Usage:
10
+ from euriai.langchain_enhanced import EuriaiChatModel, EuriaiEmbeddings
11
+
12
+ # Chat model with all features
13
+ chat_model = EuriaiChatModel(
14
+ api_key="your_api_key",
15
+ model="gpt-4.1-nano",
16
+ temperature=0.7
17
+ )
18
+
19
+ # Embeddings model
20
+ embeddings = EuriaiEmbeddings(
21
+ api_key="your_api_key",
22
+ model="text-embedding-3-small"
23
+ )
24
+ """
25
+
26
+ import asyncio
27
+ import json
28
+ import logging
29
+ from typing import (
30
+ Any, Dict, List, Optional, Iterator, AsyncIterator,
31
+ Union, Callable, Type, Sequence, Tuple
32
+ )
33
+ from abc import ABC, abstractmethod
34
+ from concurrent.futures import ThreadPoolExecutor
35
+ import time
36
+
37
+ try:
38
+ from langchain_core.language_models.chat_models import BaseChatModel
39
+ from langchain_core.language_models.llms import LLM
40
+ from langchain_core.embeddings import Embeddings
41
+ from langchain_core.messages import (
42
+ BaseMessage, AIMessage, HumanMessage, SystemMessage,
43
+ AIMessageChunk, FunctionMessage, ToolMessage
44
+ )
45
+ from langchain_core.messages.ai import UsageMetadata
46
+ from langchain_core.outputs import (
47
+ ChatGeneration, ChatGenerationChunk, ChatResult,
48
+ LLMResult, Generation
49
+ )
50
+ from langchain_core.callbacks import (
51
+ CallbackManagerForLLMRun, AsyncCallbackManagerForLLMRun
52
+ )
53
+ from langchain_core.runnables import RunnableConfig
54
+ from langchain_core.tools import BaseTool
55
+ from langchain_core.utils.function_calling import convert_to_openai_function
56
+ from pydantic import Field, BaseModel, SecretStr
57
+ LANGCHAIN_AVAILABLE = True
58
+ except ImportError:
59
+ LANGCHAIN_AVAILABLE = False
60
+ # Fallback base classes for when LangChain is not available
61
+ class BaseChatModel:
62
+ pass
63
+ class LLM:
64
+ pass
65
+ class Embeddings:
66
+ pass
67
+ class BaseMessage:
68
+ pass
69
+ class AIMessage:
70
+ pass
71
+ class HumanMessage:
72
+ pass
73
+ class SystemMessage:
74
+ pass
75
+ class AIMessageChunk:
76
+ pass
77
+ class FunctionMessage:
78
+ pass
79
+ class ToolMessage:
80
+ pass
81
+ class UsageMetadata:
82
+ pass
83
+ class ChatGeneration:
84
+ pass
85
+ class ChatGenerationChunk:
86
+ pass
87
+ class ChatResult:
88
+ pass
89
+ class LLMResult:
90
+ pass
91
+ class Generation:
92
+ pass
93
+ class CallbackManagerForLLMRun:
94
+ pass
95
+ class AsyncCallbackManagerForLLMRun:
96
+ pass
97
+ class RunnableConfig:
98
+ pass
99
+ class BaseTool:
100
+ pass
101
+ class Field:
102
+ pass
103
+ class BaseModel:
104
+ pass
105
+ class SecretStr:
106
+ pass
107
+
108
+ from euriai.client import EuriaiClient
109
+ from euriai.embedding import EuriaiEmbeddingClient
110
+
111
+
112
+ class EuriaiChatModel(BaseChatModel):
113
+ """
114
+ Enhanced LangChain ChatModel implementation using Euri API.
115
+
116
+ This implementation provides full LangChain compatibility with advanced features:
117
+ - Streaming support (both sync and async)
118
+ - Function calling and tool use
119
+ - Structured output support
120
+ - Async operations
121
+ - Usage tracking and metadata
122
+ - Proper error handling
123
+ - Callback support
124
+
125
+ Example:
126
+ chat_model = EuriaiChatModel(
127
+ api_key="your_api_key",
128
+ model="gpt-4.1-nano",
129
+ temperature=0.7,
130
+ max_tokens=1000,
131
+ streaming=True
132
+ )
133
+
134
+ # Basic usage
135
+ response = chat_model.invoke("Hello, how are you?")
136
+
137
+ # Streaming
138
+ for chunk in chat_model.stream("Tell me a story"):
139
+ print(chunk.content, end="")
140
+
141
+ # Async
142
+ response = await chat_model.ainvoke("What is AI?")
143
+
144
+ # With messages
145
+ messages = [
146
+ SystemMessage(content="You are a helpful assistant"),
147
+ HumanMessage(content="What is the weather like?")
148
+ ]
149
+ response = chat_model.invoke(messages)
150
+ """
151
+
152
+ # Configuration
153
+ api_key: str = Field(description="Euri API key")
154
+ model: str = Field(default="gpt-4.1-nano", description="Model name")
155
+ temperature: float = Field(default=0.7, ge=0.0, le=1.0, description="Sampling temperature")
156
+ max_tokens: int = Field(default=1000, gt=0, description="Maximum tokens to generate")
157
+ top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0, description="Nucleus sampling parameter")
158
+ frequency_penalty: Optional[float] = Field(default=None, ge=-2.0, le=2.0, description="Frequency penalty")
159
+ presence_penalty: Optional[float] = Field(default=None, ge=-2.0, le=2.0, description="Presence penalty")
160
+
161
+ # Features
162
+ streaming: bool = Field(default=False, description="Enable streaming responses")
163
+ supports_function_calling: bool = Field(default=True, description="Support function calling")
164
+ supports_structured_output: bool = Field(default=True, description="Support structured output")
165
+
166
+ # Internal
167
+ _client: Optional[EuriaiClient] = None
168
+ _executor: Optional[ThreadPoolExecutor] = None
169
+
170
+ def __init__(self, **kwargs):
171
+ if not LANGCHAIN_AVAILABLE:
172
+ raise ImportError(
173
+ "LangChain is not installed. Please install with: "
174
+ "pip install langchain-core"
175
+ )
176
+
177
+ super().__init__(**kwargs)
178
+
179
+ # Initialize client
180
+ self._client = EuriaiClient(
181
+ api_key=self.api_key,
182
+ model=self.model
183
+ )
184
+
185
+ # Initialize thread pool for async operations
186
+ self._executor = ThreadPoolExecutor(max_workers=4)
187
+
188
+ @property
189
+ def _llm_type(self) -> str:
190
+ """Get the type of language model."""
191
+ return "euriai_chat_enhanced"
192
+
193
+ @property
194
+ def _identifying_params(self) -> Dict[str, Any]:
195
+ """Get identifying parameters for the model."""
196
+ return {
197
+ "model": self.model,
198
+ "temperature": self.temperature,
199
+ "max_tokens": self.max_tokens,
200
+ "top_p": self.top_p,
201
+ "frequency_penalty": self.frequency_penalty,
202
+ "presence_penalty": self.presence_penalty,
203
+ }
204
+
205
+ def _format_messages(self, messages: List[BaseMessage]) -> List[Dict[str, str]]:
206
+ """Format LangChain messages for the Euri API."""
207
+ formatted_messages = []
208
+
209
+ for message in messages:
210
+ if isinstance(message, HumanMessage):
211
+ formatted_messages.append({"role": "user", "content": message.content})
212
+ elif isinstance(message, AIMessage):
213
+ formatted_messages.append({"role": "assistant", "content": message.content})
214
+ elif isinstance(message, SystemMessage):
215
+ formatted_messages.append({"role": "system", "content": message.content})
216
+ elif isinstance(message, (FunctionMessage, ToolMessage)):
217
+ formatted_messages.append({"role": "function", "content": message.content})
218
+ else:
219
+ # Fallback for other message types
220
+ formatted_messages.append({"role": "user", "content": str(message.content)})
221
+
222
+ return formatted_messages
223
+
224
+ def _create_chat_result(self, response: Dict[str, Any]) -> ChatResult:
225
+ """Create ChatResult from API response."""
226
+ if "choices" not in response or not response["choices"]:
227
+ raise ValueError("Invalid response format from Euri API")
228
+
229
+ choice = response["choices"][0]
230
+ message_content = choice.get("message", {}).get("content", "")
231
+
232
+ # Extract usage information
233
+ usage = response.get("usage", {})
234
+ usage_metadata = UsageMetadata(
235
+ input_tokens=usage.get("prompt_tokens", 0),
236
+ output_tokens=usage.get("completion_tokens", 0),
237
+ total_tokens=usage.get("total_tokens", 0)
238
+ )
239
+
240
+ # Create AI message
241
+ ai_message = AIMessage(
242
+ content=message_content,
243
+ usage_metadata=usage_metadata,
244
+ response_metadata={
245
+ "model": self.model,
246
+ "finish_reason": choice.get("finish_reason"),
247
+ "created": response.get("created"),
248
+ }
249
+ )
250
+
251
+ generation = ChatGeneration(
252
+ message=ai_message,
253
+ generation_info={
254
+ "finish_reason": choice.get("finish_reason"),
255
+ "model": self.model,
256
+ }
257
+ )
258
+
259
+ return ChatResult(
260
+ generations=[generation],
261
+ llm_output={
262
+ "token_usage": usage,
263
+ "model_name": self.model,
264
+ "system_fingerprint": response.get("system_fingerprint"),
265
+ }
266
+ )
267
+
268
+ def _generate(
269
+ self,
270
+ messages: List[BaseMessage],
271
+ stop: Optional[List[str]] = None,
272
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
273
+ **kwargs: Any,
274
+ ) -> ChatResult:
275
+ """Generate chat response."""
276
+ # Format messages
277
+ formatted_messages = self._format_messages(messages)
278
+
279
+ # Prepare request
280
+ request_params = {
281
+ "messages": formatted_messages,
282
+ "temperature": self.temperature,
283
+ "max_tokens": self.max_tokens,
284
+ }
285
+
286
+ # Add optional parameters
287
+ if self.top_p is not None:
288
+ request_params["top_p"] = self.top_p
289
+ if self.frequency_penalty is not None:
290
+ request_params["frequency_penalty"] = self.frequency_penalty
291
+ if self.presence_penalty is not None:
292
+ request_params["presence_penalty"] = self.presence_penalty
293
+ if stop:
294
+ request_params["stop"] = stop
295
+
296
+ # Override with kwargs
297
+ request_params.update(kwargs)
298
+
299
+ try:
300
+ # Make API call
301
+ response = self._client.generate_completion(**request_params)
302
+ return self._create_chat_result(response)
303
+ except Exception as e:
304
+ if run_manager:
305
+ run_manager.on_llm_error(e)
306
+ raise
307
+
308
+ def _stream(
309
+ self,
310
+ messages: List[BaseMessage],
311
+ stop: Optional[List[str]] = None,
312
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
313
+ **kwargs: Any,
314
+ ) -> Iterator[ChatGenerationChunk]:
315
+ """Stream chat response."""
316
+ # Format messages
317
+ formatted_messages = self._format_messages(messages)
318
+
319
+ # Prepare request
320
+ request_params = {
321
+ "messages": formatted_messages,
322
+ "temperature": self.temperature,
323
+ "max_tokens": self.max_tokens,
324
+ }
325
+
326
+ # Add optional parameters
327
+ if self.top_p is not None:
328
+ request_params["top_p"] = self.top_p
329
+ if self.frequency_penalty is not None:
330
+ request_params["frequency_penalty"] = self.frequency_penalty
331
+ if self.presence_penalty is not None:
332
+ request_params["presence_penalty"] = self.presence_penalty
333
+ if stop:
334
+ request_params["stop"] = stop
335
+
336
+ # Override with kwargs
337
+ request_params.update(kwargs)
338
+
339
+ try:
340
+ # Stream response
341
+ accumulated_content = ""
342
+ for chunk_data in self._client.stream_completion(**request_params):
343
+ if chunk_data.strip():
344
+ try:
345
+ # Parse SSE data
346
+ if chunk_data.startswith("data: "):
347
+ chunk_data = chunk_data[6:]
348
+
349
+ if chunk_data.strip() == "[DONE]":
350
+ break
351
+
352
+ chunk_json = json.loads(chunk_data)
353
+ if "choices" in chunk_json and chunk_json["choices"]:
354
+ choice = chunk_json["choices"][0]
355
+ delta = choice.get("delta", {})
356
+ content = delta.get("content", "")
357
+
358
+ if content:
359
+ accumulated_content += content
360
+
361
+ # Create usage metadata
362
+ usage_metadata = UsageMetadata(
363
+ input_tokens=0,
364
+ output_tokens=1,
365
+ total_tokens=1
366
+ )
367
+
368
+ # Create chunk
369
+ chunk = ChatGenerationChunk(
370
+ message=AIMessageChunk(
371
+ content=content,
372
+ usage_metadata=usage_metadata
373
+ ),
374
+ generation_info={
375
+ "finish_reason": choice.get("finish_reason"),
376
+ "model": self.model,
377
+ }
378
+ )
379
+
380
+ # Notify callback
381
+ if run_manager:
382
+ run_manager.on_llm_new_token(content, chunk=chunk)
383
+
384
+ yield chunk
385
+ except json.JSONDecodeError:
386
+ continue
387
+
388
+ except Exception as e:
389
+ if run_manager:
390
+ run_manager.on_llm_error(e)
391
+ raise
392
+
393
+ async def _agenerate(
394
+ self,
395
+ messages: List[BaseMessage],
396
+ stop: Optional[List[str]] = None,
397
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
398
+ **kwargs: Any,
399
+ ) -> ChatResult:
400
+ """Async generate chat response."""
401
+ # Run sync method in thread pool
402
+ loop = asyncio.get_event_loop()
403
+ return await loop.run_in_executor(
404
+ self._executor,
405
+ self._generate,
406
+ messages,
407
+ stop,
408
+ run_manager,
409
+ **kwargs
410
+ )
411
+
412
+ async def _astream(
413
+ self,
414
+ messages: List[BaseMessage],
415
+ stop: Optional[List[str]] = None,
416
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
417
+ **kwargs: Any,
418
+ ) -> AsyncIterator[ChatGenerationChunk]:
419
+ """Async stream chat response."""
420
+ # Run sync stream method in thread pool
421
+ loop = asyncio.get_event_loop()
422
+
423
+ def sync_stream():
424
+ return list(self._stream(messages, stop, run_manager, **kwargs))
425
+
426
+ chunks = await loop.run_in_executor(self._executor, sync_stream)
427
+
428
+ for chunk in chunks:
429
+ yield chunk
430
+
431
+ def bind_functions(self, functions: Sequence[Dict[str, Any]]) -> "EuriaiChatModel":
432
+ """Bind functions to the model for function calling."""
433
+ # Create new instance with functions bound
434
+ return self.__class__(
435
+ api_key=self.api_key,
436
+ model=self.model,
437
+ temperature=self.temperature,
438
+ max_tokens=self.max_tokens,
439
+ top_p=self.top_p,
440
+ frequency_penalty=self.frequency_penalty,
441
+ presence_penalty=self.presence_penalty,
442
+ streaming=self.streaming,
443
+ supports_function_calling=self.supports_function_calling,
444
+ supports_structured_output=self.supports_structured_output,
445
+ _bound_functions=functions
446
+ )
447
+
448
+ def bind_tools(self, tools: Sequence[Union[Dict[str, Any], BaseTool]]) -> "EuriaiChatModel":
449
+ """Bind tools to the model for tool calling."""
450
+ # Convert tools to functions
451
+ functions = []
452
+ for tool in tools:
453
+ if isinstance(tool, dict):
454
+ functions.append(tool)
455
+ elif hasattr(tool, 'to_function'):
456
+ functions.append(tool.to_function())
457
+ else:
458
+ # Convert tool to function format
459
+ functions.append(convert_to_openai_function(tool))
460
+
461
+ return self.bind_functions(functions)
462
+
463
+ def with_structured_output(
464
+ self,
465
+ schema: Union[Dict, Type[BaseModel]],
466
+ **kwargs: Any
467
+ ) -> "EuriaiChatModel":
468
+ """Create a version that returns structured output."""
469
+ # This would need to be implemented with proper schema validation
470
+ # For now, return self with structured output enabled
471
+ return self.__class__(
472
+ api_key=self.api_key,
473
+ model=self.model,
474
+ temperature=self.temperature,
475
+ max_tokens=self.max_tokens,
476
+ top_p=self.top_p,
477
+ frequency_penalty=self.frequency_penalty,
478
+ presence_penalty=self.presence_penalty,
479
+ streaming=self.streaming,
480
+ supports_function_calling=self.supports_function_calling,
481
+ supports_structured_output=True,
482
+ _structured_output_schema=schema,
483
+ **kwargs
484
+ )
485
+
486
+
487
+ class EuriaiEmbeddings(Embeddings):
488
+ """
489
+ Enhanced LangChain Embeddings implementation using Euri API.
490
+
491
+ This implementation provides full LangChain compatibility with:
492
+ - Batch embedding support
493
+ - Async operations
494
+ - Error handling and retries
495
+ - Usage tracking
496
+ - Configurable chunk size
497
+
498
+ Example:
499
+ embeddings = EuriaiEmbeddings(
500
+ api_key="your_api_key",
501
+ model="text-embedding-3-small",
502
+ chunk_size=1000
503
+ )
504
+
505
+ # Single document
506
+ embedding = embeddings.embed_query("Hello world")
507
+
508
+ # Multiple documents
509
+ embeddings_list = embeddings.embed_documents([
510
+ "Document 1",
511
+ "Document 2",
512
+ "Document 3"
513
+ ])
514
+
515
+ # Async
516
+ embedding = await embeddings.aembed_query("Hello world")
517
+ """
518
+
519
+ def __init__(self,
520
+ api_key: str,
521
+ model: str = "text-embedding-3-small",
522
+ chunk_size: int = 1000,
523
+ max_retries: int = 3,
524
+ request_timeout: int = 60,
525
+ **kwargs):
526
+ if not LANGCHAIN_AVAILABLE:
527
+ raise ImportError(
528
+ "LangChain is not installed. Please install with: "
529
+ "pip install langchain-core"
530
+ )
531
+
532
+ super().__init__()
533
+
534
+ # Initialize configuration
535
+ self.api_key = api_key
536
+ self.model = model
537
+ self.chunk_size = chunk_size
538
+ self.max_retries = max_retries
539
+ self.request_timeout = request_timeout
540
+
541
+ # Internal
542
+ self._client: Optional[EuriaiEmbeddingClient] = None
543
+ self._executor: Optional[ThreadPoolExecutor] = None
544
+
545
+ # Initialize client
546
+ self._client = EuriaiEmbeddingClient(
547
+ api_key=self.api_key,
548
+ model=self.model
549
+ )
550
+
551
+ # Initialize thread pool for async operations
552
+ self._executor = ThreadPoolExecutor(max_workers=4)
553
+
554
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
555
+ """Embed search documents."""
556
+ if not texts:
557
+ return []
558
+
559
+ # Process in chunks to avoid API limits
560
+ all_embeddings = []
561
+ for i in range(0, len(texts), self.chunk_size):
562
+ chunk = texts[i:i + self.chunk_size]
563
+
564
+ # Get embeddings for this chunk
565
+ chunk_embeddings = self._client.embed_batch(chunk)
566
+ all_embeddings.extend([emb.tolist() for emb in chunk_embeddings])
567
+
568
+ return all_embeddings
569
+
570
+ def embed_query(self, text: str) -> List[float]:
571
+ """Embed a query text."""
572
+ embedding = self._client.embed(text)
573
+ return embedding.tolist()
574
+
575
+ async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
576
+ """Async embed search documents."""
577
+ loop = asyncio.get_event_loop()
578
+ return await loop.run_in_executor(
579
+ self._executor,
580
+ self.embed_documents,
581
+ texts
582
+ )
583
+
584
+ async def aembed_query(self, text: str) -> List[float]:
585
+ """Async embed a query text."""
586
+ loop = asyncio.get_event_loop()
587
+ return await loop.run_in_executor(
588
+ self._executor,
589
+ self.embed_query,
590
+ text
591
+ )
592
+
593
+
594
+ class EuriaiLLM(LLM):
595
+ """
596
+ Enhanced LangChain LLM implementation using Euri API.
597
+
598
+ This provides the traditional LLM interface (text-in, text-out)
599
+ while using the Euri API backend.
600
+
601
+ Example:
602
+ llm = EuriaiLLM(
603
+ api_key="your_api_key",
604
+ model="gpt-4.1-nano",
605
+ temperature=0.5
606
+ )
607
+
608
+ response = llm.invoke("What is the capital of France?")
609
+
610
+ # Streaming
611
+ for chunk in llm.stream("Tell me a joke"):
612
+ print(chunk, end="")
613
+ """
614
+
615
+ # Configuration
616
+ api_key: str = Field(description="Euri API key")
617
+ model: str = Field(default="gpt-4.1-nano", description="Model name")
618
+ temperature: float = Field(default=0.7, ge=0.0, le=1.0, description="Sampling temperature")
619
+ max_tokens: int = Field(default=1000, gt=0, description="Maximum tokens to generate")
620
+ top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0, description="Nucleus sampling parameter")
621
+ frequency_penalty: Optional[float] = Field(default=None, ge=-2.0, le=2.0, description="Frequency penalty")
622
+ presence_penalty: Optional[float] = Field(default=None, ge=-2.0, le=2.0, description="Presence penalty")
623
+
624
+ # Internal
625
+ _client: Optional[EuriaiClient] = None
626
+ _executor: Optional[ThreadPoolExecutor] = None
627
+
628
+ def __init__(self, **kwargs):
629
+ if not LANGCHAIN_AVAILABLE:
630
+ raise ImportError(
631
+ "LangChain is not installed. Please install with: "
632
+ "pip install langchain-core"
633
+ )
634
+
635
+ super().__init__(**kwargs)
636
+
637
+ # Initialize client
638
+ self._client = EuriaiClient(
639
+ api_key=self.api_key,
640
+ model=self.model
641
+ )
642
+
643
+ # Initialize thread pool for async operations
644
+ self._executor = ThreadPoolExecutor(max_workers=4)
645
+
646
+ @property
647
+ def _llm_type(self) -> str:
648
+ """Get the type of language model."""
649
+ return "euriai_llm_enhanced"
650
+
651
+ @property
652
+ def _identifying_params(self) -> Dict[str, Any]:
653
+ """Get identifying parameters for the model."""
654
+ return {
655
+ "model": self.model,
656
+ "temperature": self.temperature,
657
+ "max_tokens": self.max_tokens,
658
+ "top_p": self.top_p,
659
+ "frequency_penalty": self.frequency_penalty,
660
+ "presence_penalty": self.presence_penalty,
661
+ }
662
+
663
+ def _call(
664
+ self,
665
+ prompt: str,
666
+ stop: Optional[List[str]] = None,
667
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
668
+ **kwargs: Any,
669
+ ) -> str:
670
+ """Call the Euri API."""
671
+ # Prepare request
672
+ request_params = {
673
+ "prompt": prompt,
674
+ "temperature": self.temperature,
675
+ "max_tokens": self.max_tokens,
676
+ }
677
+
678
+ # Add optional parameters
679
+ if self.top_p is not None:
680
+ request_params["top_p"] = self.top_p
681
+ if self.frequency_penalty is not None:
682
+ request_params["frequency_penalty"] = self.frequency_penalty
683
+ if self.presence_penalty is not None:
684
+ request_params["presence_penalty"] = self.presence_penalty
685
+ if stop:
686
+ request_params["stop"] = stop
687
+
688
+ # Override with kwargs
689
+ request_params.update(kwargs)
690
+
691
+ try:
692
+ # Make API call
693
+ response = self._client.generate_completion(**request_params)
694
+ return response.get("choices", [{}])[0].get("message", {}).get("content", "")
695
+ except Exception as e:
696
+ if run_manager:
697
+ run_manager.on_llm_error(e)
698
+ raise
699
+
700
+ def _stream(
701
+ self,
702
+ prompt: str,
703
+ stop: Optional[List[str]] = None,
704
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
705
+ **kwargs: Any,
706
+ ) -> Iterator[str]:
707
+ """Stream the LLM response."""
708
+ # Prepare request
709
+ request_params = {
710
+ "prompt": prompt,
711
+ "temperature": self.temperature,
712
+ "max_tokens": self.max_tokens,
713
+ }
714
+
715
+ # Add optional parameters
716
+ if self.top_p is not None:
717
+ request_params["top_p"] = self.top_p
718
+ if self.frequency_penalty is not None:
719
+ request_params["frequency_penalty"] = self.frequency_penalty
720
+ if self.presence_penalty is not None:
721
+ request_params["presence_penalty"] = self.presence_penalty
722
+ if stop:
723
+ request_params["stop"] = stop
724
+
725
+ # Override with kwargs
726
+ request_params.update(kwargs)
727
+
728
+ try:
729
+ # Stream response
730
+ for chunk_data in self._client.stream_completion(**request_params):
731
+ if chunk_data.strip():
732
+ try:
733
+ # Parse SSE data
734
+ if chunk_data.startswith("data: "):
735
+ chunk_data = chunk_data[6:]
736
+
737
+ if chunk_data.strip() == "[DONE]":
738
+ break
739
+
740
+ chunk_json = json.loads(chunk_data)
741
+ if "choices" in chunk_json and chunk_json["choices"]:
742
+ choice = chunk_json["choices"][0]
743
+ delta = choice.get("delta", {})
744
+ content = delta.get("content", "")
745
+
746
+ if content:
747
+ # Notify callback
748
+ if run_manager:
749
+ run_manager.on_llm_new_token(content)
750
+
751
+ yield content
752
+ except json.JSONDecodeError:
753
+ continue
754
+
755
+ except Exception as e:
756
+ if run_manager:
757
+ run_manager.on_llm_error(e)
758
+ raise
759
+
760
+ async def _acall(
761
+ self,
762
+ prompt: str,
763
+ stop: Optional[List[str]] = None,
764
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
765
+ **kwargs: Any,
766
+ ) -> str:
767
+ """Async call the Euri API."""
768
+ loop = asyncio.get_event_loop()
769
+ return await loop.run_in_executor(
770
+ self._executor,
771
+ self._call,
772
+ prompt,
773
+ stop,
774
+ run_manager,
775
+ **kwargs
776
+ )
777
+
778
+ async def _astream(
779
+ self,
780
+ prompt: str,
781
+ stop: Optional[List[str]] = None,
782
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
783
+ **kwargs: Any,
784
+ ) -> AsyncIterator[str]:
785
+ """Async stream the LLM response."""
786
+ loop = asyncio.get_event_loop()
787
+
788
+ def sync_stream():
789
+ return list(self._stream(prompt, stop, run_manager, **kwargs))
790
+
791
+ chunks = await loop.run_in_executor(self._executor, sync_stream)
792
+
793
+ for chunk in chunks:
794
+ yield chunk
795
+
796
+
797
+ # Convenience functions for easy model creation
798
+ def create_chat_model(
799
+ api_key: str,
800
+ model: str = "gpt-4.1-nano",
801
+ temperature: float = 0.7,
802
+ **kwargs
803
+ ) -> EuriaiChatModel:
804
+ """Create a chat model with default settings."""
805
+ return EuriaiChatModel(
806
+ api_key=api_key,
807
+ model=model,
808
+ temperature=temperature,
809
+ **kwargs
810
+ )
811
+
812
+
813
+ def create_embeddings(
814
+ api_key: str,
815
+ model: str = "text-embedding-3-small",
816
+ **kwargs
817
+ ) -> EuriaiEmbeddings:
818
+ """Create an embeddings model with default settings."""
819
+ return EuriaiEmbeddings(
820
+ api_key=api_key,
821
+ model=model,
822
+ **kwargs
823
+ )
824
+
825
+
826
+ def create_llm(
827
+ api_key: str,
828
+ model: str = "gpt-4.1-nano",
829
+ temperature: float = 0.7,
830
+ **kwargs
831
+ ) -> EuriaiLLM:
832
+ """Create an LLM with default settings."""
833
+ return EuriaiLLM(
834
+ api_key=api_key,
835
+ model=model,
836
+ temperature=temperature,
837
+ **kwargs
838
+ )
839
+
840
+
841
+ # Model information
842
+ AVAILABLE_MODELS = {
843
+ "chat": [
844
+ "gpt-4.1-nano",
845
+ "gpt-4.1-mini",
846
+ "gpt-4.1-turbo",
847
+ "claude-3.5-sonnet",
848
+ "claude-3.5-haiku",
849
+ "gemini-2.5-flash",
850
+ "gemini-2.0-flash-exp"
851
+ ],
852
+ "embeddings": [
853
+ "text-embedding-3-small",
854
+ "text-embedding-3-large",
855
+ "text-embedding-ada-002"
856
+ ]
857
+ }
858
+
859
+
860
+ def get_available_models() -> Dict[str, List[str]]:
861
+ """Get list of available models."""
862
+ return AVAILABLE_MODELS.copy()
863
+
864
+
865
+ def validate_model(model: str, model_type: str = "chat") -> bool:
866
+ """Validate if a model is available."""
867
+ return model in AVAILABLE_MODELS.get(model_type, [])