sentienceapi 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (82) hide show
  1. sentience/__init__.py +253 -0
  2. sentience/_extension_loader.py +195 -0
  3. sentience/action_executor.py +215 -0
  4. sentience/actions.py +1020 -0
  5. sentience/agent.py +1181 -0
  6. sentience/agent_config.py +46 -0
  7. sentience/agent_runtime.py +424 -0
  8. sentience/asserts/__init__.py +70 -0
  9. sentience/asserts/expect.py +621 -0
  10. sentience/asserts/query.py +383 -0
  11. sentience/async_api.py +108 -0
  12. sentience/backends/__init__.py +137 -0
  13. sentience/backends/actions.py +343 -0
  14. sentience/backends/browser_use_adapter.py +241 -0
  15. sentience/backends/cdp_backend.py +393 -0
  16. sentience/backends/exceptions.py +211 -0
  17. sentience/backends/playwright_backend.py +194 -0
  18. sentience/backends/protocol.py +216 -0
  19. sentience/backends/sentience_context.py +469 -0
  20. sentience/backends/snapshot.py +427 -0
  21. sentience/base_agent.py +196 -0
  22. sentience/browser.py +1215 -0
  23. sentience/browser_evaluator.py +299 -0
  24. sentience/canonicalization.py +207 -0
  25. sentience/cli.py +130 -0
  26. sentience/cloud_tracing.py +807 -0
  27. sentience/constants.py +6 -0
  28. sentience/conversational_agent.py +543 -0
  29. sentience/element_filter.py +136 -0
  30. sentience/expect.py +188 -0
  31. sentience/extension/background.js +104 -0
  32. sentience/extension/content.js +161 -0
  33. sentience/extension/injected_api.js +914 -0
  34. sentience/extension/manifest.json +36 -0
  35. sentience/extension/pkg/sentience_core.d.ts +51 -0
  36. sentience/extension/pkg/sentience_core.js +323 -0
  37. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  38. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  39. sentience/extension/release.json +115 -0
  40. sentience/formatting.py +15 -0
  41. sentience/generator.py +202 -0
  42. sentience/inspector.py +367 -0
  43. sentience/llm_interaction_handler.py +191 -0
  44. sentience/llm_provider.py +875 -0
  45. sentience/llm_provider_utils.py +120 -0
  46. sentience/llm_response_builder.py +153 -0
  47. sentience/models.py +846 -0
  48. sentience/ordinal.py +280 -0
  49. sentience/overlay.py +222 -0
  50. sentience/protocols.py +228 -0
  51. sentience/query.py +303 -0
  52. sentience/read.py +188 -0
  53. sentience/recorder.py +589 -0
  54. sentience/schemas/trace_v1.json +335 -0
  55. sentience/screenshot.py +100 -0
  56. sentience/sentience_methods.py +86 -0
  57. sentience/snapshot.py +706 -0
  58. sentience/snapshot_diff.py +126 -0
  59. sentience/text_search.py +262 -0
  60. sentience/trace_event_builder.py +148 -0
  61. sentience/trace_file_manager.py +197 -0
  62. sentience/trace_indexing/__init__.py +27 -0
  63. sentience/trace_indexing/index_schema.py +199 -0
  64. sentience/trace_indexing/indexer.py +414 -0
  65. sentience/tracer_factory.py +322 -0
  66. sentience/tracing.py +449 -0
  67. sentience/utils/__init__.py +40 -0
  68. sentience/utils/browser.py +46 -0
  69. sentience/utils/element.py +257 -0
  70. sentience/utils/formatting.py +59 -0
  71. sentience/utils.py +296 -0
  72. sentience/verification.py +380 -0
  73. sentience/visual_agent.py +2058 -0
  74. sentience/wait.py +139 -0
  75. sentienceapi-0.95.0.dist-info/METADATA +984 -0
  76. sentienceapi-0.95.0.dist-info/RECORD +82 -0
  77. sentienceapi-0.95.0.dist-info/WHEEL +5 -0
  78. sentienceapi-0.95.0.dist-info/entry_points.txt +2 -0
  79. sentienceapi-0.95.0.dist-info/licenses/LICENSE +24 -0
  80. sentienceapi-0.95.0.dist-info/licenses/LICENSE-APACHE +201 -0
  81. sentienceapi-0.95.0.dist-info/licenses/LICENSE-MIT +21 -0
  82. sentienceapi-0.95.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,875 @@
1
+ from typing import Optional
2
+
3
+ """
4
+ LLM Provider abstraction layer for Sentience SDK
5
+ Enables "Bring Your Own Brain" (BYOB) pattern - plug in any LLM provider
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
10
+
11
+ from .llm_provider_utils import get_api_key_from_env, handle_provider_error, require_package
12
+ from .llm_response_builder import LLMResponseBuilder
13
+
14
+
15
+ @dataclass
16
+ class LLMResponse:
17
+ """Standardized LLM response across all providers"""
18
+
19
+ content: str
20
+ prompt_tokens: int | None = None
21
+ completion_tokens: int | None = None
22
+ total_tokens: int | None = None
23
+ model_name: str | None = None
24
+ finish_reason: str | None = None
25
+
26
+
27
+ class LLMProvider(ABC):
28
+ """
29
+ Abstract base class for LLM providers.
30
+
31
+ Implement this interface to add support for any LLM:
32
+ - OpenAI (GPT-4, GPT-3.5)
33
+ - Anthropic (Claude)
34
+ - Local models (Ollama, LlamaCpp)
35
+ - Azure OpenAI
36
+ - Any other completion API
37
+ """
38
+
39
+ def __init__(self, model: str):
40
+ """
41
+ Initialize LLM provider with model name.
42
+
43
+ Args:
44
+ model: Model identifier (e.g., "gpt-4o", "claude-3-sonnet")
45
+ """
46
+ self._model_name = model
47
+
48
+ @abstractmethod
49
+ def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
50
+ """
51
+ Generate a response from the LLM
52
+
53
+ Args:
54
+ system_prompt: System instruction/context
55
+ user_prompt: User query/request
56
+ **kwargs: Provider-specific parameters (temperature, max_tokens, etc.)
57
+
58
+ Returns:
59
+ LLMResponse with content and token usage
60
+ """
61
+ pass
62
+
63
+ @abstractmethod
64
+ def supports_json_mode(self) -> bool:
65
+ """
66
+ Whether this provider supports structured JSON output
67
+
68
+ Returns:
69
+ True if provider has native JSON mode, False otherwise
70
+ """
71
+ pass
72
+
73
+ @property
74
+ @abstractmethod
75
+ def model_name(self) -> str:
76
+ """
77
+ Model identifier (e.g., "gpt-4o", "claude-3-sonnet")
78
+
79
+ Returns:
80
+ Model name string
81
+ """
82
+ pass
83
+
84
+ def supports_vision(self) -> bool:
85
+ """
86
+ Whether this provider supports image input for vision tasks.
87
+
88
+ Override in subclasses that support vision-capable models.
89
+
90
+ Returns:
91
+ True if provider supports vision, False otherwise
92
+ """
93
+ return False
94
+
95
+ def generate_with_image(
96
+ self,
97
+ system_prompt: str,
98
+ user_prompt: str,
99
+ image_base64: str,
100
+ **kwargs,
101
+ ) -> LLMResponse:
102
+ """
103
+ Generate a response with image input (for vision-capable models).
104
+
105
+ This method is used for vision fallback in assertions and visual agents.
106
+ Override in subclasses that support vision-capable models.
107
+
108
+ Args:
109
+ system_prompt: System instruction/context
110
+ user_prompt: User query/request
111
+ image_base64: Base64-encoded image (PNG or JPEG)
112
+ **kwargs: Provider-specific parameters (temperature, max_tokens, etc.)
113
+
114
+ Returns:
115
+ LLMResponse with content and token usage
116
+
117
+ Raises:
118
+ NotImplementedError: If provider doesn't support vision
119
+ """
120
+ raise NotImplementedError(
121
+ f"{type(self).__name__} does not support vision. "
122
+ "Use a vision-capable provider like OpenAIProvider with GPT-4o "
123
+ "or AnthropicProvider with Claude 3."
124
+ )
125
+
126
+
127
+ class OpenAIProvider(LLMProvider):
128
+ """
129
+ OpenAI provider implementation (GPT-4, GPT-4o, GPT-3.5-turbo, etc.)
130
+
131
+ Example:
132
+ >>> from sentience.llm_provider import OpenAIProvider
133
+ >>> llm = OpenAIProvider(api_key="sk-...", model="gpt-4o")
134
+ >>> response = llm.generate("You are a helpful assistant", "Hello!")
135
+ >>> print(response.content)
136
+ """
137
+
138
+ def __init__(
139
+ self,
140
+ api_key: str | None = None,
141
+ model: str = "gpt-4o",
142
+ base_url: str | None = None,
143
+ organization: str | None = None,
144
+ ):
145
+ """
146
+ Initialize OpenAI provider
147
+
148
+ Args:
149
+ api_key: OpenAI API key (or set OPENAI_API_KEY env var)
150
+ model: Model name (gpt-4o, gpt-4-turbo, gpt-3.5-turbo, etc.)
151
+ base_url: Custom API base URL (for compatible APIs)
152
+ organization: OpenAI organization ID
153
+ """
154
+ super().__init__(model) # Initialize base class with model name
155
+
156
+ OpenAI = require_package(
157
+ "openai",
158
+ "openai",
159
+ "OpenAI",
160
+ "pip install openai",
161
+ )
162
+
163
+ self.client = OpenAI(api_key=api_key, base_url=base_url, organization=organization)
164
+
165
+ def generate(
166
+ self,
167
+ system_prompt: str,
168
+ user_prompt: str,
169
+ temperature: float = 0.0,
170
+ max_tokens: int | None = None,
171
+ json_mode: bool = False,
172
+ **kwargs,
173
+ ) -> LLMResponse:
174
+ """
175
+ Generate response using OpenAI API
176
+
177
+ Args:
178
+ system_prompt: System instruction
179
+ user_prompt: User query
180
+ temperature: Sampling temperature (0.0 = deterministic, 1.0 = creative)
181
+ max_tokens: Maximum tokens to generate
182
+ json_mode: Enable JSON response format (requires model support)
183
+ **kwargs: Additional OpenAI API parameters
184
+
185
+ Returns:
186
+ LLMResponse object
187
+ """
188
+ messages = []
189
+ if system_prompt:
190
+ messages.append({"role": "system", "content": system_prompt})
191
+ messages.append({"role": "user", "content": user_prompt})
192
+
193
+ # Build API parameters
194
+ api_params = {
195
+ "model": self._model_name,
196
+ "messages": messages,
197
+ "temperature": temperature,
198
+ }
199
+
200
+ if max_tokens:
201
+ api_params["max_tokens"] = max_tokens
202
+
203
+ if json_mode and self.supports_json_mode():
204
+ api_params["response_format"] = {"type": "json_object"}
205
+
206
+ # Merge additional parameters
207
+ api_params.update(kwargs)
208
+
209
+ # Call OpenAI API
210
+ try:
211
+ response = self.client.chat.completions.create(**api_params)
212
+ except Exception as e:
213
+ handle_provider_error(e, "OpenAI", "generate response")
214
+
215
+ choice = response.choices[0]
216
+ usage = response.usage
217
+
218
+ return LLMResponseBuilder.from_openai_format(
219
+ content=choice.message.content,
220
+ prompt_tokens=usage.prompt_tokens if usage else None,
221
+ completion_tokens=usage.completion_tokens if usage else None,
222
+ total_tokens=usage.total_tokens if usage else None,
223
+ model_name=response.model,
224
+ finish_reason=choice.finish_reason,
225
+ )
226
+
227
+ def supports_json_mode(self) -> bool:
228
+ """OpenAI models support JSON mode (GPT-4, GPT-3.5-turbo)"""
229
+ model_lower = self._model_name.lower()
230
+ return any(x in model_lower for x in ["gpt-4", "gpt-3.5"])
231
+
232
+ def supports_vision(self) -> bool:
233
+ """GPT-4o, GPT-4-turbo, and GPT-4-vision support vision."""
234
+ model_lower = self._model_name.lower()
235
+ return any(x in model_lower for x in ["gpt-4o", "gpt-4-turbo", "gpt-4-vision"])
236
+
237
+ def generate_with_image(
238
+ self,
239
+ system_prompt: str,
240
+ user_prompt: str,
241
+ image_base64: str,
242
+ temperature: float = 0.0,
243
+ max_tokens: int | None = None,
244
+ **kwargs,
245
+ ) -> LLMResponse:
246
+ """
247
+ Generate response with image input using OpenAI Vision API.
248
+
249
+ Args:
250
+ system_prompt: System instruction
251
+ user_prompt: User query
252
+ image_base64: Base64-encoded image (PNG or JPEG)
253
+ temperature: Sampling temperature (0.0 = deterministic)
254
+ max_tokens: Maximum tokens to generate
255
+ **kwargs: Additional OpenAI API parameters
256
+
257
+ Returns:
258
+ LLMResponse object
259
+
260
+ Raises:
261
+ NotImplementedError: If model doesn't support vision
262
+ """
263
+ if not self.supports_vision():
264
+ raise NotImplementedError(
265
+ f"Model {self._model_name} does not support vision. "
266
+ "Use gpt-4o, gpt-4-turbo, or gpt-4-vision-preview."
267
+ )
268
+
269
+ messages = []
270
+ if system_prompt:
271
+ messages.append({"role": "system", "content": system_prompt})
272
+
273
+ # Vision message format with image_url
274
+ messages.append(
275
+ {
276
+ "role": "user",
277
+ "content": [
278
+ {"type": "text", "text": user_prompt},
279
+ {
280
+ "type": "image_url",
281
+ "image_url": {"url": f"data:image/png;base64,{image_base64}"},
282
+ },
283
+ ],
284
+ }
285
+ )
286
+
287
+ # Build API parameters
288
+ api_params = {
289
+ "model": self._model_name,
290
+ "messages": messages,
291
+ "temperature": temperature,
292
+ }
293
+
294
+ if max_tokens:
295
+ api_params["max_tokens"] = max_tokens
296
+
297
+ # Merge additional parameters
298
+ api_params.update(kwargs)
299
+
300
+ # Call OpenAI API
301
+ try:
302
+ response = self.client.chat.completions.create(**api_params)
303
+ except Exception as e:
304
+ handle_provider_error(e, "OpenAI", "generate response with image")
305
+
306
+ choice = response.choices[0]
307
+ usage = response.usage
308
+
309
+ return LLMResponseBuilder.from_openai_format(
310
+ content=choice.message.content,
311
+ prompt_tokens=usage.prompt_tokens if usage else None,
312
+ completion_tokens=usage.completion_tokens if usage else None,
313
+ total_tokens=usage.total_tokens if usage else None,
314
+ model_name=response.model,
315
+ finish_reason=choice.finish_reason,
316
+ )
317
+
318
+ @property
319
+ def model_name(self) -> str:
320
+ return self._model_name
321
+
322
+
323
+ class AnthropicProvider(LLMProvider):
324
+ """
325
+ Anthropic provider implementation (Claude 3 Opus, Sonnet, Haiku, etc.)
326
+
327
+ Example:
328
+ >>> from sentience.llm_provider import AnthropicProvider
329
+ >>> llm = AnthropicProvider(api_key="sk-ant-...", model="claude-3-sonnet-20240229")
330
+ >>> response = llm.generate("You are a helpful assistant", "Hello!")
331
+ >>> print(response.content)
332
+ """
333
+
334
+ def __init__(self, api_key: str | None = None, model: str = "claude-3-5-sonnet-20241022"):
335
+ """
336
+ Initialize Anthropic provider
337
+
338
+ Args:
339
+ api_key: Anthropic API key (or set ANTHROPIC_API_KEY env var)
340
+ model: Model name (claude-3-opus, claude-3-sonnet, claude-3-haiku, etc.)
341
+ """
342
+ super().__init__(model) # Initialize base class with model name
343
+
344
+ Anthropic = require_package(
345
+ "anthropic",
346
+ "anthropic",
347
+ "Anthropic",
348
+ "pip install anthropic",
349
+ )
350
+
351
+ self.client = Anthropic(api_key=api_key)
352
+
353
+ def generate(
354
+ self,
355
+ system_prompt: str,
356
+ user_prompt: str,
357
+ temperature: float = 0.0,
358
+ max_tokens: int = 1024,
359
+ **kwargs,
360
+ ) -> LLMResponse:
361
+ """
362
+ Generate response using Anthropic API
363
+
364
+ Args:
365
+ system_prompt: System instruction
366
+ user_prompt: User query
367
+ temperature: Sampling temperature
368
+ max_tokens: Maximum tokens to generate (required by Anthropic)
369
+ **kwargs: Additional Anthropic API parameters
370
+
371
+ Returns:
372
+ LLMResponse object
373
+ """
374
+ # Build API parameters
375
+ api_params = {
376
+ "model": self._model_name,
377
+ "max_tokens": max_tokens,
378
+ "temperature": temperature,
379
+ "messages": [{"role": "user", "content": user_prompt}],
380
+ }
381
+
382
+ if system_prompt:
383
+ api_params["system"] = system_prompt
384
+
385
+ # Merge additional parameters
386
+ api_params.update(kwargs)
387
+
388
+ # Call Anthropic API
389
+ try:
390
+ response = self.client.messages.create(**api_params)
391
+ except Exception as e:
392
+ handle_provider_error(e, "Anthropic", "generate response")
393
+
394
+ content = response.content[0].text if response.content else ""
395
+
396
+ return LLMResponseBuilder.from_anthropic_format(
397
+ content=content,
398
+ input_tokens=response.usage.input_tokens if hasattr(response, "usage") else None,
399
+ output_tokens=response.usage.output_tokens if hasattr(response, "usage") else None,
400
+ model_name=response.model,
401
+ stop_reason=response.stop_reason,
402
+ )
403
+
404
+ def supports_json_mode(self) -> bool:
405
+ """Anthropic doesn't have native JSON mode (requires prompt engineering)"""
406
+ return False
407
+
408
+ def supports_vision(self) -> bool:
409
+ """Claude 3 models (Opus, Sonnet, Haiku) all support vision."""
410
+ model_lower = self._model_name.lower()
411
+ return any(x in model_lower for x in ["claude-3", "claude-3.5"])
412
+
413
+ def generate_with_image(
414
+ self,
415
+ system_prompt: str,
416
+ user_prompt: str,
417
+ image_base64: str,
418
+ temperature: float = 0.0,
419
+ max_tokens: int = 1024,
420
+ **kwargs,
421
+ ) -> LLMResponse:
422
+ """
423
+ Generate response with image input using Anthropic Vision API.
424
+
425
+ Args:
426
+ system_prompt: System instruction
427
+ user_prompt: User query
428
+ image_base64: Base64-encoded image (PNG or JPEG)
429
+ temperature: Sampling temperature
430
+ max_tokens: Maximum tokens to generate (required by Anthropic)
431
+ **kwargs: Additional Anthropic API parameters
432
+
433
+ Returns:
434
+ LLMResponse object
435
+
436
+ Raises:
437
+ NotImplementedError: If model doesn't support vision
438
+ """
439
+ if not self.supports_vision():
440
+ raise NotImplementedError(
441
+ f"Model {self._model_name} does not support vision. "
442
+ "Use Claude 3 models (claude-3-opus, claude-3-sonnet, claude-3-haiku)."
443
+ )
444
+
445
+ # Anthropic vision message format
446
+ messages = [
447
+ {
448
+ "role": "user",
449
+ "content": [
450
+ {
451
+ "type": "image",
452
+ "source": {
453
+ "type": "base64",
454
+ "media_type": "image/png",
455
+ "data": image_base64,
456
+ },
457
+ },
458
+ {
459
+ "type": "text",
460
+ "text": user_prompt,
461
+ },
462
+ ],
463
+ }
464
+ ]
465
+
466
+ # Build API parameters
467
+ api_params = {
468
+ "model": self._model_name,
469
+ "max_tokens": max_tokens,
470
+ "temperature": temperature,
471
+ "messages": messages,
472
+ }
473
+
474
+ if system_prompt:
475
+ api_params["system"] = system_prompt
476
+
477
+ # Merge additional parameters
478
+ api_params.update(kwargs)
479
+
480
+ # Call Anthropic API
481
+ try:
482
+ response = self.client.messages.create(**api_params)
483
+ except Exception as e:
484
+ handle_provider_error(e, "Anthropic", "generate response with image")
485
+
486
+ content = response.content[0].text if response.content else ""
487
+
488
+ return LLMResponseBuilder.from_anthropic_format(
489
+ content=content,
490
+ input_tokens=response.usage.input_tokens if hasattr(response, "usage") else None,
491
+ output_tokens=response.usage.output_tokens if hasattr(response, "usage") else None,
492
+ model_name=response.model,
493
+ stop_reason=response.stop_reason,
494
+ )
495
+
496
+ @property
497
+ def model_name(self) -> str:
498
+ return self._model_name
499
+
500
+
501
+ class GLMProvider(LLMProvider):
502
+ """
503
+ Zhipu AI GLM provider implementation (GLM-4, GLM-4-Plus, etc.)
504
+
505
+ Requirements:
506
+ pip install zhipuai
507
+
508
+ Example:
509
+ >>> from sentience.llm_provider import GLMProvider
510
+ >>> llm = GLMProvider(api_key="your-api-key", model="glm-4-plus")
511
+ >>> response = llm.generate("You are a helpful assistant", "Hello!")
512
+ >>> print(response.content)
513
+ """
514
+
515
+ def __init__(self, api_key: str | None = None, model: str = "glm-4-plus"):
516
+ """
517
+ Initialize GLM provider
518
+
519
+ Args:
520
+ api_key: Zhipu AI API key (or set GLM_API_KEY env var)
521
+ model: Model name (glm-4-plus, glm-4, glm-4-air, glm-4-flash, etc.)
522
+ """
523
+ super().__init__(model) # Initialize base class with model name
524
+
525
+ ZhipuAI = require_package(
526
+ "zhipuai",
527
+ "zhipuai",
528
+ "ZhipuAI",
529
+ "pip install zhipuai",
530
+ )
531
+
532
+ self.client = ZhipuAI(api_key=api_key)
533
+
534
+ def generate(
535
+ self,
536
+ system_prompt: str,
537
+ user_prompt: str,
538
+ temperature: float = 0.0,
539
+ max_tokens: int | None = None,
540
+ **kwargs,
541
+ ) -> LLMResponse:
542
+ """
543
+ Generate response using GLM API
544
+
545
+ Args:
546
+ system_prompt: System instruction
547
+ user_prompt: User query
548
+ temperature: Sampling temperature (0.0 = deterministic, 1.0 = creative)
549
+ max_tokens: Maximum tokens to generate
550
+ **kwargs: Additional GLM API parameters
551
+
552
+ Returns:
553
+ LLMResponse object
554
+ """
555
+ messages = []
556
+ if system_prompt:
557
+ messages.append({"role": "system", "content": system_prompt})
558
+ messages.append({"role": "user", "content": user_prompt})
559
+
560
+ # Build API parameters
561
+ api_params = {
562
+ "model": self._model_name,
563
+ "messages": messages,
564
+ "temperature": temperature,
565
+ }
566
+
567
+ if max_tokens:
568
+ api_params["max_tokens"] = max_tokens
569
+
570
+ # Merge additional parameters
571
+ api_params.update(kwargs)
572
+
573
+ # Call GLM API
574
+ try:
575
+ response = self.client.chat.completions.create(**api_params)
576
+ except Exception as e:
577
+ handle_provider_error(e, "GLM", "generate response")
578
+
579
+ choice = response.choices[0]
580
+ usage = response.usage
581
+
582
+ return LLMResponseBuilder.from_openai_format(
583
+ content=choice.message.content,
584
+ prompt_tokens=usage.prompt_tokens if usage else None,
585
+ completion_tokens=usage.completion_tokens if usage else None,
586
+ total_tokens=usage.total_tokens if usage else None,
587
+ model_name=response.model,
588
+ finish_reason=choice.finish_reason,
589
+ )
590
+
591
+ def supports_json_mode(self) -> bool:
592
+ """GLM-4 models support JSON mode"""
593
+ return "glm-4" in self._model_name.lower()
594
+
595
+ @property
596
+ def model_name(self) -> str:
597
+ return self._model_name
598
+
599
+
600
+ class GeminiProvider(LLMProvider):
601
+ """
602
+ Google Gemini provider implementation (Gemini 2.0, Gemini 1.5 Pro, etc.)
603
+
604
+ Requirements:
605
+ pip install google-generativeai
606
+
607
+ Example:
608
+ >>> from sentience.llm_provider import GeminiProvider
609
+ >>> llm = GeminiProvider(api_key="your-api-key", model="gemini-2.0-flash-exp")
610
+ >>> response = llm.generate("You are a helpful assistant", "Hello!")
611
+ >>> print(response.content)
612
+ """
613
+
614
+ def __init__(self, api_key: str | None = None, model: str = "gemini-2.0-flash-exp"):
615
+ """
616
+ Initialize Gemini provider
617
+
618
+ Args:
619
+ api_key: Google API key (or set GEMINI_API_KEY or GOOGLE_API_KEY env var)
620
+ model: Model name (gemini-2.0-flash-exp, gemini-1.5-pro, gemini-1.5-flash, etc.)
621
+ """
622
+ super().__init__(model) # Initialize base class with model name
623
+
624
+ genai = require_package(
625
+ "google-generativeai",
626
+ "google.generativeai",
627
+ install_command="pip install google-generativeai",
628
+ )
629
+
630
+ # Configure API key (check parameter first, then environment variables)
631
+ api_key = get_api_key_from_env(["GEMINI_API_KEY", "GOOGLE_API_KEY"], api_key)
632
+ if api_key:
633
+ genai.configure(api_key=api_key)
634
+
635
+ self.genai = genai
636
+ self.model = genai.GenerativeModel(model)
637
+
638
+ def generate(
639
+ self,
640
+ system_prompt: str,
641
+ user_prompt: str,
642
+ temperature: float = 0.0,
643
+ max_tokens: int | None = None,
644
+ **kwargs,
645
+ ) -> LLMResponse:
646
+ """
647
+ Generate response using Gemini API
648
+
649
+ Args:
650
+ system_prompt: System instruction
651
+ user_prompt: User query
652
+ temperature: Sampling temperature (0.0 = deterministic, 2.0 = very creative)
653
+ max_tokens: Maximum tokens to generate
654
+ **kwargs: Additional Gemini API parameters
655
+
656
+ Returns:
657
+ LLMResponse object
658
+ """
659
+ # Combine system and user prompts (Gemini doesn't have separate system role in all versions)
660
+ full_prompt = f"{system_prompt}\n\n{user_prompt}" if system_prompt else user_prompt
661
+
662
+ # Build generation config
663
+ generation_config = {
664
+ "temperature": temperature,
665
+ }
666
+
667
+ if max_tokens:
668
+ generation_config["max_output_tokens"] = max_tokens
669
+
670
+ # Merge additional parameters
671
+ generation_config.update(kwargs)
672
+
673
+ # Call Gemini API
674
+ try:
675
+ response = self.model.generate_content(full_prompt, generation_config=generation_config)
676
+ except Exception as e:
677
+ handle_provider_error(e, "Gemini", "generate response")
678
+
679
+ # Extract content
680
+ content = response.text if response.text else ""
681
+
682
+ # Token usage (if available)
683
+ prompt_tokens = None
684
+ completion_tokens = None
685
+ total_tokens = None
686
+
687
+ if hasattr(response, "usage_metadata") and response.usage_metadata:
688
+ prompt_tokens = response.usage_metadata.prompt_token_count
689
+ completion_tokens = response.usage_metadata.candidates_token_count
690
+ total_tokens = response.usage_metadata.total_token_count
691
+
692
+ return LLMResponseBuilder.from_gemini_format(
693
+ content=content,
694
+ prompt_tokens=prompt_tokens,
695
+ completion_tokens=completion_tokens,
696
+ total_tokens=total_tokens,
697
+ model_name=self._model_name,
698
+ )
699
+
700
+ def supports_json_mode(self) -> bool:
701
+ """Gemini 1.5+ models support JSON mode via response_mime_type"""
702
+ model_lower = self._model_name.lower()
703
+ return any(x in model_lower for x in ["gemini-1.5", "gemini-2.0"])
704
+
705
+ @property
706
+ def model_name(self) -> str:
707
+ return self._model_name
708
+
709
+
710
+ class LocalLLMProvider(LLMProvider):
711
+ """
712
+ Local LLM provider using HuggingFace Transformers
713
+ Supports Qwen, Llama, Gemma, Phi, and other instruction-tuned models
714
+
715
+ Example:
716
+ >>> from sentience.llm_provider import LocalLLMProvider
717
+ >>> llm = LocalLLMProvider(model_name="Qwen/Qwen2.5-3B-Instruct")
718
+ >>> response = llm.generate("You are helpful", "Hello!")
719
+ """
720
+
721
+ def __init__(
722
+ self,
723
+ model_name: str = "Qwen/Qwen2.5-3B-Instruct",
724
+ device: str = "auto",
725
+ load_in_4bit: bool = False,
726
+ load_in_8bit: bool = False,
727
+ torch_dtype: str = "auto",
728
+ ):
729
+ """
730
+ Initialize local LLM using HuggingFace Transformers
731
+
732
+ Args:
733
+ model_name: HuggingFace model identifier
734
+ Popular options:
735
+ - "Qwen/Qwen2.5-3B-Instruct" (recommended, 3B params)
736
+ - "meta-llama/Llama-3.2-3B-Instruct" (3B params)
737
+ - "google/gemma-2-2b-it" (2B params)
738
+ - "microsoft/Phi-3-mini-4k-instruct" (3.8B params)
739
+ device: Device to run on ("cpu", "cuda", "mps", "auto")
740
+ load_in_4bit: Use 4-bit quantization (saves 75% memory)
741
+ load_in_8bit: Use 8-bit quantization (saves 50% memory)
742
+ torch_dtype: Data type ("auto", "float16", "bfloat16", "float32")
743
+ """
744
+ super().__init__(model_name) # Initialize base class with model name
745
+
746
+ # Import required packages with consistent error handling
747
+ try:
748
+ import torch
749
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
750
+ except ImportError:
751
+ raise ImportError(
752
+ "transformers and torch required for local LLM. "
753
+ "Install with: pip install transformers torch"
754
+ )
755
+
756
+ # Load tokenizer
757
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
758
+
759
+ # Set padding token if not present
760
+ if self.tokenizer.pad_token is None:
761
+ self.tokenizer.pad_token = self.tokenizer.eos_token
762
+
763
+ # Configure quantization
764
+ quantization_config = None
765
+ if load_in_4bit:
766
+ quantization_config = BitsAndBytesConfig(
767
+ load_in_4bit=True,
768
+ bnb_4bit_compute_dtype=torch.float16,
769
+ bnb_4bit_use_double_quant=True,
770
+ bnb_4bit_quant_type="nf4",
771
+ )
772
+ elif load_in_8bit:
773
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
774
+
775
+ # Determine torch dtype
776
+ if torch_dtype == "auto":
777
+ dtype = torch.float16 if device != "cpu" else torch.float32
778
+ else:
779
+ dtype = getattr(torch, torch_dtype)
780
+
781
+ # Load model
782
+ self.model = AutoModelForCausalLM.from_pretrained(
783
+ model_name,
784
+ quantization_config=quantization_config,
785
+ torch_dtype=dtype if quantization_config is None else None,
786
+ device_map=device,
787
+ trust_remote_code=True,
788
+ low_cpu_mem_usage=True,
789
+ )
790
+ self.model.eval()
791
+
792
+ def generate(
793
+ self,
794
+ system_prompt: str,
795
+ user_prompt: str,
796
+ max_new_tokens: int = 512,
797
+ temperature: float = 0.1,
798
+ top_p: float = 0.9,
799
+ **kwargs,
800
+ ) -> LLMResponse:
801
+ """
802
+ Generate response using local model
803
+
804
+ Args:
805
+ system_prompt: System instruction
806
+ user_prompt: User query
807
+ max_new_tokens: Maximum tokens to generate
808
+ temperature: Sampling temperature (0 = greedy, higher = more random)
809
+ top_p: Nucleus sampling parameter
810
+ **kwargs: Additional generation parameters
811
+
812
+ Returns:
813
+ LLMResponse object
814
+ """
815
+ import torch
816
+
817
+ # Auto-determine sampling based on temperature
818
+ do_sample = temperature > 0
819
+
820
+ # Format prompt using model's chat template
821
+ messages = []
822
+ if system_prompt:
823
+ messages.append({"role": "system", "content": system_prompt})
824
+ messages.append({"role": "user", "content": user_prompt})
825
+
826
+ # Use model's native chat template if available
827
+ if hasattr(self.tokenizer, "apply_chat_template"):
828
+ formatted_prompt = self.tokenizer.apply_chat_template(
829
+ messages, tokenize=False, add_generation_prompt=True
830
+ )
831
+ else:
832
+ # Fallback formatting
833
+ formatted_prompt = ""
834
+ if system_prompt:
835
+ formatted_prompt += f"System: {system_prompt}\n\n"
836
+ formatted_prompt += f"User: {user_prompt}\n\nAssistant:"
837
+
838
+ # Tokenize
839
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt", truncation=True).to(
840
+ self.model.device
841
+ )
842
+
843
+ input_length = inputs["input_ids"].shape[1]
844
+
845
+ # Generate
846
+ with torch.no_grad():
847
+ outputs = self.model.generate(
848
+ **inputs,
849
+ max_new_tokens=max_new_tokens,
850
+ temperature=temperature if do_sample else 1.0,
851
+ top_p=top_p,
852
+ do_sample=do_sample,
853
+ pad_token_id=self.tokenizer.pad_token_id,
854
+ eos_token_id=self.tokenizer.eos_token_id,
855
+ **kwargs,
856
+ )
857
+
858
+ # Decode only the new tokens
859
+ generated_tokens = outputs[0][input_length:]
860
+ response_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
861
+
862
+ return LLMResponseBuilder.from_local_format(
863
+ content=response_text,
864
+ prompt_tokens=input_length,
865
+ completion_tokens=len(generated_tokens),
866
+ model_name=self._model_name,
867
+ )
868
+
869
+ def supports_json_mode(self) -> bool:
870
+ """Local models typically need prompt engineering for JSON"""
871
+ return False
872
+
873
+ @property
874
+ def model_name(self) -> str:
875
+ return self._model_name