sentienceapi 0.90.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sentience/__init__.py +153 -0
- sentience/actions.py +439 -0
- sentience/agent.py +687 -0
- sentience/agent_config.py +43 -0
- sentience/base_agent.py +101 -0
- sentience/browser.py +409 -0
- sentience/cli.py +130 -0
- sentience/cloud_tracing.py +292 -0
- sentience/conversational_agent.py +509 -0
- sentience/expect.py +92 -0
- sentience/extension/background.js +233 -0
- sentience/extension/content.js +298 -0
- sentience/extension/injected_api.js +1473 -0
- sentience/extension/manifest.json +36 -0
- sentience/extension/pkg/sentience_core.d.ts +51 -0
- sentience/extension/pkg/sentience_core.js +529 -0
- sentience/extension/pkg/sentience_core_bg.wasm +0 -0
- sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
- sentience/extension/release.json +115 -0
- sentience/extension/test-content.js +4 -0
- sentience/formatting.py +59 -0
- sentience/generator.py +202 -0
- sentience/inspector.py +185 -0
- sentience/llm_provider.py +431 -0
- sentience/models.py +406 -0
- sentience/overlay.py +115 -0
- sentience/query.py +303 -0
- sentience/read.py +96 -0
- sentience/recorder.py +369 -0
- sentience/schemas/trace_v1.json +216 -0
- sentience/screenshot.py +54 -0
- sentience/snapshot.py +282 -0
- sentience/text_search.py +150 -0
- sentience/trace_indexing/__init__.py +27 -0
- sentience/trace_indexing/index_schema.py +111 -0
- sentience/trace_indexing/indexer.py +363 -0
- sentience/tracer_factory.py +211 -0
- sentience/tracing.py +285 -0
- sentience/utils.py +296 -0
- sentience/wait.py +73 -0
- sentienceapi-0.90.11.dist-info/METADATA +878 -0
- sentienceapi-0.90.11.dist-info/RECORD +46 -0
- sentienceapi-0.90.11.dist-info/WHEEL +5 -0
- sentienceapi-0.90.11.dist-info/entry_points.txt +2 -0
- sentienceapi-0.90.11.dist-info/licenses/LICENSE.md +43 -0
- sentienceapi-0.90.11.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Provider abstraction layer for Sentience SDK
|
|
3
|
+
Enables "Bring Your Own Brain" (BYOB) pattern - plug in any LLM provider
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class LLMResponse:
|
|
12
|
+
"""Standardized LLM response across all providers"""
|
|
13
|
+
|
|
14
|
+
content: str
|
|
15
|
+
prompt_tokens: int | None = None
|
|
16
|
+
completion_tokens: int | None = None
|
|
17
|
+
total_tokens: int | None = None
|
|
18
|
+
model_name: str | None = None
|
|
19
|
+
finish_reason: str | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LLMProvider(ABC):
|
|
23
|
+
"""
|
|
24
|
+
Abstract base class for LLM providers.
|
|
25
|
+
|
|
26
|
+
Implement this interface to add support for any LLM:
|
|
27
|
+
- OpenAI (GPT-4, GPT-3.5)
|
|
28
|
+
- Anthropic (Claude)
|
|
29
|
+
- Local models (Ollama, LlamaCpp)
|
|
30
|
+
- Azure OpenAI
|
|
31
|
+
- Any other completion API
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
|
|
36
|
+
"""
|
|
37
|
+
Generate a response from the LLM
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
system_prompt: System instruction/context
|
|
41
|
+
user_prompt: User query/request
|
|
42
|
+
**kwargs: Provider-specific parameters (temperature, max_tokens, etc.)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
LLMResponse with content and token usage
|
|
46
|
+
"""
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
def supports_json_mode(self) -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Whether this provider supports structured JSON output
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
True if provider has native JSON mode, False otherwise
|
|
56
|
+
"""
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def model_name(self) -> str:
|
|
62
|
+
"""
|
|
63
|
+
Model identifier (e.g., "gpt-4o", "claude-3-sonnet")
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Model name string
|
|
67
|
+
"""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class OpenAIProvider(LLMProvider):
|
|
72
|
+
"""
|
|
73
|
+
OpenAI provider implementation (GPT-4, GPT-4o, GPT-3.5-turbo, etc.)
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
>>> from sentience.llm_provider import OpenAIProvider
|
|
77
|
+
>>> llm = OpenAIProvider(api_key="sk-...", model="gpt-4o")
|
|
78
|
+
>>> response = llm.generate("You are a helpful assistant", "Hello!")
|
|
79
|
+
>>> print(response.content)
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
api_key: str | None = None,
|
|
85
|
+
model: str = "gpt-4o",
|
|
86
|
+
base_url: str | None = None,
|
|
87
|
+
organization: str | None = None,
|
|
88
|
+
):
|
|
89
|
+
"""
|
|
90
|
+
Initialize OpenAI provider
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
api_key: OpenAI API key (or set OPENAI_API_KEY env var)
|
|
94
|
+
model: Model name (gpt-4o, gpt-4-turbo, gpt-3.5-turbo, etc.)
|
|
95
|
+
base_url: Custom API base URL (for compatible APIs)
|
|
96
|
+
organization: OpenAI organization ID
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
from openai import OpenAI
|
|
100
|
+
except ImportError:
|
|
101
|
+
raise ImportError("OpenAI package not installed. Install with: pip install openai")
|
|
102
|
+
|
|
103
|
+
self.client = OpenAI(api_key=api_key, base_url=base_url, organization=organization)
|
|
104
|
+
self._model_name = model
|
|
105
|
+
|
|
106
|
+
def generate(
|
|
107
|
+
self,
|
|
108
|
+
system_prompt: str,
|
|
109
|
+
user_prompt: str,
|
|
110
|
+
temperature: float = 0.0,
|
|
111
|
+
max_tokens: int | None = None,
|
|
112
|
+
json_mode: bool = False,
|
|
113
|
+
**kwargs,
|
|
114
|
+
) -> LLMResponse:
|
|
115
|
+
"""
|
|
116
|
+
Generate response using OpenAI API
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
system_prompt: System instruction
|
|
120
|
+
user_prompt: User query
|
|
121
|
+
temperature: Sampling temperature (0.0 = deterministic, 1.0 = creative)
|
|
122
|
+
max_tokens: Maximum tokens to generate
|
|
123
|
+
json_mode: Enable JSON response format (requires model support)
|
|
124
|
+
**kwargs: Additional OpenAI API parameters
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
LLMResponse object
|
|
128
|
+
"""
|
|
129
|
+
messages = []
|
|
130
|
+
if system_prompt:
|
|
131
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
132
|
+
messages.append({"role": "user", "content": user_prompt})
|
|
133
|
+
|
|
134
|
+
# Build API parameters
|
|
135
|
+
api_params = {
|
|
136
|
+
"model": self._model_name,
|
|
137
|
+
"messages": messages,
|
|
138
|
+
"temperature": temperature,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if max_tokens:
|
|
142
|
+
api_params["max_tokens"] = max_tokens
|
|
143
|
+
|
|
144
|
+
if json_mode and self.supports_json_mode():
|
|
145
|
+
api_params["response_format"] = {"type": "json_object"}
|
|
146
|
+
|
|
147
|
+
# Merge additional parameters
|
|
148
|
+
api_params.update(kwargs)
|
|
149
|
+
|
|
150
|
+
# Call OpenAI API
|
|
151
|
+
response = self.client.chat.completions.create(**api_params)
|
|
152
|
+
|
|
153
|
+
choice = response.choices[0]
|
|
154
|
+
usage = response.usage
|
|
155
|
+
|
|
156
|
+
return LLMResponse(
|
|
157
|
+
content=choice.message.content,
|
|
158
|
+
prompt_tokens=usage.prompt_tokens if usage else None,
|
|
159
|
+
completion_tokens=usage.completion_tokens if usage else None,
|
|
160
|
+
total_tokens=usage.total_tokens if usage else None,
|
|
161
|
+
model_name=response.model,
|
|
162
|
+
finish_reason=choice.finish_reason,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def supports_json_mode(self) -> bool:
|
|
166
|
+
"""OpenAI models support JSON mode (GPT-4, GPT-3.5-turbo)"""
|
|
167
|
+
model_lower = self._model_name.lower()
|
|
168
|
+
return any(x in model_lower for x in ["gpt-4", "gpt-3.5"])
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def model_name(self) -> str:
|
|
172
|
+
return self._model_name
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class AnthropicProvider(LLMProvider):
|
|
176
|
+
"""
|
|
177
|
+
Anthropic provider implementation (Claude 3 Opus, Sonnet, Haiku, etc.)
|
|
178
|
+
|
|
179
|
+
Example:
|
|
180
|
+
>>> from sentience.llm_provider import AnthropicProvider
|
|
181
|
+
>>> llm = AnthropicProvider(api_key="sk-ant-...", model="claude-3-sonnet-20240229")
|
|
182
|
+
>>> response = llm.generate("You are a helpful assistant", "Hello!")
|
|
183
|
+
>>> print(response.content)
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
def __init__(self, api_key: str | None = None, model: str = "claude-3-5-sonnet-20241022"):
|
|
187
|
+
"""
|
|
188
|
+
Initialize Anthropic provider
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
api_key: Anthropic API key (or set ANTHROPIC_API_KEY env var)
|
|
192
|
+
model: Model name (claude-3-opus, claude-3-sonnet, claude-3-haiku, etc.)
|
|
193
|
+
"""
|
|
194
|
+
try:
|
|
195
|
+
from anthropic import Anthropic
|
|
196
|
+
except ImportError:
|
|
197
|
+
raise ImportError(
|
|
198
|
+
"Anthropic package not installed. Install with: pip install anthropic"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
self.client = Anthropic(api_key=api_key)
|
|
202
|
+
self._model_name = model
|
|
203
|
+
|
|
204
|
+
def generate(
|
|
205
|
+
self,
|
|
206
|
+
system_prompt: str,
|
|
207
|
+
user_prompt: str,
|
|
208
|
+
temperature: float = 0.0,
|
|
209
|
+
max_tokens: int = 1024,
|
|
210
|
+
**kwargs,
|
|
211
|
+
) -> LLMResponse:
|
|
212
|
+
"""
|
|
213
|
+
Generate response using Anthropic API
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
system_prompt: System instruction
|
|
217
|
+
user_prompt: User query
|
|
218
|
+
temperature: Sampling temperature
|
|
219
|
+
max_tokens: Maximum tokens to generate (required by Anthropic)
|
|
220
|
+
**kwargs: Additional Anthropic API parameters
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
LLMResponse object
|
|
224
|
+
"""
|
|
225
|
+
# Build API parameters
|
|
226
|
+
api_params = {
|
|
227
|
+
"model": self._model_name,
|
|
228
|
+
"max_tokens": max_tokens,
|
|
229
|
+
"temperature": temperature,
|
|
230
|
+
"messages": [{"role": "user", "content": user_prompt}],
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if system_prompt:
|
|
234
|
+
api_params["system"] = system_prompt
|
|
235
|
+
|
|
236
|
+
# Merge additional parameters
|
|
237
|
+
api_params.update(kwargs)
|
|
238
|
+
|
|
239
|
+
# Call Anthropic API
|
|
240
|
+
response = self.client.messages.create(**api_params)
|
|
241
|
+
|
|
242
|
+
content = response.content[0].text if response.content else ""
|
|
243
|
+
|
|
244
|
+
return LLMResponse(
|
|
245
|
+
content=content,
|
|
246
|
+
prompt_tokens=response.usage.input_tokens if hasattr(response, "usage") else None,
|
|
247
|
+
completion_tokens=response.usage.output_tokens if hasattr(response, "usage") else None,
|
|
248
|
+
total_tokens=(
|
|
249
|
+
(response.usage.input_tokens + response.usage.output_tokens)
|
|
250
|
+
if hasattr(response, "usage")
|
|
251
|
+
else None
|
|
252
|
+
),
|
|
253
|
+
model_name=response.model,
|
|
254
|
+
finish_reason=response.stop_reason,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
def supports_json_mode(self) -> bool:
|
|
258
|
+
"""Anthropic doesn't have native JSON mode (requires prompt engineering)"""
|
|
259
|
+
return False
|
|
260
|
+
|
|
261
|
+
@property
|
|
262
|
+
def model_name(self) -> str:
|
|
263
|
+
return self._model_name
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class LocalLLMProvider(LLMProvider):
|
|
267
|
+
"""
|
|
268
|
+
Local LLM provider using HuggingFace Transformers
|
|
269
|
+
Supports Qwen, Llama, Gemma, Phi, and other instruction-tuned models
|
|
270
|
+
|
|
271
|
+
Example:
|
|
272
|
+
>>> from sentience.llm_provider import LocalLLMProvider
|
|
273
|
+
>>> llm = LocalLLMProvider(model_name="Qwen/Qwen2.5-3B-Instruct")
|
|
274
|
+
>>> response = llm.generate("You are helpful", "Hello!")
|
|
275
|
+
"""
|
|
276
|
+
|
|
277
|
+
def __init__(
|
|
278
|
+
self,
|
|
279
|
+
model_name: str = "Qwen/Qwen2.5-3B-Instruct",
|
|
280
|
+
device: str = "auto",
|
|
281
|
+
load_in_4bit: bool = False,
|
|
282
|
+
load_in_8bit: bool = False,
|
|
283
|
+
torch_dtype: str = "auto",
|
|
284
|
+
):
|
|
285
|
+
"""
|
|
286
|
+
Initialize local LLM using HuggingFace Transformers
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
model_name: HuggingFace model identifier
|
|
290
|
+
Popular options:
|
|
291
|
+
- "Qwen/Qwen2.5-3B-Instruct" (recommended, 3B params)
|
|
292
|
+
- "meta-llama/Llama-3.2-3B-Instruct" (3B params)
|
|
293
|
+
- "google/gemma-2-2b-it" (2B params)
|
|
294
|
+
- "microsoft/Phi-3-mini-4k-instruct" (3.8B params)
|
|
295
|
+
device: Device to run on ("cpu", "cuda", "mps", "auto")
|
|
296
|
+
load_in_4bit: Use 4-bit quantization (saves 75% memory)
|
|
297
|
+
load_in_8bit: Use 8-bit quantization (saves 50% memory)
|
|
298
|
+
torch_dtype: Data type ("auto", "float16", "bfloat16", "float32")
|
|
299
|
+
"""
|
|
300
|
+
try:
|
|
301
|
+
import torch
|
|
302
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
303
|
+
except ImportError:
|
|
304
|
+
raise ImportError(
|
|
305
|
+
"transformers and torch required for local LLM. "
|
|
306
|
+
"Install with: pip install transformers torch"
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
self._model_name = model_name
|
|
310
|
+
|
|
311
|
+
# Load tokenizer
|
|
312
|
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
313
|
+
|
|
314
|
+
# Set padding token if not present
|
|
315
|
+
if self.tokenizer.pad_token is None:
|
|
316
|
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
|
317
|
+
|
|
318
|
+
# Configure quantization
|
|
319
|
+
quantization_config = None
|
|
320
|
+
if load_in_4bit:
|
|
321
|
+
quantization_config = BitsAndBytesConfig(
|
|
322
|
+
load_in_4bit=True,
|
|
323
|
+
bnb_4bit_compute_dtype=torch.float16,
|
|
324
|
+
bnb_4bit_use_double_quant=True,
|
|
325
|
+
bnb_4bit_quant_type="nf4",
|
|
326
|
+
)
|
|
327
|
+
elif load_in_8bit:
|
|
328
|
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
329
|
+
|
|
330
|
+
# Determine torch dtype
|
|
331
|
+
if torch_dtype == "auto":
|
|
332
|
+
dtype = torch.float16 if device != "cpu" else torch.float32
|
|
333
|
+
else:
|
|
334
|
+
dtype = getattr(torch, torch_dtype)
|
|
335
|
+
|
|
336
|
+
# Load model
|
|
337
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
|
338
|
+
model_name,
|
|
339
|
+
quantization_config=quantization_config,
|
|
340
|
+
torch_dtype=dtype if quantization_config is None else None,
|
|
341
|
+
device_map=device,
|
|
342
|
+
trust_remote_code=True,
|
|
343
|
+
low_cpu_mem_usage=True,
|
|
344
|
+
)
|
|
345
|
+
self.model.eval()
|
|
346
|
+
|
|
347
|
+
def generate(
|
|
348
|
+
self,
|
|
349
|
+
system_prompt: str,
|
|
350
|
+
user_prompt: str,
|
|
351
|
+
max_new_tokens: int = 512,
|
|
352
|
+
temperature: float = 0.1,
|
|
353
|
+
top_p: float = 0.9,
|
|
354
|
+
**kwargs,
|
|
355
|
+
) -> LLMResponse:
|
|
356
|
+
"""
|
|
357
|
+
Generate response using local model
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
system_prompt: System instruction
|
|
361
|
+
user_prompt: User query
|
|
362
|
+
max_new_tokens: Maximum tokens to generate
|
|
363
|
+
temperature: Sampling temperature (0 = greedy, higher = more random)
|
|
364
|
+
top_p: Nucleus sampling parameter
|
|
365
|
+
**kwargs: Additional generation parameters
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
LLMResponse object
|
|
369
|
+
"""
|
|
370
|
+
import torch
|
|
371
|
+
|
|
372
|
+
# Auto-determine sampling based on temperature
|
|
373
|
+
do_sample = temperature > 0
|
|
374
|
+
|
|
375
|
+
# Format prompt using model's chat template
|
|
376
|
+
messages = []
|
|
377
|
+
if system_prompt:
|
|
378
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
379
|
+
messages.append({"role": "user", "content": user_prompt})
|
|
380
|
+
|
|
381
|
+
# Use model's native chat template if available
|
|
382
|
+
if hasattr(self.tokenizer, "apply_chat_template"):
|
|
383
|
+
formatted_prompt = self.tokenizer.apply_chat_template(
|
|
384
|
+
messages, tokenize=False, add_generation_prompt=True
|
|
385
|
+
)
|
|
386
|
+
else:
|
|
387
|
+
# Fallback formatting
|
|
388
|
+
formatted_prompt = ""
|
|
389
|
+
if system_prompt:
|
|
390
|
+
formatted_prompt += f"System: {system_prompt}\n\n"
|
|
391
|
+
formatted_prompt += f"User: {user_prompt}\n\nAssistant:"
|
|
392
|
+
|
|
393
|
+
# Tokenize
|
|
394
|
+
inputs = self.tokenizer(formatted_prompt, return_tensors="pt", truncation=True).to(
|
|
395
|
+
self.model.device
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
input_length = inputs["input_ids"].shape[1]
|
|
399
|
+
|
|
400
|
+
# Generate
|
|
401
|
+
with torch.no_grad():
|
|
402
|
+
outputs = self.model.generate(
|
|
403
|
+
**inputs,
|
|
404
|
+
max_new_tokens=max_new_tokens,
|
|
405
|
+
temperature=temperature if do_sample else 1.0,
|
|
406
|
+
top_p=top_p,
|
|
407
|
+
do_sample=do_sample,
|
|
408
|
+
pad_token_id=self.tokenizer.pad_token_id,
|
|
409
|
+
eos_token_id=self.tokenizer.eos_token_id,
|
|
410
|
+
**kwargs,
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
# Decode only the new tokens
|
|
414
|
+
generated_tokens = outputs[0][input_length:]
|
|
415
|
+
response_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
|
|
416
|
+
|
|
417
|
+
return LLMResponse(
|
|
418
|
+
content=response_text,
|
|
419
|
+
prompt_tokens=input_length,
|
|
420
|
+
completion_tokens=len(generated_tokens),
|
|
421
|
+
total_tokens=input_length + len(generated_tokens),
|
|
422
|
+
model_name=self._model_name,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
def supports_json_mode(self) -> bool:
|
|
426
|
+
"""Local models typically need prompt engineering for JSON"""
|
|
427
|
+
return False
|
|
428
|
+
|
|
429
|
+
@property
|
|
430
|
+
def model_name(self) -> str:
|
|
431
|
+
return self._model_name
|