sentienceapi 0.90.12__py3-none-any.whl → 0.90.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

sentience/browser.py CHANGED
@@ -11,7 +11,8 @@ from urllib.parse import urlparse
11
11
 
12
12
  from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright
13
13
 
14
- from sentience.models import ProxyConfig, StorageState
14
+ from sentience._extension_loader import find_extension_path
15
+ from sentience.models import ProxyConfig, StorageState, Viewport
15
16
 
16
17
  # Import stealth for bot evasion (optional - graceful fallback if not available)
17
18
  try:
@@ -35,6 +36,7 @@ class SentienceBrowser:
35
36
  storage_state: str | Path | StorageState | dict | None = None,
36
37
  record_video_dir: str | Path | None = None,
37
38
  record_video_size: dict[str, int] | None = None,
39
+ viewport: Viewport | dict[str, int] | None = None,
38
40
  ):
39
41
  """
40
42
  Initialize Sentience browser
@@ -67,6 +69,11 @@ class SentienceBrowser:
67
69
  Examples: {"width": 1280, "height": 800} (default)
68
70
  {"width": 1920, "height": 1080} (1080p)
69
71
  If None, defaults to 1280x800.
72
+ viewport: Optional viewport size as Viewport object or dict with 'width' and 'height' keys.
73
+ Examples: Viewport(width=1280, height=800) (default)
74
+ Viewport(width=1920, height=1080) (Full HD)
75
+ {"width": 1280, "height": 800} (dict also supported)
76
+ If None, defaults to Viewport(width=1280, height=800).
70
77
  """
71
78
  self.api_key = api_key
72
79
  # Only set api_url if api_key is provided, otherwise None (free tier)
@@ -94,6 +101,14 @@ class SentienceBrowser:
94
101
  self.record_video_dir = record_video_dir
95
102
  self.record_video_size = record_video_size or {"width": 1280, "height": 800}
96
103
 
104
+ # Viewport configuration - convert dict to Viewport if needed
105
+ if viewport is None:
106
+ self.viewport = Viewport(width=1280, height=800)
107
+ elif isinstance(viewport, dict):
108
+ self.viewport = Viewport(width=viewport["width"], height=viewport["height"])
109
+ else:
110
+ self.viewport = viewport
111
+
97
112
  self.playwright: Playwright | None = None
98
113
  self.context: BrowserContext | None = None
99
114
  self.page: Page | None = None
@@ -147,28 +162,8 @@ class SentienceBrowser:
147
162
 
148
163
  def start(self) -> None:
149
164
  """Launch browser with extension loaded"""
150
- # Get extension source path (relative to project root/package)
151
- # Handle both development (src/) and installed package cases
152
-
153
- # 1. Try relative to this file (installed package structure)
154
- # sentience/browser.py -> sentience/extension/
155
- package_ext_path = Path(__file__).parent / "extension"
156
-
157
- # 2. Try development root (if running from source repo)
158
- # sentience/browser.py -> ../sentience-chrome
159
- dev_ext_path = Path(__file__).parent.parent.parent / "sentience-chrome"
160
-
161
- if package_ext_path.exists() and (package_ext_path / "manifest.json").exists():
162
- extension_source = package_ext_path
163
- elif dev_ext_path.exists() and (dev_ext_path / "manifest.json").exists():
164
- extension_source = dev_ext_path
165
- else:
166
- raise FileNotFoundError(
167
- f"Extension not found. Checked:\n"
168
- f"1. {package_ext_path}\n"
169
- f"2. {dev_ext_path}\n"
170
- "Make sure the extension is built and 'sentience/extension' directory exists."
171
- )
165
+ # Get extension source path using shared utility
166
+ extension_source = find_extension_path()
172
167
 
173
168
  # Create temporary extension bundle
174
169
  # We copy it to a temp dir to avoid file locking issues and ensure clean state
@@ -211,7 +206,7 @@ class SentienceBrowser:
211
206
  "user_data_dir": user_data_dir,
212
207
  "headless": False, # IMPORTANT: See note above
213
208
  "args": args,
214
- "viewport": {"width": 1280, "height": 800},
209
+ "viewport": {"width": self.viewport.width, "height": self.viewport.height},
215
210
  # Remove "HeadlessChrome" from User Agent automatically
216
211
  "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
217
212
  }
@@ -480,6 +475,97 @@ class SentienceBrowser:
480
475
 
481
476
  return final_path
482
477
 
478
+ @classmethod
479
+ def from_existing(
480
+ cls,
481
+ context: BrowserContext,
482
+ api_key: str | None = None,
483
+ api_url: str | None = None,
484
+ ) -> "SentienceBrowser":
485
+ """
486
+ Create SentienceBrowser from an existing Playwright BrowserContext.
487
+
488
+ This allows you to use Sentience SDK with a browser context you've already created,
489
+ giving you more control over browser initialization.
490
+
491
+ Args:
492
+ context: Existing Playwright BrowserContext
493
+ api_key: Optional API key for server-side processing
494
+ api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
495
+
496
+ Returns:
497
+ SentienceBrowser instance configured to use the existing context
498
+
499
+ Example:
500
+ from playwright.sync_api import sync_playwright
501
+ from sentience import SentienceBrowser, snapshot
502
+
503
+ with sync_playwright() as p:
504
+ context = p.chromium.launch_persistent_context(...)
505
+ browser = SentienceBrowser.from_existing(context)
506
+ browser.page.goto("https://example.com")
507
+ snap = snapshot(browser)
508
+ """
509
+ instance = cls(api_key=api_key, api_url=api_url)
510
+ instance.context = context
511
+ instance.page = context.pages[0] if context.pages else context.new_page()
512
+
513
+ # Apply stealth if available
514
+ if STEALTH_AVAILABLE:
515
+ stealth_sync(instance.page)
516
+
517
+ # Wait for extension to be ready (if extension is loaded)
518
+ time.sleep(0.5)
519
+
520
+ return instance
521
+
522
+ @classmethod
523
+ def from_page(
524
+ cls,
525
+ page: Page,
526
+ api_key: str | None = None,
527
+ api_url: str | None = None,
528
+ ) -> "SentienceBrowser":
529
+ """
530
+ Create SentienceBrowser from an existing Playwright Page.
531
+
532
+ This allows you to use Sentience SDK with a page you've already created,
533
+ giving you more control over browser initialization.
534
+
535
+ Args:
536
+ page: Existing Playwright Page
537
+ api_key: Optional API key for server-side processing
538
+ api_url: Optional API URL (defaults to https://api.sentienceapi.com if api_key provided)
539
+
540
+ Returns:
541
+ SentienceBrowser instance configured to use the existing page
542
+
543
+ Example:
544
+ from playwright.sync_api import sync_playwright
545
+ from sentience import SentienceBrowser, snapshot
546
+
547
+ with sync_playwright() as p:
548
+ browser_instance = p.chromium.launch()
549
+ context = browser_instance.new_context()
550
+ page = context.new_page()
551
+ page.goto("https://example.com")
552
+
553
+ browser = SentienceBrowser.from_page(page)
554
+ snap = snapshot(browser)
555
+ """
556
+ instance = cls(api_key=api_key, api_url=api_url)
557
+ instance.page = page
558
+ instance.context = page.context
559
+
560
+ # Apply stealth if available
561
+ if STEALTH_AVAILABLE:
562
+ stealth_sync(instance.page)
563
+
564
+ # Wait for extension to be ready (if extension is loaded)
565
+ time.sleep(0.5)
566
+
567
+ return instance
568
+
483
569
  def __enter__(self):
484
570
  """Context manager entry"""
485
571
  self.start()
@@ -325,9 +325,7 @@ class CloudTraceSink(TraceSink):
325
325
  pass # Ignore cleanup errors
326
326
  else:
327
327
  if self.logger:
328
- self.logger.warning(
329
- f"Index upload failed: HTTP {index_response.status_code}"
330
- )
328
+ self.logger.warning(f"Index upload failed: HTTP {index_response.status_code}")
331
329
  print(f"⚠️ [Sentience] Index upload failed: HTTP {index_response.status_code}")
332
330
 
333
331
  except Exception as e:
@@ -10,7 +10,7 @@ from typing import Any
10
10
  from .agent import SentienceAgent
11
11
  from .browser import SentienceBrowser
12
12
  from .llm_provider import LLMProvider
13
- from .models import Snapshot
13
+ from .models import Snapshot, SnapshotOptions
14
14
  from .snapshot import snapshot
15
15
 
16
16
 
@@ -274,7 +274,7 @@ Create a step-by-step execution plan."""
274
274
  elif action == "EXTRACT_INFO":
275
275
  info_type = params["info_type"]
276
276
  # Get current page snapshot and extract info
277
- snap = snapshot(self.browser, limit=50)
277
+ snap = snapshot(self.browser, SnapshotOptions(limit=50))
278
278
 
279
279
  # Use LLM to extract specific information
280
280
  extracted = self._extract_information(snap, info_type)
@@ -361,7 +361,7 @@ Return JSON with extracted information:
361
361
  True if condition is met, False otherwise
362
362
  """
363
363
  try:
364
- snap = snapshot(self.browser, limit=30)
364
+ snap = snapshot(self.browser, SnapshotOptions(limit=30))
365
365
 
366
366
  # Build context
367
367
  elements_text = "\n".join([f"{el.role}: {el.text}" for el in snap.elements[:20]])
@@ -67,7 +67,7 @@
67
67
  "state": "uploaded",
68
68
  "size": 78091,
69
69
  "digest": "sha256:e281f8b755b61da4b8015d6172064aa9a337c14133ceceff4ab29199ee53307e",
70
- "download_count": 2,
70
+ "download_count": 5,
71
71
  "created_at": "2025-12-29T03:57:09Z",
72
72
  "updated_at": "2025-12-29T03:57:09Z",
73
73
  "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.0.7/extension-files.tar.gz"
sentience/llm_provider.py CHANGED
@@ -263,6 +263,212 @@ class AnthropicProvider(LLMProvider):
263
263
  return self._model_name
264
264
 
265
265
 
266
+ class GLMProvider(LLMProvider):
267
+ """
268
+ Zhipu AI GLM provider implementation (GLM-4, GLM-4-Plus, etc.)
269
+
270
+ Requirements:
271
+ pip install zhipuai
272
+
273
+ Example:
274
+ >>> from sentience.llm_provider import GLMProvider
275
+ >>> llm = GLMProvider(api_key="your-api-key", model="glm-4-plus")
276
+ >>> response = llm.generate("You are a helpful assistant", "Hello!")
277
+ >>> print(response.content)
278
+ """
279
+
280
+ def __init__(self, api_key: str | None = None, model: str = "glm-4-plus"):
281
+ """
282
+ Initialize GLM provider
283
+
284
+ Args:
285
+ api_key: Zhipu AI API key (or set GLM_API_KEY env var)
286
+ model: Model name (glm-4-plus, glm-4, glm-4-air, glm-4-flash, etc.)
287
+ """
288
+ try:
289
+ from zhipuai import ZhipuAI
290
+ except ImportError:
291
+ raise ImportError("ZhipuAI package not installed. Install with: pip install zhipuai")
292
+
293
+ self.client = ZhipuAI(api_key=api_key)
294
+ self._model_name = model
295
+
296
+ def generate(
297
+ self,
298
+ system_prompt: str,
299
+ user_prompt: str,
300
+ temperature: float = 0.0,
301
+ max_tokens: int | None = None,
302
+ **kwargs,
303
+ ) -> LLMResponse:
304
+ """
305
+ Generate response using GLM API
306
+
307
+ Args:
308
+ system_prompt: System instruction
309
+ user_prompt: User query
310
+ temperature: Sampling temperature (0.0 = deterministic, 1.0 = creative)
311
+ max_tokens: Maximum tokens to generate
312
+ **kwargs: Additional GLM API parameters
313
+
314
+ Returns:
315
+ LLMResponse object
316
+ """
317
+ messages = []
318
+ if system_prompt:
319
+ messages.append({"role": "system", "content": system_prompt})
320
+ messages.append({"role": "user", "content": user_prompt})
321
+
322
+ # Build API parameters
323
+ api_params = {
324
+ "model": self._model_name,
325
+ "messages": messages,
326
+ "temperature": temperature,
327
+ }
328
+
329
+ if max_tokens:
330
+ api_params["max_tokens"] = max_tokens
331
+
332
+ # Merge additional parameters
333
+ api_params.update(kwargs)
334
+
335
+ # Call GLM API
336
+ response = self.client.chat.completions.create(**api_params)
337
+
338
+ choice = response.choices[0]
339
+ usage = response.usage
340
+
341
+ return LLMResponse(
342
+ content=choice.message.content,
343
+ prompt_tokens=usage.prompt_tokens if usage else None,
344
+ completion_tokens=usage.completion_tokens if usage else None,
345
+ total_tokens=usage.total_tokens if usage else None,
346
+ model_name=response.model,
347
+ finish_reason=choice.finish_reason,
348
+ )
349
+
350
+ def supports_json_mode(self) -> bool:
351
+ """GLM-4 models support JSON mode"""
352
+ return "glm-4" in self._model_name.lower()
353
+
354
+ @property
355
+ def model_name(self) -> str:
356
+ return self._model_name
357
+
358
+
359
+ class GeminiProvider(LLMProvider):
360
+ """
361
+ Google Gemini provider implementation (Gemini 2.0, Gemini 1.5 Pro, etc.)
362
+
363
+ Requirements:
364
+ pip install google-generativeai
365
+
366
+ Example:
367
+ >>> from sentience.llm_provider import GeminiProvider
368
+ >>> llm = GeminiProvider(api_key="your-api-key", model="gemini-2.0-flash-exp")
369
+ >>> response = llm.generate("You are a helpful assistant", "Hello!")
370
+ >>> print(response.content)
371
+ """
372
+
373
+ def __init__(self, api_key: str | None = None, model: str = "gemini-2.0-flash-exp"):
374
+ """
375
+ Initialize Gemini provider
376
+
377
+ Args:
378
+ api_key: Google API key (or set GEMINI_API_KEY or GOOGLE_API_KEY env var)
379
+ model: Model name (gemini-2.0-flash-exp, gemini-1.5-pro, gemini-1.5-flash, etc.)
380
+ """
381
+ try:
382
+ import google.generativeai as genai
383
+ except ImportError:
384
+ raise ImportError(
385
+ "Google Generative AI package not installed. Install with: pip install google-generativeai"
386
+ )
387
+
388
+ # Configure API key
389
+ if api_key:
390
+ genai.configure(api_key=api_key)
391
+ else:
392
+ import os
393
+
394
+ api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
395
+ if api_key:
396
+ genai.configure(api_key=api_key)
397
+
398
+ self.genai = genai
399
+ self._model_name = model
400
+ self.model = genai.GenerativeModel(model)
401
+
402
+ def generate(
403
+ self,
404
+ system_prompt: str,
405
+ user_prompt: str,
406
+ temperature: float = 0.0,
407
+ max_tokens: int | None = None,
408
+ **kwargs,
409
+ ) -> LLMResponse:
410
+ """
411
+ Generate response using Gemini API
412
+
413
+ Args:
414
+ system_prompt: System instruction
415
+ user_prompt: User query
416
+ temperature: Sampling temperature (0.0 = deterministic, 2.0 = very creative)
417
+ max_tokens: Maximum tokens to generate
418
+ **kwargs: Additional Gemini API parameters
419
+
420
+ Returns:
421
+ LLMResponse object
422
+ """
423
+ # Combine system and user prompts (Gemini doesn't have separate system role in all versions)
424
+ full_prompt = f"{system_prompt}\n\n{user_prompt}" if system_prompt else user_prompt
425
+
426
+ # Build generation config
427
+ generation_config = {
428
+ "temperature": temperature,
429
+ }
430
+
431
+ if max_tokens:
432
+ generation_config["max_output_tokens"] = max_tokens
433
+
434
+ # Merge additional parameters
435
+ generation_config.update(kwargs)
436
+
437
+ # Call Gemini API
438
+ response = self.model.generate_content(full_prompt, generation_config=generation_config)
439
+
440
+ # Extract content
441
+ content = response.text if response.text else ""
442
+
443
+ # Token usage (if available)
444
+ prompt_tokens = None
445
+ completion_tokens = None
446
+ total_tokens = None
447
+
448
+ if hasattr(response, "usage_metadata") and response.usage_metadata:
449
+ prompt_tokens = response.usage_metadata.prompt_token_count
450
+ completion_tokens = response.usage_metadata.candidates_token_count
451
+ total_tokens = response.usage_metadata.total_token_count
452
+
453
+ return LLMResponse(
454
+ content=content,
455
+ prompt_tokens=prompt_tokens,
456
+ completion_tokens=completion_tokens,
457
+ total_tokens=total_tokens,
458
+ model_name=self._model_name,
459
+ finish_reason=None, # Gemini uses different finish reason format
460
+ )
461
+
462
+ def supports_json_mode(self) -> bool:
463
+ """Gemini 1.5+ models support JSON mode via response_mime_type"""
464
+ model_lower = self._model_name.lower()
465
+ return any(x in model_lower for x in ["gemini-1.5", "gemini-2.0"])
466
+
467
+ @property
468
+ def model_name(self) -> str:
469
+ return self._model_name
470
+
471
+
266
472
  class LocalLLMProvider(LLMProvider):
267
473
  """
268
474
  Local LLM provider using HuggingFace Transformers
sentience/models.py CHANGED
@@ -2,7 +2,7 @@
2
2
  Pydantic models for Sentience SDK - matches spec/snapshot.schema.json
3
3
  """
4
4
 
5
- from typing import Literal
5
+ from typing import Literal, Optional
6
6
 
7
7
  from pydantic import BaseModel, Field
8
8
 
@@ -44,6 +44,12 @@ class Element(BaseModel):
44
44
  is_occluded: bool = False
45
45
  z_index: int = 0
46
46
 
47
+ # ML reranking metadata (optional - can be absent or null)
48
+ rerank_index: int | None = None # 0-based, The rank after ML reranking
49
+ heuristic_index: int | None = None # 0-based, Where it would have been without ML
50
+ ml_probability: float | None = None # Confidence score from ONNX model (0.0 - 1.0)
51
+ ml_score: float | None = None # Raw logit score (optional, for debugging)
52
+
47
53
 
48
54
  class Snapshot(BaseModel):
49
55
  """Snapshot response from extension"""
sentience/snapshot.py CHANGED
@@ -5,7 +5,7 @@ Snapshot functionality - calls window.sentience.snapshot() or server-side API
5
5
  import json
6
6
  import os
7
7
  import time
8
- from typing import Any
8
+ from typing import Any, Optional
9
9
 
10
10
  import requests
11
11
 
@@ -41,41 +41,33 @@ def _save_trace_to_file(raw_elements: list[dict[str, Any]], trace_path: str | No
41
41
 
42
42
  def snapshot(
43
43
  browser: SentienceBrowser,
44
- screenshot: bool | None = None,
45
- limit: int | None = None,
46
- filter: dict[str, Any] | None = None,
47
- use_api: bool | None = None,
48
- save_trace: bool = False,
49
- trace_path: str | None = None,
50
- show_overlay: bool = False,
44
+ options: SnapshotOptions | None = None,
51
45
  ) -> Snapshot:
52
46
  """
53
47
  Take a snapshot of the current page
54
48
 
55
49
  Args:
56
50
  browser: SentienceBrowser instance
57
- screenshot: Whether to capture screenshot (bool or dict with format/quality)
58
- limit: Limit number of elements returned
59
- filter: Filter options (min_area, allowed_roles, min_z_index)
60
- use_api: Force use of server-side API if True, local extension if False.
61
- If None, uses API if api_key is set, otherwise uses local extension.
62
- save_trace: Whether to save raw_elements to JSON for benchmarking/training
63
- trace_path: Path to save trace file. If None, uses "trace_{timestamp}.json"
64
- show_overlay: Show visual overlay highlighting elements in browser
51
+ options: Snapshot options (screenshot, limit, filter, etc.)
52
+ If None, uses default options.
65
53
 
66
54
  Returns:
67
55
  Snapshot object
56
+
57
+ Example:
58
+ # Basic snapshot with defaults
59
+ snap = snapshot(browser)
60
+
61
+ # With options
62
+ snap = snapshot(browser, SnapshotOptions(
63
+ screenshot=True,
64
+ limit=100,
65
+ show_overlay=True
66
+ ))
68
67
  """
69
- # Build SnapshotOptions from individual parameters
70
- options = SnapshotOptions(
71
- screenshot=screenshot if screenshot is not None else False,
72
- limit=limit if limit is not None else 50,
73
- filter=filter,
74
- use_api=use_api,
75
- save_trace=save_trace,
76
- trace_path=trace_path,
77
- show_overlay=show_overlay,
78
- )
68
+ # Use default options if none provided
69
+ if options is None:
70
+ options = SnapshotOptions()
79
71
 
80
72
  # Determine if we should use server-side API
81
73
  should_use_api = (
sentience/wait.py CHANGED
@@ -5,7 +5,7 @@ Wait functionality - wait_for element matching selector
5
5
  import time
6
6
 
7
7
  from .browser import SentienceBrowser
8
- from .models import WaitResult
8
+ from .models import SnapshotOptions, WaitResult
9
9
  from .query import find
10
10
  from .snapshot import snapshot
11
11
 
@@ -46,7 +46,7 @@ def wait_for(
46
46
 
47
47
  while time.time() - start_time < timeout:
48
48
  # Take snapshot (may be local extension or remote API)
49
- snap = snapshot(browser, use_api=use_api)
49
+ snap = snapshot(browser, SnapshotOptions(use_api=use_api))
50
50
 
51
51
  # Try to find element
52
52
  element = find(snap, selector)