lumera 0.4.22__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lumera
3
- Version: 0.4.22
3
+ Version: 0.5.0
4
4
  Summary: SDK for building on Lumera platform
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: requests
@@ -21,8 +21,8 @@ Requires-Dist: matplotlib==3.10.3; extra == "full"
21
21
  Requires-Dist: notion-client==2.4.0; extra == "full"
22
22
  Requires-Dist: numpy==2.3.0; extra == "full"
23
23
  Requires-Dist: office365-rest-python-client; extra == "full"
24
- Requires-Dist: openai; extra == "full"
25
- Requires-Dist: openai-agents; extra == "full"
24
+ Requires-Dist: openai<3.0.0,>=2.15.0; extra == "full"
25
+ Requires-Dist: openai-agents<1.0.0,>=0.6.5; extra == "full"
26
26
  Requires-Dist: openpyxl==3.1.5; extra == "full"
27
27
  Requires-Dist: pandas==2.3.0; extra == "full"
28
28
  Requires-Dist: pdfplumber; extra == "full"
@@ -5,6 +5,10 @@ This SDK provides helpers for agents running within the Lumera Notebook environm
5
5
  to interact with the Lumera API and define dynamic user interfaces.
6
6
  """
7
7
 
8
+ __version__ = "0.5.0"
9
+
10
+ # Import new modules (as modules, not individual functions)
11
+ from . import exceptions, llm, locks, pb, storage
8
12
  from ._utils import (
9
13
  LumeraAPIError,
10
14
  RecordNotUniqueError,
@@ -13,6 +17,15 @@ from ._utils import (
13
17
  log_timed,
14
18
  )
15
19
 
20
+ # Import specific exceptions for convenience
21
+ from .exceptions import (
22
+ LockHeldError,
23
+ LumeraError,
24
+ RecordNotFoundError,
25
+ UniqueConstraintError,
26
+ ValidationError,
27
+ )
28
+
16
29
  # Import key SDK helpers to expose them at the package root.
17
30
  from .sdk import (
18
31
  CollectionField,
@@ -39,29 +52,46 @@ from .sdk import (
39
52
 
40
53
  # Define what `from lumera import *` imports.
41
54
  __all__ = [
55
+ # Authentication & utilities
42
56
  "get_access_token",
43
- "save_to_lumera",
44
57
  "get_google_access_token", # Kept for backwards compatibility
45
58
  "log_timed",
59
+ # Collections (low-level API)
46
60
  "list_collections",
47
61
  "get_collection",
48
62
  "create_collection",
49
63
  "update_collection",
50
64
  "delete_collection",
65
+ # Records (low-level API)
51
66
  "list_records",
52
67
  "get_record",
53
68
  "get_record_by_external_id",
69
+ "create_record",
70
+ "update_record",
71
+ "upsert_record",
72
+ "delete_record",
73
+ # Other operations
54
74
  "replay_hook",
55
75
  "query_sql",
56
76
  "run_agent",
57
77
  "get_agent_run",
58
78
  "upload_lumera_file",
59
- "create_record",
60
- "update_record",
61
- "upsert_record",
62
- "delete_record",
79
+ "save_to_lumera",
80
+ # Type definitions
63
81
  "CollectionField",
64
82
  "HookReplayResult",
83
+ # Exceptions
65
84
  "LumeraAPIError",
66
85
  "RecordNotUniqueError",
86
+ "LumeraError",
87
+ "ValidationError",
88
+ "UniqueConstraintError",
89
+ "RecordNotFoundError",
90
+ "LockHeldError",
91
+ # New modules (use as lumera.pb, lumera.storage, etc.)
92
+ "pb",
93
+ "storage",
94
+ "llm",
95
+ "locks",
96
+ "exceptions",
67
97
  ]
@@ -0,0 +1,72 @@
1
+ """
2
+ Custom exceptions for the Lumera SDK.
3
+
4
+ Exception hierarchy:
5
+ LumeraError (base)
6
+ ├── RecordNotFoundError - Record doesn't exist (404)
7
+ ├── ValidationError - Data doesn't match schema
8
+ ├── UniqueConstraintError - Unique field violation
9
+ └── LockHeldError - Lock already held by another process
10
+
11
+ Example:
12
+ >>> from lumera import pb
13
+ >>> from lumera.exceptions import RecordNotFoundError
14
+ >>> try:
15
+ ... deposit = pb.get("deposits", "invalid_id")
16
+ ... except RecordNotFoundError as e:
17
+ ... print(f"Not found: {e.record_id}")
18
+ """
19
+
20
+ __all__ = [
21
+ "LumeraError",
22
+ "RecordNotFoundError",
23
+ "ValidationError",
24
+ "UniqueConstraintError",
25
+ "LockHeldError",
26
+ ]
27
+
28
+
29
+ class LumeraError(Exception):
30
+ """Base exception for all Lumera SDK errors."""
31
+
32
+ pass
33
+
34
+
35
+ class RecordNotFoundError(LumeraError):
36
+ """Record doesn't exist in the collection."""
37
+
38
+ def __init__(self, collection: str, record_id: str) -> None:
39
+ super().__init__(f"Record '{record_id}' not found in collection '{collection}'")
40
+ self.collection = collection
41
+ self.record_id = record_id
42
+
43
+
44
+ class ValidationError(LumeraError):
45
+ """Data doesn't match collection schema."""
46
+
47
+ def __init__(self, collection: str, errors: dict[str, str]) -> None:
48
+ super().__init__(f"Validation failed for '{collection}': {errors}")
49
+ self.collection = collection
50
+ self.errors = errors
51
+
52
+
53
+ class UniqueConstraintError(LumeraError):
54
+ """Unique field constraint violation."""
55
+
56
+ def __init__(self, collection: str, field: str, value: object) -> None:
57
+ super().__init__(f"Record with {field}='{value}' already exists in '{collection}'")
58
+ self.collection = collection
59
+ self.field = field
60
+ self.value = value
61
+
62
+
63
+ class LockHeldError(LumeraError):
64
+ """Lock is already held by another process."""
65
+
66
+ def __init__(self, lock_name: str, held_by: str | None = None) -> None:
67
+ msg = f"Lock '{lock_name}' is already held"
68
+ if held_by:
69
+ msg += f" by {held_by}"
70
+ super().__init__(msg)
71
+ self.lock_name = lock_name
72
+ self.held_by = held_by
@@ -0,0 +1,481 @@
1
+ """
2
+ LLM operations for AI completions and embeddings.
3
+
4
+ This module provides a unified interface for LLM operations with pluggable
5
+ provider support. Currently implements OpenAI, with extensibility for other
6
+ providers (Anthropic, Google, etc.) in the future.
7
+
8
+ Available functions:
9
+ complete() - Single-turn LLM completion with prompt
10
+ chat() - Multi-turn chat completion with message history
11
+ embed() - Generate embeddings for text (single or batch)
12
+
13
+ Configuration:
14
+ OPENAI_API_KEY - Required for OpenAI provider
15
+ LUMERA_LLM_PROVIDER - Provider to use (default: "openai")
16
+
17
+ Example:
18
+ >>> from lumera import llm
19
+ >>> response = llm.complete("What is 2+2?", model="gpt-5.2-mini")
20
+ >>> print(response["content"])
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import os
26
+ from abc import ABC, abstractmethod
27
+ from typing import TYPE_CHECKING, Literal, NotRequired, TypedDict, Unpack
28
+
29
+ if TYPE_CHECKING:
30
+ import openai
31
+
32
+ __all__ = [
33
+ "complete",
34
+ "chat",
35
+ "embed",
36
+ "Message",
37
+ "LLMResponse",
38
+ "ProviderConfig",
39
+ "LLMProvider",
40
+ "get_provider",
41
+ "set_provider",
42
+ ]
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Type definitions
47
+ # ---------------------------------------------------------------------------
48
+
49
+
50
+ class Message(TypedDict):
51
+ """Chat message format compatible with OpenAI and other providers."""
52
+
53
+ role: Literal["system", "user", "assistant"]
54
+ content: str
55
+
56
+
57
+ class LLMResponse(TypedDict, total=False):
58
+ """LLM completion response."""
59
+
60
+ content: str # Response text (always present)
61
+ model: str # Model used
62
+ usage: dict[str, int] # Token usage: prompt_tokens, completion_tokens, total_tokens
63
+ finish_reason: str # "stop", "length", "content_filter", etc.
64
+ provider: str # Provider name (e.g., "openai", "anthropic")
65
+
66
+
67
+ class ProviderConfig(TypedDict, total=False):
68
+ """Configuration options for LLM providers."""
69
+
70
+ api_key: NotRequired[str] # API key (overrides Lumera/env lookup)
71
+ provider_name: NotRequired[str] # Provider name for get_access_token (default: "openai")
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Provider interface (for future extensibility)
76
+ # ---------------------------------------------------------------------------
77
+
78
+
79
+ class LLMProvider(ABC):
80
+ """Abstract base class for LLM providers.
81
+
82
+ Subclass this to add support for new providers (Anthropic, Google, etc.).
83
+ """
84
+
85
+ name: str = "base"
86
+
87
+ @abstractmethod
88
+ def complete(
89
+ self,
90
+ prompt: str,
91
+ *,
92
+ model: str,
93
+ temperature: float,
94
+ max_tokens: int | None,
95
+ system_prompt: str | None,
96
+ json_mode: bool,
97
+ ) -> LLMResponse:
98
+ """Generate a completion for a single prompt."""
99
+ ...
100
+
101
+ @abstractmethod
102
+ def chat(
103
+ self,
104
+ messages: list[Message],
105
+ *,
106
+ model: str,
107
+ temperature: float,
108
+ max_tokens: int | None,
109
+ json_mode: bool,
110
+ ) -> LLMResponse:
111
+ """Generate a chat completion from message history."""
112
+ ...
113
+
114
+ @abstractmethod
115
+ def embed(
116
+ self,
117
+ text: str | list[str],
118
+ *,
119
+ model: str,
120
+ ) -> list[float] | list[list[float]]:
121
+ """Generate embeddings for text."""
122
+ ...
123
+
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # OpenAI provider implementation
127
+ # ---------------------------------------------------------------------------
128
+
129
+
130
+ class OpenAIProvider(LLMProvider):
131
+ """OpenAI provider implementation using the openai Python SDK."""
132
+
133
+ name = "openai"
134
+
135
+ # Model aliases for convenience
136
+ MODEL_ALIASES: dict[str, str] = {
137
+ "gpt-5.2": "gpt-5.2",
138
+ "gpt-5.2-mini": "gpt-5.2-mini",
139
+ "gpt-5.2-nano": "gpt-5.2-nano",
140
+ # Embedding models
141
+ "text-embedding-3-small": "text-embedding-3-small",
142
+ "text-embedding-3-large": "text-embedding-3-large",
143
+ }
144
+
145
+ DEFAULT_CHAT_MODEL = "gpt-5.2-mini"
146
+ DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
147
+ DEFAULT_PROVIDER_NAME = "openai"
148
+
149
+ def __init__(
150
+ self,
151
+ api_key: str | None = None,
152
+ provider_name: str | None = None,
153
+ ) -> None:
154
+ """Initialize OpenAI provider.
155
+
156
+ Args:
157
+ api_key: OpenAI API key. If not provided, fetches from Lumera
158
+ using get_access_token(provider_name), or falls back
159
+ to OPENAI_API_KEY env var.
160
+ provider_name: Provider name for get_access_token lookup.
161
+ Defaults to "openai".
162
+ """
163
+ self._explicit_api_key = api_key
164
+ self._provider_name = provider_name or self.DEFAULT_PROVIDER_NAME
165
+ self._client: openai.OpenAI | None = None # noqa: F821
166
+
167
+ def _get_api_key(self) -> str:
168
+ """Get API key from explicit config, Lumera, or environment."""
169
+ # 1. Use explicitly provided key
170
+ if self._explicit_api_key:
171
+ return self._explicit_api_key
172
+
173
+ # 2. Try to fetch from Lumera platform
174
+ try:
175
+ from ._utils import get_access_token
176
+
177
+ return get_access_token(self._provider_name)
178
+ except Exception:
179
+ pass # Fall through to env var
180
+
181
+ # 3. Fall back to environment variable
182
+ env_key = os.environ.get("OPENAI_API_KEY")
183
+ if env_key:
184
+ return env_key
185
+
186
+ raise ValueError(
187
+ "OpenAI API key not configured. Either:\n"
188
+ f" 1. Configure '{self._provider_name}' provider in Lumera platform\n"
189
+ " 2. Set OPENAI_API_KEY environment variable\n"
190
+ " 3. Pass api_key to set_provider()"
191
+ )
192
+
193
+ @property
194
+ def client(self) -> openai.OpenAI: # noqa: F821
195
+ """Lazy-initialize OpenAI client."""
196
+ if self._client is None:
197
+ try:
198
+ import openai
199
+ except ImportError as e:
200
+ raise ImportError(
201
+ "OpenAI package not installed. Install with: pip install 'lumera[full]'"
202
+ ) from e
203
+
204
+ api_key = self._get_api_key()
205
+ self._client = openai.OpenAI(api_key=api_key)
206
+ return self._client
207
+
208
+ def _resolve_model(self, model: str) -> str:
209
+ """Resolve model alias to actual model name."""
210
+ return self.MODEL_ALIASES.get(model, model)
211
+
212
+ def complete(
213
+ self,
214
+ prompt: str,
215
+ *,
216
+ model: str,
217
+ temperature: float,
218
+ max_tokens: int | None,
219
+ system_prompt: str | None,
220
+ json_mode: bool,
221
+ ) -> LLMResponse:
222
+ """Generate a completion using OpenAI."""
223
+ messages: list[Message] = []
224
+ if system_prompt:
225
+ messages.append({"role": "system", "content": system_prompt})
226
+ messages.append({"role": "user", "content": prompt})
227
+
228
+ return self.chat(
229
+ messages,
230
+ model=model,
231
+ temperature=temperature,
232
+ max_tokens=max_tokens,
233
+ json_mode=json_mode,
234
+ )
235
+
236
+ def chat(
237
+ self,
238
+ messages: list[Message],
239
+ *,
240
+ model: str,
241
+ temperature: float,
242
+ max_tokens: int | None,
243
+ json_mode: bool,
244
+ ) -> LLMResponse:
245
+ """Generate a chat completion using OpenAI."""
246
+ resolved_model = self._resolve_model(model)
247
+
248
+ # Build request kwargs
249
+ kwargs: dict = {
250
+ "model": resolved_model,
251
+ "messages": messages, # type: ignore[arg-type]
252
+ "temperature": temperature,
253
+ }
254
+
255
+ if max_tokens is not None:
256
+ kwargs["max_tokens"] = max_tokens
257
+
258
+ if json_mode:
259
+ kwargs["response_format"] = {"type": "json_object"}
260
+
261
+ # Make API call
262
+ response = self.client.chat.completions.create(**kwargs)
263
+
264
+ # Extract response
265
+ choice = response.choices[0]
266
+ content = choice.message.content or ""
267
+
268
+ result: LLMResponse = {
269
+ "content": content,
270
+ "model": response.model,
271
+ "provider": self.name,
272
+ }
273
+
274
+ if choice.finish_reason:
275
+ result["finish_reason"] = choice.finish_reason
276
+
277
+ if response.usage:
278
+ result["usage"] = {
279
+ "prompt_tokens": response.usage.prompt_tokens,
280
+ "completion_tokens": response.usage.completion_tokens,
281
+ "total_tokens": response.usage.total_tokens,
282
+ }
283
+
284
+ return result
285
+
286
+ def embed(
287
+ self,
288
+ text: str | list[str],
289
+ *,
290
+ model: str,
291
+ ) -> list[float] | list[list[float]]:
292
+ """Generate embeddings using OpenAI."""
293
+ resolved_model = self._resolve_model(model)
294
+
295
+ # Normalize input to list
296
+ input_texts = [text] if isinstance(text, str) else text
297
+
298
+ response = self.client.embeddings.create(
299
+ model=resolved_model,
300
+ input=input_texts,
301
+ )
302
+
303
+ # Extract embeddings
304
+ embeddings = [item.embedding for item in response.data]
305
+
306
+ # Return single embedding if single input
307
+ if isinstance(text, str):
308
+ return embeddings[0]
309
+ return embeddings
310
+
311
+
312
+ # ---------------------------------------------------------------------------
313
+ # Provider registry and module-level state
314
+ # ---------------------------------------------------------------------------
315
+
316
+ # Registry of available providers
317
+ _PROVIDERS: dict[str, type[LLMProvider]] = {
318
+ "openai": OpenAIProvider,
319
+ }
320
+
321
+ # Current active provider instance
322
+ _current_provider: LLMProvider | None = None
323
+
324
+
325
+ def get_provider() -> LLMProvider:
326
+ """Get the current LLM provider instance.
327
+
328
+ Returns the configured provider, initializing it if necessary.
329
+ Provider is determined by LUMERA_LLM_PROVIDER env var (default: "openai").
330
+ """
331
+ global _current_provider
332
+
333
+ if _current_provider is None:
334
+ provider_name = os.environ.get("LUMERA_LLM_PROVIDER", "openai").lower()
335
+
336
+ if provider_name not in _PROVIDERS:
337
+ available = ", ".join(_PROVIDERS.keys())
338
+ raise ValueError(f"Unknown LLM provider: {provider_name}. Available: {available}")
339
+
340
+ provider_class = _PROVIDERS[provider_name]
341
+ _current_provider = provider_class()
342
+
343
+ return _current_provider
344
+
345
+
346
+ def set_provider(provider: LLMProvider | str, **kwargs: Unpack[ProviderConfig]) -> None:
347
+ """Set the active LLM provider.
348
+
349
+ Args:
350
+ provider: Either a provider instance or provider name string.
351
+ **kwargs: If provider is a string, kwargs are passed to provider constructor.
352
+
353
+ Example:
354
+ >>> llm.set_provider("openai", api_key="sk-...")
355
+ >>> # Or with a custom provider instance
356
+ >>> llm.set_provider(MyCustomProvider())
357
+ """
358
+ global _current_provider
359
+
360
+ if isinstance(provider, str):
361
+ if provider not in _PROVIDERS:
362
+ available = ", ".join(_PROVIDERS.keys())
363
+ raise ValueError(f"Unknown provider: {provider}. Available: {available}")
364
+ _current_provider = _PROVIDERS[provider](**kwargs)
365
+ else:
366
+ _current_provider = provider
367
+
368
+
369
+ # ---------------------------------------------------------------------------
370
+ # Public API functions
371
+ # ---------------------------------------------------------------------------
372
+
373
+
374
+ def complete(
375
+ prompt: str,
376
+ *,
377
+ model: str = "gpt-5.2-mini",
378
+ temperature: float = 0.7,
379
+ max_tokens: int | None = None,
380
+ system_prompt: str | None = None,
381
+ json_mode: bool = False,
382
+ ) -> LLMResponse:
383
+ """Get LLM completion for a prompt.
384
+
385
+ Args:
386
+ prompt: User prompt/question
387
+ model: Model to use (default: gpt-5.2-mini)
388
+ temperature: Sampling temperature 0.0 to 2.0 (default: 0.7)
389
+ max_tokens: Max tokens in response (None = model default)
390
+ system_prompt: Optional system message to set behavior
391
+ json_mode: Force JSON output (default: False)
392
+
393
+ Returns:
394
+ LLM response with content and metadata
395
+
396
+ Example:
397
+ >>> response = llm.complete(
398
+ ... prompt="Classify this deposit: ...",
399
+ ... system_prompt="You are an expert accountant.",
400
+ ... model="gpt-5.2-mini",
401
+ ... json_mode=True
402
+ ... )
403
+ >>> data = json.loads(response["content"])
404
+ """
405
+ provider = get_provider()
406
+ return provider.complete(
407
+ prompt,
408
+ model=model,
409
+ temperature=temperature,
410
+ max_tokens=max_tokens,
411
+ system_prompt=system_prompt,
412
+ json_mode=json_mode,
413
+ )
414
+
415
+
416
+ def chat(
417
+ messages: list[Message],
418
+ *,
419
+ model: str = "gpt-5.2-mini",
420
+ temperature: float = 0.7,
421
+ max_tokens: int | None = None,
422
+ json_mode: bool = False,
423
+ ) -> LLMResponse:
424
+ """Multi-turn chat completion.
425
+
426
+ Args:
427
+ messages: Conversation history with role and content
428
+ model: Model to use (default: gpt-5.2-mini)
429
+ temperature: Sampling temperature 0.0 to 2.0 (default: 0.7)
430
+ max_tokens: Max tokens in response (None = model default)
431
+ json_mode: Force JSON output (default: False)
432
+
433
+ Returns:
434
+ LLM response with assistant's message
435
+
436
+ Example:
437
+ >>> response = llm.chat([
438
+ ... {"role": "system", "content": "You are a helpful assistant."},
439
+ ... {"role": "user", "content": "What is 2+2?"},
440
+ ... {"role": "assistant", "content": "4"},
441
+ ... {"role": "user", "content": "What about 3+3?"}
442
+ ... ])
443
+ >>> print(response["content"])
444
+ """
445
+ provider = get_provider()
446
+ return provider.chat(
447
+ messages,
448
+ model=model,
449
+ temperature=temperature,
450
+ max_tokens=max_tokens,
451
+ json_mode=json_mode,
452
+ )
453
+
454
+
455
+ def embed(
456
+ text: str | list[str],
457
+ *,
458
+ model: str = "text-embedding-3-small",
459
+ ) -> list[float] | list[list[float]]:
460
+ """Generate embeddings for text.
461
+
462
+ Args:
463
+ text: Single string or list of strings to embed
464
+ model: Embedding model (default: text-embedding-3-small)
465
+
466
+ Returns:
467
+ Embedding vector (for single string) or list of vectors (for list)
468
+
469
+ Example:
470
+ >>> embedding = llm.embed("deposit payment notice")
471
+ >>> # Use for similarity search, semantic matching, etc.
472
+ >>>
473
+ >>> # Batch embeddings
474
+ >>> embeddings = llm.embed([
475
+ ... "payment notice",
476
+ ... "direct deposit",
477
+ ... "apportionment"
478
+ ... ])
479
+ """
480
+ provider = get_provider()
481
+ return provider.embed(text, model=model)