lumera 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lumera/__init__.py CHANGED
@@ -13,7 +13,7 @@ except PackageNotFoundError:
13
13
  __version__ = "unknown" # Not installed (e.g., running from source)
14
14
 
15
15
  # Import new modules (as modules, not individual functions)
16
- from . import automations, email, exceptions, integrations, llm, locks, pb, storage, webhooks
16
+ from . import automations, documents, email, exceptions, integrations, llm, locks, pb, storage, webhooks
17
17
  from ._utils import (
18
18
  LumeraAPIError,
19
19
  RecordNotUniqueError,
@@ -102,6 +102,7 @@ __all__ = [
102
102
  "LockHeldError",
103
103
  # New modules (use as lumera.pb, lumera.storage, etc.)
104
104
  "automations",
105
+ "documents",
105
106
  "email",
106
107
  "pb",
107
108
  "storage",
lumera/automations.py CHANGED
@@ -64,6 +64,7 @@ __all__ = [
64
64
  # Classes
65
65
  "Run",
66
66
  "Automation",
67
+ "LogEntry",
67
68
  "LogsResponse",
68
69
  ]
69
70
 
@@ -71,6 +72,67 @@ from ._utils import LumeraAPIError, _api_request
71
72
  from .sdk import get_automation_run as _get_automation_run
72
73
  from .sdk import run_automation as _run_automation
73
74
 
75
+ # ============================================================================
76
+ # LogEntry Class
77
+ # ============================================================================
78
+
79
+
80
+ class LogEntry:
81
+ """A single log entry from an automation run.
82
+
83
+ Attributes:
84
+ content: The log message content.
85
+ type: Log type ("stream_stdout", "stream_stderr", "warning", "image_png", "image_jpeg").
86
+ timestamp: ISO timestamp when the log was emitted.
87
+ error: True if this is an error entry.
88
+ """
89
+
90
+ def __init__(self, data: dict[str, Any]) -> None:
91
+ self._data = data
92
+
93
+ @property
94
+ def content(self) -> str:
95
+ return self._data.get("content", "")
96
+
97
+ @property
98
+ def type(self) -> str:
99
+ return self._data.get("type", "")
100
+
101
+ @property
102
+ def timestamp(self) -> str | None:
103
+ return self._data.get("timestamp")
104
+
105
+ @property
106
+ def error(self) -> bool:
107
+ return self._data.get("error", False)
108
+
109
+ @property
110
+ def is_image(self) -> bool:
111
+ """True if this entry contains image data (base64 encoded in content)."""
112
+ return self.type in ("image_png", "image_jpeg")
113
+
114
+ def __repr__(self) -> str:
115
+ preview = self.content[:50] + "..." if len(self.content) > 50 else self.content
116
+ return f"LogEntry(type={self.type!r}, content={preview!r})"
117
+
118
+
119
+ def _parse_ndjson_entries(data: str) -> list[LogEntry]:
120
+ """Parse NDJSON log data into LogEntry objects."""
121
+ entries = []
122
+ for line in data.splitlines():
123
+ line = line.strip()
124
+ if not line:
125
+ continue
126
+ try:
127
+ parsed = json.loads(line)
128
+ if isinstance(parsed, dict):
129
+ entries.append(LogEntry(parsed))
130
+ except (json.JSONDecodeError, ValueError):
131
+ # Skip malformed lines or lines with huge numbers
132
+ pass
133
+ return entries
134
+
135
+
74
136
  # ============================================================================
75
137
  # LogsResponse Class
76
138
  # ============================================================================
@@ -81,6 +143,7 @@ class LogsResponse:
81
143
 
82
144
  Attributes:
83
145
  data: Raw log content as a string (NDJSON format).
146
+ entries: Parsed log entries as LogEntry objects.
84
147
  offset: Byte offset where this chunk starts.
85
148
  size: Number of bytes in this chunk.
86
149
  total_size: Total size of the log file.
@@ -91,11 +154,20 @@ class LogsResponse:
91
154
 
92
155
  def __init__(self, data: dict[str, Any]) -> None:
93
156
  self._data = data
157
+ self._entries: list[LogEntry] | None = None
94
158
 
95
159
  @property
96
160
  def data(self) -> str:
161
+ """Raw NDJSON log content."""
97
162
  return self._data.get("data", "")
98
163
 
164
+ @property
165
+ def entries(self) -> list[LogEntry]:
166
+ """Parsed log entries. Lazily parsed from NDJSON data."""
167
+ if self._entries is None:
168
+ self._entries = _parse_ndjson_entries(self.data)
169
+ return self._entries
170
+
99
171
  @property
100
172
  def offset(self) -> int:
101
173
  return self._data.get("offset", 0)
@@ -342,7 +414,7 @@ class Run:
342
414
  raise ValueError("Cannot fetch logs without run id")
343
415
  return get_logs(self.id, offset=offset, limit=limit, all=all)
344
416
 
345
- def stream_logs(self, *, timeout: float = 30) -> Iterator[str]:
417
+ def stream_logs(self, *, timeout: float = 30) -> Iterator[LogEntry]:
346
418
  """Stream logs from this run.
347
419
 
348
420
  Works for both live (running) and archived (completed) runs.
@@ -353,12 +425,12 @@ class Run:
353
425
  timeout: HTTP connection timeout in seconds.
354
426
 
355
427
  Yields:
356
- Log lines as strings (raw NDJSON lines).
428
+ LogEntry objects with content, type, timestamp, and error fields.
357
429
 
358
430
  Example:
359
431
  >>> run = automations.run("automation_id", inputs={})
360
- >>> for line in run.stream_logs():
361
- ... print(line)
432
+ >>> for entry in run.stream_logs():
433
+ ... print(f"[{entry.type}] {entry.content}")
362
434
  """
363
435
  if not self.id:
364
436
  raise ValueError("Cannot stream logs without run id")
@@ -912,11 +984,11 @@ def delete(automation_id: str) -> None:
912
984
  # ============================================================================
913
985
 
914
986
 
915
- def stream_logs(run_id: str, *, timeout: float = 30) -> Iterator[str]:
987
+ def stream_logs(run_id: str, *, timeout: float = 30) -> Iterator[LogEntry]:
916
988
  """Stream logs from an automation run.
917
989
 
918
990
  Works for both live (running) and archived (completed) runs.
919
- Connects to the server-sent events endpoint and yields log lines
991
+ Connects to the server-sent events endpoint and yields LogEntry objects
920
992
  as they arrive. For live runs, streams in real-time. For archived
921
993
  runs, streams the entire log from storage.
922
994
 
@@ -925,11 +997,11 @@ def stream_logs(run_id: str, *, timeout: float = 30) -> Iterator[str]:
925
997
  timeout: HTTP connection timeout in seconds.
926
998
 
927
999
  Yields:
928
- Log lines as strings (raw NDJSON lines).
1000
+ LogEntry objects with content, type, timestamp, and error fields.
929
1001
 
930
1002
  Example:
931
- >>> for line in automations.stream_logs("run_id"):
932
- ... print(line)
1003
+ >>> for entry in automations.stream_logs("run_id"):
1004
+ ... print(f"[{entry.type}] {entry.content}")
933
1005
  """
934
1006
  import base64
935
1007
  import os
@@ -971,10 +1043,20 @@ def stream_logs(run_id: str, *, timeout: float = 30) -> Iterator[str]:
971
1043
  try:
972
1044
  data = json.loads(current_data)
973
1045
  if "data" in data:
974
- # Data is base64-encoded
1046
+ # Data is base64-encoded NDJSON
975
1047
  raw = base64.b64decode(data["data"])
976
1048
  decoded = raw.decode("utf-8", errors="replace")
977
- yield from decoded.splitlines()
1049
+ for ndjson_line in decoded.splitlines():
1050
+ ndjson_line = ndjson_line.strip()
1051
+ if not ndjson_line:
1052
+ continue
1053
+ try:
1054
+ entry_data = json.loads(ndjson_line)
1055
+ if isinstance(entry_data, dict):
1056
+ yield LogEntry(entry_data)
1057
+ except (json.JSONDecodeError, ValueError):
1058
+ # Skip malformed lines or lines with huge numbers
1059
+ pass
978
1060
  except (json.JSONDecodeError, KeyError):
979
1061
  pass
980
1062
  elif current_event == "complete":
lumera/documents.py ADDED
@@ -0,0 +1,127 @@
1
+ """
2
+ Document text extraction via OpenAI Responses API.
3
+
4
+ Supports images and PDFs.
5
+
6
+ Example:
7
+ >>> from lumera import documents
8
+ >>> text = documents.extract_text("invoice.pdf")
9
+
10
+ # Or from bytes (no file needed):
11
+ >>> text = documents.extract_text_from_bytes(pdf_bytes, "application/pdf")
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import base64
17
+ import mimetypes
18
+ from pathlib import Path
19
+
20
+ __all__ = ["extract_text", "extract_text_from_bytes"]
21
+
22
+ _DEFAULT_MODEL = "gpt-5-mini"
23
+ _DEFAULT_PROMPT = "Extract all text from this document. Return only the extracted text."
24
+
25
+
26
+ def _get_mime_type(file_path: str) -> str:
27
+ """Get MIME type for a file."""
28
+ mime, _ = mimetypes.guess_type(file_path)
29
+ if mime:
30
+ return mime
31
+ ext = Path(file_path).suffix.lower()
32
+ return {
33
+ ".png": "image/png",
34
+ ".jpg": "image/jpeg",
35
+ ".jpeg": "image/jpeg",
36
+ ".gif": "image/gif",
37
+ ".webp": "image/webp",
38
+ ".pdf": "application/pdf",
39
+ }.get(ext, "application/octet-stream")
40
+
41
+
42
+ def extract_text_from_bytes(
43
+ data: bytes,
44
+ mime_type: str,
45
+ *,
46
+ filename: str = "document",
47
+ prompt: str = _DEFAULT_PROMPT,
48
+ model: str = _DEFAULT_MODEL,
49
+ ) -> str:
50
+ """Extract text from document bytes using OpenAI.
51
+
52
+ Args:
53
+ data: Raw file bytes
54
+ mime_type: MIME type (e.g., "application/pdf", "image/png")
55
+ filename: Optional filename for context
56
+ prompt: What to ask the LLM
57
+ model: Model to use (default: gpt-5-mini)
58
+
59
+ Returns:
60
+ Extracted text
61
+
62
+ Example:
63
+ >>> import requests
64
+ >>> resp = requests.get(presigned_url)
65
+ >>> text = documents.extract_text_from_bytes(
66
+ ... resp.content,
67
+ ... "application/pdf",
68
+ ... filename="invoice.pdf"
69
+ ... )
70
+ """
71
+ from . import llm
72
+
73
+ b64 = base64.b64encode(data).decode("utf-8")
74
+ data_url = f"data:{mime_type};base64,{b64}"
75
+ client = llm.get_provider().client
76
+
77
+ response = client.responses.create(
78
+ model=model,
79
+ input=[
80
+ {
81
+ "role": "user",
82
+ "content": [
83
+ {
84
+ "type": "input_file",
85
+ "filename": filename,
86
+ "file_data": data_url,
87
+ },
88
+ {
89
+ "type": "input_text",
90
+ "text": prompt,
91
+ },
92
+ ],
93
+ },
94
+ ],
95
+ )
96
+ return response.output_text or ""
97
+
98
+
99
+ def extract_text(
100
+ file_path: str,
101
+ *,
102
+ prompt: str = _DEFAULT_PROMPT,
103
+ model: str = _DEFAULT_MODEL,
104
+ ) -> str:
105
+ """Extract text from a document file using OpenAI.
106
+
107
+ Args:
108
+ file_path: Path to image or PDF
109
+ prompt: What to ask the LLM
110
+ model: Model to use (default: gpt-5-mini)
111
+
112
+ Returns:
113
+ Extracted text
114
+ """
115
+ with open(file_path, "rb") as f:
116
+ data = f.read()
117
+
118
+ mime = _get_mime_type(file_path)
119
+ filename = Path(file_path).name
120
+
121
+ return extract_text_from_bytes(
122
+ data,
123
+ mime,
124
+ filename=filename,
125
+ prompt=prompt,
126
+ model=model,
127
+ )
lumera/llm.py CHANGED
@@ -16,7 +16,7 @@ Configuration:
16
16
 
17
17
  Example:
18
18
  >>> from lumera import llm
19
- >>> response = llm.complete("What is 2+2?", model="gpt-5.2-mini")
19
+ >>> response = llm.complete("What is 2+2?", model="gpt-5-mini")
20
20
  >>> print(response["content"])
21
21
  """
22
22
 
@@ -135,14 +135,14 @@ class OpenAIProvider(LLMProvider):
135
135
  # Model aliases for convenience
136
136
  MODEL_ALIASES: dict[str, str] = {
137
137
  "gpt-5.2": "gpt-5.2",
138
- "gpt-5.2-mini": "gpt-5.2-mini",
139
- "gpt-5.2-nano": "gpt-5.2-nano",
138
+ "gpt-5-mini": "gpt-5-mini",
139
+ "gpt-5-nano": "gpt-5-nano",
140
140
  # Embedding models
141
141
  "text-embedding-3-small": "text-embedding-3-small",
142
142
  "text-embedding-3-large": "text-embedding-3-large",
143
143
  }
144
144
 
145
- DEFAULT_CHAT_MODEL = "gpt-5.2-mini"
145
+ DEFAULT_CHAT_MODEL = "gpt-5-mini"
146
146
  DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
147
147
  DEFAULT_PROVIDER_NAME = "openai"
148
148
 
@@ -374,7 +374,7 @@ def set_provider(provider: LLMProvider | str, **kwargs: Unpack[ProviderConfig])
374
374
  def complete(
375
375
  prompt: str,
376
376
  *,
377
- model: str = "gpt-5.2-mini",
377
+ model: str = "gpt-5-mini",
378
378
  temperature: float = 0.7,
379
379
  max_tokens: int | None = None,
380
380
  system_prompt: str | None = None,
@@ -384,7 +384,7 @@ def complete(
384
384
 
385
385
  Args:
386
386
  prompt: User prompt/question
387
- model: Model to use (default: gpt-5.2-mini)
387
+ model: Model to use (default: gpt-5-mini)
388
388
  temperature: Sampling temperature 0.0 to 2.0 (default: 0.7)
389
389
  max_tokens: Max tokens in response (None = model default)
390
390
  system_prompt: Optional system message to set behavior
@@ -397,7 +397,7 @@ def complete(
397
397
  >>> response = llm.complete(
398
398
  ... prompt="Classify this deposit: ...",
399
399
  ... system_prompt="You are an expert accountant.",
400
- ... model="gpt-5.2-mini",
400
+ ... model="gpt-5-mini",
401
401
  ... json_mode=True
402
402
  ... )
403
403
  >>> data = json.loads(response["content"])
@@ -416,7 +416,7 @@ def complete(
416
416
  def chat(
417
417
  messages: list[Message],
418
418
  *,
419
- model: str = "gpt-5.2-mini",
419
+ model: str = "gpt-5-mini",
420
420
  temperature: float = 0.7,
421
421
  max_tokens: int | None = None,
422
422
  json_mode: bool = False,
@@ -425,7 +425,7 @@ def chat(
425
425
 
426
426
  Args:
427
427
  messages: Conversation history with role and content
428
- model: Model to use (default: gpt-5.2-mini)
428
+ model: Model to use (default: gpt-5-mini)
429
429
  temperature: Sampling temperature 0.0 to 2.0 (default: 0.7)
430
430
  max_tokens: Max tokens in response (None = model default)
431
431
  json_mode: Force JSON output (default: False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lumera
3
- Version: 0.10.0
3
+ Version: 0.10.2
4
4
  Summary: SDK for building on Lumera platform
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: requests
@@ -1,11 +1,12 @@
1
- lumera/__init__.py,sha256=5FlY5dSJ1WNM4ko7wgmcajO8G2voBGn4S19E91_WdqE,2687
1
+ lumera/__init__.py,sha256=TraEFeZvI92sDE88amgTctLzAapnedtOijoY4XDRwVE,2715
2
2
  lumera/_utils.py,sha256=b-l3Ebh4n2pC-9T5mR6h4hPf_Wl48VDlHES0pLo1zKE,25766
3
- lumera/automations.py,sha256=KPP_rD7WKmBs865jiKoonZJjdTno-FSAU7hajPFyqs0,32851
3
+ lumera/automations.py,sha256=NhUKizNpw0p_EQVpsrLCfmZ7c0Xmh5XOggKdoWJxam0,35869
4
+ lumera/documents.py,sha256=n585BOhVntvhh6v9uip4Cug4c9N-wu-rTNFLsNHWN6Y,3156
4
5
  lumera/email.py,sha256=lk8KUsRw1ZvxgM0FPQXH-jVKUQA5f0zLv88jlc3IWlA,5056
5
6
  lumera/exceptions.py,sha256=bNsx4iYaroAAGsYxErfELC2B5ZJ3w5lVa1kKdIx5s9g,2173
6
7
  lumera/files.py,sha256=xMJmLTSaQQDttM3AMmpOWc6soh4lvCCKBreV0fXWHQw,3159
7
8
  lumera/google.py,sha256=zpWW1qSlzLZY5Ip7cGAzrv9sJrQf3JBKH2ODc1cCM_E,1130
8
- lumera/llm.py,sha256=pUTZK7t3GTK0vfxMI1PJgJwNendyuiJc5MB1pUj2vxE,14412
9
+ lumera/llm.py,sha256=HuZ-IUbszCCE0vAta9k89XvKt8_F9qVgF201CoKyYYI,14390
9
10
  lumera/locks.py,sha256=8l_qxb8nrxge7YJ-ApUTJ5MeYpIdxDeEa94Eim9O-YM,6806
10
11
  lumera/pb.py,sha256=Q_U1cKeB3YgI7bmTquzLYFWTRWcfUZkFSl7JXMBzV7M,20700
11
12
  lumera/sdk.py,sha256=Dw0yxlZ-ncjgPkCpVnAJQIURtIsbUA4RVu9VjXLayDc,34078
@@ -13,7 +14,7 @@ lumera/storage.py,sha256=fWkscTvKDzQ-5tsfA1lREO2qgtjJ4Yvxj3hvYNLKiW0,10527
13
14
  lumera/webhooks.py,sha256=L_Q5YHBJKQNpv7G9Nq0QqlGMRch6x9ptlwu1xD2qwUc,8661
14
15
  lumera/integrations/__init__.py,sha256=LnJmAnFB_p3YMKyeGVdDP4LYlJ85XFNQFAxGo6zF7CI,937
15
16
  lumera/integrations/google.py,sha256=QkbBbbDh3I_OToPDFqcivU6sWy2UieHBxZ_TPv5rqK0,11862
16
- lumera-0.10.0.dist-info/METADATA,sha256=uWvSDuD868zVICFyVUppHMIrWe6A-JKeyxFRurjxieU,1612
17
- lumera-0.10.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
18
- lumera-0.10.0.dist-info/top_level.txt,sha256=HgfK4XQkpMTnM2E5iWM4kB711FnYqUY9dglzib3pWlE,7
19
- lumera-0.10.0.dist-info/RECORD,,
17
+ lumera-0.10.2.dist-info/METADATA,sha256=zQEAmPgFin8QKsWcsLQN3QpqvPRndihzwh6s8E9J8Y4,1612
18
+ lumera-0.10.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
19
+ lumera-0.10.2.dist-info/top_level.txt,sha256=HgfK4XQkpMTnM2E5iWM4kB711FnYqUY9dglzib3pWlE,7
20
+ lumera-0.10.2.dist-info/RECORD,,