lumera 0.10.1__tar.gz → 0.10.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lumera-0.10.1 → lumera-0.10.2}/PKG-INFO +1 -1
- {lumera-0.10.1 → lumera-0.10.2}/lumera/__init__.py +2 -1
- lumera-0.10.2/lumera/documents.py +127 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/llm.py +9 -9
- {lumera-0.10.1 → lumera-0.10.2}/lumera.egg-info/PKG-INFO +1 -1
- {lumera-0.10.1 → lumera-0.10.2}/lumera.egg-info/SOURCES.txt +1 -0
- {lumera-0.10.1 → lumera-0.10.2}/pyproject.toml +1 -1
- {lumera-0.10.1 → lumera-0.10.2}/lumera/_utils.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/automations.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/email.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/exceptions.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/files.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/google.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/integrations/__init__.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/integrations/google.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/locks.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/pb.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/sdk.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/storage.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera/webhooks.py +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera.egg-info/dependency_links.txt +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera.egg-info/requires.txt +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/lumera.egg-info/top_level.txt +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/setup.cfg +0 -0
- {lumera-0.10.1 → lumera-0.10.2}/tests/test_sdk.py +0 -0
|
@@ -13,7 +13,7 @@ except PackageNotFoundError:
|
|
|
13
13
|
__version__ = "unknown" # Not installed (e.g., running from source)
|
|
14
14
|
|
|
15
15
|
# Import new modules (as modules, not individual functions)
|
|
16
|
-
from . import automations, email, exceptions, integrations, llm, locks, pb, storage, webhooks
|
|
16
|
+
from . import automations, documents, email, exceptions, integrations, llm, locks, pb, storage, webhooks
|
|
17
17
|
from ._utils import (
|
|
18
18
|
LumeraAPIError,
|
|
19
19
|
RecordNotUniqueError,
|
|
@@ -102,6 +102,7 @@ __all__ = [
|
|
|
102
102
|
"LockHeldError",
|
|
103
103
|
# New modules (use as lumera.pb, lumera.storage, etc.)
|
|
104
104
|
"automations",
|
|
105
|
+
"documents",
|
|
105
106
|
"email",
|
|
106
107
|
"pb",
|
|
107
108
|
"storage",
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Document text extraction via OpenAI Responses API.
|
|
3
|
+
|
|
4
|
+
Supports images and PDFs.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
>>> from lumera import documents
|
|
8
|
+
>>> text = documents.extract_text("invoice.pdf")
|
|
9
|
+
|
|
10
|
+
# Or from bytes (no file needed):
|
|
11
|
+
>>> text = documents.extract_text_from_bytes(pdf_bytes, "application/pdf")
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import base64
|
|
17
|
+
import mimetypes
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
__all__ = ["extract_text", "extract_text_from_bytes"]
|
|
21
|
+
|
|
22
|
+
_DEFAULT_MODEL = "gpt-5-mini"
|
|
23
|
+
_DEFAULT_PROMPT = "Extract all text from this document. Return only the extracted text."
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _get_mime_type(file_path: str) -> str:
|
|
27
|
+
"""Get MIME type for a file."""
|
|
28
|
+
mime, _ = mimetypes.guess_type(file_path)
|
|
29
|
+
if mime:
|
|
30
|
+
return mime
|
|
31
|
+
ext = Path(file_path).suffix.lower()
|
|
32
|
+
return {
|
|
33
|
+
".png": "image/png",
|
|
34
|
+
".jpg": "image/jpeg",
|
|
35
|
+
".jpeg": "image/jpeg",
|
|
36
|
+
".gif": "image/gif",
|
|
37
|
+
".webp": "image/webp",
|
|
38
|
+
".pdf": "application/pdf",
|
|
39
|
+
}.get(ext, "application/octet-stream")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def extract_text_from_bytes(
|
|
43
|
+
data: bytes,
|
|
44
|
+
mime_type: str,
|
|
45
|
+
*,
|
|
46
|
+
filename: str = "document",
|
|
47
|
+
prompt: str = _DEFAULT_PROMPT,
|
|
48
|
+
model: str = _DEFAULT_MODEL,
|
|
49
|
+
) -> str:
|
|
50
|
+
"""Extract text from document bytes using OpenAI.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
data: Raw file bytes
|
|
54
|
+
mime_type: MIME type (e.g., "application/pdf", "image/png")
|
|
55
|
+
filename: Optional filename for context
|
|
56
|
+
prompt: What to ask the LLM
|
|
57
|
+
model: Model to use (default: gpt-5-mini)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Extracted text
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> import requests
|
|
64
|
+
>>> resp = requests.get(presigned_url)
|
|
65
|
+
>>> text = documents.extract_text_from_bytes(
|
|
66
|
+
... resp.content,
|
|
67
|
+
... "application/pdf",
|
|
68
|
+
... filename="invoice.pdf"
|
|
69
|
+
... )
|
|
70
|
+
"""
|
|
71
|
+
from . import llm
|
|
72
|
+
|
|
73
|
+
b64 = base64.b64encode(data).decode("utf-8")
|
|
74
|
+
data_url = f"data:{mime_type};base64,{b64}"
|
|
75
|
+
client = llm.get_provider().client
|
|
76
|
+
|
|
77
|
+
response = client.responses.create(
|
|
78
|
+
model=model,
|
|
79
|
+
input=[
|
|
80
|
+
{
|
|
81
|
+
"role": "user",
|
|
82
|
+
"content": [
|
|
83
|
+
{
|
|
84
|
+
"type": "input_file",
|
|
85
|
+
"filename": filename,
|
|
86
|
+
"file_data": data_url,
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"type": "input_text",
|
|
90
|
+
"text": prompt,
|
|
91
|
+
},
|
|
92
|
+
],
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
)
|
|
96
|
+
return response.output_text or ""
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def extract_text(
|
|
100
|
+
file_path: str,
|
|
101
|
+
*,
|
|
102
|
+
prompt: str = _DEFAULT_PROMPT,
|
|
103
|
+
model: str = _DEFAULT_MODEL,
|
|
104
|
+
) -> str:
|
|
105
|
+
"""Extract text from a document file using OpenAI.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
file_path: Path to image or PDF
|
|
109
|
+
prompt: What to ask the LLM
|
|
110
|
+
model: Model to use (default: gpt-5-mini)
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Extracted text
|
|
114
|
+
"""
|
|
115
|
+
with open(file_path, "rb") as f:
|
|
116
|
+
data = f.read()
|
|
117
|
+
|
|
118
|
+
mime = _get_mime_type(file_path)
|
|
119
|
+
filename = Path(file_path).name
|
|
120
|
+
|
|
121
|
+
return extract_text_from_bytes(
|
|
122
|
+
data,
|
|
123
|
+
mime,
|
|
124
|
+
filename=filename,
|
|
125
|
+
prompt=prompt,
|
|
126
|
+
model=model,
|
|
127
|
+
)
|
|
@@ -16,7 +16,7 @@ Configuration:
|
|
|
16
16
|
|
|
17
17
|
Example:
|
|
18
18
|
>>> from lumera import llm
|
|
19
|
-
>>> response = llm.complete("What is 2+2?", model="gpt-5
|
|
19
|
+
>>> response = llm.complete("What is 2+2?", model="gpt-5-mini")
|
|
20
20
|
>>> print(response["content"])
|
|
21
21
|
"""
|
|
22
22
|
|
|
@@ -135,14 +135,14 @@ class OpenAIProvider(LLMProvider):
|
|
|
135
135
|
# Model aliases for convenience
|
|
136
136
|
MODEL_ALIASES: dict[str, str] = {
|
|
137
137
|
"gpt-5.2": "gpt-5.2",
|
|
138
|
-
"gpt-5
|
|
139
|
-
"gpt-5
|
|
138
|
+
"gpt-5-mini": "gpt-5-mini",
|
|
139
|
+
"gpt-5-nano": "gpt-5-nano",
|
|
140
140
|
# Embedding models
|
|
141
141
|
"text-embedding-3-small": "text-embedding-3-small",
|
|
142
142
|
"text-embedding-3-large": "text-embedding-3-large",
|
|
143
143
|
}
|
|
144
144
|
|
|
145
|
-
DEFAULT_CHAT_MODEL = "gpt-5
|
|
145
|
+
DEFAULT_CHAT_MODEL = "gpt-5-mini"
|
|
146
146
|
DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
|
|
147
147
|
DEFAULT_PROVIDER_NAME = "openai"
|
|
148
148
|
|
|
@@ -374,7 +374,7 @@ def set_provider(provider: LLMProvider | str, **kwargs: Unpack[ProviderConfig])
|
|
|
374
374
|
def complete(
|
|
375
375
|
prompt: str,
|
|
376
376
|
*,
|
|
377
|
-
model: str = "gpt-5
|
|
377
|
+
model: str = "gpt-5-mini",
|
|
378
378
|
temperature: float = 0.7,
|
|
379
379
|
max_tokens: int | None = None,
|
|
380
380
|
system_prompt: str | None = None,
|
|
@@ -384,7 +384,7 @@ def complete(
|
|
|
384
384
|
|
|
385
385
|
Args:
|
|
386
386
|
prompt: User prompt/question
|
|
387
|
-
model: Model to use (default: gpt-5
|
|
387
|
+
model: Model to use (default: gpt-5-mini)
|
|
388
388
|
temperature: Sampling temperature 0.0 to 2.0 (default: 0.7)
|
|
389
389
|
max_tokens: Max tokens in response (None = model default)
|
|
390
390
|
system_prompt: Optional system message to set behavior
|
|
@@ -397,7 +397,7 @@ def complete(
|
|
|
397
397
|
>>> response = llm.complete(
|
|
398
398
|
... prompt="Classify this deposit: ...",
|
|
399
399
|
... system_prompt="You are an expert accountant.",
|
|
400
|
-
... model="gpt-5
|
|
400
|
+
... model="gpt-5-mini",
|
|
401
401
|
... json_mode=True
|
|
402
402
|
... )
|
|
403
403
|
>>> data = json.loads(response["content"])
|
|
@@ -416,7 +416,7 @@ def complete(
|
|
|
416
416
|
def chat(
|
|
417
417
|
messages: list[Message],
|
|
418
418
|
*,
|
|
419
|
-
model: str = "gpt-5
|
|
419
|
+
model: str = "gpt-5-mini",
|
|
420
420
|
temperature: float = 0.7,
|
|
421
421
|
max_tokens: int | None = None,
|
|
422
422
|
json_mode: bool = False,
|
|
@@ -425,7 +425,7 @@ def chat(
|
|
|
425
425
|
|
|
426
426
|
Args:
|
|
427
427
|
messages: Conversation history with role and content
|
|
428
|
-
model: Model to use (default: gpt-5
|
|
428
|
+
model: Model to use (default: gpt-5-mini)
|
|
429
429
|
temperature: Sampling temperature 0.0 to 2.0 (default: 0.7)
|
|
430
430
|
max_tokens: Max tokens in response (None = model default)
|
|
431
431
|
json_mode: Force JSON output (default: False)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|