llm-gemini 0.3a0__py3-none-any.whl → 0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llm_gemini-0.3a0.dist-info → llm_gemini-0.4.dist-info}/METADATA +53 -11
- llm_gemini-0.4.dist-info/RECORD +7 -0
- {llm_gemini-0.3a0.dist-info → llm_gemini-0.4.dist-info}/WHEEL +1 -1
- llm_gemini.py +150 -24
- llm_gemini-0.3a0.dist-info/RECORD +0 -7
- {llm_gemini-0.3a0.dist-info → llm_gemini-0.4.dist-info}/LICENSE +0 -0
- {llm_gemini-0.3a0.dist-info → llm_gemini-0.4.dist-info}/entry_points.txt +0 -0
- {llm_gemini-0.3a0.dist-info → llm_gemini-0.4.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: llm-gemini
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4
|
4
4
|
Summary: LLM plugin to access Google's Gemini family of models
|
5
5
|
Author: Simon Willison
|
6
6
|
License: Apache-2.0
|
@@ -11,11 +11,12 @@ Project-URL: CI, https://github.com/simonw/llm-gemini/actions
|
|
11
11
|
Classifier: License :: OSI Approved :: Apache Software License
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
License-File: LICENSE
|
14
|
-
Requires-Dist: llm >=0.
|
14
|
+
Requires-Dist: llm >=0.17
|
15
15
|
Requires-Dist: httpx
|
16
16
|
Requires-Dist: ijson
|
17
17
|
Provides-Extra: test
|
18
18
|
Requires-Dist: pytest ; extra == 'test'
|
19
|
+
Requires-Dist: pytest-recording ; extra == 'test'
|
19
20
|
|
20
21
|
# llm-gemini
|
21
22
|
|
@@ -35,13 +36,13 @@ llm install llm-gemini
|
|
35
36
|
## Usage
|
36
37
|
|
37
38
|
Configure the model by setting a key called "gemini" to your [API key](https://aistudio.google.com/app/apikey):
|
38
|
-
|
39
39
|
```bash
|
40
40
|
llm keys set gemini
|
41
41
|
```
|
42
42
|
```
|
43
43
|
<paste key here>
|
44
44
|
```
|
45
|
+
You can also set the API key by assigning it to the environment variable `LLM_GEMINI_KEY`.
|
45
46
|
|
46
47
|
Now run the model using `-m gemini-1.5-pro-latest`, for example:
|
47
48
|
|
@@ -55,16 +56,13 @@ llm -m gemini-1.5-pro-latest "A joke about a pelican and a walrus"
|
|
55
56
|
>
|
56
57
|
> The pelican taps its beak thoughtfully. "I believe," it says, "it's a billfish."
|
57
58
|
|
58
|
-
To chat interactively with the model, run `llm chat`:
|
59
|
-
|
60
|
-
```bash
|
61
|
-
llm chat -m gemini-1.5-pro-latest
|
62
|
-
```
|
63
|
-
|
64
59
|
Other models are:
|
65
60
|
|
66
61
|
- `gemini-1.5-flash-latest`
|
67
|
-
- gemini-1.5-flash-8b-latest` - the least expensive
|
62
|
+
- `gemini-1.5-flash-8b-latest` - the least expensive
|
63
|
+
- `gemini-exp-1114` - recent experimental
|
64
|
+
|
65
|
+
### Images, audio and video
|
68
66
|
|
69
67
|
Gemini models are multi-modal. You can provide images, audio or video files as input like this:
|
70
68
|
|
@@ -76,8 +74,52 @@ Or with a URL:
|
|
76
74
|
llm -m gemini-1.5-flash-8b-latest 'describe image' \
|
77
75
|
-a https://static.simonwillison.net/static/2024/pelicans.jpg
|
78
76
|
```
|
77
|
+
Audio works too:
|
78
|
+
|
79
|
+
```bash
|
80
|
+
llm -m gemini-1.5-pro-latest 'transcribe audio' -a audio.mp3
|
81
|
+
```
|
82
|
+
|
83
|
+
And video:
|
84
|
+
|
85
|
+
```bash
|
86
|
+
llm -m gemini-1.5-pro-latest 'describe what happens' -a video.mp4
|
87
|
+
```
|
88
|
+
The Gemini prompting guide includes [extensive advice](https://ai.google.dev/gemini-api/docs/file-prompting-strategies) on multi-modal prompting.
|
89
|
+
|
90
|
+
### JSON output
|
91
|
+
|
92
|
+
Use `-o json_object 1` to force the output to be JSON:
|
93
|
+
|
94
|
+
```bash
|
95
|
+
llm -m gemini-1.5-flash-latest -o json_object 1 \
|
96
|
+
'3 largest cities in California, list of {"name": "..."}'
|
97
|
+
```
|
98
|
+
Outputs:
|
99
|
+
```json
|
100
|
+
{"cities": [{"name": "Los Angeles"}, {"name": "San Diego"}, {"name": "San Jose"}]}
|
101
|
+
```
|
102
|
+
|
103
|
+
### Code execution
|
104
|
+
|
105
|
+
Gemini models can [write and execute code](https://ai.google.dev/gemini-api/docs/code-execution) - they can decide to write Python code, execute it in a secure sandbox and use the result as part of their response.
|
106
|
+
|
107
|
+
To enable this feature, use `-o code_execution 1`:
|
108
|
+
|
109
|
+
```bash
|
110
|
+
llm -m gemini-1.5-pro-latest -o code_execution 1 \
|
111
|
+
'use python to calculate (factorial of 13) * 3'
|
112
|
+
```
|
113
|
+
|
114
|
+
### Chat
|
115
|
+
|
116
|
+
To chat interactively with the model, run `llm chat`:
|
117
|
+
|
118
|
+
```bash
|
119
|
+
llm chat -m gemini-1.5-pro-latest
|
120
|
+
```
|
79
121
|
|
80
|
-
|
122
|
+
## Embeddings
|
81
123
|
|
82
124
|
The plugin also adds support for the `text-embedding-004` embedding model.
|
83
125
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
llm_gemini.py,sha256=_7yQ14ffRpyK3ChAOc6M2ufylg2kUKTQK4C3VKOiUgM,10672
|
2
|
+
llm_gemini-0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
3
|
+
llm_gemini-0.4.dist-info/METADATA,sha256=rxQq1JCu1dvMwuVSXy8Tn1txFBdJX1NjCjdFJ6VEa1g,4862
|
4
|
+
llm_gemini-0.4.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
5
|
+
llm_gemini-0.4.dist-info/entry_points.txt,sha256=n544bpgUPIBc5l_cnwsTxPc3gMGJHPtAyqBNp-CkMWk,26
|
6
|
+
llm_gemini-0.4.dist-info/top_level.txt,sha256=WUQmG6_2QKbT_8W4HH93qyKl_0SUteL4Ra6_PhyNGKU,11
|
7
|
+
llm_gemini-0.4.dist-info/RECORD,,
|
llm_gemini.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
import httpx
|
2
2
|
import ijson
|
3
3
|
import llm
|
4
|
-
import
|
4
|
+
from pydantic import Field
|
5
|
+
from typing import Optional
|
5
6
|
|
6
|
-
# We disable all of these to avoid random unexpected errors
|
7
7
|
SAFETY_SETTINGS = [
|
8
8
|
{
|
9
9
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
@@ -26,15 +26,20 @@ SAFETY_SETTINGS = [
|
|
26
26
|
|
27
27
|
@llm.hookimpl
|
28
28
|
def register_models(register):
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
29
|
+
# Register both sync and async versions of each model
|
30
|
+
for model_id in [
|
31
|
+
"gemini-pro",
|
32
|
+
"gemini-1.5-pro-latest",
|
33
|
+
"gemini-1.5-flash-latest",
|
34
|
+
"gemini-1.5-pro-001",
|
35
|
+
"gemini-1.5-flash-001",
|
36
|
+
"gemini-1.5-pro-002",
|
37
|
+
"gemini-1.5-flash-002",
|
38
|
+
"gemini-1.5-flash-8b-latest",
|
39
|
+
"gemini-1.5-flash-8b-001",
|
40
|
+
"gemini-exp-1114",
|
41
|
+
]:
|
42
|
+
register(GeminiPro(model_id), AsyncGeminiPro(model_id))
|
38
43
|
|
39
44
|
|
40
45
|
def resolve_type(attachment):
|
@@ -45,7 +50,9 @@ def resolve_type(attachment):
|
|
45
50
|
return mime_type
|
46
51
|
|
47
52
|
|
48
|
-
class
|
53
|
+
class _SharedGemini:
|
54
|
+
needs_key = "gemini"
|
55
|
+
key_env_var = "LLM_GEMINI_KEY"
|
49
56
|
can_stream = True
|
50
57
|
|
51
58
|
attachment_types = (
|
@@ -75,8 +82,54 @@ class GeminiPro(llm.Model):
|
|
75
82
|
"video/webm",
|
76
83
|
"video/wmv",
|
77
84
|
"video/3gpp",
|
85
|
+
"video/quicktime",
|
78
86
|
)
|
79
87
|
|
88
|
+
class Options(llm.Options):
|
89
|
+
code_execution: Optional[bool] = Field(
|
90
|
+
description="Enables the model to generate and run Python code",
|
91
|
+
default=None,
|
92
|
+
)
|
93
|
+
temperature: Optional[float] = Field(
|
94
|
+
description=(
|
95
|
+
"Controls the randomness of the output. Use higher values for "
|
96
|
+
"more creative responses, and lower values for more "
|
97
|
+
"deterministic responses."
|
98
|
+
),
|
99
|
+
default=None,
|
100
|
+
ge=0.0,
|
101
|
+
le=2.0,
|
102
|
+
)
|
103
|
+
max_output_tokens: Optional[int] = Field(
|
104
|
+
description="Sets the maximum number of tokens to include in a candidate.",
|
105
|
+
default=None,
|
106
|
+
)
|
107
|
+
top_p: Optional[float] = Field(
|
108
|
+
description=(
|
109
|
+
"Changes how the model selects tokens for output. Tokens are "
|
110
|
+
"selected from the most to least probable until the sum of "
|
111
|
+
"their probabilities equals the topP value."
|
112
|
+
),
|
113
|
+
default=None,
|
114
|
+
ge=0.0,
|
115
|
+
le=1.0,
|
116
|
+
)
|
117
|
+
top_k: Optional[int] = Field(
|
118
|
+
description=(
|
119
|
+
"Changes how the model selects tokens for output. A topK of 1 "
|
120
|
+
"means the selected token is the most probable among all the "
|
121
|
+
"tokens in the model's vocabulary, while a topK of 3 means "
|
122
|
+
"that the next token is selected from among the 3 most "
|
123
|
+
"probable using the temperature."
|
124
|
+
),
|
125
|
+
default=None,
|
126
|
+
ge=1,
|
127
|
+
)
|
128
|
+
json_object: Optional[bool] = Field(
|
129
|
+
description="Output a valid JSON object {...}",
|
130
|
+
default=None,
|
131
|
+
)
|
132
|
+
|
80
133
|
def __init__(self, model_id):
|
81
134
|
self.model_id = model_id
|
82
135
|
|
@@ -95,11 +148,14 @@ class GeminiPro(llm.Model):
|
|
95
148
|
}
|
96
149
|
}
|
97
150
|
)
|
98
|
-
|
151
|
+
if response.prompt.prompt:
|
152
|
+
parts.append({"text": response.prompt.prompt})
|
99
153
|
messages.append({"role": "user", "parts": parts})
|
100
154
|
messages.append({"role": "model", "parts": [{"text": response.text()}]})
|
101
155
|
|
102
|
-
parts = [
|
156
|
+
parts = []
|
157
|
+
if prompt.prompt:
|
158
|
+
parts.append({"text": prompt.prompt})
|
103
159
|
for attachment in prompt.attachments:
|
104
160
|
mime_type = resolve_type(attachment)
|
105
161
|
parts.append(
|
@@ -114,24 +170,59 @@ class GeminiPro(llm.Model):
|
|
114
170
|
messages.append({"role": "user", "parts": parts})
|
115
171
|
return messages
|
116
172
|
|
117
|
-
def
|
118
|
-
key = llm.get_key("", "gemini", "LLM_GEMINI_KEY")
|
119
|
-
url = "https://generativelanguage.googleapis.com/v1beta/models/{}:streamGenerateContent?".format(
|
120
|
-
self.model_id
|
121
|
-
) + urllib.parse.urlencode(
|
122
|
-
{"key": key}
|
123
|
-
)
|
124
|
-
gathered = []
|
173
|
+
def build_request_body(self, prompt, conversation):
|
125
174
|
body = {
|
126
175
|
"contents": self.build_messages(prompt, conversation),
|
127
176
|
"safetySettings": SAFETY_SETTINGS,
|
128
177
|
}
|
178
|
+
if prompt.options and prompt.options.code_execution:
|
179
|
+
body["tools"] = [{"codeExecution": {}}]
|
129
180
|
if prompt.system:
|
130
181
|
body["systemInstruction"] = {"parts": [{"text": prompt.system}]}
|
182
|
+
|
183
|
+
config_map = {
|
184
|
+
"temperature": "temperature",
|
185
|
+
"max_output_tokens": "maxOutputTokens",
|
186
|
+
"top_p": "topP",
|
187
|
+
"top_k": "topK",
|
188
|
+
}
|
189
|
+
if prompt.options and prompt.options.json_object:
|
190
|
+
body["generationConfig"] = {"response_mime_type": "application/json"}
|
191
|
+
|
192
|
+
if any(
|
193
|
+
getattr(prompt.options, key, None) is not None for key in config_map.keys()
|
194
|
+
):
|
195
|
+
generation_config = {}
|
196
|
+
for key, other_key in config_map.items():
|
197
|
+
config_value = getattr(prompt.options, key, None)
|
198
|
+
if config_value is not None:
|
199
|
+
generation_config[other_key] = config_value
|
200
|
+
body["generationConfig"] = generation_config
|
201
|
+
|
202
|
+
return body
|
203
|
+
|
204
|
+
def process_part(self, part):
|
205
|
+
if "text" in part:
|
206
|
+
return part["text"]
|
207
|
+
elif "executableCode" in part:
|
208
|
+
return f'```{part["executableCode"]["language"].lower()}\n{part["executableCode"]["code"].strip()}\n```\n'
|
209
|
+
elif "codeExecutionResult" in part:
|
210
|
+
return f'```\n{part["codeExecutionResult"]["output"].strip()}\n```\n'
|
211
|
+
return ""
|
212
|
+
|
213
|
+
|
214
|
+
class GeminiPro(_SharedGemini, llm.Model):
|
215
|
+
def execute(self, prompt, stream, response, conversation):
|
216
|
+
key = self.get_key()
|
217
|
+
url = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model_id}:streamGenerateContent"
|
218
|
+
gathered = []
|
219
|
+
body = self.build_request_body(prompt, conversation)
|
220
|
+
|
131
221
|
with httpx.stream(
|
132
222
|
"POST",
|
133
223
|
url,
|
134
224
|
timeout=None,
|
225
|
+
headers={"x-goog-api-key": key},
|
135
226
|
json=body,
|
136
227
|
) as http_response:
|
137
228
|
events = ijson.sendable_list()
|
@@ -143,7 +234,8 @@ class GeminiPro(llm.Model):
|
|
143
234
|
if isinstance(event, dict) and "error" in event:
|
144
235
|
raise llm.ModelError(event["error"]["message"])
|
145
236
|
try:
|
146
|
-
|
237
|
+
part = event["candidates"][0]["content"]["parts"][0]
|
238
|
+
yield self.process_part(part)
|
147
239
|
except KeyError:
|
148
240
|
yield ""
|
149
241
|
gathered.append(event)
|
@@ -151,6 +243,39 @@ class GeminiPro(llm.Model):
|
|
151
243
|
response.response_json = gathered
|
152
244
|
|
153
245
|
|
246
|
+
class AsyncGeminiPro(_SharedGemini, llm.AsyncModel):
|
247
|
+
async def execute(self, prompt, stream, response, conversation):
|
248
|
+
key = self.get_key()
|
249
|
+
url = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model_id}:streamGenerateContent"
|
250
|
+
gathered = []
|
251
|
+
body = self.build_request_body(prompt, conversation)
|
252
|
+
|
253
|
+
async with httpx.AsyncClient() as client:
|
254
|
+
async with client.stream(
|
255
|
+
"POST",
|
256
|
+
url,
|
257
|
+
timeout=None,
|
258
|
+
headers={"x-goog-api-key": key},
|
259
|
+
json=body,
|
260
|
+
) as http_response:
|
261
|
+
events = ijson.sendable_list()
|
262
|
+
coro = ijson.items_coro(events, "item")
|
263
|
+
async for chunk in http_response.aiter_bytes():
|
264
|
+
coro.send(chunk)
|
265
|
+
if events:
|
266
|
+
event = events[0]
|
267
|
+
if isinstance(event, dict) and "error" in event:
|
268
|
+
raise llm.ModelError(event["error"]["message"])
|
269
|
+
try:
|
270
|
+
part = event["candidates"][0]["content"]["parts"][0]
|
271
|
+
yield self.process_part(part)
|
272
|
+
except KeyError:
|
273
|
+
yield ""
|
274
|
+
gathered.append(event)
|
275
|
+
events.clear()
|
276
|
+
response.response_json = gathered
|
277
|
+
|
278
|
+
|
154
279
|
@llm.hookimpl
|
155
280
|
def register_embedding_models(register):
|
156
281
|
register(
|
@@ -170,6 +295,7 @@ class GeminiEmbeddingModel(llm.EmbeddingModel):
|
|
170
295
|
def embed_batch(self, items):
|
171
296
|
headers = {
|
172
297
|
"Content-Type": "application/json",
|
298
|
+
"x-goog-api-key": self.get_key(),
|
173
299
|
}
|
174
300
|
data = {
|
175
301
|
"requests": [
|
@@ -183,7 +309,7 @@ class GeminiEmbeddingModel(llm.EmbeddingModel):
|
|
183
309
|
|
184
310
|
with httpx.Client() as client:
|
185
311
|
response = client.post(
|
186
|
-
f"https://generativelanguage.googleapis.com/v1beta/models/{self.gemini_model_id}:batchEmbedContents
|
312
|
+
f"https://generativelanguage.googleapis.com/v1beta/models/{self.gemini_model_id}:batchEmbedContents",
|
187
313
|
headers=headers,
|
188
314
|
json=data,
|
189
315
|
timeout=None,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
llm_gemini.py,sha256=nmatZLQyVUUwoaiUloPycKKDbLMzGo4mcXeJwjzAENA,5881
|
2
|
-
llm_gemini-0.3a0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
3
|
-
llm_gemini-0.3a0.dist-info/METADATA,sha256=pIxt4pF2XgDJSVi2RNuaOc4fgU2pRYWnasijmry8d_E,3618
|
4
|
-
llm_gemini-0.3a0.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
5
|
-
llm_gemini-0.3a0.dist-info/entry_points.txt,sha256=n544bpgUPIBc5l_cnwsTxPc3gMGJHPtAyqBNp-CkMWk,26
|
6
|
-
llm_gemini-0.3a0.dist-info/top_level.txt,sha256=WUQmG6_2QKbT_8W4HH93qyKl_0SUteL4Ra6_PhyNGKU,11
|
7
|
-
llm_gemini-0.3a0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|