llm-gemini 0.3__py3-none-any.whl → 0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-gemini
3
- Version: 0.3
3
+ Version: 0.4
4
4
  Summary: LLM plugin to access Google's Gemini family of models
5
5
  Author: Simon Willison
6
6
  License: Apache-2.0
@@ -16,6 +16,7 @@ Requires-Dist: httpx
16
16
  Requires-Dist: ijson
17
17
  Provides-Extra: test
18
18
  Requires-Dist: pytest ; extra == 'test'
19
+ Requires-Dist: pytest-recording ; extra == 'test'
19
20
 
20
21
  # llm-gemini
21
22
 
@@ -35,13 +36,13 @@ llm install llm-gemini
35
36
  ## Usage
36
37
 
37
38
  Configure the model by setting a key called "gemini" to your [API key](https://aistudio.google.com/app/apikey):
38
-
39
39
  ```bash
40
40
  llm keys set gemini
41
41
  ```
42
42
  ```
43
43
  <paste key here>
44
44
  ```
45
+ You can also set the API key by assigning it to the environment variable `LLM_GEMINI_KEY`.
45
46
 
46
47
  Now run the model using `-m gemini-1.5-pro-latest`, for example:
47
48
 
@@ -55,6 +56,12 @@ llm -m gemini-1.5-pro-latest "A joke about a pelican and a walrus"
55
56
  >
56
57
  > The pelican taps its beak thoughtfully. "I believe," it says, "it's a billfish."
57
58
 
59
+ Other models are:
60
+
61
+ - `gemini-1.5-flash-latest`
62
+ - `gemini-1.5-flash-8b-latest` - the least expensive
63
+ - `gemini-exp-1114` - recent experimental
64
+
58
65
  ### Images, audio and video
59
66
 
60
67
  Gemini models are multi-modal. You can provide images, audio or video files as input like this:
@@ -78,8 +85,22 @@ And video:
78
85
  ```bash
79
86
  llm -m gemini-1.5-pro-latest 'describe what happens' -a video.mp4
80
87
  ```
88
+ The Gemini prompting guide includes [extensive advice](https://ai.google.dev/gemini-api/docs/file-prompting-strategies) on multi-modal prompting.
89
+
90
+ ### JSON output
91
+
92
+ Use `-o json_object 1` to force the output to be JSON:
93
+
94
+ ```bash
95
+ llm -m gemini-1.5-flash-latest -o json_object 1 \
96
+ '3 largest cities in California, list of {"name": "..."}'
97
+ ```
98
+ Outputs:
99
+ ```json
100
+ {"cities": [{"name": "Los Angeles"}, {"name": "San Diego"}, {"name": "San Jose"}]}
101
+ ```
81
102
 
82
- ## Code execution
103
+ ### Code execution
83
104
 
84
105
  Gemini models can [write and execute code](https://ai.google.dev/gemini-api/docs/code-execution) - they can decide to write Python code, execute it in a secure sandbox and use the result as part of their response.
85
106
 
@@ -98,13 +119,7 @@ To chat interactively with the model, run `llm chat`:
98
119
  llm chat -m gemini-1.5-pro-latest
99
120
  ```
100
121
 
101
- Other models are:
102
-
103
- - `gemini-1.5-flash-latest`
104
- - gemini-1.5-flash-8b-latest` - the least expensive
105
-
106
-
107
- ### Embeddings
122
+ ## Embeddings
108
123
 
109
124
  The plugin also adds support for the `text-embedding-004` embedding model.
110
125
 
@@ -0,0 +1,7 @@
1
+ llm_gemini.py,sha256=_7yQ14ffRpyK3ChAOc6M2ufylg2kUKTQK4C3VKOiUgM,10672
2
+ llm_gemini-0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
3
+ llm_gemini-0.4.dist-info/METADATA,sha256=rxQq1JCu1dvMwuVSXy8Tn1txFBdJX1NjCjdFJ6VEa1g,4862
4
+ llm_gemini-0.4.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
5
+ llm_gemini-0.4.dist-info/entry_points.txt,sha256=n544bpgUPIBc5l_cnwsTxPc3gMGJHPtAyqBNp-CkMWk,26
6
+ llm_gemini-0.4.dist-info/top_level.txt,sha256=WUQmG6_2QKbT_8W4HH93qyKl_0SUteL4Ra6_PhyNGKU,11
7
+ llm_gemini-0.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.2.0)
2
+ Generator: setuptools (75.5.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
llm_gemini.py CHANGED
@@ -4,9 +4,6 @@ import llm
4
4
  from pydantic import Field
5
5
  from typing import Optional
6
6
 
7
- import urllib.parse
8
-
9
- # We disable all of these to avoid random unexpected errors
10
7
  SAFETY_SETTINGS = [
11
8
  {
12
9
  "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
@@ -29,15 +26,20 @@ SAFETY_SETTINGS = [
29
26
 
30
27
  @llm.hookimpl
31
28
  def register_models(register):
32
- register(GeminiPro("gemini-pro"))
33
- register(GeminiPro("gemini-1.5-pro-latest"))
34
- register(GeminiPro("gemini-1.5-flash-latest"))
35
- register(GeminiPro("gemini-1.5-pro-001"))
36
- register(GeminiPro("gemini-1.5-flash-001"))
37
- register(GeminiPro("gemini-1.5-pro-002"))
38
- register(GeminiPro("gemini-1.5-flash-002"))
39
- register(GeminiPro("gemini-1.5-flash-8b-latest"))
40
- register(GeminiPro("gemini-1.5-flash-8b-001"))
29
+ # Register both sync and async versions of each model
30
+ for model_id in [
31
+ "gemini-pro",
32
+ "gemini-1.5-pro-latest",
33
+ "gemini-1.5-flash-latest",
34
+ "gemini-1.5-pro-001",
35
+ "gemini-1.5-flash-001",
36
+ "gemini-1.5-pro-002",
37
+ "gemini-1.5-flash-002",
38
+ "gemini-1.5-flash-8b-latest",
39
+ "gemini-1.5-flash-8b-001",
40
+ "gemini-exp-1114",
41
+ ]:
42
+ register(GeminiPro(model_id), AsyncGeminiPro(model_id))
41
43
 
42
44
 
43
45
  def resolve_type(attachment):
@@ -48,7 +50,7 @@ def resolve_type(attachment):
48
50
  return mime_type
49
51
 
50
52
 
51
- class GeminiPro(llm.Model):
53
+ class _SharedGemini:
52
54
  needs_key = "gemini"
53
55
  key_env_var = "LLM_GEMINI_KEY"
54
56
  can_stream = True
@@ -80,6 +82,7 @@ class GeminiPro(llm.Model):
80
82
  "video/webm",
81
83
  "video/wmv",
82
84
  "video/3gpp",
85
+ "video/quicktime",
83
86
  )
84
87
 
85
88
  class Options(llm.Options):
@@ -88,7 +91,11 @@ class GeminiPro(llm.Model):
88
91
  default=None,
89
92
  )
90
93
  temperature: Optional[float] = Field(
91
- description="Controls the randomness of the output. Use higher values for more creative responses, and lower values for more deterministic responses.",
94
+ description=(
95
+ "Controls the randomness of the output. Use higher values for "
96
+ "more creative responses, and lower values for more "
97
+ "deterministic responses."
98
+ ),
92
99
  default=None,
93
100
  ge=0.0,
94
101
  le=2.0,
@@ -98,16 +105,30 @@ class GeminiPro(llm.Model):
98
105
  default=None,
99
106
  )
100
107
  top_p: Optional[float] = Field(
101
- description="Changes how the model selects tokens for output. Tokens are selected from the most to least probable until the sum of their probabilities equals the topP value.",
108
+ description=(
109
+ "Changes how the model selects tokens for output. Tokens are "
110
+ "selected from the most to least probable until the sum of "
111
+ "their probabilities equals the topP value."
112
+ ),
102
113
  default=None,
103
114
  ge=0.0,
104
115
  le=1.0,
105
116
  )
106
117
  top_k: Optional[int] = Field(
107
- description="Changes how the model selects tokens for output. A topK of 1 means the selected token is the most probable among all the tokens in the model's vocabulary, while a topK of 3 means that the next token is selected from among the 3 most probable using the temperature.",
118
+ description=(
119
+ "Changes how the model selects tokens for output. A topK of 1 "
120
+ "means the selected token is the most probable among all the "
121
+ "tokens in the model's vocabulary, while a topK of 3 means "
122
+ "that the next token is selected from among the 3 most "
123
+ "probable using the temperature."
124
+ ),
108
125
  default=None,
109
126
  ge=1,
110
127
  )
128
+ json_object: Optional[bool] = Field(
129
+ description="Output a valid JSON object {...}",
130
+ default=None,
131
+ )
111
132
 
112
133
  def __init__(self, model_id):
113
134
  self.model_id = model_id
@@ -127,11 +148,14 @@ class GeminiPro(llm.Model):
127
148
  }
128
149
  }
129
150
  )
130
- parts.append({"text": response.prompt.prompt})
151
+ if response.prompt.prompt:
152
+ parts.append({"text": response.prompt.prompt})
131
153
  messages.append({"role": "user", "parts": parts})
132
154
  messages.append({"role": "model", "parts": [{"text": response.text()}]})
133
155
 
134
- parts = [{"text": prompt.prompt}]
156
+ parts = []
157
+ if prompt.prompt:
158
+ parts.append({"text": prompt.prompt})
135
159
  for attachment in prompt.attachments:
136
160
  mime_type = resolve_type(attachment)
137
161
  parts.append(
@@ -146,14 +170,7 @@ class GeminiPro(llm.Model):
146
170
  messages.append({"role": "user", "parts": parts})
147
171
  return messages
148
172
 
149
- def execute(self, prompt, stream, response, conversation):
150
- key = self.get_key()
151
- url = "https://generativelanguage.googleapis.com/v1beta/models/{}:streamGenerateContent?".format(
152
- self.model_id
153
- ) + urllib.parse.urlencode(
154
- {"key": key}
155
- )
156
- gathered = []
173
+ def build_request_body(self, prompt, conversation):
157
174
  body = {
158
175
  "contents": self.build_messages(prompt, conversation),
159
176
  "safetySettings": SAFETY_SETTINGS,
@@ -169,7 +186,9 @@ class GeminiPro(llm.Model):
169
186
  "top_p": "topP",
170
187
  "top_k": "topK",
171
188
  }
172
- # If any of those are set in prompt.options...
189
+ if prompt.options and prompt.options.json_object:
190
+ body["generationConfig"] = {"response_mime_type": "application/json"}
191
+
173
192
  if any(
174
193
  getattr(prompt.options, key, None) is not None for key in config_map.keys()
175
194
  ):
@@ -180,10 +199,30 @@ class GeminiPro(llm.Model):
180
199
  generation_config[other_key] = config_value
181
200
  body["generationConfig"] = generation_config
182
201
 
202
+ return body
203
+
204
+ def process_part(self, part):
205
+ if "text" in part:
206
+ return part["text"]
207
+ elif "executableCode" in part:
208
+ return f'```{part["executableCode"]["language"].lower()}\n{part["executableCode"]["code"].strip()}\n```\n'
209
+ elif "codeExecutionResult" in part:
210
+ return f'```\n{part["codeExecutionResult"]["output"].strip()}\n```\n'
211
+ return ""
212
+
213
+
214
+ class GeminiPro(_SharedGemini, llm.Model):
215
+ def execute(self, prompt, stream, response, conversation):
216
+ key = self.get_key()
217
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model_id}:streamGenerateContent"
218
+ gathered = []
219
+ body = self.build_request_body(prompt, conversation)
220
+
183
221
  with httpx.stream(
184
222
  "POST",
185
223
  url,
186
224
  timeout=None,
225
+ headers={"x-goog-api-key": key},
187
226
  json=body,
188
227
  ) as http_response:
189
228
  events = ijson.sendable_list()
@@ -196,14 +235,7 @@ class GeminiPro(llm.Model):
196
235
  raise llm.ModelError(event["error"]["message"])
197
236
  try:
198
237
  part = event["candidates"][0]["content"]["parts"][0]
199
- if "text" in part:
200
- yield part["text"]
201
- elif "executableCode" in part:
202
- # For code_execution
203
- yield f'```{part["executableCode"]["language"].lower()}\n{part["executableCode"]["code"].strip()}\n```\n'
204
- elif "codeExecutionResult" in part:
205
- # For code_execution
206
- yield f'```\n{part["codeExecutionResult"]["output"].strip()}\n```\n'
238
+ yield self.process_part(part)
207
239
  except KeyError:
208
240
  yield ""
209
241
  gathered.append(event)
@@ -211,6 +243,39 @@ class GeminiPro(llm.Model):
211
243
  response.response_json = gathered
212
244
 
213
245
 
246
+ class AsyncGeminiPro(_SharedGemini, llm.AsyncModel):
247
+ async def execute(self, prompt, stream, response, conversation):
248
+ key = self.get_key()
249
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model_id}:streamGenerateContent"
250
+ gathered = []
251
+ body = self.build_request_body(prompt, conversation)
252
+
253
+ async with httpx.AsyncClient() as client:
254
+ async with client.stream(
255
+ "POST",
256
+ url,
257
+ timeout=None,
258
+ headers={"x-goog-api-key": key},
259
+ json=body,
260
+ ) as http_response:
261
+ events = ijson.sendable_list()
262
+ coro = ijson.items_coro(events, "item")
263
+ async for chunk in http_response.aiter_bytes():
264
+ coro.send(chunk)
265
+ if events:
266
+ event = events[0]
267
+ if isinstance(event, dict) and "error" in event:
268
+ raise llm.ModelError(event["error"]["message"])
269
+ try:
270
+ part = event["candidates"][0]["content"]["parts"][0]
271
+ yield self.process_part(part)
272
+ except KeyError:
273
+ yield ""
274
+ gathered.append(event)
275
+ events.clear()
276
+ response.response_json = gathered
277
+
278
+
214
279
  @llm.hookimpl
215
280
  def register_embedding_models(register):
216
281
  register(
@@ -230,6 +295,7 @@ class GeminiEmbeddingModel(llm.EmbeddingModel):
230
295
  def embed_batch(self, items):
231
296
  headers = {
232
297
  "Content-Type": "application/json",
298
+ "x-goog-api-key": self.get_key(),
233
299
  }
234
300
  data = {
235
301
  "requests": [
@@ -243,7 +309,7 @@ class GeminiEmbeddingModel(llm.EmbeddingModel):
243
309
 
244
310
  with httpx.Client() as client:
245
311
  response = client.post(
246
- f"https://generativelanguage.googleapis.com/v1beta/models/{self.gemini_model_id}:batchEmbedContents?key={self.get_key()}",
312
+ f"https://generativelanguage.googleapis.com/v1beta/models/{self.gemini_model_id}:batchEmbedContents",
247
313
  headers=headers,
248
314
  json=data,
249
315
  timeout=None,
@@ -1,7 +0,0 @@
1
- llm_gemini.py,sha256=DQO3ROfJSajqUYmgeuW-4_FJ1yvMoFVKb44ly20oqGw,8628
2
- llm_gemini-0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
3
- llm_gemini-0.3.dist-info/METADATA,sha256=ROGQUiOTfQHn1FXN3x6cgFFnNsJ75TtHTOyb_EJvFBA,4234
4
- llm_gemini-0.3.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
5
- llm_gemini-0.3.dist-info/entry_points.txt,sha256=n544bpgUPIBc5l_cnwsTxPc3gMGJHPtAyqBNp-CkMWk,26
6
- llm_gemini-0.3.dist-info/top_level.txt,sha256=WUQmG6_2QKbT_8W4HH93qyKl_0SUteL4Ra6_PhyNGKU,11
7
- llm_gemini-0.3.dist-info/RECORD,,