llm-gemini 0.1a5__py3-none-any.whl → 0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-gemini
3
- Version: 0.1a5
3
+ Version: 0.3
4
4
  Summary: LLM plugin to access Google's Gemini family of models
5
5
  Author: Simon Willison
6
6
  License: Apache-2.0
@@ -11,7 +11,7 @@ Project-URL: CI, https://github.com/simonw/llm-gemini/actions
11
11
  Classifier: License :: OSI Approved :: Apache Software License
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: llm
14
+ Requires-Dist: llm >=0.17
15
15
  Requires-Dist: httpx
16
16
  Requires-Dist: ijson
17
17
  Provides-Extra: test
@@ -43,23 +43,66 @@ llm keys set gemini
43
43
  <paste key here>
44
44
  ```
45
45
 
46
- Now run the model using `-m gemini-pro`, for example:
46
+ Now run the model using `-m gemini-1.5-pro-latest`, for example:
47
47
 
48
48
  ```bash
49
- llm -m gemini-pro "A joke about a pelican and a walrus"
49
+ llm -m gemini-1.5-pro-latest "A joke about a pelican and a walrus"
50
50
  ```
51
51
 
52
- > Why did the pelican get mad at the walrus?
52
+ > A pelican walks into a seafood restaurant with a huge fish hanging out of its beak. The walrus, sitting at the bar, eyes it enviously.
53
53
  >
54
- > Because he called him a hippo-crit.
54
+ > "Hey," the walrus says, "That looks delicious! What kind of fish is that?"
55
+ >
56
+ > The pelican taps its beak thoughtfully. "I believe," it says, "it's a billfish."
57
+
58
+ ### Images, audio and video
59
+
60
+ Gemini models are multi-modal. You can provide images, audio or video files as input like this:
61
+
62
+ ```bash
63
+ llm -m gemini-1.5-flash-latest 'extract text' -a image.jpg
64
+ ```
65
+ Or with a URL:
66
+ ```bash
67
+ llm -m gemini-1.5-flash-8b-latest 'describe image' \
68
+ -a https://static.simonwillison.net/static/2024/pelicans.jpg
69
+ ```
70
+ Audio works too:
71
+
72
+ ```bash
73
+ llm -m gemini-1.5-pro-latest 'transcribe audio' -a audio.mp3
74
+ ```
75
+
76
+ And video:
77
+
78
+ ```bash
79
+ llm -m gemini-1.5-pro-latest 'describe what happens' -a video.mp4
80
+ ```
81
+
82
+ ## Code execution
83
+
84
+ Gemini models can [write and execute code](https://ai.google.dev/gemini-api/docs/code-execution) - they can decide to write Python code, execute it in a secure sandbox and use the result as part of their response.
85
+
86
+ To enable this feature, use `-o code_execution 1`:
87
+
88
+ ```bash
89
+ llm -m gemini-1.5-pro-latest -o code_execution 1 \
90
+ 'use python to calculate (factorial of 13) * 3'
91
+ ```
92
+
93
+ ### Chat
55
94
 
56
95
  To chat interactively with the model, run `llm chat`:
57
96
 
58
97
  ```bash
59
- llm chat -m gemini-pro
98
+ llm chat -m gemini-1.5-pro-latest
60
99
  ```
61
100
 
62
- If you have access to the Gemini 1.5 Pro preview you can use `-m gemini-1.5-pro-latest` to work with that model.
101
+ Other models are:
102
+
103
+ - `gemini-1.5-flash-latest`
104
+ - gemini-1.5-flash-8b-latest` - the least expensive
105
+
63
106
 
64
107
  ### Embeddings
65
108
 
@@ -0,0 +1,7 @@
1
+ llm_gemini.py,sha256=DQO3ROfJSajqUYmgeuW-4_FJ1yvMoFVKb44ly20oqGw,8628
2
+ llm_gemini-0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
3
+ llm_gemini-0.3.dist-info/METADATA,sha256=ROGQUiOTfQHn1FXN3x6cgFFnNsJ75TtHTOyb_EJvFBA,4234
4
+ llm_gemini-0.3.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
5
+ llm_gemini-0.3.dist-info/entry_points.txt,sha256=n544bpgUPIBc5l_cnwsTxPc3gMGJHPtAyqBNp-CkMWk,26
6
+ llm_gemini-0.3.dist-info/top_level.txt,sha256=WUQmG6_2QKbT_8W4HH93qyKl_0SUteL4Ra6_PhyNGKU,11
7
+ llm_gemini-0.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
llm_gemini.py CHANGED
@@ -1,6 +1,9 @@
1
1
  import httpx
2
2
  import ijson
3
3
  import llm
4
+ from pydantic import Field
5
+ from typing import Optional
6
+
4
7
  import urllib.parse
5
8
 
6
9
  # We disable all of these to avoid random unexpected errors
@@ -33,28 +36,118 @@ def register_models(register):
33
36
  register(GeminiPro("gemini-1.5-flash-001"))
34
37
  register(GeminiPro("gemini-1.5-pro-002"))
35
38
  register(GeminiPro("gemini-1.5-flash-002"))
39
+ register(GeminiPro("gemini-1.5-flash-8b-latest"))
40
+ register(GeminiPro("gemini-1.5-flash-8b-001"))
41
+
42
+
43
+ def resolve_type(attachment):
44
+ mime_type = attachment.resolve_type()
45
+ # https://github.com/simonw/llm/issues/587#issuecomment-2439785140
46
+ if mime_type == "audio/mpeg":
47
+ mime_type = "audio/mp3"
48
+ return mime_type
36
49
 
37
50
 
38
51
  class GeminiPro(llm.Model):
52
+ needs_key = "gemini"
53
+ key_env_var = "LLM_GEMINI_KEY"
39
54
  can_stream = True
40
55
 
56
+ attachment_types = (
57
+ # PDF
58
+ "application/pdf",
59
+ # Images
60
+ "image/png",
61
+ "image/jpeg",
62
+ "image/webp",
63
+ "image/heic",
64
+ "image/heif",
65
+ # Audio
66
+ "audio/wav",
67
+ "audio/mp3",
68
+ "audio/aiff",
69
+ "audio/aac",
70
+ "audio/ogg",
71
+ "audio/flac",
72
+ "audio/mpeg", # Treated as audio/mp3
73
+ # Video
74
+ "video/mp4",
75
+ "video/mpeg",
76
+ "video/mov",
77
+ "video/avi",
78
+ "video/x-flv",
79
+ "video/mpg",
80
+ "video/webm",
81
+ "video/wmv",
82
+ "video/3gpp",
83
+ )
84
+
85
+ class Options(llm.Options):
86
+ code_execution: Optional[bool] = Field(
87
+ description="Enables the model to generate and run Python code",
88
+ default=None,
89
+ )
90
+ temperature: Optional[float] = Field(
91
+ description="Controls the randomness of the output. Use higher values for more creative responses, and lower values for more deterministic responses.",
92
+ default=None,
93
+ ge=0.0,
94
+ le=2.0,
95
+ )
96
+ max_output_tokens: Optional[int] = Field(
97
+ description="Sets the maximum number of tokens to include in a candidate.",
98
+ default=None,
99
+ )
100
+ top_p: Optional[float] = Field(
101
+ description="Changes how the model selects tokens for output. Tokens are selected from the most to least probable until the sum of their probabilities equals the topP value.",
102
+ default=None,
103
+ ge=0.0,
104
+ le=1.0,
105
+ )
106
+ top_k: Optional[int] = Field(
107
+ description="Changes how the model selects tokens for output. A topK of 1 means the selected token is the most probable among all the tokens in the model's vocabulary, while a topK of 3 means that the next token is selected from among the 3 most probable using the temperature.",
108
+ default=None,
109
+ ge=1,
110
+ )
111
+
41
112
  def __init__(self, model_id):
42
113
  self.model_id = model_id
43
114
 
44
115
  def build_messages(self, prompt, conversation):
45
- if not conversation:
46
- return [{"role": "user", "parts": [{"text": prompt.prompt}]}]
47
116
  messages = []
48
- for response in conversation.responses:
49
- messages.append(
50
- {"role": "user", "parts": [{"text": response.prompt.prompt}]}
117
+ if conversation:
118
+ for response in conversation.responses:
119
+ parts = []
120
+ for attachment in response.attachments:
121
+ mime_type = resolve_type(attachment)
122
+ parts.append(
123
+ {
124
+ "inlineData": {
125
+ "data": attachment.base64_content(),
126
+ "mimeType": mime_type,
127
+ }
128
+ }
129
+ )
130
+ parts.append({"text": response.prompt.prompt})
131
+ messages.append({"role": "user", "parts": parts})
132
+ messages.append({"role": "model", "parts": [{"text": response.text()}]})
133
+
134
+ parts = [{"text": prompt.prompt}]
135
+ for attachment in prompt.attachments:
136
+ mime_type = resolve_type(attachment)
137
+ parts.append(
138
+ {
139
+ "inlineData": {
140
+ "data": attachment.base64_content(),
141
+ "mimeType": mime_type,
142
+ }
143
+ }
51
144
  )
52
- messages.append({"role": "model", "parts": [{"text": response.text()}]})
53
- messages.append({"role": "user", "parts": [{"text": prompt.prompt}]})
145
+
146
+ messages.append({"role": "user", "parts": parts})
54
147
  return messages
55
148
 
56
149
  def execute(self, prompt, stream, response, conversation):
57
- key = llm.get_key("", "gemini", "LLM_GEMINI_KEY")
150
+ key = self.get_key()
58
151
  url = "https://generativelanguage.googleapis.com/v1beta/models/{}:streamGenerateContent?".format(
59
152
  self.model_id
60
153
  ) + urllib.parse.urlencode(
@@ -65,8 +158,28 @@ class GeminiPro(llm.Model):
65
158
  "contents": self.build_messages(prompt, conversation),
66
159
  "safetySettings": SAFETY_SETTINGS,
67
160
  }
161
+ if prompt.options and prompt.options.code_execution:
162
+ body["tools"] = [{"codeExecution": {}}]
68
163
  if prompt.system:
69
164
  body["systemInstruction"] = {"parts": [{"text": prompt.system}]}
165
+
166
+ config_map = {
167
+ "temperature": "temperature",
168
+ "max_output_tokens": "maxOutputTokens",
169
+ "top_p": "topP",
170
+ "top_k": "topK",
171
+ }
172
+ # If any of those are set in prompt.options...
173
+ if any(
174
+ getattr(prompt.options, key, None) is not None for key in config_map.keys()
175
+ ):
176
+ generation_config = {}
177
+ for key, other_key in config_map.items():
178
+ config_value = getattr(prompt.options, key, None)
179
+ if config_value is not None:
180
+ generation_config[other_key] = config_value
181
+ body["generationConfig"] = generation_config
182
+
70
183
  with httpx.stream(
71
184
  "POST",
72
185
  url,
@@ -82,7 +195,15 @@ class GeminiPro(llm.Model):
82
195
  if isinstance(event, dict) and "error" in event:
83
196
  raise llm.ModelError(event["error"]["message"])
84
197
  try:
85
- yield event["candidates"][0]["content"]["parts"][0]["text"]
198
+ part = event["candidates"][0]["content"]["parts"][0]
199
+ if "text" in part:
200
+ yield part["text"]
201
+ elif "executableCode" in part:
202
+ # For code_execution
203
+ yield f'```{part["executableCode"]["language"].lower()}\n{part["executableCode"]["code"].strip()}\n```\n'
204
+ elif "codeExecutionResult" in part:
205
+ # For code_execution
206
+ yield f'```\n{part["codeExecutionResult"]["output"].strip()}\n```\n'
86
207
  except KeyError:
87
208
  yield ""
88
209
  gathered.append(event)
@@ -1,7 +0,0 @@
1
- llm_gemini.py,sha256=h14JieCdToWEVmDBL65Nf84SXXLQx0db9oUyqFn9ptE,4200
2
- llm_gemini-0.1a5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
3
- llm_gemini-0.1a5.dist-info/METADATA,sha256=2FQqX1O0O_V7AYKleSSFR7FyoAF8zePFj6OjHzII63g,3061
4
- llm_gemini-0.1a5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
5
- llm_gemini-0.1a5.dist-info/entry_points.txt,sha256=n544bpgUPIBc5l_cnwsTxPc3gMGJHPtAyqBNp-CkMWk,26
6
- llm_gemini-0.1a5.dist-info/top_level.txt,sha256=WUQmG6_2QKbT_8W4HH93qyKl_0SUteL4Ra6_PhyNGKU,11
7
- llm_gemini-0.1a5.dist-info/RECORD,,