llm-gemini 0.13a0__tar.gz → 0.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: llm-gemini
3
- Version: 0.13a0
3
+ Version: 0.14
4
4
  Summary: LLM plugin to access Google's Gemini family of models
5
5
  Author: Simon Willison
6
6
  License: Apache-2.0
@@ -11,12 +11,13 @@ Project-URL: CI, https://github.com/simonw/llm-gemini/actions
11
11
  Classifier: License :: OSI Approved :: Apache Software License
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: llm>=0.23a0
14
+ Requires-Dist: llm>=0.23
15
15
  Requires-Dist: httpx
16
16
  Requires-Dist: ijson
17
17
  Provides-Extra: test
18
18
  Requires-Dist: pytest; extra == "test"
19
19
  Requires-Dist: pytest-recording; extra == "test"
20
+ Requires-Dist: pytest-asyncio; extra == "test"
20
21
  Requires-Dist: nest-asyncio; extra == "test"
21
22
 
22
23
  # llm-gemini
@@ -145,7 +146,7 @@ llm chat -m gemini-1.5-pro-latest
145
146
 
146
147
  ## Embeddings
147
148
 
148
- The plugin also adds support for the `text-embedding-004` embedding model.
149
+ The plugin also adds support for the `gemini-embedding-exp-03-07` and `text-embedding-004` embedding models.
149
150
 
150
151
  Run that against a single string like this:
151
152
  ```bash
@@ -153,10 +154,20 @@ llm embed -m text-embedding-004 -c 'hello world'
153
154
  ```
154
155
  This returns a JSON array of 768 numbers.
155
156
 
157
+ The `gemini-embedding-exp-03-07` model is larger, returning 3072 numbers. You can also use variants of it that are truncated down to smaller sizes:
158
+
159
+ - `gemini-embedding-exp-03-07` - 3072 numbers
160
+ - `gemini-embedding-exp-03-07-2048` - 2048 numbers
161
+ - `gemini-embedding-exp-03-07-1024` - 1024 numbers
162
+ - `gemini-embedding-exp-03-07-512` - 512 numbers
163
+ - `gemini-embedding-exp-03-07-256` - 256 numbers
164
+ - `gemini-embedding-exp-03-07-128` - 128 numbers
165
+
156
166
  This command will embed every `README.md` file in child directories of the current directory and store the results in a SQLite database called `embed.db` in a collection called `readmes`:
157
167
 
158
168
  ```bash
159
- llm embed-multi readmes --files . '*/README.md' -d embed.db -m text-embedding-004
169
+ llm embed-multi readmes -d embed.db -m gemini-embedding-exp-03-07-128 \
170
+ --files . '*/README.md'
160
171
  ```
161
172
  You can then run similarity searches against that collection like this:
162
173
  ```bash
@@ -124,7 +124,7 @@ llm chat -m gemini-1.5-pro-latest
124
124
 
125
125
  ## Embeddings
126
126
 
127
- The plugin also adds support for the `text-embedding-004` embedding model.
127
+ The plugin also adds support for the `gemini-embedding-exp-03-07` and `text-embedding-004` embedding models.
128
128
 
129
129
  Run that against a single string like this:
130
130
  ```bash
@@ -132,10 +132,20 @@ llm embed -m text-embedding-004 -c 'hello world'
132
132
  ```
133
133
  This returns a JSON array of 768 numbers.
134
134
 
135
+ The `gemini-embedding-exp-03-07` model is larger, returning 3072 numbers. You can also use variants of it that are truncated down to smaller sizes:
136
+
137
+ - `gemini-embedding-exp-03-07` - 3072 numbers
138
+ - `gemini-embedding-exp-03-07-2048` - 2048 numbers
139
+ - `gemini-embedding-exp-03-07-1024` - 1024 numbers
140
+ - `gemini-embedding-exp-03-07-512` - 512 numbers
141
+ - `gemini-embedding-exp-03-07-256` - 256 numbers
142
+ - `gemini-embedding-exp-03-07-128` - 128 numbers
143
+
135
144
  This command will embed every `README.md` file in child directories of the current directory and store the results in a SQLite database called `embed.db` in a collection called `readmes`:
136
145
 
137
146
  ```bash
138
- llm embed-multi readmes --files . '*/README.md' -d embed.db -m text-embedding-004
147
+ llm embed-multi readmes -d embed.db -m gemini-embedding-exp-03-07-128 \
148
+ --files . '*/README.md'
139
149
  ```
140
150
  You can then run similarity searches against that collection like this:
141
151
  ```bash
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: llm-gemini
3
- Version: 0.13a0
3
+ Version: 0.14
4
4
  Summary: LLM plugin to access Google's Gemini family of models
5
5
  Author: Simon Willison
6
6
  License: Apache-2.0
@@ -11,12 +11,13 @@ Project-URL: CI, https://github.com/simonw/llm-gemini/actions
11
11
  Classifier: License :: OSI Approved :: Apache Software License
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: llm>=0.23a0
14
+ Requires-Dist: llm>=0.23
15
15
  Requires-Dist: httpx
16
16
  Requires-Dist: ijson
17
17
  Provides-Extra: test
18
18
  Requires-Dist: pytest; extra == "test"
19
19
  Requires-Dist: pytest-recording; extra == "test"
20
+ Requires-Dist: pytest-asyncio; extra == "test"
20
21
  Requires-Dist: nest-asyncio; extra == "test"
21
22
 
22
23
  # llm-gemini
@@ -145,7 +146,7 @@ llm chat -m gemini-1.5-pro-latest
145
146
 
146
147
  ## Embeddings
147
148
 
148
- The plugin also adds support for the `text-embedding-004` embedding model.
149
+ The plugin also adds support for the `gemini-embedding-exp-03-07` and `text-embedding-004` embedding models.
149
150
 
150
151
  Run that against a single string like this:
151
152
  ```bash
@@ -153,10 +154,20 @@ llm embed -m text-embedding-004 -c 'hello world'
153
154
  ```
154
155
  This returns a JSON array of 768 numbers.
155
156
 
157
+ The `gemini-embedding-exp-03-07` model is larger, returning 3072 numbers. You can also use variants of it that are truncated down to smaller sizes:
158
+
159
+ - `gemini-embedding-exp-03-07` - 3072 numbers
160
+ - `gemini-embedding-exp-03-07-2048` - 2048 numbers
161
+ - `gemini-embedding-exp-03-07-1024` - 1024 numbers
162
+ - `gemini-embedding-exp-03-07-512` - 512 numbers
163
+ - `gemini-embedding-exp-03-07-256` - 256 numbers
164
+ - `gemini-embedding-exp-03-07-128` - 128 numbers
165
+
156
166
  This command will embed every `README.md` file in child directories of the current directory and store the results in a SQLite database called `embed.db` in a collection called `readmes`:
157
167
 
158
168
  ```bash
159
- llm embed-multi readmes --files . '*/README.md' -d embed.db -m text-embedding-004
169
+ llm embed-multi readmes -d embed.db -m gemini-embedding-exp-03-07-128 \
170
+ --files . '*/README.md'
160
171
  ```
161
172
  You can then run similarity searches against that collection like this:
162
173
  ```bash
@@ -1,8 +1,9 @@
1
- llm>=0.23a0
1
+ llm>=0.23
2
2
  httpx
3
3
  ijson
4
4
 
5
5
  [test]
6
6
  pytest
7
7
  pytest-recording
8
+ pytest-asyncio
8
9
  nest-asyncio
@@ -65,8 +65,16 @@ def register_models(register):
65
65
  ]:
66
66
  can_google_search = model_id in GOOGLE_SEARCH_MODELS
67
67
  register(
68
- GeminiPro(model_id, can_google_search=can_google_search),
69
- AsyncGeminiPro(model_id, can_google_search=can_google_search),
68
+ GeminiPro(
69
+ model_id,
70
+ can_google_search=can_google_search,
71
+ can_schema="flash-thinking" not in model_id,
72
+ ),
73
+ AsyncGeminiPro(
74
+ model_id,
75
+ can_google_search=can_google_search,
76
+ can_schema="flash-thinking" not in model_id,
77
+ ),
70
78
  )
71
79
 
72
80
 
@@ -82,7 +90,7 @@ def resolve_type(attachment):
82
90
 
83
91
  def cleanup_schema(schema):
84
92
  "Gemini supports only a subset of JSON schema"
85
- keys_to_remove = ("$schema", "additionalProperties")
93
+ keys_to_remove = ("$schema", "additionalProperties", "title")
86
94
  # Recursively remove them
87
95
  if isinstance(schema, dict):
88
96
  for key in keys_to_remove:
@@ -186,9 +194,10 @@ class _SharedGemini:
186
194
  default=None,
187
195
  )
188
196
 
189
- def __init__(self, model_id, can_google_search=False):
197
+ def __init__(self, model_id, can_google_search=False, can_schema=False):
190
198
  self.model_id = model_id
191
199
  self.can_google_search = can_google_search
200
+ self.supports_schema = can_schema
192
201
  if can_google_search:
193
202
  self.Options = self.OptionsWithGoogleSearch
194
203
 
@@ -279,9 +288,17 @@ class _SharedGemini:
279
288
  return f'```\n{part["codeExecutionResult"]["output"].strip()}\n```\n'
280
289
  return ""
281
290
 
291
+ def process_candidates(self, candidates):
292
+ # We only use the first candidate
293
+ for part in candidates[0]["content"]["parts"]:
294
+ yield self.process_part(part)
295
+
282
296
  def set_usage(self, response):
283
297
  try:
284
- usage = response.response_json[-1].pop("usageMetadata")
298
+ # Don't record the "content" key from that last candidate
299
+ for candidate in response.response_json["candidates"]:
300
+ candidate.pop("content", None)
301
+ usage = response.response_json.pop("usageMetadata")
285
302
  input_tokens = usage.pop("promptTokenCount", None)
286
303
  output_tokens = usage.pop("candidatesTokenCount", None)
287
304
  usage.pop("totalTokenCount", None)
@@ -311,17 +328,16 @@ class GeminiPro(_SharedGemini, llm.KeyModel):
311
328
  for chunk in http_response.iter_bytes():
312
329
  coro.send(chunk)
313
330
  if events:
314
- event = events[0]
315
- if isinstance(event, dict) and "error" in event:
316
- raise llm.ModelError(event["error"]["message"])
317
- try:
318
- part = event["candidates"][0]["content"]["parts"][0]
319
- yield self.process_part(part)
320
- except KeyError:
321
- yield ""
322
- gathered.append(event)
331
+ for event in events:
332
+ if isinstance(event, dict) and "error" in event:
333
+ raise llm.ModelError(event["error"]["message"])
334
+ try:
335
+ yield from self.process_candidates(event["candidates"])
336
+ except KeyError:
337
+ yield ""
338
+ gathered.append(event)
323
339
  events.clear()
324
- response.response_json = gathered
340
+ response.response_json = gathered[-1]
325
341
  self.set_usage(response)
326
342
 
327
343
 
@@ -344,25 +360,37 @@ class AsyncGeminiPro(_SharedGemini, llm.AsyncKeyModel):
344
360
  async for chunk in http_response.aiter_bytes():
345
361
  coro.send(chunk)
346
362
  if events:
347
- event = events[0]
348
- if isinstance(event, dict) and "error" in event:
349
- raise llm.ModelError(event["error"]["message"])
350
- try:
351
- part = event["candidates"][0]["content"]["parts"][0]
352
- yield self.process_part(part)
353
- except KeyError:
354
- yield ""
355
- gathered.append(event)
363
+ for event in events:
364
+ if isinstance(event, dict) and "error" in event:
365
+ raise llm.ModelError(event["error"]["message"])
366
+ try:
367
+ for chunk in self.process_candidates(
368
+ event["candidates"]
369
+ ):
370
+ yield chunk
371
+ except KeyError:
372
+ yield ""
373
+ gathered.append(event)
356
374
  events.clear()
357
- response.response_json = gathered
375
+ response.response_json = gathered[-1]
358
376
  self.set_usage(response)
359
377
 
360
378
 
361
379
  @llm.hookimpl
362
380
  def register_embedding_models(register):
381
+ register(GeminiEmbeddingModel("text-embedding-004", "text-embedding-004"))
382
+ # gemini-embedding-exp-03-07 in different truncation sizes
363
383
  register(
364
- GeminiEmbeddingModel("text-embedding-004", "text-embedding-004"),
384
+ GeminiEmbeddingModel(
385
+ "gemini-embedding-exp-03-07", "gemini-embedding-exp-03-07"
386
+ ),
365
387
  )
388
+ for i in (128, 256, 512, 1024, 2048):
389
+ register(
390
+ GeminiEmbeddingModel(
391
+ f"gemini-embedding-exp-03-07-{i}", f"gemini-embedding-exp-03-07", i
392
+ ),
393
+ )
366
394
 
367
395
 
368
396
  class GeminiEmbeddingModel(llm.EmbeddingModel):
@@ -370,9 +398,10 @@ class GeminiEmbeddingModel(llm.EmbeddingModel):
370
398
  key_env_var = "LLM_GEMINI_KEY"
371
399
  batch_size = 20
372
400
 
373
- def __init__(self, model_id, gemini_model_id):
401
+ def __init__(self, model_id, gemini_model_id, truncate=None):
374
402
  self.model_id = model_id
375
403
  self.gemini_model_id = gemini_model_id
404
+ self.truncate = truncate
376
405
 
377
406
  def embed_batch(self, items):
378
407
  headers = {
@@ -398,4 +427,7 @@ class GeminiEmbeddingModel(llm.EmbeddingModel):
398
427
  )
399
428
 
400
429
  response.raise_for_status()
401
- return [item["values"] for item in response.json()["embeddings"]]
430
+ values = [item["values"] for item in response.json()["embeddings"]]
431
+ if self.truncate:
432
+ values = [value[: self.truncate] for value in values]
433
+ return values
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "llm-gemini"
3
- version = "0.13a0"
3
+ version = "0.14"
4
4
  description = "LLM plugin to access Google's Gemini family of models"
5
5
  readme = "README.md"
6
6
  authors = [{name = "Simon Willison"}]
@@ -9,7 +9,7 @@ classifiers = [
9
9
  "License :: OSI Approved :: Apache Software License"
10
10
  ]
11
11
  dependencies = [
12
- "llm>=0.23a0",
12
+ "llm>=0.23",
13
13
  "httpx",
14
14
  "ijson"
15
15
  ]
@@ -24,4 +24,4 @@ CI = "https://github.com/simonw/llm-gemini/actions"
24
24
  gemini = "llm_gemini"
25
25
 
26
26
  [project.optional-dependencies]
27
- test = ["pytest", "pytest-recording", "nest-asyncio"]
27
+ test = ["pytest", "pytest-recording", "pytest-asyncio", "nest-asyncio"]
@@ -0,0 +1,125 @@
1
+ import llm
2
+ import nest_asyncio
3
+ import json
4
+ import os
5
+ import pytest
6
+ import pydantic
7
+
8
+ nest_asyncio.apply()
9
+
10
+ GEMINI_API_KEY = os.environ.get("PYTEST_GEMINI_API_KEY", None) or "gm-..."
11
+
12
+
13
+ @pytest.mark.vcr
14
+ @pytest.mark.asyncio
15
+ async def test_prompt():
16
+ model = llm.get_model("gemini-1.5-flash-latest")
17
+ response = model.prompt("Name for a pet pelican, just the name", key=GEMINI_API_KEY)
18
+ assert str(response) == "Percy\n"
19
+ assert response.response_json == {
20
+ "candidates": [
21
+ {
22
+ "finishReason": "STOP",
23
+ "safetyRatings": [
24
+ {
25
+ "category": "HARM_CATEGORY_HATE_SPEECH",
26
+ "probability": "NEGLIGIBLE",
27
+ },
28
+ {
29
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
30
+ "probability": "NEGLIGIBLE",
31
+ },
32
+ {
33
+ "category": "HARM_CATEGORY_HARASSMENT",
34
+ "probability": "NEGLIGIBLE",
35
+ },
36
+ {
37
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
38
+ "probability": "NEGLIGIBLE",
39
+ },
40
+ ],
41
+ }
42
+ ],
43
+ "modelVersion": "gemini-1.5-flash-latest",
44
+ }
45
+ assert response.token_details == {
46
+ "promptTokensDetails": [{"modality": "TEXT", "tokenCount": 9}],
47
+ "candidatesTokensDetails": [{"modality": "TEXT", "tokenCount": 2}],
48
+ }
49
+ assert response.input_tokens == 9
50
+ assert response.output_tokens == 2
51
+
52
+ # And try it async too
53
+ async_model = llm.get_async_model("gemini-1.5-flash-latest")
54
+ response = await async_model.prompt(
55
+ "Name for a pet pelican, just the name", key=GEMINI_API_KEY
56
+ )
57
+ text = await response.text()
58
+ assert text == "Percy\n"
59
+
60
+
61
+ @pytest.mark.vcr
62
+ @pytest.mark.asyncio
63
+ async def test_prompt_with_pydantic_schema():
64
+ class Dog(pydantic.BaseModel):
65
+ name: str
66
+ age: int
67
+ bio: str
68
+
69
+ model = llm.get_model("gemini-1.5-flash-latest")
70
+ response = model.prompt(
71
+ "Invent a cool dog", key=GEMINI_API_KEY, schema=Dog, stream=False
72
+ )
73
+ assert json.loads(response.text()) == {
74
+ "age": 3,
75
+ "bio": "A fluffy Samoyed with exceptional intelligence and a love for belly rubs. He's mastered several tricks, including fetching the newspaper and opening doors.",
76
+ "name": "Cloud",
77
+ }
78
+ assert response.response_json == {
79
+ "candidates": [
80
+ {
81
+ "finishReason": "STOP",
82
+ "safetyRatings": [
83
+ {
84
+ "category": "HARM_CATEGORY_HATE_SPEECH",
85
+ "probability": "NEGLIGIBLE",
86
+ },
87
+ {
88
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
89
+ "probability": "NEGLIGIBLE",
90
+ },
91
+ {
92
+ "category": "HARM_CATEGORY_HARASSMENT",
93
+ "probability": "NEGLIGIBLE",
94
+ },
95
+ {
96
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
97
+ "probability": "NEGLIGIBLE",
98
+ },
99
+ ],
100
+ }
101
+ ],
102
+ "modelVersion": "gemini-1.5-flash-latest",
103
+ }
104
+ assert response.input_tokens == 10
105
+
106
+
107
+ @pytest.mark.vcr
108
+ @pytest.mark.parametrize(
109
+ "model_id",
110
+ (
111
+ "gemini-embedding-exp-03-07",
112
+ "gemini-embedding-exp-03-07-128",
113
+ "gemini-embedding-exp-03-07-512",
114
+ ),
115
+ )
116
+ def test_embedding(model_id, monkeypatch):
117
+ monkeypatch.setenv("LLM_GEMINI_KEY", GEMINI_API_KEY)
118
+ model = llm.get_embedding_model(model_id)
119
+ response = model.embed("Some text goes here")
120
+ expected_length = 3072
121
+ if model_id.endswith("-128"):
122
+ expected_length = 128
123
+ elif model_id.endswith("-512"):
124
+ expected_length = 512
125
+ assert len(response) == expected_length
@@ -1,37 +0,0 @@
1
- import llm
2
- import nest_asyncio
3
- import os
4
- import pytest
5
-
6
- nest_asyncio.apply()
7
-
8
- GEMINI_API_KEY = os.environ.get("PYTEST_GEMINI_API_KEY", None) or "gm-..."
9
-
10
-
11
- @pytest.mark.vcr
12
- @pytest.mark.asyncio
13
- async def test_prompt():
14
- model = llm.get_model("gemini-1.5-flash-latest")
15
- response = model.prompt("Name for a pet pelican, just the name", key=GEMINI_API_KEY)
16
- assert str(response) == "Percy"
17
- assert response.response_json == [
18
- {
19
- "candidates": [
20
- {"content": {"parts": [{"text": "Percy"}], "role": "model"}}
21
- ],
22
- "modelVersion": "gemini-1.5-flash-latest",
23
- }
24
- ]
25
- assert response.token_details is None
26
- assert response.input_tokens == 10
27
- # Not sure why our pytest-recording setup doesn't report output tokens
28
- # https://github.com/simonw/llm-gemini/issues/25#issuecomment-2487464339
29
- assert response.output_tokens is None
30
-
31
- # And try it async too
32
- async_model = llm.get_async_model("gemini-1.5-flash-latest")
33
- response = await async_model.prompt(
34
- "Name for a pet pelican, just the name", key=GEMINI_API_KEY
35
- )
36
- text = await response.text()
37
- assert text == "Percy"
File without changes
File without changes