llm-gemini 0.13.1__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: llm-gemini
3
- Version: 0.13.1
3
+ Version: 0.14.1
4
4
  Summary: LLM plugin to access Google's Gemini family of models
5
5
  Author: Simon Willison
6
6
  License: Apache-2.0
@@ -17,6 +17,7 @@ Requires-Dist: ijson
17
17
  Provides-Extra: test
18
18
  Requires-Dist: pytest; extra == "test"
19
19
  Requires-Dist: pytest-recording; extra == "test"
20
+ Requires-Dist: pytest-asyncio; extra == "test"
20
21
  Requires-Dist: nest-asyncio; extra == "test"
21
22
 
22
23
  # llm-gemini
@@ -145,7 +146,7 @@ llm chat -m gemini-1.5-pro-latest
145
146
 
146
147
  ## Embeddings
147
148
 
148
- The plugin also adds support for the `text-embedding-004` embedding model.
149
+ The plugin also adds support for the `gemini-embedding-exp-03-07` and `text-embedding-004` embedding models.
149
150
 
150
151
  Run that against a single string like this:
151
152
  ```bash
@@ -153,10 +154,20 @@ llm embed -m text-embedding-004 -c 'hello world'
153
154
  ```
154
155
  This returns a JSON array of 768 numbers.
155
156
 
157
+ The `gemini-embedding-exp-03-07` model is larger, returning 3072 numbers. You can also use variants of it that are truncated down to smaller sizes:
158
+
159
+ - `gemini-embedding-exp-03-07` - 3072 numbers
160
+ - `gemini-embedding-exp-03-07-2048` - 2048 numbers
161
+ - `gemini-embedding-exp-03-07-1024` - 1024 numbers
162
+ - `gemini-embedding-exp-03-07-512` - 512 numbers
163
+ - `gemini-embedding-exp-03-07-256` - 256 numbers
164
+ - `gemini-embedding-exp-03-07-128` - 128 numbers
165
+
156
166
  This command will embed every `README.md` file in child directories of the current directory and store the results in a SQLite database called `embed.db` in a collection called `readmes`:
157
167
 
158
168
  ```bash
159
- llm embed-multi readmes --files . '*/README.md' -d embed.db -m text-embedding-004
169
+ llm embed-multi readmes -d embed.db -m gemini-embedding-exp-03-07-128 \
170
+ --files . '*/README.md'
160
171
  ```
161
172
  You can then run similarity searches against that collection like this:
162
173
  ```bash
@@ -0,0 +1,7 @@
1
+ llm_gemini.py,sha256=eddn8U4kN2g9DOrotcKbyzNxGonR7XLiK7i_m7dgZ84,15296
2
+ llm_gemini-0.14.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
3
+ llm_gemini-0.14.1.dist-info/METADATA,sha256=jH04kG_VQYRVqjyVF2ZWHAWBqyxf-giSyX1NRO8ntss,7558
4
+ llm_gemini-0.14.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
5
+ llm_gemini-0.14.1.dist-info/entry_points.txt,sha256=n544bpgUPIBc5l_cnwsTxPc3gMGJHPtAyqBNp-CkMWk,26
6
+ llm_gemini-0.14.1.dist-info/top_level.txt,sha256=WUQmG6_2QKbT_8W4HH93qyKl_0SUteL4Ra6_PhyNGKU,11
7
+ llm_gemini-0.14.1.dist-info/RECORD,,
llm_gemini.py CHANGED
@@ -88,18 +88,24 @@ def resolve_type(attachment):
88
88
  return mime_type
89
89
 
90
90
 
91
- def cleanup_schema(schema):
91
+ def cleanup_schema(schema, in_properties=False):
92
92
  "Gemini supports only a subset of JSON schema"
93
93
  keys_to_remove = ("$schema", "additionalProperties", "title")
94
- # Recursively remove them
94
+
95
95
  if isinstance(schema, dict):
96
- for key in keys_to_remove:
97
- schema.pop(key, None)
98
- for value in schema.values():
99
- cleanup_schema(value)
96
+ # Only remove keys if we're not inside a 'properties' block.
97
+ if not in_properties:
98
+ for key in keys_to_remove:
99
+ schema.pop(key, None)
100
+ for key, value in list(schema.items()):
101
+ # If the key is 'properties', set the flag for its value.
102
+ if key == "properties" and isinstance(value, dict):
103
+ cleanup_schema(value, in_properties=True)
104
+ else:
105
+ cleanup_schema(value, in_properties=in_properties)
100
106
  elif isinstance(schema, list):
101
- for value in schema:
102
- cleanup_schema(value)
107
+ for item in schema:
108
+ cleanup_schema(item, in_properties=in_properties)
103
109
  return schema
104
110
 
105
111
 
@@ -378,9 +384,19 @@ class AsyncGeminiPro(_SharedGemini, llm.AsyncKeyModel):
378
384
 
379
385
  @llm.hookimpl
380
386
  def register_embedding_models(register):
387
+ register(GeminiEmbeddingModel("text-embedding-004", "text-embedding-004"))
388
+ # gemini-embedding-exp-03-07 in different truncation sizes
381
389
  register(
382
- GeminiEmbeddingModel("text-embedding-004", "text-embedding-004"),
390
+ GeminiEmbeddingModel(
391
+ "gemini-embedding-exp-03-07", "gemini-embedding-exp-03-07"
392
+ ),
383
393
  )
394
+ for i in (128, 256, 512, 1024, 2048):
395
+ register(
396
+ GeminiEmbeddingModel(
397
+ f"gemini-embedding-exp-03-07-{i}", f"gemini-embedding-exp-03-07", i
398
+ ),
399
+ )
384
400
 
385
401
 
386
402
  class GeminiEmbeddingModel(llm.EmbeddingModel):
@@ -388,9 +404,10 @@ class GeminiEmbeddingModel(llm.EmbeddingModel):
388
404
  key_env_var = "LLM_GEMINI_KEY"
389
405
  batch_size = 20
390
406
 
391
- def __init__(self, model_id, gemini_model_id):
407
+ def __init__(self, model_id, gemini_model_id, truncate=None):
392
408
  self.model_id = model_id
393
409
  self.gemini_model_id = gemini_model_id
410
+ self.truncate = truncate
394
411
 
395
412
  def embed_batch(self, items):
396
413
  headers = {
@@ -416,4 +433,7 @@ class GeminiEmbeddingModel(llm.EmbeddingModel):
416
433
  )
417
434
 
418
435
  response.raise_for_status()
419
- return [item["values"] for item in response.json()["embeddings"]]
436
+ values = [item["values"] for item in response.json()["embeddings"]]
437
+ if self.truncate:
438
+ values = [value[: self.truncate] for value in values]
439
+ return values
@@ -1,7 +0,0 @@
1
- llm_gemini.py,sha256=JWaRtT8vJzdxrRbCAgoNrKqes2df_T4gqqeDmiw0oRI,14370
2
- llm_gemini-0.13.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
3
- llm_gemini-0.13.1.dist-info/METADATA,sha256=S87OFYnm9K0EVvJIRrJoZT-J0gxhPA4BrnIGkVD8q8c,7016
4
- llm_gemini-0.13.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
5
- llm_gemini-0.13.1.dist-info/entry_points.txt,sha256=n544bpgUPIBc5l_cnwsTxPc3gMGJHPtAyqBNp-CkMWk,26
6
- llm_gemini-0.13.1.dist-info/top_level.txt,sha256=WUQmG6_2QKbT_8W4HH93qyKl_0SUteL4Ra6_PhyNGKU,11
7
- llm_gemini-0.13.1.dist-info/RECORD,,