genkit-plugin-dev-local-vectorstore 0.3.0.dev1__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (12) hide show
  1. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/PKG-INFO +1 -1
  2. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/pyproject.toml +1 -1
  3. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/__init__.py +6 -0
  4. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/constant.py +0 -8
  5. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/indexer.py +11 -9
  6. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/local_vector_store_api.py +8 -5
  7. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/plugin_api.py +22 -29
  8. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/retriever.py +15 -14
  9. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/.gitignore +0 -0
  10. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/LICENSE +0 -0
  11. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/README.md +0 -0
  12. {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/tests/.gitkeep +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: genkit-plugin-dev-local-vectorstore
3
- Version: 0.3.0.dev1
3
+ Version: 0.3.1
4
4
  Summary: Genkit Local Vector Store Plugin
5
5
  Author: Google
6
6
  License: Apache-2.0
@@ -27,7 +27,7 @@ license = { text = "Apache-2.0" }
27
27
  name = "genkit-plugin-dev-local-vectorstore"
28
28
  readme = "README.md"
29
29
  requires-python = ">=3.10"
30
- version = "0.3.0.dev1"
30
+ version = "0.3.1"
31
31
 
32
32
  [build-system]
33
33
  build-backend = "hatchling.build"
@@ -13,3 +13,9 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  # SPDX-License-Identifier: Apache-2.0
16
+
17
+ from .plugin_api import DevLocalVectorStore
18
+
19
+ __all__ = [
20
+ DevLocalVectorStore.__name__,
21
+ ]
@@ -15,19 +15,11 @@
15
15
  # SPDX-License-Identifier: Apache-2.0
16
16
 
17
17
 
18
- from typing import Any
19
-
20
18
  from pydantic import BaseModel
21
19
 
22
20
  from genkit.types import DocumentData, Embedding
23
21
 
24
22
 
25
- class Params(BaseModel):
26
- index_name: str
27
- embedder: str
28
- embedder_options: dict[str, Any] | None = None
29
-
30
-
31
23
  class DbValue(BaseModel):
32
24
  doc: DocumentData
33
25
  embedding: Embedding
@@ -20,19 +20,21 @@ import json
20
20
  from hashlib import md5
21
21
 
22
22
  from genkit.blocks.document import Document
23
- from genkit.plugins.dev_local_vector_store.constant import DbValue
24
- from genkit.plugins.dev_local_vector_store.local_vector_store_api import (
23
+ from genkit.codec import dump_json
24
+ from genkit.types import DocumentData, Embedding
25
+
26
+ from .constant import DbValue
27
+ from .local_vector_store_api import (
25
28
  LocalVectorStoreAPI,
26
29
  )
27
- from genkit.types import Docs, Embedding
28
30
 
29
31
 
30
32
  class DevLocalVectorStoreIndexer(LocalVectorStoreAPI):
31
- async def index(self, docs: Docs) -> None:
33
+ async def index(self, docs: list[DocumentData]) -> None:
32
34
  data = self._load_filestore()
33
35
  tasks = []
34
36
 
35
- for doc_data in docs.root:
37
+ for doc_data in docs:
36
38
  tasks.append(
37
39
  self.process_document(
38
40
  document=Document.from_document_data(document_data=doc_data),
@@ -43,17 +45,17 @@ class DevLocalVectorStoreIndexer(LocalVectorStoreAPI):
43
45
  await asyncio.gather(*tasks)
44
46
 
45
47
  with open(self.index_file_name, 'w', encoding='utf-8') as f:
46
- json.dump(data, f, indent=2)
48
+ f.write(dump_json(self._serialize_data(data=data), indent=2))
47
49
 
48
50
  async def process_document(self, document: Document, data: dict[str, DbValue]) -> None:
49
51
  embeddings = await self.ai.embed(
50
- embedder=self.params.embedder,
52
+ embedder=self.embedder,
51
53
  documents=[document],
52
- options=self.params.embedder_options,
54
+ options=self.embedder_options,
53
55
  )
54
56
  embedding_docs = document.get_embedding_documents(embeddings.embeddings)
55
57
 
56
- for embedding, emb_doc in zip(embeddings, embedding_docs, strict=False):
58
+ for embedding, emb_doc in zip(embeddings.embeddings, embedding_docs, strict=False):
57
59
  self._add_document(data=data, embedding=embedding, doc=emb_doc)
58
60
 
59
61
  def _add_document(
@@ -23,19 +23,22 @@ from functools import cached_property
23
23
  from typing import Any
24
24
 
25
25
  from genkit.ai import Genkit
26
- from genkit.plugins.dev_local_vector_store.constant import DbValue, Params
26
+
27
+ from .constant import DbValue
27
28
 
28
29
 
29
30
  class LocalVectorStoreAPI(ABC):
30
31
  _LOCAL_FILESTORE_TEMPLATE = '__db_{index_name}.json'
31
32
 
32
- def __init__(self, ai: Genkit, params: Params):
33
+ def __init__(self, ai: Genkit, index_name: str, embedder: str, embedder_options: dict[str, Any] | None = None):
33
34
  self.ai = ai
34
- self.params = params
35
+ self.index_name = index_name
36
+ self.embedder = embedder
37
+ self.embedder_options = embedder_options
35
38
 
36
39
  @cached_property
37
40
  def index_file_name(self):
38
- return self._LOCAL_FILESTORE_TEMPLATE.format(index_name=self.params.index_name)
41
+ return self._LOCAL_FILESTORE_TEMPLATE.format(index_name=self.index_name)
39
42
 
40
43
  def _load_filestore(self) -> dict[str, DbValue]:
41
44
  data = {}
@@ -53,7 +56,7 @@ class LocalVectorStoreAPI(ABC):
53
56
  def _serialize_data(data: dict[str, DbValue]) -> dict[str, Any]:
54
57
  data = copy.deepcopy(data)
55
58
  for k in data:
56
- data[k] = DbValue.model_dump(data[k])
59
+ data[k] = DbValue.model_dump(data[k], exclude_none=True)
57
60
  return data
58
61
 
59
62
  @staticmethod
@@ -16,29 +16,19 @@
16
16
 
17
17
  """Local file-based vectorstore plugin that provides retriever and indexer for Genkit."""
18
18
 
19
+ from typing import Any
20
+
19
21
  from genkit.ai import GenkitRegistry, Plugin
20
22
  from genkit.core.action import Action
21
- from genkit.plugins.dev_local_vector_store.constant import Params
22
- from genkit.plugins.dev_local_vector_store.indexer import (
23
+ from genkit.types import Docs
24
+
25
+ from .indexer import (
23
26
  DevLocalVectorStoreIndexer,
24
27
  )
25
- from genkit.plugins.dev_local_vector_store.retriever import (
28
+ from .retriever import (
26
29
  DevLocalVectorStoreRetriever,
27
30
  RetrieverOptionsSchema,
28
31
  )
29
- from genkit.types import Docs
30
-
31
-
32
- def dev_local_vectorstore_name(name: str) -> str:
33
- """Create a Dev Local Vector Store action name.
34
-
35
- Args:
36
- name: Base name for the action.
37
-
38
- Returns:
39
- The fully qualified Dev Local Vector Store action name.
40
- """
41
- return f'devLocalVectorstore/{name}'
42
32
 
43
33
 
44
34
  class DevLocalVectorStore(Plugin):
@@ -50,8 +40,10 @@ class DevLocalVectorStore(Plugin):
50
40
  name = 'devLocalVectorstore'
51
41
  _indexers: dict[str, DevLocalVectorStoreIndexer] = {}
52
42
 
53
- def __init__(self, params: list[Params]):
54
- self.params = params
43
+ def __init__(self, name: str, embedder: str, embedder_options: dict[str, Any] | None = None):
44
+ self.index_name = name
45
+ self.embedder = embedder
46
+ self.embedder_options = embedder_options
55
47
 
56
48
  def initialize(self, ai: GenkitRegistry) -> None:
57
49
  """Initialize the plugin by registering actions with the registry.
@@ -65,12 +57,10 @@ class DevLocalVectorStore(Plugin):
65
57
  Returns:
66
58
  None
67
59
  """
68
- for params in self.params:
69
- self._configure_dev_local_retriever(ai=ai, params=params)
70
- self._configure_dev_local_indexer(ai=ai, params=params)
60
+ self._configure_dev_local_retriever(ai=ai)
61
+ self._configure_dev_local_indexer(ai=ai)
71
62
 
72
- @classmethod
73
- def _configure_dev_local_retriever(cls, ai: GenkitRegistry, params: Params) -> Action:
63
+ def _configure_dev_local_retriever(self, ai: GenkitRegistry) -> Action:
74
64
  """Registers Local Vector Store retriever for provided parameters.
75
65
 
76
66
  Args:
@@ -82,17 +72,18 @@ class DevLocalVectorStore(Plugin):
82
72
  """
83
73
  retriever = DevLocalVectorStoreRetriever(
84
74
  ai=ai,
85
- params=params,
75
+ index_name=self.index_name,
76
+ embedder=self.embedder,
77
+ embedder_options=self.embedder_options,
86
78
  )
87
79
 
88
80
  return ai.define_retriever(
89
- name=dev_local_vectorstore_name(params.index_name),
81
+ name=self.index_name,
90
82
  config_schema=RetrieverOptionsSchema,
91
83
  fn=retriever.retrieve,
92
84
  )
93
85
 
94
- @classmethod
95
- def _configure_dev_local_indexer(cls, ai: GenkitRegistry, params: Params) -> Action:
86
+ def _configure_dev_local_indexer(self, ai: GenkitRegistry) -> Action:
96
87
  """Registers Local Vector Store indexer for provided parameters.
97
88
 
98
89
  Args:
@@ -104,10 +95,12 @@ class DevLocalVectorStore(Plugin):
104
95
  """
105
96
  indexer = DevLocalVectorStoreIndexer(
106
97
  ai=ai,
107
- params=params,
98
+ index_name=self.index_name,
99
+ embedder=self.embedder,
100
+ embedder_options=self.embedder_options,
108
101
  )
109
102
 
110
- cls._indexers[params.index_name] = indexer
103
+ DevLocalVectorStore._indexers[self.index_name] = indexer
111
104
 
112
105
  @classmethod
113
106
  async def index(cls, index_name: str, documents: Docs) -> None:
@@ -18,10 +18,9 @@
18
18
  from pydantic import BaseModel, Field
19
19
 
20
20
  from genkit.ai import ActionRunContext, Document
21
- from genkit.plugins.dev_local_vector_store.local_vector_store_api import (
22
- LocalVectorStoreAPI,
23
- )
24
- from genkit.types import Embedding, RetrieverRequest
21
+ from genkit.types import Embedding, RetrieverRequest, RetrieverResponse
22
+
23
+ from .local_vector_store_api import LocalVectorStoreAPI
25
24
 
26
25
 
27
26
  class ScoredDocument(BaseModel):
@@ -34,29 +33,31 @@ class RetrieverOptionsSchema(BaseModel):
34
33
 
35
34
 
36
35
  class DevLocalVectorStoreRetriever(LocalVectorStoreAPI):
37
- async def retrieve(self, request: RetrieverRequest, _: ActionRunContext):
36
+ async def retrieve(self, request: RetrieverRequest, _: ActionRunContext) -> RetrieverResponse:
38
37
  document = Document.from_document_data(document_data=request.query)
39
38
  embeddings = await self.ai.embed(
40
- embedder=self.params.embedder,
39
+ embedder=self.embedder,
41
40
  documents=[document],
42
- options=self.params.embedder_options,
41
+ options=self.embedder_options,
43
42
  )
44
- if self.params.embedder_options:
45
- k = self.params.embedder_options.get('limit') or 3
43
+ if self.embedder_options:
44
+ k = self.embedder_options.get('limit') or 3
46
45
  else:
47
46
  k = 3
48
- return self._get_closest_documents(
47
+ docs = self._get_closest_documents(
49
48
  k=k,
50
49
  query_embeddings=embeddings.embeddings[0],
51
50
  )
52
51
 
52
+ return RetrieverResponse(documents=[d.document for d in docs])
53
+
53
54
  def _get_closest_documents(self, k: int, query_embeddings: Embedding) -> list[ScoredDocument]:
54
55
  db = self._load_filestore()
55
56
  scored_documents = []
56
57
 
57
58
  for val in db.values():
58
59
  this_embedding = val.embedding.embedding
59
- score = self.cosine_similarity(query_embeddings, this_embedding)
60
+ score = self.cosine_similarity(query_embeddings.embedding, this_embedding)
60
61
  scored_documents.append(
61
62
  ScoredDocument(
62
63
  score=score,
@@ -68,9 +69,9 @@ class DevLocalVectorStoreRetriever(LocalVectorStoreAPI):
68
69
  return scored_documents[:k]
69
70
 
70
71
  @classmethod
71
- def cosine_similarity(cls, a: list[int], b: list[int]) -> float:
72
+ def cosine_similarity(cls, a: list[float], b: list[float]) -> float:
72
73
  return cls.dot(a, b) / ((cls.dot(a, a) ** 0.5) * (cls.dot(b, b) ** 0.5))
73
74
 
74
75
  @staticmethod
75
- def dot(a: list[int], b: list[int]) -> float:
76
- return sum(a * b for a, b in zip(a, b, strict=False))
76
+ def dot(a: list[float], b: list[float]) -> float:
77
+ return sum(av * bv for av, bv in zip(a, b, strict=False))