genkit-plugin-dev-local-vectorstore 0.3.0.dev1__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/PKG-INFO +1 -1
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/pyproject.toml +1 -1
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/__init__.py +6 -0
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/constant.py +0 -8
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/indexer.py +11 -9
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/local_vector_store_api.py +8 -5
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/plugin_api.py +22 -29
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1/src/genkit/plugins/dev_local_vector_store → genkit_plugin_dev_local_vectorstore-0.3.1/src/genkit/plugins/dev_local_vectorstore}/retriever.py +15 -14
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/.gitignore +0 -0
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/LICENSE +0 -0
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/README.md +0 -0
- {genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/tests/.gitkeep +0 -0
|
@@ -15,19 +15,11 @@
|
|
|
15
15
|
# SPDX-License-Identifier: Apache-2.0
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
from typing import Any
|
|
19
|
-
|
|
20
18
|
from pydantic import BaseModel
|
|
21
19
|
|
|
22
20
|
from genkit.types import DocumentData, Embedding
|
|
23
21
|
|
|
24
22
|
|
|
25
|
-
class Params(BaseModel):
|
|
26
|
-
index_name: str
|
|
27
|
-
embedder: str
|
|
28
|
-
embedder_options: dict[str, Any] | None = None
|
|
29
|
-
|
|
30
|
-
|
|
31
23
|
class DbValue(BaseModel):
|
|
32
24
|
doc: DocumentData
|
|
33
25
|
embedding: Embedding
|
|
@@ -20,19 +20,21 @@ import json
|
|
|
20
20
|
from hashlib import md5
|
|
21
21
|
|
|
22
22
|
from genkit.blocks.document import Document
|
|
23
|
-
from genkit.
|
|
24
|
-
from genkit.
|
|
23
|
+
from genkit.codec import dump_json
|
|
24
|
+
from genkit.types import DocumentData, Embedding
|
|
25
|
+
|
|
26
|
+
from .constant import DbValue
|
|
27
|
+
from .local_vector_store_api import (
|
|
25
28
|
LocalVectorStoreAPI,
|
|
26
29
|
)
|
|
27
|
-
from genkit.types import Docs, Embedding
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
class DevLocalVectorStoreIndexer(LocalVectorStoreAPI):
|
|
31
|
-
async def index(self, docs:
|
|
33
|
+
async def index(self, docs: list[DocumentData]) -> None:
|
|
32
34
|
data = self._load_filestore()
|
|
33
35
|
tasks = []
|
|
34
36
|
|
|
35
|
-
for doc_data in docs
|
|
37
|
+
for doc_data in docs:
|
|
36
38
|
tasks.append(
|
|
37
39
|
self.process_document(
|
|
38
40
|
document=Document.from_document_data(document_data=doc_data),
|
|
@@ -43,17 +45,17 @@ class DevLocalVectorStoreIndexer(LocalVectorStoreAPI):
|
|
|
43
45
|
await asyncio.gather(*tasks)
|
|
44
46
|
|
|
45
47
|
with open(self.index_file_name, 'w', encoding='utf-8') as f:
|
|
46
|
-
|
|
48
|
+
f.write(dump_json(self._serialize_data(data=data), indent=2))
|
|
47
49
|
|
|
48
50
|
async def process_document(self, document: Document, data: dict[str, DbValue]) -> None:
|
|
49
51
|
embeddings = await self.ai.embed(
|
|
50
|
-
embedder=self.
|
|
52
|
+
embedder=self.embedder,
|
|
51
53
|
documents=[document],
|
|
52
|
-
options=self.
|
|
54
|
+
options=self.embedder_options,
|
|
53
55
|
)
|
|
54
56
|
embedding_docs = document.get_embedding_documents(embeddings.embeddings)
|
|
55
57
|
|
|
56
|
-
for embedding, emb_doc in zip(embeddings, embedding_docs, strict=False):
|
|
58
|
+
for embedding, emb_doc in zip(embeddings.embeddings, embedding_docs, strict=False):
|
|
57
59
|
self._add_document(data=data, embedding=embedding, doc=emb_doc)
|
|
58
60
|
|
|
59
61
|
def _add_document(
|
|
@@ -23,19 +23,22 @@ from functools import cached_property
|
|
|
23
23
|
from typing import Any
|
|
24
24
|
|
|
25
25
|
from genkit.ai import Genkit
|
|
26
|
-
|
|
26
|
+
|
|
27
|
+
from .constant import DbValue
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class LocalVectorStoreAPI(ABC):
|
|
30
31
|
_LOCAL_FILESTORE_TEMPLATE = '__db_{index_name}.json'
|
|
31
32
|
|
|
32
|
-
def __init__(self, ai: Genkit,
|
|
33
|
+
def __init__(self, ai: Genkit, index_name: str, embedder: str, embedder_options: dict[str, Any] | None = None):
|
|
33
34
|
self.ai = ai
|
|
34
|
-
self.
|
|
35
|
+
self.index_name = index_name
|
|
36
|
+
self.embedder = embedder
|
|
37
|
+
self.embedder_options = embedder_options
|
|
35
38
|
|
|
36
39
|
@cached_property
|
|
37
40
|
def index_file_name(self):
|
|
38
|
-
return self._LOCAL_FILESTORE_TEMPLATE.format(index_name=self.
|
|
41
|
+
return self._LOCAL_FILESTORE_TEMPLATE.format(index_name=self.index_name)
|
|
39
42
|
|
|
40
43
|
def _load_filestore(self) -> dict[str, DbValue]:
|
|
41
44
|
data = {}
|
|
@@ -53,7 +56,7 @@ class LocalVectorStoreAPI(ABC):
|
|
|
53
56
|
def _serialize_data(data: dict[str, DbValue]) -> dict[str, Any]:
|
|
54
57
|
data = copy.deepcopy(data)
|
|
55
58
|
for k in data:
|
|
56
|
-
data[k] = DbValue.model_dump(data[k])
|
|
59
|
+
data[k] = DbValue.model_dump(data[k], exclude_none=True)
|
|
57
60
|
return data
|
|
58
61
|
|
|
59
62
|
@staticmethod
|
|
@@ -16,29 +16,19 @@
|
|
|
16
16
|
|
|
17
17
|
"""Local file-based vectorstore plugin that provides retriever and indexer for Genkit."""
|
|
18
18
|
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
19
21
|
from genkit.ai import GenkitRegistry, Plugin
|
|
20
22
|
from genkit.core.action import Action
|
|
21
|
-
from genkit.
|
|
22
|
-
|
|
23
|
+
from genkit.types import Docs
|
|
24
|
+
|
|
25
|
+
from .indexer import (
|
|
23
26
|
DevLocalVectorStoreIndexer,
|
|
24
27
|
)
|
|
25
|
-
from
|
|
28
|
+
from .retriever import (
|
|
26
29
|
DevLocalVectorStoreRetriever,
|
|
27
30
|
RetrieverOptionsSchema,
|
|
28
31
|
)
|
|
29
|
-
from genkit.types import Docs
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def dev_local_vectorstore_name(name: str) -> str:
|
|
33
|
-
"""Create a Dev Local Vector Store action name.
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
name: Base name for the action.
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
The fully qualified Dev Local Vector Store action name.
|
|
40
|
-
"""
|
|
41
|
-
return f'devLocalVectorstore/{name}'
|
|
42
32
|
|
|
43
33
|
|
|
44
34
|
class DevLocalVectorStore(Plugin):
|
|
@@ -50,8 +40,10 @@ class DevLocalVectorStore(Plugin):
|
|
|
50
40
|
name = 'devLocalVectorstore'
|
|
51
41
|
_indexers: dict[str, DevLocalVectorStoreIndexer] = {}
|
|
52
42
|
|
|
53
|
-
def __init__(self,
|
|
54
|
-
self.
|
|
43
|
+
def __init__(self, name: str, embedder: str, embedder_options: dict[str, Any] | None = None):
|
|
44
|
+
self.index_name = name
|
|
45
|
+
self.embedder = embedder
|
|
46
|
+
self.embedder_options = embedder_options
|
|
55
47
|
|
|
56
48
|
def initialize(self, ai: GenkitRegistry) -> None:
|
|
57
49
|
"""Initialize the plugin by registering actions with the registry.
|
|
@@ -65,12 +57,10 @@ class DevLocalVectorStore(Plugin):
|
|
|
65
57
|
Returns:
|
|
66
58
|
None
|
|
67
59
|
"""
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
self._configure_dev_local_indexer(ai=ai, params=params)
|
|
60
|
+
self._configure_dev_local_retriever(ai=ai)
|
|
61
|
+
self._configure_dev_local_indexer(ai=ai)
|
|
71
62
|
|
|
72
|
-
|
|
73
|
-
def _configure_dev_local_retriever(cls, ai: GenkitRegistry, params: Params) -> Action:
|
|
63
|
+
def _configure_dev_local_retriever(self, ai: GenkitRegistry) -> Action:
|
|
74
64
|
"""Registers Local Vector Store retriever for provided parameters.
|
|
75
65
|
|
|
76
66
|
Args:
|
|
@@ -82,17 +72,18 @@ class DevLocalVectorStore(Plugin):
|
|
|
82
72
|
"""
|
|
83
73
|
retriever = DevLocalVectorStoreRetriever(
|
|
84
74
|
ai=ai,
|
|
85
|
-
|
|
75
|
+
index_name=self.index_name,
|
|
76
|
+
embedder=self.embedder,
|
|
77
|
+
embedder_options=self.embedder_options,
|
|
86
78
|
)
|
|
87
79
|
|
|
88
80
|
return ai.define_retriever(
|
|
89
|
-
name=
|
|
81
|
+
name=self.index_name,
|
|
90
82
|
config_schema=RetrieverOptionsSchema,
|
|
91
83
|
fn=retriever.retrieve,
|
|
92
84
|
)
|
|
93
85
|
|
|
94
|
-
|
|
95
|
-
def _configure_dev_local_indexer(cls, ai: GenkitRegistry, params: Params) -> Action:
|
|
86
|
+
def _configure_dev_local_indexer(self, ai: GenkitRegistry) -> Action:
|
|
96
87
|
"""Registers Local Vector Store indexer for provided parameters.
|
|
97
88
|
|
|
98
89
|
Args:
|
|
@@ -104,10 +95,12 @@ class DevLocalVectorStore(Plugin):
|
|
|
104
95
|
"""
|
|
105
96
|
indexer = DevLocalVectorStoreIndexer(
|
|
106
97
|
ai=ai,
|
|
107
|
-
|
|
98
|
+
index_name=self.index_name,
|
|
99
|
+
embedder=self.embedder,
|
|
100
|
+
embedder_options=self.embedder_options,
|
|
108
101
|
)
|
|
109
102
|
|
|
110
|
-
|
|
103
|
+
DevLocalVectorStore._indexers[self.index_name] = indexer
|
|
111
104
|
|
|
112
105
|
@classmethod
|
|
113
106
|
async def index(cls, index_name: str, documents: Docs) -> None:
|
|
@@ -18,10 +18,9 @@
|
|
|
18
18
|
from pydantic import BaseModel, Field
|
|
19
19
|
|
|
20
20
|
from genkit.ai import ActionRunContext, Document
|
|
21
|
-
from genkit.
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
from genkit.types import Embedding, RetrieverRequest
|
|
21
|
+
from genkit.types import Embedding, RetrieverRequest, RetrieverResponse
|
|
22
|
+
|
|
23
|
+
from .local_vector_store_api import LocalVectorStoreAPI
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
class ScoredDocument(BaseModel):
|
|
@@ -34,29 +33,31 @@ class RetrieverOptionsSchema(BaseModel):
|
|
|
34
33
|
|
|
35
34
|
|
|
36
35
|
class DevLocalVectorStoreRetriever(LocalVectorStoreAPI):
|
|
37
|
-
async def retrieve(self, request: RetrieverRequest, _: ActionRunContext):
|
|
36
|
+
async def retrieve(self, request: RetrieverRequest, _: ActionRunContext) -> RetrieverResponse:
|
|
38
37
|
document = Document.from_document_data(document_data=request.query)
|
|
39
38
|
embeddings = await self.ai.embed(
|
|
40
|
-
embedder=self.
|
|
39
|
+
embedder=self.embedder,
|
|
41
40
|
documents=[document],
|
|
42
|
-
options=self.
|
|
41
|
+
options=self.embedder_options,
|
|
43
42
|
)
|
|
44
|
-
if self.
|
|
45
|
-
k = self.
|
|
43
|
+
if self.embedder_options:
|
|
44
|
+
k = self.embedder_options.get('limit') or 3
|
|
46
45
|
else:
|
|
47
46
|
k = 3
|
|
48
|
-
|
|
47
|
+
docs = self._get_closest_documents(
|
|
49
48
|
k=k,
|
|
50
49
|
query_embeddings=embeddings.embeddings[0],
|
|
51
50
|
)
|
|
52
51
|
|
|
52
|
+
return RetrieverResponse(documents=[d.document for d in docs])
|
|
53
|
+
|
|
53
54
|
def _get_closest_documents(self, k: int, query_embeddings: Embedding) -> list[ScoredDocument]:
|
|
54
55
|
db = self._load_filestore()
|
|
55
56
|
scored_documents = []
|
|
56
57
|
|
|
57
58
|
for val in db.values():
|
|
58
59
|
this_embedding = val.embedding.embedding
|
|
59
|
-
score = self.cosine_similarity(query_embeddings, this_embedding)
|
|
60
|
+
score = self.cosine_similarity(query_embeddings.embedding, this_embedding)
|
|
60
61
|
scored_documents.append(
|
|
61
62
|
ScoredDocument(
|
|
62
63
|
score=score,
|
|
@@ -68,9 +69,9 @@ class DevLocalVectorStoreRetriever(LocalVectorStoreAPI):
|
|
|
68
69
|
return scored_documents[:k]
|
|
69
70
|
|
|
70
71
|
@classmethod
|
|
71
|
-
def cosine_similarity(cls, a: list[
|
|
72
|
+
def cosine_similarity(cls, a: list[float], b: list[float]) -> float:
|
|
72
73
|
return cls.dot(a, b) / ((cls.dot(a, a) ** 0.5) * (cls.dot(b, b) ** 0.5))
|
|
73
74
|
|
|
74
75
|
@staticmethod
|
|
75
|
-
def dot(a: list[
|
|
76
|
-
return sum(
|
|
76
|
+
def dot(a: list[float], b: list[float]) -> float:
|
|
77
|
+
return sum(av * bv for av, bv in zip(a, b, strict=False))
|
|
File without changes
|
{genkit_plugin_dev_local_vectorstore-0.3.0.dev1 → genkit_plugin_dev_local_vectorstore-0.3.1}/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|