gllm-datastore-binary 0.0.15__cp312-cp312-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-datastore-binary might be problematic. Click here for more details.

Files changed (38) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache_data_store/__init__.pyi +5 -0
  3. gllm_datastore/cache_data_store/cache_data_store.pyi +146 -0
  4. gllm_datastore/cache_data_store/cache_data_store_utils.pyi +1 -0
  5. gllm_datastore/cache_data_store/file_system_cache_data_store.pyi +62 -0
  6. gllm_datastore/cache_data_store/in_memory_cache_data_store.pyi +43 -0
  7. gllm_datastore/cache_data_store/redis_cache_data_store.pyi +48 -0
  8. gllm_datastore/cache_data_store/utils.pyi +36 -0
  9. gllm_datastore/constants.pyi +2 -0
  10. gllm_datastore/graph_data_store/__init__.pyi +0 -0
  11. gllm_datastore/graph_data_store/graph_data_store.pyi +80 -0
  12. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +28 -0
  13. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +18 -0
  14. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +26 -0
  15. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +112 -0
  16. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +81 -0
  17. gllm_datastore/sql_data_store/__init__.pyi +5 -0
  18. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  19. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +30 -0
  20. gllm_datastore/sql_data_store/constants.pyi +6 -0
  21. gllm_datastore/sql_data_store/sql_data_store.pyi +87 -0
  22. gllm_datastore/sql_data_store/sqlalchemy_data_store.pyi +9 -0
  23. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +183 -0
  24. gllm_datastore/sql_data_store/types.pyi +30 -0
  25. gllm_datastore/utils/__init__.pyi +4 -0
  26. gllm_datastore/utils/converter.pyi +21 -0
  27. gllm_datastore/utils/ttl.pyi +25 -0
  28. gllm_datastore/vector_data_store/__init__.pyi +4 -0
  29. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +119 -0
  30. gllm_datastore/vector_data_store/elasticsearch_data_store.pyi +9 -0
  31. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +140 -0
  32. gllm_datastore/vector_data_store/vector_data_store.pyi +73 -0
  33. gllm_datastore.build/.gitignore +1 -0
  34. gllm_datastore.cpython-312-darwin.so +0 -0
  35. gllm_datastore.pyi +63 -0
  36. gllm_datastore_binary-0.0.15.dist-info/METADATA +98 -0
  37. gllm_datastore_binary-0.0.15.dist-info/RECORD +38 -0
  38. gllm_datastore_binary-0.0.15.dist-info/WHEEL +4 -0
@@ -0,0 +1,81 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_datastore.graph_data_store.graph_data_store import BaseGraphDataStore as BaseGraphDataStore
3
+ from typing import Any
4
+
5
+ class Neo4jGraphDataStore(BaseGraphDataStore):
6
+ """Implementation of BaseGraphDataStore for Neo4j.
7
+
8
+ Attributes:
9
+ driver (Driver): The Neo4j driver.
10
+ """
11
+ driver: Incomplete
12
+ def __init__(self, uri: str, user: str, password: str) -> None:
13
+ """Initialize Neo4jGraphDataStore.
14
+
15
+ Args:
16
+ uri (str): The URI of the graph store.
17
+ user (str): The user of the graph store.
18
+ password (str): The password of the graph store.
19
+ """
20
+ def upsert_node(self, label: str, identifier_key: str, identifier_value: str, properties: dict[str, Any] | None = None) -> Any:
21
+ """Upsert a node in the graph.
22
+
23
+ Args:
24
+ label (str): The label of the node.
25
+ identifier_key (str): The key of the identifier.
26
+ identifier_value (str): The value of the identifier.
27
+ properties (dict[str, Any] | None, optional): The properties of the node. Defaults to None.
28
+
29
+ Returns:
30
+ Any: The result of the operation.
31
+ """
32
+ def upsert_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str, properties: dict[str, Any] | None = None) -> Any:
33
+ """Upsert a relationship between two nodes in the graph.
34
+
35
+ Args:
36
+ node_source_key (str): The key of the source node.
37
+ node_source_value (str): The value of the source node.
38
+ relation (str): The type of the relationship.
39
+ node_target_key (str): The key of the target node.
40
+ node_target_value (str): The value of the target node.
41
+ properties (dict[str, Any] | None, optional): The properties of the relationship. Defaults to None.
42
+
43
+ Returns:
44
+ Any: The result of the operation.
45
+ """
46
+ def delete_node(self, label: str, identifier_key: str, identifier_value: str) -> Any:
47
+ """Delete a node from the graph.
48
+
49
+ Args:
50
+ label (str): The label of the node.
51
+ identifier_key (str): The key of the identifier.
52
+ identifier_value (str): The identifier of the node.
53
+
54
+ Returns:
55
+ Any: The result of the operation.
56
+ """
57
+ def delete_relationship(self, node_source_key: str, node_source_value: str, relation: str, node_target_key: str, node_target_value: str) -> Any:
58
+ """Delete a relationship between two nodes in the graph.
59
+
60
+ Args:
61
+ node_source_key (str): The key of the source node.
62
+ node_source_value (str): The identifier of the source node.
63
+ relation (str): The type of the relationship.
64
+ node_target_key (str): The key of the target node.
65
+ node_target_value (str): The identifier of the target node.
66
+
67
+ Returns:
68
+ Any: The result of the operation.
69
+ """
70
+ def query(self, query: str, parameters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
71
+ """Query the graph store.
72
+
73
+ Args:
74
+ query (str): The query to be executed.
75
+ parameters (dict[str, Any] | None, optional): The parameters of the query. Defaults to None.
76
+
77
+ Returns:
78
+ list[dict[str, Any]]: The result of the query.
79
+ """
80
+ def close(self) -> None:
81
+ """Close the graph data store."""
@@ -0,0 +1,5 @@
1
+ from gllm_datastore.sql_data_store.sqlalchemy_data_store import SQLAlchemyDataStore as SQLAlchemyDataStore
2
+ from gllm_datastore.sql_data_store.sqlalchemy_sql_data_store import SQLAlchemySQLDataStore as SQLAlchemySQLDataStore
3
+ from gllm_datastore.sql_data_store.types import QueryFilter as QueryFilter, QueryOptions as QueryOptions
4
+
5
+ __all__ = ['SQLAlchemyDataStore', 'SQLAlchemySQLDataStore', 'QueryFilter', 'QueryOptions']
File without changes
@@ -0,0 +1,30 @@
1
+ from _typeshed import Incomplete
2
+ from sqlalchemy.engine import Engine as Engine
3
+
4
+ class SQLAlchemyAdapter:
5
+ """Initializes a database engine and session using SQLAlchemy.
6
+
7
+ Provides a scoped session and a base query property for interacting with the database.
8
+
9
+ Attributes:
10
+ engine (Engine): The SQLAlchemy engine object.
11
+ db (Session): The SQLAlchemy session object.
12
+ base (DeclarativeMeta): The SQLAlchemy declarative base object.
13
+ """
14
+ engine: Incomplete
15
+ db: Incomplete
16
+ base: Incomplete
17
+ @classmethod
18
+ def initialize(cls, engine_or_url: Engine | str, pool_size: int = 50, max_overflow: int = 0, autocommit: bool = False, autoflush: bool = True):
19
+ """Creates a new database engine and session.
20
+
21
+ Must provide either an engine or a database URL.
22
+
23
+ Args:
24
+ engine_or_url (Engine | str): Sqlalchemy engine object or database URL.
25
+ pool_size (int, optional): The size of the database connections to be maintained. Defaults to 50.
26
+ max_overflow (int, optional): The maximum overflow size of the pool. Defaults to 0.
27
+ autocommit (bool, optional): If True, all changes to the database are committed immediately.
28
+ Defaults to False.
29
+ autoflush (bool, optional): If True, all changes to the database are flushed immediately. Defaults to True.
30
+ """
@@ -0,0 +1,6 @@
1
+ QUERY_ERROR_MSG: str
2
+ CREATE_ERROR_MSG: str
3
+ READ_ERROR_MSG: str
4
+ DELETE_ERROR_MSG: str
5
+ UPDATE_ERROR_MSG: str
6
+ UNEXPECTED_ERROR_MSG: str
@@ -0,0 +1,87 @@
1
+ import abc
2
+ import pandas as pd
3
+ from abc import ABC, abstractmethod
4
+ from gllm_datastore.sql_data_store.types import QueryFilter as QueryFilter, QueryOptions as QueryOptions
5
+ from typing import Any
6
+
7
+ class BaseSQLDataStore(ABC, metaclass=abc.ABCMeta):
8
+ """Abstract base class for SQL data stores.
9
+
10
+ This class defines the interface for all SQL data store implementations.
11
+ Subclasses must implement the abstract methods.
12
+ """
13
+ @abstractmethod
14
+ async def query(self, query: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
15
+ """Executes raw SQL query.
16
+
17
+ This method must be implemented by subclasses to execute a raw SQL query.
18
+ Use this method for raw queries, complex queries, or for executing a query generated by LLM.
19
+
20
+ Args:
21
+ query (str): The query string to execute.
22
+ params (dict[str, Any] | None, optional): Parameters to bind to the query. Defaults to None.
23
+
24
+ Returns:
25
+ pd.DataFrame: A DataFrame of query results.
26
+
27
+ Raises:
28
+ NotImplementedError: If the method is not implemented.
29
+ """
30
+ @abstractmethod
31
+ def create(self, **kwargs: Any) -> None:
32
+ """Create data using available information in kwargs.
33
+
34
+ This method must be implemented by subclasses to create data in the data store.
35
+
36
+ Args:
37
+ **kwargs (Any): A dictionary of information to create data.
38
+
39
+ Raises:
40
+ NotImplementedError: If the method is not implemented.
41
+ """
42
+ @abstractmethod
43
+ def read(self, filters: QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> pd.DataFrame:
44
+ """Read data from the data store using optional filters and options.
45
+
46
+ This method must be implemented by subclasses to read data from the data store.
47
+ Use this method for simple queries with filters and options.
48
+
49
+ Args:
50
+ filters (QueryFilter | None, optional): Filters to apply to the query. Defaults to None.
51
+ options (QueryOptions | None, optional): Options to apply to the query. Defaults to None.
52
+ **kwargs (Any): A dictionary of additional information to support the read method.
53
+
54
+ Returns:
55
+ pd.DataFrame: A DataFrame of query results.
56
+
57
+ Raises:
58
+ NotImplementedError: If the method is not implemented.
59
+ """
60
+ @abstractmethod
61
+ def update(self, update_values: dict[str, Any], filters: QueryFilter | None = None, **kwargs: Any) -> None:
62
+ """Update data in the data store using optional filters and update values.
63
+
64
+ This method must be implemented by subclasses to update data in the data store.
65
+
66
+ Args:
67
+ update_values (dict[str, Any]): Values to update in the data store.
68
+ filters (QueryFilter | None, optional): Filters to apply to the query. Defaults to None.
69
+ **kwargs (Any): A dictionary of additional information to support the update method.
70
+
71
+ Raises:
72
+ NotImplementedError: If the method is not implemented.
73
+ """
74
+ @abstractmethod
75
+ def delete(self, filters: QueryFilter | None = None, allow_delete_all: bool = False, **kwargs: Any) -> None:
76
+ """Delete data in the data store using filters.
77
+
78
+ This method must be implemented by subclasses to delete data in the data store.
79
+
80
+ Args:
81
+ filters (QueryFilter | None, optional): Filters to apply to the query. Defaults to None.
82
+ allow_delete_all (bool, optional): A flag to allow deleting all data. Defaults to False.
83
+ **kwargs (Any): A dictionary of additional information to support the delete method.
84
+
85
+ Raises:
86
+ NotImplementedError: If the method is not implemented.
87
+ """
@@ -0,0 +1,9 @@
1
+ from gllm_datastore.sql_data_store.sqlalchemy_sql_data_store import SQLAlchemySQLDataStore as SQLAlchemySQLDataStore
2
+
3
+ class SQLAlchemyDataStore(SQLAlchemySQLDataStore):
4
+ """A data store for interacting with SQLAlchemy.
5
+
6
+ This class is a subclass of SQLAlchemySQLDataStore.
7
+ It is deprecated and will be removed in a future release.
8
+ Use SQLAlchemySQLDataStore instead.
9
+ """
@@ -0,0 +1,183 @@
1
+ import pandas as pd
2
+ from _typeshed import Incomplete
3
+ from gllm_datastore.sql_data_store.adapter.sqlalchemy_adapter import SQLAlchemyAdapter as SQLAlchemyAdapter
4
+ from gllm_datastore.sql_data_store.constants import CREATE_ERROR_MSG as CREATE_ERROR_MSG, DELETE_ERROR_MSG as DELETE_ERROR_MSG, QUERY_ERROR_MSG as QUERY_ERROR_MSG, READ_ERROR_MSG as READ_ERROR_MSG, UNEXPECTED_ERROR_MSG as UNEXPECTED_ERROR_MSG, UPDATE_ERROR_MSG as UPDATE_ERROR_MSG
5
+ from gllm_datastore.sql_data_store.sql_data_store import BaseSQLDataStore as BaseSQLDataStore
6
+ from gllm_datastore.sql_data_store.types import QueryFilter as QueryFilter, QueryOptions as QueryOptions
7
+ from sqlalchemy import Engine as Engine
8
+ from sqlalchemy.orm import DeclarativeBase as DeclarativeBase
9
+ from typing import Any
10
+
11
+ class SQLAlchemySQLDataStore(BaseSQLDataStore):
12
+ """Data store for interacting with SQLAlchemy.
13
+
14
+ This class provides methods to interact with a SQL database using SQLAlchemy.
15
+
16
+ Attributes:
17
+ db (Engine): The SQLAlchemy engine object.
18
+ logger (Logger): The logger object.
19
+ """
20
+ db: Incomplete
21
+ logger: Incomplete
22
+ def __init__(self, engine_or_url: Engine | str, pool_size: int = 50, max_overflow: int = 50, autoflush: bool = True) -> None:
23
+ """Initialize SQLAlchemySQLDataStore class.
24
+
25
+ Args:
26
+ engine_or_url (Engine | str): SQLAlchemy engine object or database URL.
27
+ pool_size (int, optional): The size of the database connections to be maintained. Defaults to 50.
28
+ max_overflow (int, optional): The maximum overflow size of the pool. Defaults to 50.
29
+ autoflush (bool, optional): If True, all changes to the database are flushed immediately. Defaults to True.
30
+
31
+ Raises:
32
+ ValueError: If the database adapter is not initialized.
33
+ """
34
+ async def query(self, query: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
35
+ '''Executes raw SQL queries.
36
+
37
+ Preferred for complex queries, when working with legacy schemas without ORM models,
38
+ or when using an LLM to generate your SQL queries.
39
+ Use this method when you need advanced SQL operations not supported by read().
40
+
41
+ Args:
42
+ query (str): The query string with optional :param style parameters.
43
+ params (dict[str, Any] | None, optional): Parameters to bind to the query. Defaults to None.
44
+
45
+ Returns:
46
+ pd.DataFrame: The result of the query.
47
+
48
+ Note:
49
+ Using string parameters directly in queries is unsafe and vulnerable to SQL injection.
50
+ Therefore, please avoid doing as follows as they\'re unsafe:
51
+ ```
52
+ name = "O\'Connor"
53
+ query = f"SELECT * FROM users WHERE last_name = \'{name}\'"
54
+ ```
55
+ or
56
+ ```
57
+ query = "SELECT * FROM users WHERE last_name = \'" + name + "\'"
58
+ ```
59
+ Instead, please use parameterized queries with :param style notation as follows:
60
+ ```
61
+ query = "SELECT * FROM users WHERE last_name = :last_name"
62
+ params = {"last_name": "O\'Connor"}
63
+ ```
64
+
65
+ Raises:
66
+ RuntimeError: If the query fails.
67
+ RuntimeError: If an unexpected error occurs.
68
+ '''
69
+ def create(self, model: DeclarativeBase | list[DeclarativeBase]) -> None:
70
+ '''Inserts data into the database using SQLAlchemy ORM.
71
+
72
+ This method provides a structured way to insert data using ORM models.
73
+
74
+ Args:
75
+ model (DeclarativeBase | list[DeclarativeBase]): An instance or list of instances of SQLAlchemy
76
+ model to be inserted.
77
+
78
+ Example:
79
+ To insert a row into a table:
80
+ ```
81
+ data_store.create(MyModel(column1="value1", column2="value2"))
82
+ ```
83
+
84
+ To insert multiple rows:
85
+ ```
86
+ data_store.create([
87
+ MyModel(column1="value1", column2="value2"),
88
+ MyModel(column1="value3", column2="value4")
89
+ ])
90
+ ```
91
+
92
+ Raises:
93
+ RuntimeError: If the insertion fails.
94
+ RuntimeError: If an unexpected error occurs.
95
+ '''
96
+ def read(self, model_class: type[DeclarativeBase], filters: QueryFilter | None = None, options: QueryOptions | None = None) -> pd.DataFrame:
97
+ '''Reads data from the database using SQLAlchemy ORM with a structured, type-safe interface.
98
+
99
+ This method provides a high-level interface for querying data using ORM models. It supports
100
+ filtering, column selection, ordering, and limiting results through a type-safe interface.
101
+
102
+ Args:
103
+ model_class (Type[DeclarativeBase]): The SQLAlchemy model class to query.
104
+ filters (QueryFilter | None, optional): Optional query filters containing column-value pairs
105
+ to filter the results. Defaults to None.
106
+ options (QueryOptions | None, optional): Optional query configuration including:
107
+ - columns: Specific columns to select
108
+ - order_by: Column to sort by
109
+ - order_desc: Sort order (ascending/descending)
110
+ - limit: Maximum number of results
111
+ Defaults to None.
112
+
113
+ Returns:
114
+ pd.DataFrame: A DataFrame containing the query results.
115
+
116
+ Example:
117
+ ```python
118
+ data_store.read(
119
+ Message,
120
+ filters=QueryFilter(conditions={"conversation_id": "123"}),
121
+ options=QueryOptions(
122
+ columns=["role", "content"],
123
+ order_by="created_at",
124
+ order_desc=True,
125
+ limit=10
126
+ )
127
+ )
128
+ ```
129
+
130
+ Raises:
131
+ RuntimeError: If the read operation fails.
132
+ RuntimeError: If an unexpected error occurs.
133
+ '''
134
+ def update(self, model_class: type[DeclarativeBase], update_values: dict[str, Any], filters: QueryFilter | None = None, **kwargs: Any) -> None:
135
+ '''Updates data in the database using SQLAlchemy ORM.
136
+
137
+ This method provides a structured way to update data using ORM models.
138
+
139
+ Args:
140
+ model_class (Type[DeclarativeBase]): The SQLAlchemy model class to update.
141
+ update_values (dict[str, Any]): Values to update.
142
+ filters (QueryFilter | None, optional): Filters to apply to the query. Defaults to None.
143
+ **kwargs (Any): Additional keyword arguments to support the update method.
144
+
145
+ Example:
146
+ To update a row in a table:
147
+ ```
148
+ data_store.update(
149
+ MyModel,
150
+ update_values={"column1": "new_value"}
151
+ filters=QueryFilter(conditions={"id": 1}),
152
+ )
153
+ ```
154
+
155
+ Raises:
156
+ RuntimeError: If the update operation fails.
157
+ RuntimeError: If an unexpected error occurs.
158
+ '''
159
+ def delete(self, model_class: type[DeclarativeBase], filters: QueryFilter | None = None, allow_delete_all: bool = False, **kwargs: Any) -> None:
160
+ '''Deletes data from the database using SQLAlchemy ORM.
161
+
162
+ This method provides a structured way to delete data using ORM models.
163
+
164
+ Args:
165
+ model_class (Type[DeclarativeBase]): The SQLAlchemy model class to delete.
166
+ filters (QueryFilter | None, optional): Filters to apply to the query. Defaults to None.
167
+ allow_delete_all (bool, optional): If True, allows deletion of all records. Defaults to False.
168
+ **kwargs (Any): Additional keyword arguments to support the delete method.
169
+
170
+ Example:
171
+ To delete a row from a table:
172
+ ```
173
+ data_store.delete(
174
+ MyModel,
175
+ filters=QueryFilter(conditions={"id": 1})
176
+ )
177
+ ```
178
+
179
+ Raises:
180
+ ValueError: If no filters are provided (to prevent accidental deletion of all records).
181
+ RuntimeError: If the delete operation fails.
182
+ RuntimeError: If an unexpected error occurs.
183
+ '''
@@ -0,0 +1,30 @@
1
+ from pydantic import BaseModel
2
+ from typing import Any, Sequence
3
+
4
+ class QueryFilter(BaseModel):
5
+ '''Model for query filters.
6
+
7
+ Attributes:
8
+ conditions (dict[str, Any]): The conditions for filtering the query.
9
+
10
+ Example:
11
+ QueryFilter(conditions={"column1": "value1", "column2": "value2"})
12
+ '''
13
+ conditions: dict[str, Any]
14
+
15
+ class QueryOptions(BaseModel):
16
+ '''Model for query options.
17
+
18
+ Attributes:
19
+ columns (Sequence[str] | None): The columns to include in the query result. Defaults to None.
20
+ order_by (str | None): The column to order the query result by. Defaults to None.
21
+ order_desc (bool): Whether to order the query result in descending order. Defaults to False.
22
+ limit (int | None): The maximum number of rows to return. Defaults to None.
23
+
24
+ Example:
25
+ QueryOptions(columns=["column1", "column2"], order_by="column1", order_desc=True, limit=10)
26
+ '''
27
+ columns: Sequence[str] | None
28
+ order_by: str | None
29
+ order_desc: bool
30
+ limit: int | None
@@ -0,0 +1,4 @@
1
+ from gllm_datastore.utils.converter import from_langchain as from_langchain
2
+ from gllm_datastore.utils.ttl import convert_ttl_to_seconds as convert_ttl_to_seconds
3
+
4
+ __all__ = ['from_langchain', 'convert_ttl_to_seconds']
@@ -0,0 +1,21 @@
1
+ from gllm_core.schema import Chunk
2
+ from langchain_core.documents import Document
3
+
4
+ def from_langchain(doc: Document) -> Chunk:
5
+ """Create a standardized Chunk from a LangChain Document.
6
+
7
+ Args:
8
+ doc (Document): The document to create a Chunk from.
9
+
10
+ Returns:
11
+ Chunk: The standardized Chunk object.
12
+ """
13
+ def to_langchain(chunk: Chunk) -> Document:
14
+ """Create a LangChain Document from a standardized Chunk.
15
+
16
+ Args:
17
+ chunk (Chunk): The standardized Chunk to create a Document from.
18
+
19
+ Returns:
20
+ Document: The LangChain Document object.
21
+ """
@@ -0,0 +1,25 @@
1
+ from _typeshed import Incomplete
2
+
3
+ TIME_UNIT_TO_SECOND_MAPPING: Incomplete
4
+
5
+ def convert_ttl_to_seconds(ttl: str | int) -> int:
6
+ '''Convert TTL (time-to-live) string with time units to seconds.
7
+
8
+ Supported units: s (seconds), m (minutes), h (hours), d (days), w (weeks), y (years).
9
+
10
+ Examples:
11
+ "2m" -> 120 (2 minutes in seconds)
12
+ "1h" -> 3600 (1 hour in seconds)
13
+ "1y" -> 31536000 (1 year in seconds)
14
+ 300 -> 300 (numeric input returned as is)
15
+
16
+ Args:
17
+ ttl (str | int): Time to live value with optional unit suffix (e.g., "2m", "1h", "1y")
18
+ or numeric value in seconds.
19
+
20
+ Returns:
21
+ int: TTL converted to seconds.
22
+
23
+ Raises:
24
+ ValueError: If the input format is invalid.
25
+ '''
@@ -0,0 +1,4 @@
1
+ from gllm_datastore.vector_data_store.elasticsearch_data_store import ElasticsearchDataStore as ElasticsearchDataStore
2
+ from gllm_datastore.vector_data_store.elasticsearch_vector_data_store import ElasticsearchVectorDataStore as ElasticsearchVectorDataStore
3
+
4
+ __all__ = ['ElasticsearchDataStore', 'ElasticsearchVectorDataStore']
@@ -0,0 +1,119 @@
1
+ from _typeshed import Incomplete
2
+ from chromadb.types import Where as Where, WhereDocument as WhereDocument
3
+ from enum import Enum
4
+ from gllm_core.schema.chunk import Chunk
5
+ from gllm_datastore.constants import DEFAULT_TOP_K as DEFAULT_TOP_K
6
+ from gllm_datastore.utils.converter import from_langchain as from_langchain, to_langchain as to_langchain
7
+ from gllm_datastore.vector_data_store.vector_data_store import BaseVectorDataStore as BaseVectorDataStore
8
+ from langchain_core.documents import Document as Document
9
+ from langchain_core.embeddings import Embeddings as Embeddings
10
+ from typing import Any
11
+
12
+ DEFAULT_NUM_CANDIDATES: int
13
+
14
+ class ChromaClientType(str, Enum):
15
+ """Enum for different types of ChromaDB clients.
16
+
17
+ Attributes:
18
+ MEMORY (str): Client type for an in-memory data store.
19
+ PERSISTENT (str): Client type for a persistent data store.
20
+ HTTP (str): Client type for a client-server architecture.
21
+ """
22
+ MEMORY = 'memory'
23
+ PERSISTENT = 'persistent'
24
+ HTTP = 'http'
25
+
26
+ class ChromaVectorDataStore(BaseVectorDataStore):
27
+ """Datastore for interacting with ChromaDB.
28
+
29
+ This class provides methods to interact with ChromaDB for vector storage and retrieval
30
+ using the langchain-chroma integration.
31
+
32
+ Attributes:
33
+ collection_name (str): The name of the ChromaDB collection to use.
34
+ embedding: The model used to generate embeddings.
35
+ client: The ChromaDB client instance.
36
+ 1. MEMORY: In-memory client (chromadb.Client)
37
+ 2. PERSISTENT: Persistent client (chromadb.PersistentClient)
38
+ 3. HTTP: HTTP client for client-server architecture (chromadb.HttpClient)
39
+ vector_store (Chroma): The langchain Chroma vector store instance.
40
+ num_candidates (int): The maximum number of candidates to consider during search.
41
+ """
42
+ collection_name: Incomplete
43
+ embedding: Incomplete
44
+ vector_store: Incomplete
45
+ num_candidates: Incomplete
46
+ def __init__(self, collection_name: str, embedding: Embeddings | None = None, client_type: ChromaClientType = ..., persist_directory: str | None = None, host: str | None = None, port: int | None = None, num_candidates: int = ..., **kwargs: Any) -> None:
47
+ """Initialize the ChromaDB vector data store with langchain-chroma.
48
+
49
+ Args:
50
+ collection_name (str): Name of the collection to use in ChromaDB.
51
+ embedding (Embeddings | None): Function to generate embeddings.
52
+ client_type (ChromaClientType, optional): Type of ChromaDB client to use.
53
+ Defaults to ChromaClientType.MEMORY.
54
+ persist_directory (str | None, optional): Directory to persist vector store data.
55
+ Required for PERSISTENT client type. Defaults to None.
56
+ host (str | None, optional): Host address for ChromaDB server.
57
+ Required for HTTP client type. Defaults to None.
58
+ port (int | None, optional): Port for ChromaDB server.
59
+ Required for HTTP client type. Defaults to None.
60
+ num_candidates (int, optional): Maximum number of candidates to consider during search.
61
+ Defaults to DEFAULT_NUM_CANDIDATES.
62
+ **kwargs: Additional parameters for Chroma initialization.
63
+
64
+ Note:
65
+ num_candidates (int, optional): This constant affects the maximum number of results to consider
66
+ during the search. Index with more documents would need a higher value for the whole documents
67
+ to be considered during search. This happens due to a bug with Chroma's search algorithm as discussed
68
+ in this issue: [3] https://github.com/langchain-ai/langchain/issues/1946
69
+ """
70
+ async def query(self, query: str, top_k: int = ..., retrieval_params: dict[str, dict[str, str]] | None = None) -> list[Chunk]:
71
+ '''Query the vector data store for similar chunks.
72
+
73
+ Args:
74
+ query (str): The query string to find similar chunks for.
75
+ top_k (int, optional): Maximum number of results to return. Defaults to DEFAULT_TOP_K.
76
+ retrieval_params (dict[str, Any] | None, optional): Additional parameters for retrieval.
77
+ - filter (Where, optional): A Where type dict used to filter the retrieval by the metadata keys.
78
+ E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}]}}`.
79
+ - where_document (WhereDocument, optional): A WhereDocument type dict used to filter the retrieval by
80
+ the document content. E.g. `{$contains: {"text": "hello"}}`.
81
+ Defaults to None.
82
+
83
+ Returns:
84
+ list[Chunk]: A list of Chunk objects matching the query.
85
+ '''
86
+ async def query_by_id(self, id: str | list[str]) -> list[Chunk]:
87
+ """Retrieve chunks by their IDs.
88
+
89
+ Args:
90
+ id (str | list[str]): A single ID or a list of IDs to retrieve.
91
+
92
+ Returns:
93
+ list[Chunk]: A list of retrieved Chunk objects.
94
+ """
95
+ async def add_chunks(self, chunks: Chunk | list[Chunk], **kwargs) -> list[str]:
96
+ """Add chunks to the vector data store.
97
+
98
+ Args:
99
+ chunks (Chunk | list[Chunk]): A single chunk or list of chunks to add.
100
+ **kwargs: Additional keyword arguments for the add operation.
101
+
102
+ Returns:
103
+ list[str]: List of IDs of the added chunks.
104
+ """
105
+ async def delete_chunks(self, ids: list[str] = None, where: Where | None = None, where_document: WhereDocument | None = None) -> None:
106
+ '''Delete chunks from the vector data store.
107
+
108
+ Args:
109
+ ids (list[str], optional): List of IDs of chunks to delete. Defaults to None. If not provided, the deletion
110
+ all chunks will be based on the `where` and `where_document` filters. If all are None, all chunks will
111
+ be deleted.
112
+ where (Where | None, optional): A Where type dict used to filter the deletion by the metadata keys.
113
+ E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}]}}`. Defaults to None.
114
+ where_document (WhereDocument | None, optional): A WhereDocument type dict used to filter the deletion by
115
+ the document content. E.g. `{$contains: {"text": "hello"}}`. Defaults to None.
116
+
117
+ Note:
118
+ If no parameters are provided, all chunks in the collection will be deleted. Please use with caution.
119
+ '''
@@ -0,0 +1,9 @@
1
+ from gllm_datastore.vector_data_store.elasticsearch_vector_data_store import ElasticsearchVectorDataStore as ElasticsearchVectorDataStore
2
+
3
+ class ElasticsearchDataStore(ElasticsearchVectorDataStore):
4
+ """A vector data store for interacting with Elasticsearch.
5
+
6
+ This class is a subclass of ElasticsearchVectorDataStore.
7
+ It is deprecated and will be removed in a future release.
8
+ Use ElasticsearchVectorDataStore instead.
9
+ """