langchain-kinetica 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ ##
2
+ # Copyright (c) 2024, Chad Juliano, Kinetica DB Inc.
3
+ ##
4
+
5
+ __version__ = "1.0.0"
6
+
7
+ from .llm_chat import KineticaChatLLM
8
+ from .sql_output import KineticaSqlOutputParser, SqlResponse
@@ -0,0 +1,183 @@
1
+ ##
2
+ # Copyright (c) 2024, Chad Juliano, Kinetica DB Inc.
3
+ ##
4
+
5
+ from typing import Any, List, Dict, Mapping, Optional, cast
6
+ from pathlib import Path
7
+ from importlib.metadata import version
8
+ import json
9
+ import re
10
+
11
+ from langchain_core.pydantic_v1 import Field, root_validator
12
+ from langchain_core.language_models.chat_models import BaseChatModel
13
+ from langchain_core.callbacks import CallbackManagerForLLMRun
14
+ from langchain_core.outputs import ChatGeneration, ChatResult
15
+ from langchain_core.messages import (
16
+ AIMessage,
17
+ BaseMessage,
18
+ HumanMessage,
19
+ SystemMessage,
20
+ )
21
+
22
+ from gpudb import GPUdb
23
+ from .sa_dto import SuggestRequest, CompletionResponse, SqlResponse
24
+ from .sa_datafile import SaDatafile
25
+
26
+
27
+ class KineticaChatLLM(BaseChatModel):
28
+
29
+ kdbc: GPUdb
30
+ """ Kinetica DB connection. """
31
+
32
+ @classmethod
33
+ def _create_kdbc(cls, host: str, login: str, password: str) -> GPUdb:
34
+ options = GPUdb.Options()
35
+ options.username = login
36
+ options.password = password
37
+ options.skip_ssl_cert_verification = True
38
+ options.disable_failover = True
39
+ options.logging_level = 'INFO'
40
+ kdbc = GPUdb(host=host, options = options)
41
+ return kdbc
42
+
43
+ @root_validator()
44
+ def validate_environment(cls, values: Dict) -> Dict:
45
+ kdbc = values['kdbc']
46
+ print(f"Connected to Kinetica: {kdbc.get_url()}. (api={version('gpudb')}, server={kdbc.server_version})")
47
+ return values
48
+
49
+ @property
50
+ def _llm_type(self) -> str:
51
+ return "kinetica-sqlassist"
52
+
53
+ @property
54
+ def _identifying_params(self) -> Mapping[str, Any]:
55
+ """Get the identifying parameters."""
56
+ return dict(kinetica_version=str(self.kdbc.server_version),
57
+ api_version=version('gpudb'))
58
+
59
+ def _generate(
60
+ self,
61
+ messages: List[BaseMessage],
62
+ stop: Optional[List[str]] = None,
63
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
64
+ **kwargs: Any,
65
+ ) -> ChatResult:
66
+ if stop is not None:
67
+ raise ValueError("stop kwargs are not permitted.")
68
+
69
+ dict_messages = [self._convert_message_to_dict(m) for m in messages]
70
+ sql_response = self._submit_completion(dict_messages)
71
+ generated_dict = sql_response.choices[0].message.model_dump()
72
+ generated_message = self._convert_message_from_dict(generated_dict)
73
+
74
+ llm_output = dict(
75
+ input_tokens = sql_response.usage.prompt_tokens,
76
+ output_tokens = sql_response.usage.completion_tokens,
77
+ model_name = sql_response.model)
78
+ return ChatResult(generations=[ChatGeneration(message=generated_message)], llm_output=llm_output)
79
+
80
+ def load_messages_from_context(self, context_name: str) -> List[BaseMessage]:
81
+ # query kinetica for the prompt
82
+ sql = f"GENERATE PROMPT WITH OPTIONS (CONTEXT_NAMES = '{context_name}')"
83
+ result = self._execute_sql(sql)
84
+ prompt = result['Prompt']
85
+ prompt_json = json.loads(prompt)
86
+
87
+ # convert the prompt to messages
88
+ request = SuggestRequest.model_validate(prompt_json)
89
+ payload = request.payload
90
+
91
+ dict_messages=[]
92
+ dict_messages.append(dict(role="system", content=payload.get_system_str()))
93
+ dict_messages.extend(payload.get_messages())
94
+ messages = [self._convert_message_from_dict(m) for m in dict_messages]
95
+ return messages
96
+
97
+ def _submit_completion(self, messages: Dict) -> SqlResponse:
98
+ request = dict(messages=messages)
99
+ request_json = json.dumps(request)
100
+ response_raw = self.kdbc._GPUdb__submit_request_json( '/chat/completions', request_json)
101
+ response_json = json.loads(response_raw)
102
+
103
+ status = response_json['status']
104
+ if(status != "OK"):
105
+ message = response_json['message']
106
+ match_resp = re.compile(r'response:({.*})')
107
+ result = match_resp.search(message)
108
+ if(result is not None):
109
+ response = result.group(1)
110
+ response_json = json.loads(response)
111
+ message = response_json['message']
112
+ raise ValueError(message)
113
+
114
+ data = response_json['data']
115
+ response = CompletionResponse.model_validate(data)
116
+ if(response.status != "OK"):
117
+ raise ValueError("SQL Generation failed")
118
+ return response.data
119
+
120
+ def _execute_sql(self, sql: str) -> Dict:
121
+ response = self.kdbc.execute_sql_and_decode(sql, limit=1, get_column_major=False)
122
+
123
+ status_info = response['status_info']
124
+ if(status_info['status'] != 'OK'):
125
+ message = status_info['message']
126
+ raise ValueError(message)
127
+
128
+ records = response['records']
129
+ if(len(records) != 1):
130
+ raise ValueError("No records returned.")
131
+
132
+ record = records[0]
133
+ response_dict = {}
134
+ for col, val in record.items():
135
+ response_dict[col] = val
136
+ return response_dict
137
+
138
+ @classmethod
139
+ def load_messages_from_datafile(cls, sa_datafile: Path) -> List[BaseMessage]:
140
+ datafile_dict = SaDatafile.parse_dialogue_file(sa_datafile)
141
+ messages = cls._convert_dict_to_messages(datafile_dict)
142
+ return messages
143
+
144
+ @classmethod
145
+ def _convert_message_to_dict(cls, message: BaseMessage) -> Dict:
146
+ content = cast(str, message.content)
147
+ if isinstance(message, HumanMessage):
148
+ role = "user"
149
+ elif isinstance(message, AIMessage):
150
+ role = "assistant"
151
+ elif isinstance(message, SystemMessage):
152
+ role = "system"
153
+ else:
154
+ raise ValueError(f"Got unsupported message type: {message}")
155
+
156
+ message = dict(role=role, content=content)
157
+ return message
158
+
159
+ @classmethod
160
+ def _convert_message_from_dict(cls, message: Dict) -> BaseMessage:
161
+ role = message['role']
162
+ content = message['content']
163
+ if(role == 'user'):
164
+ return HumanMessage(content=content)
165
+ elif(role == 'assistant'):
166
+ return AIMessage(content=content)
167
+ elif(role == 'system'):
168
+ return SystemMessage(content=content)
169
+ else:
170
+ raise ValueError(f"Got unsupported role: {role}")
171
+
172
+ @classmethod
173
+ def _convert_dict_to_messages(cls, sa_data: Dict) -> List[BaseMessage]:
174
+ schema = sa_data['schema']
175
+ system = sa_data['system']
176
+ messages = sa_data['messages']
177
+ print(f"Importing prompt for schema: {schema}")
178
+
179
+ result_list = []
180
+ result_list.append(SystemMessage(content=system))
181
+ result_list.extend([cls._convert_message_from_dict(m) for m in messages])
182
+ return result_list
183
+
@@ -0,0 +1,60 @@
1
+ ##
2
+ # Copyright (c) 2023, Chad Juliano, Kinetica DB Inc.
3
+ ##
4
+
5
+ from pathlib import Path
6
+ import re
7
+ import os
8
+
9
+ class SaDatafile:
10
+
11
+ # parse line into a dict containing role and content
12
+ PARSER = re.compile(r"^<\|(?P<role>\w+)\|>\W*(?P<content>.*)$", re.DOTALL)
13
+
14
+ @classmethod
15
+ def parse_dialogue_file(cls, input_file: os.PathLike) -> dict:
16
+ path = Path(input_file)
17
+ schema = path.name.removesuffix('.txt')
18
+ lines = open(input_file).read()
19
+ return cls.parse_dialogue(lines,schema)
20
+
21
+
22
+ @classmethod
23
+ def parse_dialogue(cls, text: str, schema: str) -> dict:
24
+ messages = []
25
+ system = None
26
+
27
+ lines = text.split('<|end|>')
28
+ user_message = None
29
+
30
+ for idx, line in enumerate(lines):
31
+ line = line.strip()
32
+
33
+ if(len(line) == 0):
34
+ continue
35
+
36
+ match = cls.PARSER.match(line)
37
+ if(match is None):
38
+ raise ValueError(f"Could not find starting token in: {line}")
39
+
40
+ groupdict = match.groupdict()
41
+ role = groupdict["role"]
42
+
43
+ if(role == "system"):
44
+ if(system is not None):
45
+ raise ValueError(f"Only one system token allowed in: {line}")
46
+ system = groupdict['content']
47
+ elif(role == "user"):
48
+ if(user_message is not None):
49
+ raise ValueError(f"Found user token without assistant token: {line}")
50
+ user_message = groupdict
51
+ elif(role == "assistant"):
52
+ if(user_message is None):
53
+ raise Exception(f"Found assistant token without user token: {line}")
54
+ messages.append(user_message)
55
+ messages.append(groupdict)
56
+ user_message = None
57
+ else:
58
+ raise ValueError(f"Unknown token: {role}")
59
+
60
+ return { "schema": schema, "system": system, "messages": messages }
@@ -0,0 +1,111 @@
1
+ ##
2
+ # Copyright (c) 2023, Chad Juliano, Kinetica DB Inc.
3
+ ##
4
+
5
+ from __future__ import annotations
6
+ from pydantic import BaseModel, Field
7
+
8
+ class SuggestContext(BaseModel):
9
+ table: str | None = Field(default=None, title="Name of table")
10
+ description: str | None = Field(default=None, title="Table description")
11
+ columns: list[str] | None = Field(default=None, title="Table columns list")
12
+ rules: list[str] | None = Field(default=None, title="Rules that apply to the table.")
13
+ samples: dict | None = Field(default=None, title="Samples that apply to the entire context.")
14
+
15
+ def to_system_str(self) -> str:
16
+ lines = []
17
+ lines.append(f"CREATE TABLE {self.table} AS")
18
+ lines.append("(")
19
+
20
+ if(not self.columns or len(self.columns) == 0):
21
+ ValueError(detail="columns list can't be null.")
22
+
23
+ columns = []
24
+ for column in self.columns:
25
+ column = column.replace("\"", "").strip()
26
+ columns.append(f" {column}")
27
+ lines.append(",\n".join(columns))
28
+ lines.append(");")
29
+
30
+ if(self.description):
31
+ lines.append(f"COMMENT ON TABLE {self.table} IS '{self.description}';")
32
+
33
+ if(self.rules and len(self.rules) > 0):
34
+ lines.append(f"-- When querying table {self.table} the following rules apply:")
35
+ for rule in self.rules:
36
+ lines.append(f"-- * {rule}")
37
+
38
+ result = "\n".join(lines)
39
+ return result
40
+
41
+
42
+ class SuggestPayload(BaseModel):
43
+ question: str = None
44
+ context: list[SuggestContext]
45
+
46
+ def get_system_str(self) -> str:
47
+ lines = []
48
+ for table_context in self.context:
49
+ if(table_context.table is None):
50
+ continue
51
+ context_str = table_context.to_system_str()
52
+ lines.append(context_str)
53
+ return "\n\n".join(lines)
54
+
55
+
56
+ def get_messages(self) -> str | None:
57
+ messages = []
58
+ for context in self.context:
59
+ if(context.samples is None):
60
+ continue
61
+ for question, answer in context.samples.items():
62
+ # unescape double quotes
63
+ answer = answer.replace("''", "'")
64
+
65
+ messages.append(dict(role="user", content=question))
66
+ messages.append(dict(role="assistant", content=answer))
67
+ return messages
68
+
69
+ def to_completion(self) -> str:
70
+ messages = []
71
+ messages.append(dict(role="system", content=self.get_system_str()))
72
+ messages.extend(self.get_messages())
73
+ messages.append(dict(role="user", content=self.question))
74
+ response = dict(messages=messages)
75
+ return response
76
+
77
+
78
+ class SuggestRequest(BaseModel):
79
+ payload: SuggestPayload
80
+
81
+ class CompletionRequest(BaseModel):
82
+ messages: list[dict]
83
+
84
+ # Output Types
85
+
86
+ class Message(BaseModel):
87
+ role: str = Field(default=None, title="One of [user|assistant|system]")
88
+ content: str
89
+
90
+ class Choice(BaseModel):
91
+ index: int
92
+ message: Message = Field(default=None, title="The generated SQL")
93
+ finish_reason: str
94
+
95
+ class Usage(BaseModel):
96
+ prompt_tokens: int
97
+ completion_tokens: int
98
+ total_tokens: int
99
+
100
+ class SqlResponse(BaseModel):
101
+ id: str
102
+ object: str
103
+ created: int
104
+ model: str
105
+ choices: list[Choice]
106
+ usage: Usage
107
+ prompt: str = Field(default=None, title="The input question")
108
+
109
+ class CompletionResponse(BaseModel):
110
+ status: str
111
+ data: SqlResponse
@@ -0,0 +1,45 @@
1
+ ##
2
+ # Copyright (c) 2024, Chad Juliano, Kinetica DB Inc.
3
+ ##
4
+
5
+ from typing import Any, List
6
+ from pandas import DataFrame
7
+
8
+ from langchain_core.output_parsers.transform import BaseOutputParser
9
+ from langchain_core.outputs import Generation
10
+ from langchain_core.pydantic_v1 import Field, BaseModel
11
+
12
+ from gpudb import GPUdb
13
+
14
+ class SqlResponse(BaseModel):
15
+ """ Response containing SQL and the fetched data """
16
+
17
+ sql: str = Field(description="Result SQL")
18
+ dataframe: DataFrame = Field(description="Result Data")
19
+
20
+ class Config:
21
+ """Configuration for this pydantic object."""
22
+ arbitrary_types_allowed = True
23
+
24
+
25
+ class KineticaSqlOutputParser(BaseOutputParser[SqlResponse]):
26
+ """ Fetch and return data from the Kinetica LLM """
27
+
28
+ kdbc: GPUdb = Field(exclude=True)
29
+ """ Kinetica DB connection. """
30
+
31
+ class Config:
32
+ """Configuration for this pydantic object."""
33
+ arbitrary_types_allowed = True
34
+
35
+ def parse(self, text: str) -> SqlResponse:
36
+ df = self.kdbc.to_df(text)
37
+ return SqlResponse(sql=text, dataframe=df)
38
+
39
+ def parse_result(self, result: List[Generation], *, partial: bool = False) -> SqlResponse:
40
+ return self.parse(result[0].text)
41
+
42
+ @property
43
+ def _type(self) -> str:
44
+ return "kinetica_sql_output_parser"
45
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Kinetica
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.1
2
+ Name: langchain-kinetica
3
+ Version: 1.0.0
4
+ Summary: Kinetica intefrace for Langchain.
5
+ Author-email: Chad Juliano <cjuliano@kinetica.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2024 Kinetica
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://kinetica.com
29
+ Project-URL: Documentation, https://docs.kinetica.com/7.1/sql-gpt/
30
+ Project-URL: Repository, https://github.com/kineticadb/langchain-kinetica
31
+ Classifier: Programming Language :: Python :: 3
32
+ Requires-Python: >=3.10
33
+ Description-Content-Type: text/markdown
34
+ License-File: LICENSE
35
+ Requires-Dist: gpudb >=7.1.9.10
36
+ Requires-Dist: langchain-core
37
+ Requires-Dist: pandas
38
+
39
+ # langchain-kinetica
40
+
41
+ Kinetica intefrace for Langchain. See the [LLM documentation][LLM_DOCS] for an overview of the Kinetica LLM.
42
+
43
+ [LLM_DOCS]: <https://docs.kinetica.com/7.1/sql-gpt/>
44
+
45
+ - [1. Prerequisites](#1-prerequisites)
46
+ - [2. Package Contents](#2-package-contents)
47
+ - [3. Installation](#3-installation)
48
+ - [4. Usage](#4-usage)
49
+ - [5. Building](#5-building)
50
+ - [6. See Also](#6-see-also)
51
+
52
+ ## 1. Prerequisites
53
+
54
+ To use langchain with Kinetica you will need:
55
+
56
+ * Python runtime >3.10
57
+ * Kinetica SqlAssist LLM
58
+ * Kinetica instance >7.2.0 configured to use SqlAssist.
59
+
60
+ ## 2. Package Contents
61
+
62
+ * `KineticaChatLLM`: ChatModel for converting natural language to SQL.
63
+ * `KineticaSqlOutputParser`: OutputParser that will execute SQL from the `KineticaChatLLM`.
64
+ * `SqlResponse`: If the Kinetica chain ends with `KineticaSqlOutputParser` then this response will contain the generated SQL and results from its execution.
65
+
66
+ ## 3. Installation
67
+
68
+ This project is not yet available on pypi. You can install it directly from the repository.
69
+
70
+ ```sh
71
+ $ pip install "langchain-kinetica @ git+ssh://git@github.com/kineticadb/langchain-kinetica.git"
72
+ ```
73
+
74
+ ## 4. Usage
75
+
76
+ See the [Kinetica LLM Demo notebook](./notebooks/kinetica_llm_demo.ipynb) for examples.
77
+
78
+ ## 5. Building
79
+
80
+ Install the project locally.
81
+
82
+ ```sh
83
+ $ pip install --editable .
84
+ ```
85
+
86
+ You will need to install the build utility.
87
+
88
+ ```sh
89
+ $ pip install --upgrade build
90
+ ```
91
+
92
+ Build the project
93
+
94
+ ```sh
95
+ $ python3 -m build
96
+ ```
97
+
98
+ The build will generate a `.whl` file that can be distributed.
99
+
100
+ ```sh
101
+ $ ls -1 ./dist
102
+ langchain-kinetica-1.0.tar.gz
103
+ langchain_kinetica-1.0-py3-none-any.whl
104
+ ```
105
+
106
+ ## 6. See Also
107
+
108
+ - [Kinetica LLM Documentation](https://docs.kinetica.com/7.1/sql-gpt/)
109
+ - [LangChain Prompts](https://python.langchain.com/docs/modules/model_io/prompts/)
110
+ - [LancChain Chat Models](https://python.langchain.com/docs/modules/model_io/chat/)
@@ -0,0 +1,10 @@
1
+ langchain_kinetica/__init__.py,sha256=4HXjx2qFQpbaVpkkJ4FPHzkTQSFittpEwf5E-Z1AUho,182
2
+ langchain_kinetica/llm_chat.py,sha256=X39-RT74seaecY6J_4GTxZoipttqgC3CP7K0Yko5x5I,6684
3
+ langchain_kinetica/sa_datafile.py,sha256=48u7vQQyC4nXcTkh3Wp7YEf2vhdmHnx_t1ZRQokrSew,1948
4
+ langchain_kinetica/sa_dto.py,sha256=N2aIRjJWDhGzQovbgyIZAv1LNCOwGksQoBdC6AXWKZI,3440
5
+ langchain_kinetica/sql_output.py,sha256=49tVYqa2OncGU2LIXk1wpbAFkaipQve8rn1w4u2Tgso,1306
6
+ langchain_kinetica-1.0.0.dist-info/LICENSE,sha256=VYHwkc_3acBxI-AvhEwSp5ve7kIZuvkcl8pQA93UunA,1065
7
+ langchain_kinetica-1.0.0.dist-info/METADATA,sha256=KyBd01fWwlRcxJ5DTPNqJw_pilnWnKXRatswciitnVA,3665
8
+ langchain_kinetica-1.0.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
9
+ langchain_kinetica-1.0.0.dist-info/top_level.txt,sha256=JXgMmx9S2IcZYAbH0sFz2asosN_NztlOS88TzHK5GV4,19
10
+ langchain_kinetica-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.42.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ langchain_kinetica