PyPI - langchain-kinetica - Versions diffs - 1.0.0__py3-none-any.whl - Mend

langchain-kinetica 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

langchain_kinetica/__init__.py +8 -0
langchain_kinetica/llm_chat.py +183 -0
langchain_kinetica/sa_datafile.py +60 -0
langchain_kinetica/sa_dto.py +111 -0
langchain_kinetica/sql_output.py +45 -0
langchain_kinetica-1.0.0.dist-info/LICENSE +21 -0
langchain_kinetica-1.0.0.dist-info/METADATA +110 -0
langchain_kinetica-1.0.0.dist-info/RECORD +10 -0
langchain_kinetica-1.0.0.dist-info/WHEEL +5 -0
langchain_kinetica-1.0.0.dist-info/top_level.txt +1 -0

langchain_kinetica/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+##
+# Copyright (c) 2024, Chad Juliano, Kinetica DB Inc.
+##
+__version__ = "1.0.0"
+from .llm_chat import KineticaChatLLM
+from .sql_output import KineticaSqlOutputParser, SqlResponse

langchain_kinetica/llm_chat.py ADDED Viewed

@@ -0,0 +1,183 @@
+##
+# Copyright (c) 2024, Chad Juliano, Kinetica DB Inc.
+##
+from typing import Any, List, Dict, Mapping, Optional, cast
+from pathlib import Path
+from importlib.metadata import version
+import json
+import re
+from langchain_core.pydantic_v1 import Field, root_validator
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.outputs import ChatGeneration, ChatResult
+from langchain_core.messages import (
+    AIMessage,
+    BaseMessage,
+    HumanMessage,
+    SystemMessage,
+)
+from gpudb import GPUdb
+from .sa_dto import SuggestRequest, CompletionResponse, SqlResponse
+from .sa_datafile import SaDatafile
+class KineticaChatLLM(BaseChatModel):
+    kdbc: GPUdb
+    """ Kinetica DB connection. """
+    @classmethod
+    def _create_kdbc(cls, host: str, login: str, password: str) -> GPUdb:
+        options = GPUdb.Options()
+        options.username = login
+        options.password = password
+        options.skip_ssl_cert_verification = True
+        options.disable_failover = True
+        options.logging_level = 'INFO'
+        kdbc = GPUdb(host=host, options = options)
+        return kdbc
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        kdbc = values['kdbc']
+        print(f"Connected to Kinetica: {kdbc.get_url()}. (api={version('gpudb')}, server={kdbc.server_version})")
+        return values
+    @property
+    def _llm_type(self) -> str:
+        return "kinetica-sqlassist"
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        return dict(kinetica_version=str(self.kdbc.server_version),
+                    api_version=version('gpudb'))
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        if stop is not None:
+            raise ValueError("stop kwargs are not permitted.")
+        dict_messages = [self._convert_message_to_dict(m) for m in messages]
+        sql_response = self._submit_completion(dict_messages)
+        generated_dict = sql_response.choices[0].message.model_dump()
+        generated_message = self._convert_message_from_dict(generated_dict)
+        llm_output = dict(
+            input_tokens = sql_response.usage.prompt_tokens,
+            output_tokens = sql_response.usage.completion_tokens,
+            model_name = sql_response.model)
+        return ChatResult(generations=[ChatGeneration(message=generated_message)], llm_output=llm_output)
+    def load_messages_from_context(self, context_name: str) -> List[BaseMessage]:
+        # query kinetica for the prompt
+        sql = f"GENERATE PROMPT WITH OPTIONS (CONTEXT_NAMES = '{context_name}')"
+        result = self._execute_sql(sql)
+        prompt = result['Prompt']
+        prompt_json = json.loads(prompt)
+        # convert the prompt to messages
+        request = SuggestRequest.model_validate(prompt_json)
+        payload = request.payload
+        dict_messages=[]
+        dict_messages.append(dict(role="system", content=payload.get_system_str()))
+        dict_messages.extend(payload.get_messages())
+        messages = [self._convert_message_from_dict(m) for m in dict_messages]
+        return messages
+    def _submit_completion(self, messages: Dict) -> SqlResponse:
+        request = dict(messages=messages)
+        request_json = json.dumps(request)
+        response_raw = self.kdbc._GPUdb__submit_request_json( '/chat/completions', request_json)
+        response_json = json.loads(response_raw)
+        status = response_json['status']
+        if(status != "OK"):
+            message = response_json['message']
+            match_resp = re.compile(r'response:({.*})')
+            result = match_resp.search(message)
+            if(result is not None):
+                response = result.group(1)
+                response_json = json.loads(response)
+                message = response_json['message']
+            raise ValueError(message)
+        data = response_json['data']
+        response = CompletionResponse.model_validate(data)
+        if(response.status != "OK"):
+            raise ValueError("SQL Generation failed")
+        return response.data
+    def _execute_sql(self, sql: str) -> Dict:
+        response = self.kdbc.execute_sql_and_decode(sql, limit=1, get_column_major=False)
+        status_info = response['status_info']
+        if(status_info['status'] != 'OK'):
+            message = status_info['message']
+            raise ValueError(message)
+        records = response['records']
+        if(len(records) != 1):
+            raise ValueError("No records returned.")
+        record = records[0]
+        response_dict = {}
+        for col, val in record.items():
+            response_dict[col] = val
+        return response_dict
+    @classmethod
+    def load_messages_from_datafile(cls, sa_datafile: Path) -> List[BaseMessage]:
+        datafile_dict = SaDatafile.parse_dialogue_file(sa_datafile)
+        messages = cls._convert_dict_to_messages(datafile_dict)
+        return messages
+    @classmethod
+    def _convert_message_to_dict(cls, message: BaseMessage) -> Dict:
+        content = cast(str, message.content)
+        if isinstance(message, HumanMessage):
+            role = "user"
+        elif isinstance(message, AIMessage):
+            role = "assistant"
+        elif isinstance(message, SystemMessage):
+            role = "system"
+        else:
+            raise ValueError(f"Got unsupported message type: {message}")
+        message = dict(role=role, content=content)
+        return message
+    @classmethod
+    def _convert_message_from_dict(cls, message: Dict) -> BaseMessage:
+        role = message['role']
+        content = message['content']
+        if(role == 'user'):
+            return HumanMessage(content=content)
+        elif(role == 'assistant'):
+            return AIMessage(content=content)
+        elif(role == 'system'):
+            return SystemMessage(content=content)
+        else:
+            raise ValueError(f"Got unsupported role: {role}")
+    @classmethod
+    def _convert_dict_to_messages(cls, sa_data: Dict) -> List[BaseMessage]:
+        schema = sa_data['schema']
+        system = sa_data['system']
+        messages = sa_data['messages']
+        print(f"Importing prompt for schema: {schema}")
+        result_list = []
+        result_list.append(SystemMessage(content=system))
+        result_list.extend([cls._convert_message_from_dict(m) for m in messages])
+        return result_list

langchain_kinetica/sa_datafile.py ADDED Viewed

@@ -0,0 +1,60 @@
+##
+# Copyright (c) 2023, Chad Juliano, Kinetica DB Inc.
+##
+from pathlib import Path
+import re
+import os
+class SaDatafile:
+    # parse line into a dict containing role and content
+    PARSER = re.compile(r"^<\|(?P<role>\w+)\|>\W*(?P<content>.*)$", re.DOTALL)
+    @classmethod
+    def parse_dialogue_file(cls, input_file: os.PathLike) -> dict:
+        path = Path(input_file)
+        schema = path.name.removesuffix('.txt')
+        lines = open(input_file).read()
+        return cls.parse_dialogue(lines,schema)
+    @classmethod
+    def parse_dialogue(cls, text: str, schema: str) -> dict:
+        messages = []
+        system = None
+        lines = text.split('<|end|>')
+        user_message = None
+        for idx, line in enumerate(lines):
+            line = line.strip()
+            if(len(line) == 0):
+                continue
+            match = cls.PARSER.match(line)
+            if(match is None):
+                raise ValueError(f"Could not find starting token in: {line}")
+            groupdict = match.groupdict()
+            role = groupdict["role"]
+            if(role == "system"):
+                if(system is not None):
+                    raise ValueError(f"Only one system token allowed in: {line}")
+                system = groupdict['content']
+            elif(role == "user"):
+                if(user_message is not None):
+                    raise ValueError(f"Found user token without assistant token: {line}")
+                user_message = groupdict
+            elif(role == "assistant"):
+                if(user_message is None):
+                    raise Exception(f"Found assistant token without user token: {line}")
+                messages.append(user_message)
+                messages.append(groupdict)
+                user_message = None
+            else:
+                raise ValueError(f"Unknown token: {role}")
+        return { "schema": schema, "system": system, "messages": messages  }

langchain_kinetica/sa_dto.py ADDED Viewed

@@ -0,0 +1,111 @@
+##
+# Copyright (c) 2023, Chad Juliano, Kinetica DB Inc.
+##
+from __future__ import annotations
+from pydantic import BaseModel, Field
+class SuggestContext(BaseModel):
+    table: str | None = Field(default=None, title="Name of table")
+    description: str | None = Field(default=None, title="Table description")
+    columns: list[str] | None = Field(default=None, title="Table columns list")
+    rules: list[str] | None = Field(default=None, title="Rules that apply to the table.")
+    samples: dict | None = Field(default=None, title="Samples that apply to the entire context.")
+    def to_system_str(self) -> str:
+        lines = []
+        lines.append(f"CREATE TABLE {self.table} AS")
+        lines.append("(")
+        if(not self.columns or len(self.columns) == 0):
+            ValueError(detail="columns list can't be null.")
+        columns = []
+        for column in self.columns:
+            column = column.replace("\"", "").strip()
+            columns.append(f"   {column}")
+        lines.append(",\n".join(columns))
+        lines.append(");")
+        if(self.description):
+            lines.append(f"COMMENT ON TABLE {self.table} IS '{self.description}';")
+        if(self.rules and len(self.rules) > 0):
+            lines.append(f"-- When querying table {self.table} the following rules apply:")
+            for rule in self.rules:
+                lines.append(f"-- * {rule}")
+        result = "\n".join(lines)
+        return result
+class SuggestPayload(BaseModel):
+    question: str = None
+    context: list[SuggestContext]
+    def get_system_str(self) -> str:
+        lines = []
+        for table_context in self.context:
+            if(table_context.table is None):
+                continue
+            context_str = table_context.to_system_str()
+            lines.append(context_str)
+        return "\n\n".join(lines)
+    def get_messages(self) -> str | None:
+        messages = []
+        for context in self.context:
+            if(context.samples is None):
+                continue
+            for question, answer in context.samples.items():
+                # unescape double quotes
+                answer = answer.replace("''", "'")
+                messages.append(dict(role="user", content=question))
+                messages.append(dict(role="assistant", content=answer))
+        return messages
+    def to_completion(self) -> str:
+        messages = []
+        messages.append(dict(role="system", content=self.get_system_str()))
+        messages.extend(self.get_messages())
+        messages.append(dict(role="user", content=self.question))
+        response = dict(messages=messages)
+        return response
+class SuggestRequest(BaseModel):
+    payload: SuggestPayload
+class CompletionRequest(BaseModel):
+    messages: list[dict]
+# Output Types
+class Message(BaseModel):
+    role: str = Field(default=None, title="One of [user|assistant|system]")
+    content: str
+class Choice(BaseModel):
+    index: int
+    message: Message = Field(default=None, title="The generated SQL")
+    finish_reason: str
+class Usage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class SqlResponse(BaseModel):
+    id: str
+    object: str
+    created: int
+    model: str
+    choices: list[Choice]
+    usage: Usage
+    prompt: str = Field(default=None, title="The input question")
+class CompletionResponse(BaseModel):
+    status: str
+    data: SqlResponse

langchain_kinetica/sql_output.py ADDED Viewed

@@ -0,0 +1,45 @@
+##
+# Copyright (c) 2024, Chad Juliano, Kinetica DB Inc.
+##
+from typing import Any, List
+from pandas import DataFrame
+from langchain_core.output_parsers.transform import BaseOutputParser
+from langchain_core.outputs import Generation
+from langchain_core.pydantic_v1 import Field, BaseModel
+from gpudb import GPUdb
+class SqlResponse(BaseModel):
+    """ Response containing SQL and the fetched data """
+    sql: str = Field(description="Result SQL")
+    dataframe: DataFrame = Field(description="Result Data")
+    class Config:
+        """Configuration for this pydantic object."""
+        arbitrary_types_allowed = True
+class KineticaSqlOutputParser(BaseOutputParser[SqlResponse]):
+    """ Fetch and return data from the Kinetica LLM """
+    kdbc: GPUdb = Field(exclude=True)
+    """ Kinetica DB connection. """
+    class Config:
+        """Configuration for this pydantic object."""
+        arbitrary_types_allowed = True
+    def parse(self, text: str) -> SqlResponse:
+        df = self.kdbc.to_df(text)
+        return SqlResponse(sql=text, dataframe=df)
+    def parse_result(self, result: List[Generation], *, partial: bool = False) -> SqlResponse:
+        return self.parse(result[0].text)
+    @property
+    def _type(self) -> str:
+        return "kinetica_sql_output_parser"

langchain_kinetica-1.0.0.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 Kinetica
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

langchain_kinetica-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,110 @@
+Metadata-Version: 2.1
+Name: langchain-kinetica
+Version: 1.0.0
+Summary: Kinetica intefrace for Langchain.
+Author-email: Chad Juliano <cjuliano@kinetica.com>
+License: MIT License
+        Copyright (c) 2024 Kinetica
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Project-URL: Homepage, https://kinetica.com
+Project-URL: Documentation, https://docs.kinetica.com/7.1/sql-gpt/
+Project-URL: Repository, https://github.com/kineticadb/langchain-kinetica
+Classifier: Programming Language :: Python :: 3
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: gpudb >=7.1.9.10
+Requires-Dist: langchain-core
+Requires-Dist: pandas
+# langchain-kinetica
+Kinetica intefrace for Langchain. See the [LLM documentation][LLM_DOCS] for an overview of the Kinetica LLM.
+[LLM_DOCS]: <https://docs.kinetica.com/7.1/sql-gpt/>
+- [1. Prerequisites](#1-prerequisites)
+- [2. Package Contents](#2-package-contents)
+- [3. Installation](#3-installation)
+- [4. Usage](#4-usage)
+- [5. Building](#5-building)
+- [6. See Also](#6-see-also)
+## 1. Prerequisites
+To use langchain with Kinetica you will need:
+* Python runtime >3.10
+* Kinetica SqlAssist LLM
+* Kinetica instance >7.2.0 configured to use SqlAssist.
+## 2. Package Contents
+* `KineticaChatLLM`: ChatModel for converting natural language to SQL.
+* `KineticaSqlOutputParser`: OutputParser that will execute SQL from the `KineticaChatLLM`.
+* `SqlResponse`: If the Kinetica chain ends with `KineticaSqlOutputParser` then this response will contain the generated SQL and results from its execution.
+## 3. Installation
+This project is not yet available on pypi. You can install it directly from the repository.
+```sh
+$ pip install "langchain-kinetica @ git+ssh://git@github.com/kineticadb/langchain-kinetica.git"
+```
+## 4. Usage
+See the [Kinetica LLM Demo notebook](./notebooks/kinetica_llm_demo.ipynb) for examples.
+## 5. Building
+Install the project locally.
+```sh
+$ pip install --editable .
+```
+You will need to install the build utility.
+```sh
+$ pip install --upgrade build
+```
+Build the project
+```sh
+$ python3 -m build
+```
+The build will generate a `.whl` file that can be distributed.
+```sh
+$ ls -1 ./dist
+langchain-kinetica-1.0.tar.gz
+langchain_kinetica-1.0-py3-none-any.whl
+```
+## 6. See Also
+- [Kinetica LLM Documentation](https://docs.kinetica.com/7.1/sql-gpt/)
+- [LangChain Prompts](https://python.langchain.com/docs/modules/model_io/prompts/)
+- [LancChain Chat Models](https://python.langchain.com/docs/modules/model_io/chat/)

langchain_kinetica-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+langchain_kinetica/__init__.py,sha256=4HXjx2qFQpbaVpkkJ4FPHzkTQSFittpEwf5E-Z1AUho,182
+langchain_kinetica/llm_chat.py,sha256=X39-RT74seaecY6J_4GTxZoipttqgC3CP7K0Yko5x5I,6684
+langchain_kinetica/sa_datafile.py,sha256=48u7vQQyC4nXcTkh3Wp7YEf2vhdmHnx_t1ZRQokrSew,1948
+langchain_kinetica/sa_dto.py,sha256=N2aIRjJWDhGzQovbgyIZAv1LNCOwGksQoBdC6AXWKZI,3440
+langchain_kinetica/sql_output.py,sha256=49tVYqa2OncGU2LIXk1wpbAFkaipQve8rn1w4u2Tgso,1306
+langchain_kinetica-1.0.0.dist-info/LICENSE,sha256=VYHwkc_3acBxI-AvhEwSp5ve7kIZuvkcl8pQA93UunA,1065
+langchain_kinetica-1.0.0.dist-info/METADATA,sha256=KyBd01fWwlRcxJ5DTPNqJw_pilnWnKXRatswciitnVA,3665
+langchain_kinetica-1.0.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+langchain_kinetica-1.0.0.dist-info/top_level.txt,sha256=JXgMmx9S2IcZYAbH0sFz2asosN_NztlOS88TzHK5GV4,19
+langchain_kinetica-1.0.0.dist-info/RECORD,,

langchain_kinetica-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.42.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

langchain_kinetica-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ langchain_kinetica