PyPI - dbgpt-ext - Versions diffs - 0.7.0__tar.gz - Mend

dbgpt-ext 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

dbgpt_ext-0.7.0/.gitignore ADDED Viewed

@@ -0,0 +1,10 @@
+# python generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# venv
+.venv

dbgpt_ext-0.7.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,64 @@
+Metadata-Version: 2.4
+Name: dbgpt-ext
+Version: 0.7.0
+Summary: Add your description here
+Project-URL: Homepage, https://github.com/eosphoros-ai/DB-GPT
+Project-URL: Documentation, http://docs.dbgpt.cn/docs/overview
+Project-URL: Repository, https://github.com/eosphoros-ai/DB-GPT.git
+Project-URL: Issues, https://github.com/eosphoros-ai/DB-GPT/issues
+Author-email: csunny <cfqcsunny@gmail.com>
+License-Expression: MIT
+Requires-Python: >=3.10
+Requires-Dist: dbgpt>=0.7.0
+Requires-Dist: pymysql
+Provides-Extra: datasource-clickhouse
+Requires-Dist: clickhouse-connect; extra == 'datasource-clickhouse'
+Provides-Extra: datasource-duckdb
+Requires-Dist: duckdb; extra == 'datasource-duckdb'
+Requires-Dist: duckdb-engine==0.9.1; extra == 'datasource-duckdb'
+Provides-Extra: datasource-hive
+Requires-Dist: pyhive; extra == 'datasource-hive'
+Requires-Dist: thrift; extra == 'datasource-hive'
+Requires-Dist: thrift-sasl; extra == 'datasource-hive'
+Provides-Extra: datasource-mssql
+Requires-Dist: pymssql; extra == 'datasource-mssql'
+Provides-Extra: datasource-mysql
+Requires-Dist: mysqlclient==2.1.0; extra == 'datasource-mysql'
+Provides-Extra: datasource-postgres
+Requires-Dist: psycopg2-binary; extra == 'datasource-postgres'
+Provides-Extra: datasource-spark
+Requires-Dist: pyspark; extra == 'datasource-spark'
+Provides-Extra: datasource-vertica
+Requires-Dist: vertica-python; extra == 'datasource-vertica'
+Provides-Extra: file-oss
+Requires-Dist: oss2; extra == 'file-oss'
+Provides-Extra: file-s3
+Requires-Dist: boto3; extra == 'file-s3'
+Provides-Extra: graph-rag
+Requires-Dist: dbgpt-tugraph-plugins>=0.1.1; extra == 'graph-rag'
+Requires-Dist: neo4j; extra == 'graph-rag'
+Requires-Dist: networkx; extra == 'graph-rag'
+Provides-Extra: rag
+Requires-Dist: bs4; extra == 'rag'
+Requires-Dist: markdown; extra == 'rag'
+Requires-Dist: pdfplumber; extra == 'rag'
+Requires-Dist: pypdf; extra == 'rag'
+Requires-Dist: python-docx; extra == 'rag'
+Requires-Dist: python-pptx; extra == 'rag'
+Requires-Dist: spacy==3.7; extra == 'rag'
+Provides-Extra: storage-chromadb
+Requires-Dist: chromadb>=0.4.22; extra == 'storage-chromadb'
+Requires-Dist: onnxruntime<=1.18.1,>=1.14.1; extra == 'storage-chromadb'
+Provides-Extra: storage-elasticsearch
+Requires-Dist: elasticsearch; extra == 'storage-elasticsearch'
+Provides-Extra: storage-milvus
+Requires-Dist: pymilvus; extra == 'storage-milvus'
+Provides-Extra: storage-obvector
+Requires-Dist: pyobvector; extra == 'storage-obvector'
+Provides-Extra: storage-weaviate
+Requires-Dist: weaviate-client; extra == 'storage-weaviate'
+Description-Content-Type: text/markdown
+# dbgpt-integration
+Package that contains modules and utilities that can be used across packages and services.

dbgpt_ext-0.7.0/README.md ADDED Viewed

@@ -0,0 +1,3 @@
+# dbgpt-integration
+Package that contains modules and utilities that can be used across packages and services.

dbgpt_ext-0.7.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,100 @@
+[project]
+name = "dbgpt-ext"
+version = "0.7.0"
+description = "Add your description here"
+authors = [
+    { name = "csunny", email = "cfqcsunny@gmail.com" }
+]
+license = "MIT"
+readme = "README.md"
+requires-python = ">= 3.10"
+dependencies = [
+    "dbgpt>=0.7.0",
+    "pymysql",
+]
+[project.urls]
+Homepage = "https://github.com/eosphoros-ai/DB-GPT"
+Documentation = "http://docs.dbgpt.cn/docs/overview"
+Repository = "https://github.com/eosphoros-ai/DB-GPT.git"
+Issues = "https://github.com/eosphoros-ai/DB-GPT/issues"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project.optional-dependencies]
+rag = [
+    "spacy==3.7",
+    "markdown",
+    "bs4",
+    "python-pptx",
+    "python-docx",
+    "pypdf",
+    "pdfplumber",
+]
+graph_rag = [
+    # For visualization in code
+    "networkx",
+    "dbgpt-tugraph-plugins>=0.1.1",
+    "neo4j"
+]
+datasource_mysql = [
+    # mysqlclient 2.2.x have pkg-config issue on 3.10+
+    # If you want to install psycopg2 and mysqlclient in ubuntu, you should install
+    # libpq-dev and libmysqlclient-dev first.
+    "mysqlclient==2.1.0",
+]
+datasource_postgres = [
+    # "psycopg2", # In production, you can install psycopg2 instead of psycopg2-binary
+    "psycopg2-binary",
+]
+datasource_clickhouse = [
+    "clickhouse-connect",
+]
+datasource_spark = ["pyspark"]
+datasource_mssql = ["pymssql"]
+datasource_hive = [
+    "pyhive",
+    "thrift",
+    "thrift_sasl",
+]
+datasource_vertica = ["vertica-python"]
+datasource_duckdb = [
+    "duckdb",
+    "duckdb-engine==0.9.1",
+]
+# datasource_doris = ["pydoris>=1.0.2,<2.0.0"]
+storage_milvus = ["pymilvus"]
+storage_weaviate = ["weaviate-client"]
+storage_chromadb = [
+    "onnxruntime>=1.14.1,<=1.18.1",
+    "chromadb>=0.4.22"
+    ]
+storage_elasticsearch = ["elasticsearch"]
+storage_obvector = ["pyobvector"]
+file_oss = [
+    "oss2" # Aliyun OSS
+]
+file_s3 = [
+    "boto3"
+]
+[tool.uv]
+managed = true
+dev-dependencies = [
+    "pytest>=8.3.4",
+]
+[tool.hatch.build.targets.wheel]
+packages = ["src/dbgpt_ext"]
+exclude = [
+    "src/dbgpt_ext/**/tests",
+    "src/dbgpt_ext/**/tests/*",
+    "src/dbgpt_ext/tests",
+    "src/dbgpt_ext/tests/*",
+    "src/dbgpt_ext/**/examples",
+    "src/dbgpt_ext/**/examples/*"
+]

dbgpt_ext-0.7.0/src/dbgpt_ext/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from ._version import version as __version__  # noqa: F401
+__ALL__ = ["__version__"]

dbgpt_ext-0.7.0/src/dbgpt_ext/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ version = "0.7.0"

dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Module to define the data source connectors."""
+from typing import Any
+from dbgpt.datasource.base import BaseConnector  # noqa: F401
+def __getattr__(name: str) -> Any:
+    if name == "RDBMSConnector":
+        from dbgpt.datasource.rdbms.base import RDBMSConnector  # noqa: F401
+        return RDBMSConnector
+    else:
+        raise AttributeError(f"Could not find: {name} in datasource")
+__ALL__ = ["BaseConnector", "RDBMSConnector"]

dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/conn_spark.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""Spark Connector."""
+import logging
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Optional, Type
+from dbgpt.core.awel.flow import (
+    TAGS_ORDER_HIGH,
+    ResourceCategory,
+    auto_register_resource,
+)
+from dbgpt.datasource.base import BaseConnector
+from dbgpt.datasource.parameter import BaseDatasourceParameters
+from dbgpt.util.i18n_utils import _
+if TYPE_CHECKING:
+    from pyspark.sql import SparkSession
+logger = logging.getLogger(__name__)
+@auto_register_resource(
+    label=_("Apache Spark datasource"),
+    category=ResourceCategory.DATABASE,
+    tags={"order": TAGS_ORDER_HIGH},
+    description=_("Unified engine for large-scale data analytics."),
+)
+@dataclass
+class SparkParameters(BaseDatasourceParameters):
+    """Spark connection parameters."""
+    __type__ = "spark"
+    path: str = field(
+        metadata={
+            "help": _("The file path of the data source."),
+        },
+    )
+    def create_connector(self) -> "SparkConnector":
+        """Create Spark connector."""
+        return SparkConnector.from_parameters(self)
+    def db_url(self, ssl=False, charset=None):
+        raise NotImplementedError("Spark does not support db_url")
+class SparkConnector(BaseConnector):
+    """Spark Connector.
+    Spark Connect supports operating on a variety of data sources through the DataFrame
+    interface.
+    A DataFrame can be operated on using relational transformations and can also be
+    used to create a temporary view.Registering a DataFrame as a temporary view allows
+    you to run SQL queries over its data.
+    Datasource now support parquet, jdbc, orc, libsvm, csv, text, json.
+    """
+    """db type"""
+    db_type: str = "spark"
+    """db driver"""
+    driver: str = "spark"
+    """db dialect"""
+    dialect: str = "sparksql"
+    @classmethod
+    def param_class(cls) -> Type[SparkParameters]:
+        """Return the parameter class."""
+        return SparkParameters
+    @classmethod
+    def from_parameters(cls, parameters: SparkParameters) -> "SparkConnector":
+        """Create a new SparkConnector from parameters."""
+        return cls(file_path=parameters.path)
+    def __init__(
+        self,
+        file_path: str,
+        spark_session: Optional["SparkSession"] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Create a Spark Connector.
+        Args:
+            file_path: file path
+            spark_session: spark session
+            kwargs: other args
+        """
+        from pyspark.sql import SparkSession
+        self.spark_session = (
+            spark_session or SparkSession.builder.appName("dbgpt_spark").getOrCreate()
+        )
+        self.path = file_path
+        self.table_name = "temp"
+        self.df = self.create_df(self.path)
+    @classmethod
+    def from_file_path(
+        cls, file_path: str, engine_args: Optional[dict] = None, **kwargs: Any
+    ) -> "SparkConnector":
+        """Create a new SparkConnector from file path."""
+        try:
+            return cls(file_path=file_path, engine_args=engine_args, **kwargs)
+        except Exception as e:
+            logger.error("load spark datasource error" + str(e))
+            raise e
+    def create_df(self, path):
+        """Create a Spark DataFrame.
+        Create a Spark DataFrame from Datasource path(now support parquet, jdbc,
+        orc, libsvm, csv, text, json.).
+        return: Spark DataFrame
+        reference:https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html
+        """
+        extension = (
+            "text" if path.rsplit(".", 1)[-1] == "txt" else path.rsplit(".", 1)[-1]
+        )
+        return self.spark_session.read.load(
+            path, format=extension, inferSchema="true", header="true"
+        )
+    def run(self, sql: str, fetch: str = "all"):
+        """Execute sql command."""
+        logger.info(f"spark sql to run is {sql}")
+        self.df.createOrReplaceTempView(self.table_name)
+        df = self.spark_session.sql(sql)
+        first_row = df.first()
+        rows = [first_row.asDict().keys()]
+        for row in df.collect():
+            rows.append(row)
+        return rows
+    def query_ex(self, sql: str, timeout: Optional[float] = None):
+        """Execute sql command."""
+        rows = self.run(sql)
+        field_names = rows[0]
+        return field_names, rows
+    def get_indexes(self, table_name):
+        """Get table indexes about specified table."""
+        return ""
+    def get_show_create_table(self, table_name):
+        """Get table show create table about specified table."""
+        return "ans"
+    def get_fields(self, table_name: str):
+        """Get column meta about dataframe.
+        TODO: Support table_name.
+        """
+        return ",".join([f"({name}: {dtype})" for name, dtype in self.df.dtypes])
+    def get_collation(self):
+        """Get collation."""
+        return "UTF-8"
+    def get_db_names(self):
+        """Get database names."""
+        return ["default"]
+    def get_database_names(self):
+        """Get database names."""
+        return []
+    def table_simple_info(self):
+        """Get table simple info."""
+        return f"{self.table_name}{self.get_fields()}"
+    def get_table_comments(self, db_name):
+        """Get table comments."""
+        return ""

dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/conn_tugraph.py ADDED Viewed

@@ -0,0 +1,268 @@
+"""TuGraph Connector."""
+import json
+from dataclasses import dataclass, field
+from typing import Dict, Generator, Iterator, List, Type, cast
+from dbgpt.core.awel.flow import (
+    TAGS_ORDER_HIGH,
+    ResourceCategory,
+    auto_register_resource,
+)
+from dbgpt.datasource.base import BaseConnector
+from dbgpt.datasource.parameter import BaseDatasourceParameters
+from dbgpt.util.i18n_utils import _
+@auto_register_resource(
+    label=_("TuGraph datasource"),
+    category=ResourceCategory.DATABASE,
+    tags={"order": TAGS_ORDER_HIGH},
+    description=_(
+        "TuGraph is a high-performance graph database jointly developed by Ant Group "
+        "and Tsinghua University."
+    ),
+)
+@dataclass
+class TuGraphParameters(BaseDatasourceParameters):
+    """TuGraph connection parameters."""
+    __type__ = "tugraph"
+    host: str = field(metadata={"help": _("TuGraph server host")})
+    user: str = field(metadata={"help": _("TuGraph server user")})
+    password: str = field(
+        default="${env:DBGPT_DB_PASSWORD}",
+        metadata={
+            "help": _(
+                "Database password, you can write your password directly, of course, "
+                "you can also use environment variables, such as "
+                "${env:DBGPT_DB_PASSWORD}"
+            ),
+            "tags": "privacy",
+        },
+    )
+    port: int = field(
+        default=7687, metadata={"help": _("TuGraph server port, default 7687")}
+    )
+    database: str = field(
+        default="default", metadata={"help": _("Database name, default 'default'")}
+    )
+    def create_connector(self) -> "BaseConnector":
+        """Create TuGraph connector."""
+        return TuGraphConnector.from_parameters(self)
+    def db_url(self, ssl=False, charset=None):
+        """Get the database URL."""
+        raise NotImplementedError("TuGraph does not support db_url")
+class TuGraphConnector(BaseConnector):
+    """TuGraph connector."""
+    db_type: str = "tugraph"
+    driver: str = "bolt"
+    dialect: str = "tugraph"
+    def __init__(self, driver, graph):
+        """Initialize the connector with a Neo4j driver."""
+        self._driver = driver
+        self._schema = None
+        self._graph = graph
+        self._session = None
+        self._is_closed = False
+    def create_graph(self, graph_name: str) -> bool:
+        """Create a new graph in the database if it doesn't already exist."""
+        try:
+            with self._driver.session(database="default") as session:
+                graph_list = session.run("CALL dbms.graph.listGraphs()").data()
+                exists = any(item["graph_name"] == graph_name for item in graph_list)
+                if not exists:
+                    session.run(
+                        f"CALL dbms.graph.createGraph('{graph_name}', '', 2048)"
+                    )
+        except Exception as e:
+            raise Exception(f"Failed to create graph '{graph_name}': {str(e)}") from e
+        return not exists
+    def is_exist(self, graph_name: str) -> bool:
+        """Check a new graph in the database if it doesn't already exist."""
+        try:
+            with self._driver.session(database="default") as session:
+                graph_list = session.run("CALL dbms.graph.listGraphs()").data()
+                exists = any(item["graph_name"] == graph_name for item in graph_list)
+        except Exception as e:
+            raise Exception(
+                f"Failed to check graph exist'{graph_name}': {str(e)}"
+            ) from e
+        return exists
+    def delete_graph(self, graph_name: str) -> None:
+        """Delete a graph in the database if it exists."""
+        with self._driver.session(database="default") as session:
+            graph_list = session.run("CALL dbms.graph.listGraphs()").data()
+            exists = any(item["graph_name"] == graph_name for item in graph_list)
+            if exists:
+                session.run(f"Call dbms.graph.deleteGraph('{graph_name}')")
+    @classmethod
+    def param_class(cls) -> Type[TuGraphParameters]:
+        """Return the parameter class."""
+        return TuGraphParameters
+    @classmethod
+    def from_parameters(cls, parameters: TuGraphParameters) -> "TuGraphConnector":
+        """Create a new TuGraphConnector from parameters."""
+        return cls.from_uri_db(
+            parameters.host,
+            parameters.port,
+            parameters.user,
+            parameters.password,
+            parameters.database,
+        )
+    @classmethod
+    def from_uri_db(
+        cls, host: str, port: int, user: str, pwd: str, db_name: str
+    ) -> "TuGraphConnector":
+        """Create a new TuGraphConnector from host, port, user, pwd, db_name."""
+        try:
+            from neo4j import GraphDatabase
+            db_url = f"{cls.driver}://{host}:{str(port)}"
+            driver = GraphDatabase.driver(db_url, auth=(user, pwd))
+            driver.verify_connectivity()
+            return cast(TuGraphConnector, cls(driver=driver, graph=db_name))
+        except ImportError as err:
+            raise ImportError(
+                "neo4j package is not installed, please install it with "
+                "`pip install neo4j`"
+            ) from err
+    def get_system_info(self) -> Dict:
+        """Get system info from the TuGraph."""
+        with self._driver.session(database="default") as session:
+            system_info_list = session.run("CALL dbms.system.info()")
+            system_info = {}
+            for info in system_info_list:
+                system_info[info["name"]] = info["value"]
+            return system_info
+    def get_table_names(self) -> Iterator[str]:
+        """Get all table names from the TuGraph by Neo4j driver."""
+        with self._driver.session(database=self._graph) as session:
+            # Run the query to get vertex labels
+            raw_vertex_labels = session.run("CALL db.vertexLabels()").data()
+            vertex_labels = [table_name["label"] for table_name in raw_vertex_labels]
+            # Run the query to get edge labels
+            raw_edge_labels = session.run("CALL db.edgeLabels()").data()
+            edge_labels = [table_name["label"] for table_name in raw_edge_labels]
+            return iter(vertex_labels + edge_labels)
+    def get_grants(self):
+        """Get grants."""
+        return []
+    def get_collation(self):
+        """Get collation."""
+        return "UTF-8"
+    def get_charset(self):
+        """Get character_set of current database."""
+        return "UTF-8"
+    def table_simple_info(self):
+        """Get table simple info."""
+        return []
+    def close(self):
+        """Close the Neo4j driver."""
+        if self._is_closed:
+            return
+        self._driver.close()
+        self._is_closed = True
+    def run(self, query: str, fetch: str = "all") -> List:
+        """Run query."""
+        with self._driver.session(database=self._graph) as session:
+            try:
+                result = session.run(query)
+                return list(result)
+            except Exception as e:
+                raise Exception(f"Query execution failed: {e}\nQuery: {query}") from e
+    def run_stream(self, query: str) -> Generator:
+        """Run GQL."""
+        with self._driver.session(database=self._graph) as session:
+            result = session.run(query)
+            yield from result
+    def get_columns(self, table_name: str, table_type: str = "vertex") -> List[Dict]:
+        """Retrieve the column for a specified vertex or edge table in the graph db.
+        This function queries the schema of a given table (vertex or edge) and returns
+        detailed information about its columns (properties).
+        Args:
+            table_name (str): table name (graph name)
+            table_type (str): table type (vertex or edge)
+        Returns:
+            columns: List[Dict], which contains name: str, type: str,
+                default_expression: str, is_in_primary_key: bool, comment: str
+                eg:[{'name': 'id', 'type': 'int', 'default_expression': '',
+                'is_in_primary_key': True, 'comment': 'id'}, ...]
+        """
+        with self._driver.session(database=self._graph) as session:
+            data = []
+            result = None
+            if table_type == "vertex":
+                result = session.run(f"CALL db.getVertexSchema('{table_name}')").data()
+            else:
+                result = session.run(f"CALL db.getEdgeSchema('{table_name}')").data()
+            schema_info = json.loads(result[0]["schema"])
+            for prop in schema_info.get("properties", []):
+                prop_dict = {
+                    "name": prop["name"],
+                    "type": prop["type"],
+                    "default_expression": "",
+                    "is_in_primary_key": bool(
+                        "primary" in schema_info
+                        and prop["name"] == schema_info["primary"]
+                    ),
+                    "comment": prop["name"],
+                }
+                data.append(prop_dict)
+            return data
+    def get_indexes(self, table_name: str, table_type: str = "vertex") -> List[Dict]:
+        """Get table indexes about specified table.
+        Args:
+            table_name (str): table name
+            table_type (str): 'vertex' | 'edge'
+        Returns:
+            List[Dict]:eg:[{'name': 'idx_key', 'column_names': ['id']}]
+        """
+        # [{'name':'id','column_names':['id']}]
+        with self._driver.session(database=self._graph) as session:
+            result = session.run(
+                f"CALL db.listLabelIndexes('{table_name}','{table_type}')"
+            ).data()
+            transformed_data = []
+            for item in result:
+                new_dict = {"name": item["field"], "column_names": [item["field"]]}
+                transformed_data.append(new_dict)
+            return transformed_data
+    @classmethod
+    def is_graph_type(cls) -> bool:
+        """Return whether the connector is a graph database connector."""
+        return True

dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/nosql/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """NoSQL data source package."""

dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """RDBMS Connector Module."""