dbgpt-ext 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. dbgpt_ext-0.7.0/.gitignore +10 -0
  2. dbgpt_ext-0.7.0/PKG-INFO +64 -0
  3. dbgpt_ext-0.7.0/README.md +3 -0
  4. dbgpt_ext-0.7.0/pyproject.toml +100 -0
  5. dbgpt_ext-0.7.0/src/dbgpt_ext/__init__.py +3 -0
  6. dbgpt_ext-0.7.0/src/dbgpt_ext/_version.py +1 -0
  7. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/__init__.py +17 -0
  8. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/conn_spark.py +175 -0
  9. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/conn_tugraph.py +268 -0
  10. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/nosql/__init__.py +1 -0
  11. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/__init__.py +1 -0
  12. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_clickhouse.py +474 -0
  13. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_doris.py +262 -0
  14. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_duckdb.py +133 -0
  15. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_hive.py +182 -0
  16. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_mssql.py +79 -0
  17. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_mysql.py +52 -0
  18. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_oceanbase.py +72 -0
  19. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_postgresql.py +338 -0
  20. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_sqlite.py +359 -0
  21. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_starrocks.py +212 -0
  22. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/conn_vertica.py +305 -0
  23. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/__init__.py +1 -0
  24. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/oceanbase/__init__.py +1 -0
  25. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/oceanbase/ob_dialect.py +138 -0
  26. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/starrocks/__init__.py +15 -0
  27. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/starrocks/sqlalchemy/__init__.py +23 -0
  28. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/starrocks/sqlalchemy/datatype.py +128 -0
  29. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/starrocks/sqlalchemy/dialect.py +223 -0
  30. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/vertica/__init__.py +0 -0
  31. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/vertica/base.py +179 -0
  32. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/dialect/vertica/dialect_vertica_python.py +23 -0
  33. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/tests/__init__.py +0 -0
  34. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/tests/test_conn_duckdb.py +41 -0
  35. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/rdbms/tests/test_conn_sqlite.py +141 -0
  36. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/redis.py +10 -0
  37. dbgpt_ext-0.7.0/src/dbgpt_ext/datasource/schema.py +65 -0
  38. dbgpt_ext-0.7.0/src/dbgpt_ext/llms/__init__.py +0 -0
  39. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/__init__.py +5 -0
  40. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/assembler/__init__.py +16 -0
  41. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/assembler/base.py +86 -0
  42. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/assembler/bm25.py +244 -0
  43. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/assembler/db_schema.py +156 -0
  44. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/assembler/embedding.py +170 -0
  45. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/assembler/summary.py +132 -0
  46. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/chunk_manager.py +219 -0
  47. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/embeddings/__init__.py +13 -0
  48. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/embeddings/jina.py +156 -0
  49. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/embeddings/ollama.py +163 -0
  50. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/embeddings/qianfan.py +173 -0
  51. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/embeddings/tongyi.py +164 -0
  52. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/__init__.py +54 -0
  53. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/csv.py +104 -0
  54. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/datasource.py +99 -0
  55. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/docx.py +105 -0
  56. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/excel.py +114 -0
  57. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/factory.py +184 -0
  58. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/html.py +98 -0
  59. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/json.py +1 -0
  60. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/markdown.py +81 -0
  61. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/pdf.py +513 -0
  62. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/pptx.py +101 -0
  63. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/string.py +62 -0
  64. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/tests/__init__.py +0 -0
  65. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/tests/test_csv.py +32 -0
  66. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/tests/test_docx.py +29 -0
  67. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/tests/test_html.py +46 -0
  68. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/tests/test_markdown.py +29 -0
  69. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/tests/test_pdf.py +37 -0
  70. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/tests/test_txt.py +38 -0
  71. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/txt.py +84 -0
  72. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/knowledge/url.py +67 -0
  73. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/__init__.py +35 -0
  74. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/assembler.py +25 -0
  75. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/db_schema.py +94 -0
  76. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/embedding.py +194 -0
  77. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/full_text.py +75 -0
  78. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/knowledge.py +146 -0
  79. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/knowledge_graph.py +75 -0
  80. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/process_branch.py +193 -0
  81. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/schema_linking.py +52 -0
  82. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/summary.py +119 -0
  83. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/operators/vector_store.py +75 -0
  84. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/__init__.py +7 -0
  85. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/bm25.py +185 -0
  86. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/db_schema.py +258 -0
  87. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/graph_retriever/__init__.py +1 -0
  88. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/graph_retriever/base.py +17 -0
  89. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/graph_retriever/document_graph_retriever.py +58 -0
  90. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/graph_retriever/graph_retriever.py +217 -0
  91. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/graph_retriever/keyword_based_graph_retriever.py +27 -0
  92. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/graph_retriever/text_based_graph_retriever.py +52 -0
  93. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/graph_retriever/vector_based_graph_retriever.py +37 -0
  94. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/tests/__init__.py +0 -0
  95. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/tests/test_db_struct.py +66 -0
  96. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/retriever/tests/test_embedding.py +39 -0
  97. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/schemalinker/__init__.py +1 -0
  98. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/schemalinker/base_linker.py +68 -0
  99. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/schemalinker/schema_linking.py +92 -0
  100. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/summary/__init__.py +7 -0
  101. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/summary/gdbms_db_summary.py +134 -0
  102. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/summary/rdbms_db_summary.py +307 -0
  103. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/summary/tests/__init__.py +0 -0
  104. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/summary/tests/test_rdbms_summary.py +67 -0
  105. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/tests/__init__.py +0 -0
  106. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/tests/test_db_struct_assembler.py +112 -0
  107. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/tests/test_embedding_assembler.py +83 -0
  108. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/text_splitter/__init__.py +0 -0
  109. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/text_splitter/pre_text_splitter.py +44 -0
  110. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/text_splitter/token_splitter.py +186 -0
  111. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/transformer/__init__.py +1 -0
  112. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/transformer/community_summarizer.py +208 -0
  113. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/transformer/graph_embedder.py +67 -0
  114. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/transformer/graph_extractor.py +389 -0
  115. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/transformer/local_text2gql.py +70 -0
  116. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/transformer/text2gql.py +107 -0
  117. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/transformer/text_embedder.py +50 -0
  118. dbgpt_ext-0.7.0/src/dbgpt_ext/rag/transformer/triplet_extractor.py +72 -0
  119. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/__init__.py +158 -0
  120. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/file/__init__.py +0 -0
  121. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/file/oss/__init__.py +0 -0
  122. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/file/oss/config.py +102 -0
  123. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/file/oss/oss_storage.py +484 -0
  124. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/file/s3/__init__.py +0 -0
  125. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/file/s3/config.py +118 -0
  126. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/file/s3/s3_storage.py +589 -0
  127. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/full_text/__init__.py +1 -0
  128. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/full_text/elasticsearch.py +231 -0
  129. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/full_text/opensearch.py +64 -0
  130. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/graph_store/__init__.py +41 -0
  131. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/graph_store/factory.py +50 -0
  132. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/graph_store/neo4j_store.py +21 -0
  133. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/graph_store/tugraph_store.py +230 -0
  134. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/__init__.py +1 -0
  135. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/community/__init__.py +1 -0
  136. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/community/base.py +304 -0
  137. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/community/community_metastore.py +73 -0
  138. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/community/community_store.py +104 -0
  139. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/community/factory.py +31 -0
  140. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/community/memgraph_store_adapter.py +224 -0
  141. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/community/tugraph_store_adapter.py +1210 -0
  142. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/community_summary.py +633 -0
  143. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/knowledge_graph.py +322 -0
  144. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/knowledge_graph/open_spg.py +21 -0
  145. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/vector_store/__init__.py +0 -0
  146. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/vector_store/chroma_store.py +455 -0
  147. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/vector_store/elastic_store.py +441 -0
  148. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/vector_store/milvus_store.py +687 -0
  149. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/vector_store/oceanbase_store.py +526 -0
  150. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/vector_store/pgvector_store.py +156 -0
  151. dbgpt_ext-0.7.0/src/dbgpt_ext/storage/vector_store/weaviate_store.py +238 -0
  152. dbgpt_ext-0.7.0/tests/__init__.py +0 -0
@@ -0,0 +1,10 @@
1
+ # python generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # venv
10
+ .venv
@@ -0,0 +1,64 @@
1
+ Metadata-Version: 2.4
2
+ Name: dbgpt-ext
3
+ Version: 0.7.0
4
+ Summary: Add your description here
5
+ Project-URL: Homepage, https://github.com/eosphoros-ai/DB-GPT
6
+ Project-URL: Documentation, http://docs.dbgpt.cn/docs/overview
7
+ Project-URL: Repository, https://github.com/eosphoros-ai/DB-GPT.git
8
+ Project-URL: Issues, https://github.com/eosphoros-ai/DB-GPT/issues
9
+ Author-email: csunny <cfqcsunny@gmail.com>
10
+ License-Expression: MIT
11
+ Requires-Python: >=3.10
12
+ Requires-Dist: dbgpt>=0.7.0
13
+ Requires-Dist: pymysql
14
+ Provides-Extra: datasource-clickhouse
15
+ Requires-Dist: clickhouse-connect; extra == 'datasource-clickhouse'
16
+ Provides-Extra: datasource-duckdb
17
+ Requires-Dist: duckdb; extra == 'datasource-duckdb'
18
+ Requires-Dist: duckdb-engine==0.9.1; extra == 'datasource-duckdb'
19
+ Provides-Extra: datasource-hive
20
+ Requires-Dist: pyhive; extra == 'datasource-hive'
21
+ Requires-Dist: thrift; extra == 'datasource-hive'
22
+ Requires-Dist: thrift-sasl; extra == 'datasource-hive'
23
+ Provides-Extra: datasource-mssql
24
+ Requires-Dist: pymssql; extra == 'datasource-mssql'
25
+ Provides-Extra: datasource-mysql
26
+ Requires-Dist: mysqlclient==2.1.0; extra == 'datasource-mysql'
27
+ Provides-Extra: datasource-postgres
28
+ Requires-Dist: psycopg2-binary; extra == 'datasource-postgres'
29
+ Provides-Extra: datasource-spark
30
+ Requires-Dist: pyspark; extra == 'datasource-spark'
31
+ Provides-Extra: datasource-vertica
32
+ Requires-Dist: vertica-python; extra == 'datasource-vertica'
33
+ Provides-Extra: file-oss
34
+ Requires-Dist: oss2; extra == 'file-oss'
35
+ Provides-Extra: file-s3
36
+ Requires-Dist: boto3; extra == 'file-s3'
37
+ Provides-Extra: graph-rag
38
+ Requires-Dist: dbgpt-tugraph-plugins>=0.1.1; extra == 'graph-rag'
39
+ Requires-Dist: neo4j; extra == 'graph-rag'
40
+ Requires-Dist: networkx; extra == 'graph-rag'
41
+ Provides-Extra: rag
42
+ Requires-Dist: bs4; extra == 'rag'
43
+ Requires-Dist: markdown; extra == 'rag'
44
+ Requires-Dist: pdfplumber; extra == 'rag'
45
+ Requires-Dist: pypdf; extra == 'rag'
46
+ Requires-Dist: python-docx; extra == 'rag'
47
+ Requires-Dist: python-pptx; extra == 'rag'
48
+ Requires-Dist: spacy==3.7; extra == 'rag'
49
+ Provides-Extra: storage-chromadb
50
+ Requires-Dist: chromadb>=0.4.22; extra == 'storage-chromadb'
51
+ Requires-Dist: onnxruntime<=1.18.1,>=1.14.1; extra == 'storage-chromadb'
52
+ Provides-Extra: storage-elasticsearch
53
+ Requires-Dist: elasticsearch; extra == 'storage-elasticsearch'
54
+ Provides-Extra: storage-milvus
55
+ Requires-Dist: pymilvus; extra == 'storage-milvus'
56
+ Provides-Extra: storage-obvector
57
+ Requires-Dist: pyobvector; extra == 'storage-obvector'
58
+ Provides-Extra: storage-weaviate
59
+ Requires-Dist: weaviate-client; extra == 'storage-weaviate'
60
+ Description-Content-Type: text/markdown
61
+
62
+ # dbgpt-integration
63
+
64
+ Package that contains modules and utilities that can be used across packages and services.
@@ -0,0 +1,3 @@
1
+ # dbgpt-integration
2
+
3
+ Package that contains modules and utilities that can be used across packages and services.
@@ -0,0 +1,100 @@
1
+ [project]
2
+ name = "dbgpt-ext"
3
+ version = "0.7.0"
4
+ description = "Add your description here"
5
+ authors = [
6
+ { name = "csunny", email = "cfqcsunny@gmail.com" }
7
+ ]
8
+ license = "MIT"
9
+ readme = "README.md"
10
+ requires-python = ">= 3.10"
11
+
12
+ dependencies = [
13
+ "dbgpt>=0.7.0",
14
+ "pymysql",
15
+ ]
16
+
17
+ [project.urls]
18
+ Homepage = "https://github.com/eosphoros-ai/DB-GPT"
19
+ Documentation = "http://docs.dbgpt.cn/docs/overview"
20
+ Repository = "https://github.com/eosphoros-ai/DB-GPT.git"
21
+ Issues = "https://github.com/eosphoros-ai/DB-GPT/issues"
22
+
23
+ [build-system]
24
+ requires = ["hatchling"]
25
+ build-backend = "hatchling.build"
26
+
27
+ [project.optional-dependencies]
28
+ rag = [
29
+ "spacy==3.7",
30
+ "markdown",
31
+ "bs4",
32
+ "python-pptx",
33
+ "python-docx",
34
+ "pypdf",
35
+ "pdfplumber",
36
+ ]
37
+ graph_rag = [
38
+ # For visualization in code
39
+ "networkx",
40
+ "dbgpt-tugraph-plugins>=0.1.1",
41
+ "neo4j"
42
+ ]
43
+ datasource_mysql = [
44
+ # mysqlclient 2.2.x have pkg-config issue on 3.10+
45
+ # If you want to install psycopg2 and mysqlclient in ubuntu, you should install
46
+ # libpq-dev and libmysqlclient-dev first.
47
+ "mysqlclient==2.1.0",
48
+ ]
49
+ datasource_postgres = [
50
+ # "psycopg2", # In production, you can install psycopg2 instead of psycopg2-binary
51
+ "psycopg2-binary",
52
+ ]
53
+ datasource_clickhouse = [
54
+ "clickhouse-connect",
55
+ ]
56
+ datasource_spark = ["pyspark"]
57
+ datasource_mssql = ["pymssql"]
58
+ datasource_hive = [
59
+ "pyhive",
60
+ "thrift",
61
+ "thrift_sasl",
62
+ ]
63
+ datasource_vertica = ["vertica-python"]
64
+ datasource_duckdb = [
65
+ "duckdb",
66
+ "duckdb-engine==0.9.1",
67
+ ]
68
+ # datasource_doris = ["pydoris>=1.0.2,<2.0.0"]
69
+ storage_milvus = ["pymilvus"]
70
+ storage_weaviate = ["weaviate-client"]
71
+ storage_chromadb = [
72
+ "onnxruntime>=1.14.1,<=1.18.1",
73
+ "chromadb>=0.4.22"
74
+ ]
75
+ storage_elasticsearch = ["elasticsearch"]
76
+ storage_obvector = ["pyobvector"]
77
+
78
+ file_oss = [
79
+ "oss2" # Aliyun OSS
80
+ ]
81
+ file_s3 = [
82
+ "boto3"
83
+ ]
84
+
85
+ [tool.uv]
86
+ managed = true
87
+ dev-dependencies = [
88
+ "pytest>=8.3.4",
89
+ ]
90
+
91
+ [tool.hatch.build.targets.wheel]
92
+ packages = ["src/dbgpt_ext"]
93
+ exclude = [
94
+ "src/dbgpt_ext/**/tests",
95
+ "src/dbgpt_ext/**/tests/*",
96
+ "src/dbgpt_ext/tests",
97
+ "src/dbgpt_ext/tests/*",
98
+ "src/dbgpt_ext/**/examples",
99
+ "src/dbgpt_ext/**/examples/*"
100
+ ]
@@ -0,0 +1,3 @@
1
+ from ._version import version as __version__ # noqa: F401
2
+
3
+ __ALL__ = ["__version__"]
@@ -0,0 +1 @@
1
+ version = "0.7.0"
@@ -0,0 +1,17 @@
1
+ """Module to define the data source connectors."""
2
+
3
+ from typing import Any
4
+
5
+ from dbgpt.datasource.base import BaseConnector # noqa: F401
6
+
7
+
8
+ def __getattr__(name: str) -> Any:
9
+ if name == "RDBMSConnector":
10
+ from dbgpt.datasource.rdbms.base import RDBMSConnector # noqa: F401
11
+
12
+ return RDBMSConnector
13
+ else:
14
+ raise AttributeError(f"Could not find: {name} in datasource")
15
+
16
+
17
+ __ALL__ = ["BaseConnector", "RDBMSConnector"]
@@ -0,0 +1,175 @@
1
+ """Spark Connector."""
2
+
3
+ import logging
4
+ from dataclasses import dataclass, field
5
+ from typing import TYPE_CHECKING, Any, Optional, Type
6
+
7
+ from dbgpt.core.awel.flow import (
8
+ TAGS_ORDER_HIGH,
9
+ ResourceCategory,
10
+ auto_register_resource,
11
+ )
12
+ from dbgpt.datasource.base import BaseConnector
13
+ from dbgpt.datasource.parameter import BaseDatasourceParameters
14
+ from dbgpt.util.i18n_utils import _
15
+
16
+ if TYPE_CHECKING:
17
+ from pyspark.sql import SparkSession
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @auto_register_resource(
22
+ label=_("Apache Spark datasource"),
23
+ category=ResourceCategory.DATABASE,
24
+ tags={"order": TAGS_ORDER_HIGH},
25
+ description=_("Unified engine for large-scale data analytics."),
26
+ )
27
+ @dataclass
28
+ class SparkParameters(BaseDatasourceParameters):
29
+ """Spark connection parameters."""
30
+
31
+ __type__ = "spark"
32
+ path: str = field(
33
+ metadata={
34
+ "help": _("The file path of the data source."),
35
+ },
36
+ )
37
+
38
+ def create_connector(self) -> "SparkConnector":
39
+ """Create Spark connector."""
40
+ return SparkConnector.from_parameters(self)
41
+
42
+ def db_url(self, ssl=False, charset=None):
43
+ raise NotImplementedError("Spark does not support db_url")
44
+
45
+
46
+ class SparkConnector(BaseConnector):
47
+ """Spark Connector.
48
+
49
+ Spark Connect supports operating on a variety of data sources through the DataFrame
50
+ interface.
51
+ A DataFrame can be operated on using relational transformations and can also be
52
+ used to create a temporary view.Registering a DataFrame as a temporary view allows
53
+ you to run SQL queries over its data.
54
+
55
+ Datasource now support parquet, jdbc, orc, libsvm, csv, text, json.
56
+ """
57
+
58
+ """db type"""
59
+ db_type: str = "spark"
60
+ """db driver"""
61
+ driver: str = "spark"
62
+ """db dialect"""
63
+ dialect: str = "sparksql"
64
+
65
+ @classmethod
66
+ def param_class(cls) -> Type[SparkParameters]:
67
+ """Return the parameter class."""
68
+ return SparkParameters
69
+
70
+ @classmethod
71
+ def from_parameters(cls, parameters: SparkParameters) -> "SparkConnector":
72
+ """Create a new SparkConnector from parameters."""
73
+ return cls(file_path=parameters.path)
74
+
75
+ def __init__(
76
+ self,
77
+ file_path: str,
78
+ spark_session: Optional["SparkSession"] = None,
79
+ **kwargs: Any,
80
+ ) -> None:
81
+ """Create a Spark Connector.
82
+
83
+ Args:
84
+ file_path: file path
85
+ spark_session: spark session
86
+ kwargs: other args
87
+ """
88
+ from pyspark.sql import SparkSession
89
+
90
+ self.spark_session = (
91
+ spark_session or SparkSession.builder.appName("dbgpt_spark").getOrCreate()
92
+ )
93
+ self.path = file_path
94
+ self.table_name = "temp"
95
+ self.df = self.create_df(self.path)
96
+
97
+ @classmethod
98
+ def from_file_path(
99
+ cls, file_path: str, engine_args: Optional[dict] = None, **kwargs: Any
100
+ ) -> "SparkConnector":
101
+ """Create a new SparkConnector from file path."""
102
+ try:
103
+ return cls(file_path=file_path, engine_args=engine_args, **kwargs)
104
+
105
+ except Exception as e:
106
+ logger.error("load spark datasource error" + str(e))
107
+ raise e
108
+
109
+ def create_df(self, path):
110
+ """Create a Spark DataFrame.
111
+
112
+ Create a Spark DataFrame from Datasource path(now support parquet, jdbc,
113
+ orc, libsvm, csv, text, json.).
114
+
115
+ return: Spark DataFrame
116
+ reference:https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html
117
+ """
118
+ extension = (
119
+ "text" if path.rsplit(".", 1)[-1] == "txt" else path.rsplit(".", 1)[-1]
120
+ )
121
+ return self.spark_session.read.load(
122
+ path, format=extension, inferSchema="true", header="true"
123
+ )
124
+
125
+ def run(self, sql: str, fetch: str = "all"):
126
+ """Execute sql command."""
127
+ logger.info(f"spark sql to run is {sql}")
128
+ self.df.createOrReplaceTempView(self.table_name)
129
+ df = self.spark_session.sql(sql)
130
+ first_row = df.first()
131
+ rows = [first_row.asDict().keys()]
132
+ for row in df.collect():
133
+ rows.append(row)
134
+ return rows
135
+
136
+ def query_ex(self, sql: str, timeout: Optional[float] = None):
137
+ """Execute sql command."""
138
+ rows = self.run(sql)
139
+ field_names = rows[0]
140
+ return field_names, rows
141
+
142
+ def get_indexes(self, table_name):
143
+ """Get table indexes about specified table."""
144
+ return ""
145
+
146
+ def get_show_create_table(self, table_name):
147
+ """Get table show create table about specified table."""
148
+ return "ans"
149
+
150
+ def get_fields(self, table_name: str):
151
+ """Get column meta about dataframe.
152
+
153
+ TODO: Support table_name.
154
+ """
155
+ return ",".join([f"({name}: {dtype})" for name, dtype in self.df.dtypes])
156
+
157
+ def get_collation(self):
158
+ """Get collation."""
159
+ return "UTF-8"
160
+
161
+ def get_db_names(self):
162
+ """Get database names."""
163
+ return ["default"]
164
+
165
+ def get_database_names(self):
166
+ """Get database names."""
167
+ return []
168
+
169
+ def table_simple_info(self):
170
+ """Get table simple info."""
171
+ return f"{self.table_name}{self.get_fields()}"
172
+
173
+ def get_table_comments(self, db_name):
174
+ """Get table comments."""
175
+ return ""
@@ -0,0 +1,268 @@
1
+ """TuGraph Connector."""
2
+
3
+ import json
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, Generator, Iterator, List, Type, cast
6
+
7
+ from dbgpt.core.awel.flow import (
8
+ TAGS_ORDER_HIGH,
9
+ ResourceCategory,
10
+ auto_register_resource,
11
+ )
12
+ from dbgpt.datasource.base import BaseConnector
13
+ from dbgpt.datasource.parameter import BaseDatasourceParameters
14
+ from dbgpt.util.i18n_utils import _
15
+
16
+
17
+ @auto_register_resource(
18
+ label=_("TuGraph datasource"),
19
+ category=ResourceCategory.DATABASE,
20
+ tags={"order": TAGS_ORDER_HIGH},
21
+ description=_(
22
+ "TuGraph is a high-performance graph database jointly developed by Ant Group "
23
+ "and Tsinghua University."
24
+ ),
25
+ )
26
+ @dataclass
27
+ class TuGraphParameters(BaseDatasourceParameters):
28
+ """TuGraph connection parameters."""
29
+
30
+ __type__ = "tugraph"
31
+
32
+ host: str = field(metadata={"help": _("TuGraph server host")})
33
+ user: str = field(metadata={"help": _("TuGraph server user")})
34
+ password: str = field(
35
+ default="${env:DBGPT_DB_PASSWORD}",
36
+ metadata={
37
+ "help": _(
38
+ "Database password, you can write your password directly, of course, "
39
+ "you can also use environment variables, such as "
40
+ "${env:DBGPT_DB_PASSWORD}"
41
+ ),
42
+ "tags": "privacy",
43
+ },
44
+ )
45
+ port: int = field(
46
+ default=7687, metadata={"help": _("TuGraph server port, default 7687")}
47
+ )
48
+ database: str = field(
49
+ default="default", metadata={"help": _("Database name, default 'default'")}
50
+ )
51
+
52
+ def create_connector(self) -> "BaseConnector":
53
+ """Create TuGraph connector."""
54
+ return TuGraphConnector.from_parameters(self)
55
+
56
+ def db_url(self, ssl=False, charset=None):
57
+ """Get the database URL."""
58
+ raise NotImplementedError("TuGraph does not support db_url")
59
+
60
+
61
+ class TuGraphConnector(BaseConnector):
62
+ """TuGraph connector."""
63
+
64
+ db_type: str = "tugraph"
65
+ driver: str = "bolt"
66
+ dialect: str = "tugraph"
67
+
68
+ def __init__(self, driver, graph):
69
+ """Initialize the connector with a Neo4j driver."""
70
+ self._driver = driver
71
+ self._schema = None
72
+ self._graph = graph
73
+ self._session = None
74
+ self._is_closed = False
75
+
76
+ def create_graph(self, graph_name: str) -> bool:
77
+ """Create a new graph in the database if it doesn't already exist."""
78
+ try:
79
+ with self._driver.session(database="default") as session:
80
+ graph_list = session.run("CALL dbms.graph.listGraphs()").data()
81
+ exists = any(item["graph_name"] == graph_name for item in graph_list)
82
+ if not exists:
83
+ session.run(
84
+ f"CALL dbms.graph.createGraph('{graph_name}', '', 2048)"
85
+ )
86
+ except Exception as e:
87
+ raise Exception(f"Failed to create graph '{graph_name}': {str(e)}") from e
88
+
89
+ return not exists
90
+
91
+ def is_exist(self, graph_name: str) -> bool:
92
+ """Check a new graph in the database if it doesn't already exist."""
93
+ try:
94
+ with self._driver.session(database="default") as session:
95
+ graph_list = session.run("CALL dbms.graph.listGraphs()").data()
96
+ exists = any(item["graph_name"] == graph_name for item in graph_list)
97
+ except Exception as e:
98
+ raise Exception(
99
+ f"Failed to check graph exist'{graph_name}': {str(e)}"
100
+ ) from e
101
+
102
+ return exists
103
+
104
+ def delete_graph(self, graph_name: str) -> None:
105
+ """Delete a graph in the database if it exists."""
106
+ with self._driver.session(database="default") as session:
107
+ graph_list = session.run("CALL dbms.graph.listGraphs()").data()
108
+ exists = any(item["graph_name"] == graph_name for item in graph_list)
109
+ if exists:
110
+ session.run(f"Call dbms.graph.deleteGraph('{graph_name}')")
111
+
112
+ @classmethod
113
+ def param_class(cls) -> Type[TuGraphParameters]:
114
+ """Return the parameter class."""
115
+ return TuGraphParameters
116
+
117
+ @classmethod
118
+ def from_parameters(cls, parameters: TuGraphParameters) -> "TuGraphConnector":
119
+ """Create a new TuGraphConnector from parameters."""
120
+ return cls.from_uri_db(
121
+ parameters.host,
122
+ parameters.port,
123
+ parameters.user,
124
+ parameters.password,
125
+ parameters.database,
126
+ )
127
+
128
+ @classmethod
129
+ def from_uri_db(
130
+ cls, host: str, port: int, user: str, pwd: str, db_name: str
131
+ ) -> "TuGraphConnector":
132
+ """Create a new TuGraphConnector from host, port, user, pwd, db_name."""
133
+ try:
134
+ from neo4j import GraphDatabase
135
+
136
+ db_url = f"{cls.driver}://{host}:{str(port)}"
137
+ driver = GraphDatabase.driver(db_url, auth=(user, pwd))
138
+ driver.verify_connectivity()
139
+ return cast(TuGraphConnector, cls(driver=driver, graph=db_name))
140
+
141
+ except ImportError as err:
142
+ raise ImportError(
143
+ "neo4j package is not installed, please install it with "
144
+ "`pip install neo4j`"
145
+ ) from err
146
+
147
+ def get_system_info(self) -> Dict:
148
+ """Get system info from the TuGraph."""
149
+ with self._driver.session(database="default") as session:
150
+ system_info_list = session.run("CALL dbms.system.info()")
151
+ system_info = {}
152
+ for info in system_info_list:
153
+ system_info[info["name"]] = info["value"]
154
+ return system_info
155
+
156
+ def get_table_names(self) -> Iterator[str]:
157
+ """Get all table names from the TuGraph by Neo4j driver."""
158
+ with self._driver.session(database=self._graph) as session:
159
+ # Run the query to get vertex labels
160
+ raw_vertex_labels = session.run("CALL db.vertexLabels()").data()
161
+ vertex_labels = [table_name["label"] for table_name in raw_vertex_labels]
162
+
163
+ # Run the query to get edge labels
164
+ raw_edge_labels = session.run("CALL db.edgeLabels()").data()
165
+ edge_labels = [table_name["label"] for table_name in raw_edge_labels]
166
+
167
+ return iter(vertex_labels + edge_labels)
168
+
169
+ def get_grants(self):
170
+ """Get grants."""
171
+ return []
172
+
173
+ def get_collation(self):
174
+ """Get collation."""
175
+ return "UTF-8"
176
+
177
+ def get_charset(self):
178
+ """Get character_set of current database."""
179
+ return "UTF-8"
180
+
181
+ def table_simple_info(self):
182
+ """Get table simple info."""
183
+ return []
184
+
185
+ def close(self):
186
+ """Close the Neo4j driver."""
187
+ if self._is_closed:
188
+ return
189
+ self._driver.close()
190
+ self._is_closed = True
191
+
192
+ def run(self, query: str, fetch: str = "all") -> List:
193
+ """Run query."""
194
+ with self._driver.session(database=self._graph) as session:
195
+ try:
196
+ result = session.run(query)
197
+ return list(result)
198
+ except Exception as e:
199
+ raise Exception(f"Query execution failed: {e}\nQuery: {query}") from e
200
+
201
+ def run_stream(self, query: str) -> Generator:
202
+ """Run GQL."""
203
+ with self._driver.session(database=self._graph) as session:
204
+ result = session.run(query)
205
+ yield from result
206
+
207
+ def get_columns(self, table_name: str, table_type: str = "vertex") -> List[Dict]:
208
+ """Retrieve the column for a specified vertex or edge table in the graph db.
209
+
210
+ This function queries the schema of a given table (vertex or edge) and returns
211
+ detailed information about its columns (properties).
212
+
213
+ Args:
214
+ table_name (str): table name (graph name)
215
+ table_type (str): table type (vertex or edge)
216
+
217
+ Returns:
218
+ columns: List[Dict], which contains name: str, type: str,
219
+ default_expression: str, is_in_primary_key: bool, comment: str
220
+ eg:[{'name': 'id', 'type': 'int', 'default_expression': '',
221
+ 'is_in_primary_key': True, 'comment': 'id'}, ...]
222
+ """
223
+ with self._driver.session(database=self._graph) as session:
224
+ data = []
225
+ result = None
226
+ if table_type == "vertex":
227
+ result = session.run(f"CALL db.getVertexSchema('{table_name}')").data()
228
+ else:
229
+ result = session.run(f"CALL db.getEdgeSchema('{table_name}')").data()
230
+ schema_info = json.loads(result[0]["schema"])
231
+ for prop in schema_info.get("properties", []):
232
+ prop_dict = {
233
+ "name": prop["name"],
234
+ "type": prop["type"],
235
+ "default_expression": "",
236
+ "is_in_primary_key": bool(
237
+ "primary" in schema_info
238
+ and prop["name"] == schema_info["primary"]
239
+ ),
240
+ "comment": prop["name"],
241
+ }
242
+ data.append(prop_dict)
243
+ return data
244
+
245
+ def get_indexes(self, table_name: str, table_type: str = "vertex") -> List[Dict]:
246
+ """Get table indexes about specified table.
247
+
248
+ Args:
249
+ table_name (str): table name
250
+ table_type (str): 'vertex' | 'edge'
251
+ Returns:
252
+ List[Dict]:eg:[{'name': 'idx_key', 'column_names': ['id']}]
253
+ """
254
+ # [{'name':'id','column_names':['id']}]
255
+ with self._driver.session(database=self._graph) as session:
256
+ result = session.run(
257
+ f"CALL db.listLabelIndexes('{table_name}','{table_type}')"
258
+ ).data()
259
+ transformed_data = []
260
+ for item in result:
261
+ new_dict = {"name": item["field"], "column_names": [item["field"]]}
262
+ transformed_data.append(new_dict)
263
+ return transformed_data
264
+
265
+ @classmethod
266
+ def is_graph_type(cls) -> bool:
267
+ """Return whether the connector is a graph database connector."""
268
+ return True
@@ -0,0 +1 @@
1
+ """NoSQL data source package."""
@@ -0,0 +1 @@
1
+ """RDBMS Connector Module."""