hotdata-langchain 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""LangChain tools for Hotdata runtime."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
__version__ = version("hotdata-langchain")
|
|
7
|
+
except PackageNotFoundError:
|
|
8
|
+
__version__ = "0.0.0+unknown"
|
|
9
|
+
|
|
10
|
+
from hotdata_runtime import HotdataClient, QueryResult, from_env
|
|
11
|
+
|
|
12
|
+
from hotdata_langchain.databases import (
|
|
13
|
+
create_managed_database,
|
|
14
|
+
list_managed_databases_json,
|
|
15
|
+
load_managed_table,
|
|
16
|
+
load_result_summary,
|
|
17
|
+
managed_database_summary,
|
|
18
|
+
)
|
|
19
|
+
from hotdata_langchain.tools import (
|
|
20
|
+
execute_sql_json,
|
|
21
|
+
make_hotdata_tools,
|
|
22
|
+
result_rows_for_llm,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"HotdataClient",
|
|
27
|
+
"QueryResult",
|
|
28
|
+
"__version__",
|
|
29
|
+
"create_managed_database",
|
|
30
|
+
"execute_sql_json",
|
|
31
|
+
"from_env",
|
|
32
|
+
"list_managed_databases_json",
|
|
33
|
+
"load_managed_table",
|
|
34
|
+
"load_result_summary",
|
|
35
|
+
"make_hotdata_tools",
|
|
36
|
+
"managed_database_summary",
|
|
37
|
+
"result_rows_for_llm",
|
|
38
|
+
]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Managed database helpers for LangChain agents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from hotdata_runtime import (
|
|
9
|
+
DEFAULT_SCHEMA,
|
|
10
|
+
HotdataClient,
|
|
11
|
+
LoadManagedTableResult,
|
|
12
|
+
ManagedDatabase,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def list_managed_databases_json(client: HotdataClient) -> str:
|
|
17
|
+
rows = [
|
|
18
|
+
{
|
|
19
|
+
"description": db.description,
|
|
20
|
+
"id": db.id,
|
|
21
|
+
"sql_prefix": f"{db.id}.{{schema}}.{{table}}",
|
|
22
|
+
}
|
|
23
|
+
for db in client.list_managed_databases()
|
|
24
|
+
]
|
|
25
|
+
return json.dumps(rows, indent=2)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def create_managed_database(
|
|
29
|
+
client: HotdataClient,
|
|
30
|
+
*,
|
|
31
|
+
name: str,
|
|
32
|
+
schema: str = DEFAULT_SCHEMA,
|
|
33
|
+
tables: list[str] | None = None,
|
|
34
|
+
) -> ManagedDatabase:
|
|
35
|
+
return client.create_managed_database(description=name, schema=schema, tables=tables)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load_managed_table(
|
|
39
|
+
client: HotdataClient,
|
|
40
|
+
*,
|
|
41
|
+
database: str,
|
|
42
|
+
table: str,
|
|
43
|
+
file: str,
|
|
44
|
+
schema: str = DEFAULT_SCHEMA,
|
|
45
|
+
) -> LoadManagedTableResult:
|
|
46
|
+
return client.load_managed_table(database, table, schema=schema, file=file)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def managed_database_summary(db: ManagedDatabase) -> dict[str, str]:
|
|
50
|
+
return {"id": db.id, "description": db.description or db.id}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def load_result_summary(result: LoadManagedTableResult) -> dict[str, Any]:
|
|
54
|
+
return {
|
|
55
|
+
"connection_id": result.connection_id,
|
|
56
|
+
"schema_name": result.schema_name,
|
|
57
|
+
"table_name": result.table_name,
|
|
58
|
+
"row_count": result.row_count,
|
|
59
|
+
"full_name": result.full_name,
|
|
60
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""LangChain tools built on hotdata-runtime."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from hotdata_runtime import DEFAULT_SCHEMA, HotdataClient, QueryResult
|
|
9
|
+
from langchain_core.tools import StructuredTool
|
|
10
|
+
|
|
11
|
+
from hotdata_langchain.databases import (
|
|
12
|
+
create_managed_database,
|
|
13
|
+
list_managed_databases_json,
|
|
14
|
+
load_managed_table,
|
|
15
|
+
load_result_summary,
|
|
16
|
+
managed_database_summary,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def result_rows_for_llm(result: QueryResult, *, max_rows: int = 20) -> list[dict[str, Any]]:
|
|
21
|
+
return result.to_records(max_rows=max_rows)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def execute_sql_json(
|
|
25
|
+
client: HotdataClient,
|
|
26
|
+
sql: str,
|
|
27
|
+
*,
|
|
28
|
+
max_rows: int = 100,
|
|
29
|
+
database: str | None = None,
|
|
30
|
+
) -> str:
|
|
31
|
+
result = client.execute_sql(sql, database=database)
|
|
32
|
+
payload = {
|
|
33
|
+
"metadata": result.metadata_dict(),
|
|
34
|
+
"rows": result.to_records(max_rows=max_rows),
|
|
35
|
+
}
|
|
36
|
+
return json.dumps(payload, indent=2)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def make_hotdata_tools(
|
|
40
|
+
client: HotdataClient,
|
|
41
|
+
*,
|
|
42
|
+
max_rows: int = 100,
|
|
43
|
+
database: str | None = None,
|
|
44
|
+
) -> list[StructuredTool]:
|
|
45
|
+
"""Return LangChain tools for SQL and managed database workflows."""
|
|
46
|
+
|
|
47
|
+
def hotdata_execute_sql(sql: str) -> str:
|
|
48
|
+
"""Run SQL against the Hotdata workspace and return JSON rows."""
|
|
49
|
+
return execute_sql_json(client, sql, max_rows=max_rows, database=database)
|
|
50
|
+
|
|
51
|
+
def hotdata_list_managed_databases() -> str:
|
|
52
|
+
"""List Hotdata-managed databases in the workspace."""
|
|
53
|
+
return list_managed_databases_json(client)
|
|
54
|
+
|
|
55
|
+
def hotdata_create_managed_database(
|
|
56
|
+
name: str,
|
|
57
|
+
schema_name: str = DEFAULT_SCHEMA,
|
|
58
|
+
tables: str = "",
|
|
59
|
+
) -> str:
|
|
60
|
+
"""Create a managed database and optionally declare tables (comma/newline separated)."""
|
|
61
|
+
table_names = [t.strip() for t in tables.replace(",", "\n").splitlines() if t.strip()]
|
|
62
|
+
db = create_managed_database(
|
|
63
|
+
client,
|
|
64
|
+
name=name,
|
|
65
|
+
schema=schema_name or DEFAULT_SCHEMA,
|
|
66
|
+
tables=table_names or None,
|
|
67
|
+
)
|
|
68
|
+
return json.dumps(managed_database_summary(db), indent=2)
|
|
69
|
+
|
|
70
|
+
def hotdata_load_managed_table(
|
|
71
|
+
database: str,
|
|
72
|
+
table: str,
|
|
73
|
+
file: str,
|
|
74
|
+
schema_name: str = DEFAULT_SCHEMA,
|
|
75
|
+
) -> str:
|
|
76
|
+
"""Load a local parquet file into a declared managed table."""
|
|
77
|
+
loaded = load_managed_table(
|
|
78
|
+
client,
|
|
79
|
+
database=database,
|
|
80
|
+
table=table,
|
|
81
|
+
file=file,
|
|
82
|
+
schema=schema_name or DEFAULT_SCHEMA,
|
|
83
|
+
)
|
|
84
|
+
return json.dumps(load_result_summary(loaded), indent=2)
|
|
85
|
+
|
|
86
|
+
return [
|
|
87
|
+
StructuredTool.from_function(
|
|
88
|
+
func=hotdata_execute_sql,
|
|
89
|
+
name="hotdata_execute_sql",
|
|
90
|
+
),
|
|
91
|
+
StructuredTool.from_function(
|
|
92
|
+
func=hotdata_list_managed_databases,
|
|
93
|
+
name="hotdata_list_managed_databases",
|
|
94
|
+
),
|
|
95
|
+
StructuredTool.from_function(
|
|
96
|
+
func=hotdata_create_managed_database,
|
|
97
|
+
name="hotdata_create_managed_database",
|
|
98
|
+
),
|
|
99
|
+
StructuredTool.from_function(
|
|
100
|
+
func=hotdata_load_managed_table,
|
|
101
|
+
name="hotdata_load_managed_table",
|
|
102
|
+
),
|
|
103
|
+
]
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hotdata-langchain
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: LangChain tools for Hotdata runtime
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: hotdata-runtime>=0.3.0
|
|
8
|
+
Requires-Dist: hotdata>=0.4.1
|
|
9
|
+
Requires-Dist: langchain-core>=1.0
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# hotdata-langchain
|
|
13
|
+
|
|
14
|
+
Give your [LangChain](https://python.langchain.com/) agents access to [Hotdata](https://hotdata.dev) — run SQL against your workspace connections and work with managed databases.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install hotdata-langchain
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Authentication
|
|
23
|
+
|
|
24
|
+
Set `HOTDATA_API_KEY` in your environment. Optionally set `HOTDATA_WORKSPACE` to pin a specific workspace (the first available workspace is used if unset).
|
|
25
|
+
|
|
26
|
+
## Quickstart
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from langchain.agents import AgentExecutor, create_tool_calling_agent
|
|
30
|
+
import hotdata_langchain as hl
|
|
31
|
+
|
|
32
|
+
client = hl.from_env()
|
|
33
|
+
tools = hl.make_hotdata_tools(client)
|
|
34
|
+
|
|
35
|
+
agent = create_tool_calling_agent(llm=your_llm, tools=tools, prompt=your_prompt)
|
|
36
|
+
executor = AgentExecutor(agent=agent, tools=tools)
|
|
37
|
+
result = executor.invoke({"input": "How many rows are in the orders table?"})
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Tools
|
|
41
|
+
|
|
42
|
+
`make_hotdata_tools(client)` returns a list of LangChain `StructuredTool` objects ready to pass to any agent:
|
|
43
|
+
|
|
44
|
+
| Tool | What it does |
|
|
45
|
+
|------|-------------|
|
|
46
|
+
| `hotdata_execute_sql` | Run a SQL query and return rows as JSON |
|
|
47
|
+
| `hotdata_list_managed_databases` | List available managed databases |
|
|
48
|
+
| `hotdata_create_managed_database` | Create a new managed database |
|
|
49
|
+
| `hotdata_load_managed_table` | Load a parquet file into a managed table |
|
|
50
|
+
|
|
51
|
+
## Calling tools directly
|
|
52
|
+
|
|
53
|
+
You can also invoke tools outside of an agent loop:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
tools = {t.name: t for t in hl.make_hotdata_tools(client)}
|
|
57
|
+
|
|
58
|
+
result = tools["hotdata_execute_sql"].invoke({"sql": "SELECT * FROM orders LIMIT 10"})
|
|
59
|
+
print(result) # JSON rows
|
|
60
|
+
|
|
61
|
+
tools["hotdata_create_managed_database"].invoke({
|
|
62
|
+
"name": "sales",
|
|
63
|
+
"schema_name": "public",
|
|
64
|
+
"tables": "orders,customers",
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
tools["hotdata_load_managed_table"].invoke({
|
|
68
|
+
"database": "sales",
|
|
69
|
+
"table": "orders",
|
|
70
|
+
"file": "/path/to/orders.parquet",
|
|
71
|
+
})
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Scoping queries to a managed database
|
|
75
|
+
|
|
76
|
+
Pass `database=` so all SQL the agent runs resolves against a specific managed database:
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
tools = hl.make_hotdata_tools(client, database="sales")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Controlling result size
|
|
83
|
+
|
|
84
|
+
Limit how many rows are returned to the LLM. Useful for keeping responses within context limits (default: 100):
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
tools = hl.make_hotdata_tools(client, max_rows=50)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Run the examples
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
uv run python examples/langchain_basic.py
|
|
94
|
+
uv run python examples/langchain_managed_db.py
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Development
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
uv sync --locked
|
|
101
|
+
uv run pytest
|
|
102
|
+
```
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
hotdata_langchain/__init__.py,sha256=6mmsOi0XrsbZa0AbCvx0wJPe0rsjuYQ8tAhAJHwkGjs,902
|
|
2
|
+
hotdata_langchain/databases.py,sha256=XFfK3b6_xDY3utuTge1RX0r5bFaxlkut1tiMKObnbjc,1522
|
|
3
|
+
hotdata_langchain/tools.py,sha256=opkUACZNVcv1M5ggOqmssjjsBMiALD18Dfczin7twUU,3171
|
|
4
|
+
hotdata_langchain-0.2.1.dist-info/METADATA,sha256=4mOVq-u-flOfdq6cxPOQq_e7RA-ZeMiRxpnu_PqSa24,2715
|
|
5
|
+
hotdata_langchain-0.2.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
6
|
+
hotdata_langchain-0.2.1.dist-info/RECORD,,
|