datahub-agent-context 1.3.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datahub_agent_context/__init__.py +25 -0
- datahub_agent_context/_version.py +16 -0
- datahub_agent_context/context.py +97 -0
- datahub_agent_context/langchain_tools/__init__.py +8 -0
- datahub_agent_context/langchain_tools/builder.py +127 -0
- datahub_agent_context/mcp_tools/__init__.py +46 -0
- datahub_agent_context/mcp_tools/_token_estimator.py +71 -0
- datahub_agent_context/mcp_tools/base.py +325 -0
- datahub_agent_context/mcp_tools/descriptions.py +299 -0
- datahub_agent_context/mcp_tools/documents.py +473 -0
- datahub_agent_context/mcp_tools/domains.py +246 -0
- datahub_agent_context/mcp_tools/entities.py +349 -0
- datahub_agent_context/mcp_tools/get_me.py +99 -0
- datahub_agent_context/mcp_tools/gql/__init__.py +13 -0
- datahub_agent_context/mcp_tools/gql/document_search.gql +114 -0
- datahub_agent_context/mcp_tools/gql/document_semantic_search.gql +111 -0
- datahub_agent_context/mcp_tools/gql/entity_details.gql +1682 -0
- datahub_agent_context/mcp_tools/gql/queries.gql +51 -0
- datahub_agent_context/mcp_tools/gql/query_entity.gql +37 -0
- datahub_agent_context/mcp_tools/gql/read_documents.gql +16 -0
- datahub_agent_context/mcp_tools/gql/search.gql +242 -0
- datahub_agent_context/mcp_tools/helpers.py +448 -0
- datahub_agent_context/mcp_tools/lineage.py +698 -0
- datahub_agent_context/mcp_tools/owners.py +318 -0
- datahub_agent_context/mcp_tools/queries.py +191 -0
- datahub_agent_context/mcp_tools/search.py +239 -0
- datahub_agent_context/mcp_tools/structured_properties.py +447 -0
- datahub_agent_context/mcp_tools/tags.py +296 -0
- datahub_agent_context/mcp_tools/terms.py +295 -0
- datahub_agent_context/py.typed +2 -0
- datahub_agent_context-1.3.1.8.dist-info/METADATA +233 -0
- datahub_agent_context-1.3.1.8.dist-info/RECORD +34 -0
- datahub_agent_context-1.3.1.8.dist-info/WHEEL +5 -0
- datahub_agent_context-1.3.1.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datahub-agent-context
|
|
3
|
+
Version: 1.3.1.8
|
|
4
|
+
Summary: DataHub Agent Context - MCP Tools for AI Agents
|
|
5
|
+
Home-page: https://datahub.io/
|
|
6
|
+
License: Apache License 2.0
|
|
7
|
+
Project-URL: Documentation, https://datahubproject.io/docs/
|
|
8
|
+
Project-URL: Source, https://github.com/datahub-project/datahub
|
|
9
|
+
Project-URL: Changelog, https://github.com/datahub-project/datahub/releases
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Programming Language :: Python
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Intended Audience :: Developers
|
|
20
|
+
Classifier: Intended Audience :: Information Technology
|
|
21
|
+
Classifier: Intended Audience :: System Administrators
|
|
22
|
+
Classifier: License :: OSI Approved
|
|
23
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
24
|
+
Classifier: Operating System :: Unix
|
|
25
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
26
|
+
Classifier: Environment :: Console
|
|
27
|
+
Classifier: Environment :: MacOS X
|
|
28
|
+
Classifier: Topic :: Software Development
|
|
29
|
+
Requires-Python: >=3.9
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
Requires-Dist: cachetools<7.0.0,>=5.0.0
|
|
32
|
+
Requires-Dist: acryl-datahub==1.3.1.8
|
|
33
|
+
Requires-Dist: google-re2<2.0,>=1.0
|
|
34
|
+
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
35
|
+
Requires-Dist: h11<1.0,>=0.16
|
|
36
|
+
Requires-Dist: json-repair<1.0.0,>=0.25.0
|
|
37
|
+
Requires-Dist: jmespath<2.0.0,>=1.0.0
|
|
38
|
+
Requires-Dist: httpcore<2.0,>=1.0.9
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: ruff==0.11.7; extra == "dev"
|
|
41
|
+
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
42
|
+
Requires-Dist: types-toml<1.0.0,>=0.10.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"
|
|
44
|
+
Requires-Dist: types-jmespath<2.0.0,>=1.0.0; extra == "dev"
|
|
45
|
+
Requires-Dist: types-PyYAML<7.0.0,>=6.0.0; extra == "dev"
|
|
46
|
+
Requires-Dist: pytest-cov<7.0.0,>=2.8.0; extra == "dev"
|
|
47
|
+
Requires-Dist: tox<5.0.0,>=4.0.0; extra == "dev"
|
|
48
|
+
Requires-Dist: types-requests<3.0.0,>=2.0.0; extra == "dev"
|
|
49
|
+
Requires-Dist: types-cachetools<7.0.0,>=5.0.0; extra == "dev"
|
|
50
|
+
Provides-Extra: langchain
|
|
51
|
+
Requires-Dist: langchain-core<2.0.0,>=1.2.7; extra == "langchain"
|
|
52
|
+
Dynamic: classifier
|
|
53
|
+
Dynamic: description
|
|
54
|
+
Dynamic: description-content-type
|
|
55
|
+
Dynamic: home-page
|
|
56
|
+
Dynamic: license
|
|
57
|
+
Dynamic: project-url
|
|
58
|
+
Dynamic: provides-extra
|
|
59
|
+
Dynamic: requires-dist
|
|
60
|
+
Dynamic: requires-python
|
|
61
|
+
Dynamic: summary
|
|
62
|
+
|
|
63
|
+
# DataHub Agent Context
|
|
64
|
+
|
|
65
|
+
**DataHub Agent Context** provides a collection of tools and utilities for building AI agents that interact with DataHub metadata. This package contains MCP (Model Context Protocol) tools that enable AI agents to search, retrieve, and manipulate metadata in DataHub. These can be used directly to create an agent, or be included in an MCP server such as Datahub's open source MCP server.
|
|
66
|
+
|
|
67
|
+
## Features
|
|
68
|
+
|
|
69
|
+
## Installation
|
|
70
|
+
|
|
71
|
+
### Base Installation
|
|
72
|
+
|
|
73
|
+
```shell
|
|
74
|
+
python3 -m pip install --upgrade pip wheel setuptools
|
|
75
|
+
python3 -m pip install --upgrade datahub-agent-context
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### With LangChain Support
|
|
79
|
+
|
|
80
|
+
For building LangChain agents with pre-built tools:
|
|
81
|
+
|
|
82
|
+
```shell
|
|
83
|
+
python3 -m pip install --upgrade "datahub-agent-context[langchain]"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Prerequisites
|
|
87
|
+
|
|
88
|
+
This package requires:
|
|
89
|
+
|
|
90
|
+
- Python 3.9 or higher
|
|
91
|
+
- `acryl-datahub` package
|
|
92
|
+
|
|
93
|
+
## Quick Start
|
|
94
|
+
|
|
95
|
+
### Basic Example
|
|
96
|
+
|
|
97
|
+
These tools are designed to be used with an AI agent and have the responses passed directly to an LLM, so the return schema is a simple dict, but they can be used independently if desired.
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
101
|
+
from datahub_agent_context.mcp_tools.search import search
|
|
102
|
+
from datahub_agent_context.mcp_tools.entities import get_entities
|
|
103
|
+
|
|
104
|
+
# Initialize DataHub graph client
|
|
105
|
+
client = DataHubClient.from_env()
|
|
106
|
+
|
|
107
|
+
# Search for datasets
|
|
108
|
+
with client.graph as graph:
|
|
109
|
+
results = search(
|
|
110
|
+
query="user_data",
|
|
111
|
+
filters={"entity_type": ["dataset"]},
|
|
112
|
+
num_results=10
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Get detailed entity information
|
|
116
|
+
with client.graph as graph:
|
|
117
|
+
entities = get_entities(
|
|
118
|
+
urns=[result["entity"]["urn"] for result in results["searchResults"]]
|
|
119
|
+
)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### LangChain Integration
|
|
123
|
+
|
|
124
|
+
Build AI agents with pre-built LangChain tools:
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from datahub.sdk.main_client import DataHubClient
|
|
128
|
+
from datahub_agent_context.langchain_tools import build_langchain_tools
|
|
129
|
+
from langchain.agents import create_agent
|
|
130
|
+
|
|
131
|
+
# Initialize DataHub client
|
|
132
|
+
client = DataHubClient.from_env()
|
|
133
|
+
|
|
134
|
+
# Build all tools (read-only by default)
|
|
135
|
+
tools = build_langchain_tools(client, include_mutations=False)
|
|
136
|
+
|
|
137
|
+
# Or include mutation tools for tagging, descriptions, etc.
|
|
138
|
+
tools = build_langchain_tools(client, include_mutations=True)
|
|
139
|
+
|
|
140
|
+
# Create agent
|
|
141
|
+
agent = create_agent(model, tools=tools, system_prompt="...")
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
**See [examples/langchain/](examples/langchain/)** for complete LangChain agent examples including:
|
|
145
|
+
|
|
146
|
+
- [simple_search.py](examples/langchain/simple_search.py) - Minimal example with AWS Bedrock
|
|
147
|
+
|
|
148
|
+
### Available Tools
|
|
149
|
+
|
|
150
|
+
#### Search Tools
|
|
151
|
+
|
|
152
|
+
- `search()` - Search across all entity types with filters and sorting
|
|
153
|
+
- `search_documents()` - Search specifically for Document entities
|
|
154
|
+
- `grep_documents()` - Grep for patterns in document content
|
|
155
|
+
|
|
156
|
+
#### Entity Tools
|
|
157
|
+
|
|
158
|
+
- `get_entities()` - Get detailed information about entities by URN
|
|
159
|
+
- `list_schema_fields()` - List and filter schema fields for datasets
|
|
160
|
+
|
|
161
|
+
#### Lineage Tools
|
|
162
|
+
|
|
163
|
+
- `get_lineage()` - Get upstream or downstream lineage
|
|
164
|
+
- `get_lineage_paths_between()` - Get detailed paths between two entities
|
|
165
|
+
|
|
166
|
+
#### Query Tools
|
|
167
|
+
|
|
168
|
+
- `get_dataset_queries()` - Get SQL queries for datasets or columns
|
|
169
|
+
|
|
170
|
+
#### Mutation Tools
|
|
171
|
+
|
|
172
|
+
- `add_tags()`, `remove_tags()` - Manage tags
|
|
173
|
+
- `update_description()` - Update entity descriptions
|
|
174
|
+
- `set_domains()`, `remove_domains()` - Manage domains
|
|
175
|
+
- `add_owners()`, `remove_owners()` - Manage owners
|
|
176
|
+
- `add_glossary_terms()`, `remove_glossary_terms()` - Manage glossary terms
|
|
177
|
+
- `add_structured_properties()`, `remove_structured_properties()` - Manage structured properties
|
|
178
|
+
|
|
179
|
+
#### User Tools
|
|
180
|
+
|
|
181
|
+
- `get_me()` - Get information about the authenticated user
|
|
182
|
+
|
|
183
|
+
## Architecture
|
|
184
|
+
|
|
185
|
+
The package is organized into the following modules:
|
|
186
|
+
|
|
187
|
+
- `mcp_tools/` - Core MCP tool implementations
|
|
188
|
+
- `base.py` - Base GraphQL execution and response cleaning
|
|
189
|
+
- `search.py` - Search functionality
|
|
190
|
+
- `documents.py` - Document search and grep
|
|
191
|
+
- `entities.py` - Entity retrieval
|
|
192
|
+
- `lineage.py` - Lineage querying
|
|
193
|
+
- `queries.py` - Query retrieval
|
|
194
|
+
- `tags.py`, `descriptions.py`, `domains.py`, etc. - Mutation tools
|
|
195
|
+
- `helpers.py` - Shared utility functions
|
|
196
|
+
- `gql/` - GraphQL query definitions
|
|
197
|
+
|
|
198
|
+
## Development
|
|
199
|
+
|
|
200
|
+
### Setup
|
|
201
|
+
|
|
202
|
+
```shell
|
|
203
|
+
# Clone the repository
|
|
204
|
+
git clone https://github.com/datahub-project/datahub.git
|
|
205
|
+
cd datahub/datahub-agent-context
|
|
206
|
+
|
|
207
|
+
# Set up development environment
|
|
208
|
+
./gradlew :datahub-agent-context:installDev
|
|
209
|
+
|
|
210
|
+
# Run tests
|
|
211
|
+
./gradlew :datahub-agent-context:testQuick
|
|
212
|
+
|
|
213
|
+
# Run linting
|
|
214
|
+
./gradlew :datahub-agent-context:lintFix
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Testing
|
|
218
|
+
|
|
219
|
+
The package includes comprehensive unit tests for all tools:
|
|
220
|
+
|
|
221
|
+
```shell
|
|
222
|
+
# Run quick tests
|
|
223
|
+
./gradlew :datahub-agent-context:testQuick
|
|
224
|
+
|
|
225
|
+
# Run full test suite
|
|
226
|
+
./gradlew :datahub-agent-context:testFull
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Support
|
|
230
|
+
|
|
231
|
+
- [Documentation](https://datahubproject.io/docs/)
|
|
232
|
+
- [Slack Community](https://datahub.com/slack)
|
|
233
|
+
- [GitHub Issues](https://github.com/datahub-project/datahub/issues)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
datahub_agent_context/__init__.py,sha256=VGBOuNztxuwUi5Ofnrpe7tw8EmUrQD-i-eMbSKwvMtU,890
|
|
2
|
+
datahub_agent_context/_version.py,sha256=nmlf5FAQnUzuLFA3M_c7d5GiMxh4Cj93Tn3Nyw9V7BA,647
|
|
3
|
+
datahub_agent_context/context.py,sha256=qRc44o38Y-LoDQH1oFm38hIatOWRKnRxxytQNIR93kU,2771
|
|
4
|
+
datahub_agent_context/py.typed,sha256=kO13kg6OXApIRwKRcPpEOL09GZHx2Pk8Rp2KZpxv0lw,63
|
|
5
|
+
datahub_agent_context/langchain_tools/__init__.py,sha256=M0tn6fD9qY5Wc1XdptQuIf_7MSKLX8OSBaBxcPo5wmw,259
|
|
6
|
+
datahub_agent_context/langchain_tools/builder.py,sha256=X59zdmdUqltKiTo3HZrE4-JOd7CztGprW2O32jIYt2o,5145
|
|
7
|
+
datahub_agent_context/mcp_tools/__init__.py,sha256=7iUoWuT-KvszOqnmL3_co2LVQdhZtkQKRLRE98Hn8WM,1544
|
|
8
|
+
datahub_agent_context/mcp_tools/_token_estimator.py,sha256=U0kTqPZKBkKwxe7JZaLxIIFEobNSrEEHoM4NQbrmmAE,2782
|
|
9
|
+
datahub_agent_context/mcp_tools/base.py,sha256=r0vHc6uivWjTyGdpAkcmyvg4XuBTKWDds1Uv5BC13y4,10989
|
|
10
|
+
datahub_agent_context/mcp_tools/descriptions.py,sha256=xjfQJ1g-Vrj5L4EZ_Zd2_ZivI_utWnFsKkzV8Ad9aPA,10504
|
|
11
|
+
datahub_agent_context/mcp_tools/documents.py,sha256=en-W7pWs6C-h2u6cTMn6g-SY62BT6Hp8tz0GXcj7fDQ,16468
|
|
12
|
+
datahub_agent_context/mcp_tools/domains.py,sha256=mgSPJVlGhN_mdnvQsSJGqcluVKnUI72uMT067GzwU_w,7880
|
|
13
|
+
datahub_agent_context/mcp_tools/entities.py,sha256=H39aJn5BIUFEW9jJLnimdDmfpO1Ei0sx8FSeb4y3hFc,13408
|
|
14
|
+
datahub_agent_context/mcp_tools/get_me.py,sha256=1XUoov7slzAYfycq-NR8w8GxEKvgmBUOKoK2I2_0nNM,3111
|
|
15
|
+
datahub_agent_context/mcp_tools/helpers.py,sha256=NRIoVEB62vDWDg26UOFv-IhM8mEQd4hf8eL4iCJI18Y,17253
|
|
16
|
+
datahub_agent_context/mcp_tools/lineage.py,sha256=sJVR2jJkbGU_KjjtqZ8IJVOKDaIjDdtQKtAIxYWq71Q,26753
|
|
17
|
+
datahub_agent_context/mcp_tools/owners.py,sha256=LGZ5n5a3xRKSttay2NLf_rq97_Dl9pGIcVFi-l7uJK8,11798
|
|
18
|
+
datahub_agent_context/mcp_tools/queries.py,sha256=V4-yFcCi3c8r4Xy7XVKfQ7s3SsIWXMAHRrI8Sqf2g20,6864
|
|
19
|
+
datahub_agent_context/mcp_tools/search.py,sha256=z5Hy1jLV4uDO26nb_oFuP5w6GX0DYcYWRIWn3kDp7dY,9880
|
|
20
|
+
datahub_agent_context/mcp_tools/structured_properties.py,sha256=amj7C-sbeAyctrXY_rpc2vCNTaJy2aTRx21TioeKEJk,15745
|
|
21
|
+
datahub_agent_context/mcp_tools/tags.py,sha256=5_Wg1Jqf_FgPgYuUV5bDwQ6J8t_sECcSM5yVtwQruPs,10814
|
|
22
|
+
datahub_agent_context/mcp_tools/terms.py,sha256=NHJ_PITAp0vUMij0o3-7Hd0a_tfM8TF59f4kayFcd0Q,11558
|
|
23
|
+
datahub_agent_context/mcp_tools/gql/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
|
|
24
|
+
datahub_agent_context/mcp_tools/gql/document_search.gql,sha256=45oP14UsoMHDJSDpUlNNYpjdqGPJC4k1ZIEpN_PY8jQ,1635
|
|
25
|
+
datahub_agent_context/mcp_tools/gql/document_semantic_search.gql,sha256=lbckeTLY5idPgAhTBGZA-DIaR3lrSWAw_-imlAOgGIM,1606
|
|
26
|
+
datahub_agent_context/mcp_tools/gql/entity_details.gql,sha256=LEca6jvIQCib6c3ex0AZ8Wmxf1VHaPIVwvYkMLD0y1w,41169
|
|
27
|
+
datahub_agent_context/mcp_tools/gql/queries.gql,sha256=TIk_LHNSqJAFbwI9V354N-5Rx_pJyDP6JswcX63UMMc,624
|
|
28
|
+
datahub_agent_context/mcp_tools/gql/query_entity.gql,sha256=Nxm4lGz-hy95XoMPIyeWtK2QBRIaMEicZ2yAWgJAV2s,527
|
|
29
|
+
datahub_agent_context/mcp_tools/gql/read_documents.gql,sha256=QxUX-R-qL4VElZxIKzuILFnoyI1ZPAmvGIDKNYIe36Y,302
|
|
30
|
+
datahub_agent_context/mcp_tools/gql/search.gql,sha256=vIKrUmPbRY1mMsv7nNbEP337z6DbgZMQ1zXw_gd66zo,3457
|
|
31
|
+
datahub_agent_context-1.3.1.8.dist-info/METADATA,sha256=CB_xtqXWMC-3IRKS2aFQcTj-nRMsPv9fEcks8x6eWsw,7410
|
|
32
|
+
datahub_agent_context-1.3.1.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
33
|
+
datahub_agent_context-1.3.1.8.dist-info/top_level.txt,sha256=Tv1bg7ZwDOKM9u9RHj5m1Zbx2LDf4lVBBRNHi_gBBTI,22
|
|
34
|
+
datahub_agent_context-1.3.1.8.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
datahub_agent_context
|