datahub-agent-context 1.3.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. datahub_agent_context/__init__.py +25 -0
  2. datahub_agent_context/_version.py +16 -0
  3. datahub_agent_context/context.py +97 -0
  4. datahub_agent_context/langchain_tools/__init__.py +8 -0
  5. datahub_agent_context/langchain_tools/builder.py +127 -0
  6. datahub_agent_context/mcp_tools/__init__.py +46 -0
  7. datahub_agent_context/mcp_tools/_token_estimator.py +71 -0
  8. datahub_agent_context/mcp_tools/base.py +325 -0
  9. datahub_agent_context/mcp_tools/descriptions.py +299 -0
  10. datahub_agent_context/mcp_tools/documents.py +473 -0
  11. datahub_agent_context/mcp_tools/domains.py +246 -0
  12. datahub_agent_context/mcp_tools/entities.py +349 -0
  13. datahub_agent_context/mcp_tools/get_me.py +99 -0
  14. datahub_agent_context/mcp_tools/gql/__init__.py +13 -0
  15. datahub_agent_context/mcp_tools/gql/document_search.gql +114 -0
  16. datahub_agent_context/mcp_tools/gql/document_semantic_search.gql +111 -0
  17. datahub_agent_context/mcp_tools/gql/entity_details.gql +1682 -0
  18. datahub_agent_context/mcp_tools/gql/queries.gql +51 -0
  19. datahub_agent_context/mcp_tools/gql/query_entity.gql +37 -0
  20. datahub_agent_context/mcp_tools/gql/read_documents.gql +16 -0
  21. datahub_agent_context/mcp_tools/gql/search.gql +242 -0
  22. datahub_agent_context/mcp_tools/helpers.py +448 -0
  23. datahub_agent_context/mcp_tools/lineage.py +698 -0
  24. datahub_agent_context/mcp_tools/owners.py +318 -0
  25. datahub_agent_context/mcp_tools/queries.py +191 -0
  26. datahub_agent_context/mcp_tools/search.py +239 -0
  27. datahub_agent_context/mcp_tools/structured_properties.py +447 -0
  28. datahub_agent_context/mcp_tools/tags.py +296 -0
  29. datahub_agent_context/mcp_tools/terms.py +295 -0
  30. datahub_agent_context/py.typed +2 -0
  31. datahub_agent_context-1.3.1.8.dist-info/METADATA +233 -0
  32. datahub_agent_context-1.3.1.8.dist-info/RECORD +34 -0
  33. datahub_agent_context-1.3.1.8.dist-info/WHEEL +5 -0
  34. datahub_agent_context-1.3.1.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,233 @@
1
+ Metadata-Version: 2.4
2
+ Name: datahub-agent-context
3
+ Version: 1.3.1.8
4
+ Summary: DataHub Agent Context - MCP Tools for AI Agents
5
+ Home-page: https://datahub.io/
6
+ License: Apache License 2.0
7
+ Project-URL: Documentation, https://datahubproject.io/docs/
8
+ Project-URL: Source, https://github.com/datahub-project/datahub
9
+ Project-URL: Changelog, https://github.com/datahub-project/datahub/releases
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Intended Audience :: Developers
20
+ Classifier: Intended Audience :: Information Technology
21
+ Classifier: Intended Audience :: System Administrators
22
+ Classifier: License :: OSI Approved
23
+ Classifier: License :: OSI Approved :: Apache Software License
24
+ Classifier: Operating System :: Unix
25
+ Classifier: Operating System :: POSIX :: Linux
26
+ Classifier: Environment :: Console
27
+ Classifier: Environment :: MacOS X
28
+ Classifier: Topic :: Software Development
29
+ Requires-Python: >=3.9
30
+ Description-Content-Type: text/markdown
31
+ Requires-Dist: cachetools<7.0.0,>=5.0.0
32
+ Requires-Dist: acryl-datahub==1.3.1.8
33
+ Requires-Dist: google-re2<2.0,>=1.0
34
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
35
+ Requires-Dist: h11<1.0,>=0.16
36
+ Requires-Dist: json-repair<1.0.0,>=0.25.0
37
+ Requires-Dist: jmespath<2.0.0,>=1.0.0
38
+ Requires-Dist: httpcore<2.0,>=1.0.9
39
+ Provides-Extra: dev
40
+ Requires-Dist: ruff==0.11.7; extra == "dev"
41
+ Requires-Dist: mypy==1.17.1; extra == "dev"
42
+ Requires-Dist: types-toml<1.0.0,>=0.10.0; extra == "dev"
43
+ Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"
44
+ Requires-Dist: types-jmespath<2.0.0,>=1.0.0; extra == "dev"
45
+ Requires-Dist: types-PyYAML<7.0.0,>=6.0.0; extra == "dev"
46
+ Requires-Dist: pytest-cov<7.0.0,>=2.8.0; extra == "dev"
47
+ Requires-Dist: tox<5.0.0,>=4.0.0; extra == "dev"
48
+ Requires-Dist: types-requests<3.0.0,>=2.0.0; extra == "dev"
49
+ Requires-Dist: types-cachetools<7.0.0,>=5.0.0; extra == "dev"
50
+ Provides-Extra: langchain
51
+ Requires-Dist: langchain-core<2.0.0,>=1.2.7; extra == "langchain"
52
+ Dynamic: classifier
53
+ Dynamic: description
54
+ Dynamic: description-content-type
55
+ Dynamic: home-page
56
+ Dynamic: license
57
+ Dynamic: project-url
58
+ Dynamic: provides-extra
59
+ Dynamic: requires-dist
60
+ Dynamic: requires-python
61
+ Dynamic: summary
62
+
63
+ # DataHub Agent Context
64
+
65
+ **DataHub Agent Context** provides a collection of tools and utilities for building AI agents that interact with DataHub metadata. This package contains MCP (Model Context Protocol) tools that enable AI agents to search, retrieve, and manipulate metadata in DataHub. These can be used directly to create an agent, or be included in an MCP server such as Datahub's open source MCP server.
66
+
67
+ ## Features
68
+
69
+ ## Installation
70
+
71
+ ### Base Installation
72
+
73
+ ```shell
74
+ python3 -m pip install --upgrade pip wheel setuptools
75
+ python3 -m pip install --upgrade datahub-agent-context
76
+ ```
77
+
78
+ ### With LangChain Support
79
+
80
+ For building LangChain agents with pre-built tools:
81
+
82
+ ```shell
83
+ python3 -m pip install --upgrade "datahub-agent-context[langchain]"
84
+ ```
85
+
86
+ ## Prerequisites
87
+
88
+ This package requires:
89
+
90
+ - Python 3.9 or higher
91
+ - `acryl-datahub` package
92
+
93
+ ## Quick Start
94
+
95
+ ### Basic Example
96
+
97
+ These tools are designed to be used with an AI agent and have the responses passed directly to an LLM, so the return schema is a simple dict, but they can be used independently if desired.
98
+
99
+ ```python
100
+ from datahub.ingestion.graph.client import DataHubGraph
101
+ from datahub_agent_context.mcp_tools.search import search
102
+ from datahub_agent_context.mcp_tools.entities import get_entities
103
+
104
+ # Initialize DataHub graph client
105
+ client = DataHubClient.from_env()
106
+
107
+ # Search for datasets
108
+ with client.graph as graph:
109
+ results = search(
110
+ query="user_data",
111
+ filters={"entity_type": ["dataset"]},
112
+ num_results=10
113
+ )
114
+
115
+ # Get detailed entity information
116
+ with client.graph as graph:
117
+ entities = get_entities(
118
+ urns=[result["entity"]["urn"] for result in results["searchResults"]]
119
+ )
120
+ ```
121
+
122
+ ### LangChain Integration
123
+
124
+ Build AI agents with pre-built LangChain tools:
125
+
126
+ ```python
127
+ from datahub.sdk.main_client import DataHubClient
128
+ from datahub_agent_context.langchain_tools import build_langchain_tools
129
+ from langchain.agents import create_agent
130
+
131
+ # Initialize DataHub client
132
+ client = DataHubClient.from_env()
133
+
134
+ # Build all tools (read-only by default)
135
+ tools = build_langchain_tools(client, include_mutations=False)
136
+
137
+ # Or include mutation tools for tagging, descriptions, etc.
138
+ tools = build_langchain_tools(client, include_mutations=True)
139
+
140
+ # Create agent
141
+ agent = create_agent(model, tools=tools, system_prompt="...")
142
+ ```
143
+
144
+ **See [examples/langchain/](examples/langchain/)** for complete LangChain agent examples including:
145
+
146
+ - [simple_search.py](examples/langchain/simple_search.py) - Minimal example with AWS Bedrock
147
+
148
+ ### Available Tools
149
+
150
+ #### Search Tools
151
+
152
+ - `search()` - Search across all entity types with filters and sorting
153
+ - `search_documents()` - Search specifically for Document entities
154
+ - `grep_documents()` - Grep for patterns in document content
155
+
156
+ #### Entity Tools
157
+
158
+ - `get_entities()` - Get detailed information about entities by URN
159
+ - `list_schema_fields()` - List and filter schema fields for datasets
160
+
161
+ #### Lineage Tools
162
+
163
+ - `get_lineage()` - Get upstream or downstream lineage
164
+ - `get_lineage_paths_between()` - Get detailed paths between two entities
165
+
166
+ #### Query Tools
167
+
168
+ - `get_dataset_queries()` - Get SQL queries for datasets or columns
169
+
170
+ #### Mutation Tools
171
+
172
+ - `add_tags()`, `remove_tags()` - Manage tags
173
+ - `update_description()` - Update entity descriptions
174
+ - `set_domains()`, `remove_domains()` - Manage domains
175
+ - `add_owners()`, `remove_owners()` - Manage owners
176
+ - `add_glossary_terms()`, `remove_glossary_terms()` - Manage glossary terms
177
+ - `add_structured_properties()`, `remove_structured_properties()` - Manage structured properties
178
+
179
+ #### User Tools
180
+
181
+ - `get_me()` - Get information about the authenticated user
182
+
183
+ ## Architecture
184
+
185
+ The package is organized into the following modules:
186
+
187
+ - `mcp_tools/` - Core MCP tool implementations
188
+ - `base.py` - Base GraphQL execution and response cleaning
189
+ - `search.py` - Search functionality
190
+ - `documents.py` - Document search and grep
191
+ - `entities.py` - Entity retrieval
192
+ - `lineage.py` - Lineage querying
193
+ - `queries.py` - Query retrieval
194
+ - `tags.py`, `descriptions.py`, `domains.py`, etc. - Mutation tools
195
+ - `helpers.py` - Shared utility functions
196
+ - `gql/` - GraphQL query definitions
197
+
198
+ ## Development
199
+
200
+ ### Setup
201
+
202
+ ```shell
203
+ # Clone the repository
204
+ git clone https://github.com/datahub-project/datahub.git
205
+ cd datahub/datahub-agent-context
206
+
207
+ # Set up development environment
208
+ ./gradlew :datahub-agent-context:installDev
209
+
210
+ # Run tests
211
+ ./gradlew :datahub-agent-context:testQuick
212
+
213
+ # Run linting
214
+ ./gradlew :datahub-agent-context:lintFix
215
+ ```
216
+
217
+ ### Testing
218
+
219
+ The package includes comprehensive unit tests for all tools:
220
+
221
+ ```shell
222
+ # Run quick tests
223
+ ./gradlew :datahub-agent-context:testQuick
224
+
225
+ # Run full test suite
226
+ ./gradlew :datahub-agent-context:testFull
227
+ ```
228
+
229
+ ## Support
230
+
231
+ - [Documentation](https://datahubproject.io/docs/)
232
+ - [Slack Community](https://datahub.com/slack)
233
+ - [GitHub Issues](https://github.com/datahub-project/datahub/issues)
@@ -0,0 +1,34 @@
1
+ datahub_agent_context/__init__.py,sha256=VGBOuNztxuwUi5Ofnrpe7tw8EmUrQD-i-eMbSKwvMtU,890
2
+ datahub_agent_context/_version.py,sha256=nmlf5FAQnUzuLFA3M_c7d5GiMxh4Cj93Tn3Nyw9V7BA,647
3
+ datahub_agent_context/context.py,sha256=qRc44o38Y-LoDQH1oFm38hIatOWRKnRxxytQNIR93kU,2771
4
+ datahub_agent_context/py.typed,sha256=kO13kg6OXApIRwKRcPpEOL09GZHx2Pk8Rp2KZpxv0lw,63
5
+ datahub_agent_context/langchain_tools/__init__.py,sha256=M0tn6fD9qY5Wc1XdptQuIf_7MSKLX8OSBaBxcPo5wmw,259
6
+ datahub_agent_context/langchain_tools/builder.py,sha256=X59zdmdUqltKiTo3HZrE4-JOd7CztGprW2O32jIYt2o,5145
7
+ datahub_agent_context/mcp_tools/__init__.py,sha256=7iUoWuT-KvszOqnmL3_co2LVQdhZtkQKRLRE98Hn8WM,1544
8
+ datahub_agent_context/mcp_tools/_token_estimator.py,sha256=U0kTqPZKBkKwxe7JZaLxIIFEobNSrEEHoM4NQbrmmAE,2782
9
+ datahub_agent_context/mcp_tools/base.py,sha256=r0vHc6uivWjTyGdpAkcmyvg4XuBTKWDds1Uv5BC13y4,10989
10
+ datahub_agent_context/mcp_tools/descriptions.py,sha256=xjfQJ1g-Vrj5L4EZ_Zd2_ZivI_utWnFsKkzV8Ad9aPA,10504
11
+ datahub_agent_context/mcp_tools/documents.py,sha256=en-W7pWs6C-h2u6cTMn6g-SY62BT6Hp8tz0GXcj7fDQ,16468
12
+ datahub_agent_context/mcp_tools/domains.py,sha256=mgSPJVlGhN_mdnvQsSJGqcluVKnUI72uMT067GzwU_w,7880
13
+ datahub_agent_context/mcp_tools/entities.py,sha256=H39aJn5BIUFEW9jJLnimdDmfpO1Ei0sx8FSeb4y3hFc,13408
14
+ datahub_agent_context/mcp_tools/get_me.py,sha256=1XUoov7slzAYfycq-NR8w8GxEKvgmBUOKoK2I2_0nNM,3111
15
+ datahub_agent_context/mcp_tools/helpers.py,sha256=NRIoVEB62vDWDg26UOFv-IhM8mEQd4hf8eL4iCJI18Y,17253
16
+ datahub_agent_context/mcp_tools/lineage.py,sha256=sJVR2jJkbGU_KjjtqZ8IJVOKDaIjDdtQKtAIxYWq71Q,26753
17
+ datahub_agent_context/mcp_tools/owners.py,sha256=LGZ5n5a3xRKSttay2NLf_rq97_Dl9pGIcVFi-l7uJK8,11798
18
+ datahub_agent_context/mcp_tools/queries.py,sha256=V4-yFcCi3c8r4Xy7XVKfQ7s3SsIWXMAHRrI8Sqf2g20,6864
19
+ datahub_agent_context/mcp_tools/search.py,sha256=z5Hy1jLV4uDO26nb_oFuP5w6GX0DYcYWRIWn3kDp7dY,9880
20
+ datahub_agent_context/mcp_tools/structured_properties.py,sha256=amj7C-sbeAyctrXY_rpc2vCNTaJy2aTRx21TioeKEJk,15745
21
+ datahub_agent_context/mcp_tools/tags.py,sha256=5_Wg1Jqf_FgPgYuUV5bDwQ6J8t_sECcSM5yVtwQruPs,10814
22
+ datahub_agent_context/mcp_tools/terms.py,sha256=NHJ_PITAp0vUMij0o3-7Hd0a_tfM8TF59f4kayFcd0Q,11558
23
+ datahub_agent_context/mcp_tools/gql/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
24
+ datahub_agent_context/mcp_tools/gql/document_search.gql,sha256=45oP14UsoMHDJSDpUlNNYpjdqGPJC4k1ZIEpN_PY8jQ,1635
25
+ datahub_agent_context/mcp_tools/gql/document_semantic_search.gql,sha256=lbckeTLY5idPgAhTBGZA-DIaR3lrSWAw_-imlAOgGIM,1606
26
+ datahub_agent_context/mcp_tools/gql/entity_details.gql,sha256=LEca6jvIQCib6c3ex0AZ8Wmxf1VHaPIVwvYkMLD0y1w,41169
27
+ datahub_agent_context/mcp_tools/gql/queries.gql,sha256=TIk_LHNSqJAFbwI9V354N-5Rx_pJyDP6JswcX63UMMc,624
28
+ datahub_agent_context/mcp_tools/gql/query_entity.gql,sha256=Nxm4lGz-hy95XoMPIyeWtK2QBRIaMEicZ2yAWgJAV2s,527
29
+ datahub_agent_context/mcp_tools/gql/read_documents.gql,sha256=QxUX-R-qL4VElZxIKzuILFnoyI1ZPAmvGIDKNYIe36Y,302
30
+ datahub_agent_context/mcp_tools/gql/search.gql,sha256=vIKrUmPbRY1mMsv7nNbEP337z6DbgZMQ1zXw_gd66zo,3457
31
+ datahub_agent_context-1.3.1.8.dist-info/METADATA,sha256=CB_xtqXWMC-3IRKS2aFQcTj-nRMsPv9fEcks8x6eWsw,7410
32
+ datahub_agent_context-1.3.1.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
33
+ datahub_agent_context-1.3.1.8.dist-info/top_level.txt,sha256=Tv1bg7ZwDOKM9u9RHj5m1Zbx2LDf4lVBBRNHi_gBBTI,22
34
+ datahub_agent_context-1.3.1.8.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ datahub_agent_context