datus-hive 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ *.py,cover
49
+ .hypothesis/
50
+ .pytest_cache/
51
+
52
+ # Translations
53
+ *.mo
54
+ *.pot
55
+
56
+ # Django stuff:
57
+ *.log
58
+ local_settings.py
59
+ db.sqlite3
60
+ db.sqlite3-journal
61
+
62
+ # Flask stuff:
63
+ instance/
64
+ .webassets-cache
65
+
66
+ # Scrapy stuff:
67
+ .scrapy
68
+
69
+ # Sphinx documentation
70
+ docs/_build/
71
+
72
+ # PyBuilder
73
+ target/
74
+
75
+ # Jupyter Notebook
76
+ .ipynb_checkpoints
77
+
78
+ # IPython
79
+ profile_default/
80
+ ipython_config.py
81
+
82
+ # pyenv
83
+ .python-version
84
+
85
+ # pipenv
86
+ Pipfile.lock
87
+
88
+ # uv
89
+ uv.lock
90
+
91
+ # PEP 582
92
+ __pypackages__/
93
+
94
+ # Celery stuff
95
+ celerybeat-schedule
96
+ celerybeat.pid
97
+
98
+ # SageMath parsed files
99
+ *.sage.py
100
+
101
+ # Environments
102
+ .env
103
+ .venv
104
+ env/
105
+ venv/
106
+ ENV/
107
+ env.bak/
108
+ venv.bak/
109
+
110
+ # Spyder project settings
111
+ .spyderproject
112
+ .spyproject
113
+
114
+ # Rope project settings
115
+ .ropeproject
116
+
117
+ # mkdocs documentation
118
+ /site
119
+
120
+ # mypy
121
+ .mypy_cache/
122
+ .dmypy.json
123
+ dmypy.json
124
+
125
+ # Pyre type checker
126
+ .pyre/
127
+
128
+ # IDEs
129
+ .vscode/
130
+ .idea/
131
+ *.swp
132
+ *.swo
133
+ *~
134
+
135
+ # OS
136
+ .DS_Store
137
+ Thumbs.db
138
+
139
+
140
+ .omc
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: datus-hive
3
+ Version: 0.1.0
4
+ Summary: Hive database adapter for Datus
5
+ Project-URL: Homepage, https://github.com/Datus-ai/datus-db-adapters
6
+ Project-URL: Repository, https://github.com/Datus-ai/datus-db-adapters
7
+ Project-URL: Issues, https://github.com/Datus-ai/datus-db-adapters/issues
8
+ Author-email: DatusAI <support@datus.ai>
9
+ License: Apache-2.0
10
+ Keywords: adapter,database,datus,hive
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Python: >=3.12
17
+ Requires-Dist: datus-agent>0.2.1
18
+ Requires-Dist: datus-sqlalchemy>=0.1.0
19
+ Requires-Dist: pure-sasl>=0.6.2
20
+ Requires-Dist: pydantic>=2.0.0
21
+ Requires-Dist: pyhive>=0.7.0
22
+ Requires-Dist: thrift-sasl>=0.4.3
23
+ Requires-Dist: thrift>=0.16.0
24
+ Description-Content-Type: text/markdown
25
+
26
+ # datus-hive
27
+
28
+ Hive database adapter for Datus.
29
+
30
+ ## Installation
31
+
32
+ ```bash
33
+ pip install datus-hive
34
+ ```
35
+
36
+ This will automatically install the required dependencies:
37
+ - `datus-agent`
38
+ - `datus-sqlalchemy`
39
+ - `pyhive`
40
+ - `thrift`
41
+ - `thrift-sasl`
42
+ - `pure-sasl`
43
+
44
+ ## Usage
45
+
46
+ The adapter is automatically registered with Datus when installed. Configure your Hive connection in your Datus configuration:
47
+
48
+ ```yaml
49
+ namespace:
50
+ hive:
51
+ type: hive
52
+ host: 127.0.0.1
53
+ port: 10000
54
+ username: hive
55
+ database: default
56
+ ```
57
+
58
+ With authentication and session configuration:
59
+
60
+ ```yaml
61
+ namespace:
62
+ hive_production:
63
+ type: hive
64
+ host: 127.0.0.1
65
+ port: 10000
66
+ database: mydb
67
+ username: hive_user
68
+ password: your_password
69
+ auth: CUSTOM
70
+ configuration:
71
+ hive.execution.engine: spark
72
+ spark.app.name: my_app
73
+ spark.executor.memory: 1G
74
+ spark.executor.instances: 2
75
+ ```
76
+
77
+ Or use programmatically:
78
+
79
+ ```python
80
+ from datus_hive import HiveConnector, HiveConfig
81
+
82
+ # Create connector
83
+ config = HiveConfig(
84
+ host="127.0.0.1",
85
+ port=10000,
86
+ database="default",
87
+ username="hive",
88
+ )
89
+
90
+ connector = HiveConnector(config)
91
+
92
+ # Test connection
93
+ connector.test_connection()
94
+
95
+ # Execute query
96
+ result = connector.execute(
97
+ {"sql_query": "SELECT * FROM my_table LIMIT 10"},
98
+ result_format="list",
99
+ )
100
+ print(result.sql_return)
101
+
102
+ # Get table list
103
+ tables = connector.get_tables()
104
+ print(f"Tables: {tables}")
105
+
106
+ # Get table schema
107
+ schema = connector.get_schema(table_name="my_table")
108
+ for column in schema:
109
+ print(f"{column['name']}: {column['type']}")
110
+ ```
111
+
112
+ ## Configuration Parameters
113
+
114
+ | Parameter | Type | Default | Description |
115
+ |-----------|------|---------|-------------|
116
+ | `host` | str | `127.0.0.1` | HiveServer2 host |
117
+ | `port` | int | `10000` | HiveServer2 Thrift port |
118
+ | `database` | str | `None` | Default database (falls back to `default`) |
119
+ | `username` | str | **required** | Hive username |
120
+ | `password` | str | `""` | Password (for LDAP/CUSTOM auth) |
121
+ | `auth` | str | `None` | Auth mechanism: `NONE`, `LDAP`, `CUSTOM`, `KERBEROS` |
122
+ | `configuration` | dict | `{}` | Hive session configuration key-value pairs |
123
+ | `timeout_seconds` | int | `30` | Connection timeout in seconds |
124
+
125
+ ## Features
126
+
127
+ - Query execution with multiple result formats (list, csv, pandas, arrow)
128
+ - DDL execution (CREATE, ALTER, DROP)
129
+ - Metadata retrieval (databases, tables, views, schemas)
130
+ - DDL retrieval (SHOW CREATE TABLE)
131
+ - Sample data extraction
132
+ - Database context switching (USE statement)
133
+ - Connection pooling and management
134
+ - Hive session configuration support
135
+
136
+ ## Testing
137
+
138
+ ### Unit Tests
139
+
140
+ ```bash
141
+ uv run pytest datus-hive/tests/unit -v
142
+ ```
143
+
144
+ ### Integration Tests
145
+
146
+ Start Hive using Docker:
147
+
148
+ ```bash
149
+ cd datus-hive
150
+ docker compose up -d
151
+
152
+ # Wait for Hive to be healthy (about 1-2 minutes)
153
+ docker inspect --format='{{.State.Health.Status}}' datus-hive-server
154
+ ```
155
+
156
+ Run integration tests:
157
+
158
+ ```bash
159
+ uv run pytest datus-hive/tests/integration -v
160
+ ```
161
+
162
+ Stop Hive:
163
+
164
+ ```bash
165
+ cd datus-hive
166
+ docker compose down
167
+ ```
168
+
169
+ ### TPC-H Test Data
170
+
171
+ Initialize TPC-H sample data for manual testing:
172
+
173
+ ```bash
174
+ uv run python datus-hive/scripts/init_tpch_data.py
175
+
176
+ # With custom connection:
177
+ uv run python datus-hive/scripts/init_tpch_data.py --host localhost --port 10000 --username hive
178
+
179
+ # Clean re-init (drop existing tables first):
180
+ uv run python datus-hive/scripts/init_tpch_data.py --drop
181
+ ```
182
+
183
+ This creates 5 TPC-H tables with sample data:
184
+
185
+ | Table | Rows |
186
+ |-------|------|
187
+ | `tpch_region` | 5 |
188
+ | `tpch_nation` | 25 |
189
+ | `tpch_customer` | 10 |
190
+ | `tpch_orders` | 15 |
191
+ | `tpch_supplier` | 5 |
192
+
193
+ ## Requirements
194
+
195
+ - Python >= 3.10
196
+ - Apache Hive >= 2.x (tested with 4.0.1)
197
+ - datus-agent >= 0.3.0
198
+ - datus-sqlalchemy >= 0.1.0
199
+ - pyhive >= 0.7.0
200
+
201
+ ## License
202
+
203
+ Apache License 2.0
@@ -0,0 +1,178 @@
1
+ # datus-hive
2
+
3
+ Hive database adapter for Datus.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install datus-hive
9
+ ```
10
+
11
+ This will automatically install the required dependencies:
12
+ - `datus-agent`
13
+ - `datus-sqlalchemy`
14
+ - `pyhive`
15
+ - `thrift`
16
+ - `thrift-sasl`
17
+ - `pure-sasl`
18
+
19
+ ## Usage
20
+
21
+ The adapter is automatically registered with Datus when installed. Configure your Hive connection in your Datus configuration:
22
+
23
+ ```yaml
24
+ namespace:
25
+ hive:
26
+ type: hive
27
+ host: 127.0.0.1
28
+ port: 10000
29
+ username: hive
30
+ database: default
31
+ ```
32
+
33
+ With authentication and session configuration:
34
+
35
+ ```yaml
36
+ namespace:
37
+ hive_production:
38
+ type: hive
39
+ host: 127.0.0.1
40
+ port: 10000
41
+ database: mydb
42
+ username: hive_user
43
+ password: your_password
44
+ auth: CUSTOM
45
+ configuration:
46
+ hive.execution.engine: spark
47
+ spark.app.name: my_app
48
+ spark.executor.memory: 1G
49
+ spark.executor.instances: 2
50
+ ```
51
+
52
+ Or use programmatically:
53
+
54
+ ```python
55
+ from datus_hive import HiveConnector, HiveConfig
56
+
57
+ # Create connector
58
+ config = HiveConfig(
59
+ host="127.0.0.1",
60
+ port=10000,
61
+ database="default",
62
+ username="hive",
63
+ )
64
+
65
+ connector = HiveConnector(config)
66
+
67
+ # Test connection
68
+ connector.test_connection()
69
+
70
+ # Execute query
71
+ result = connector.execute(
72
+ {"sql_query": "SELECT * FROM my_table LIMIT 10"},
73
+ result_format="list",
74
+ )
75
+ print(result.sql_return)
76
+
77
+ # Get table list
78
+ tables = connector.get_tables()
79
+ print(f"Tables: {tables}")
80
+
81
+ # Get table schema
82
+ schema = connector.get_schema(table_name="my_table")
83
+ for column in schema:
84
+ print(f"{column['name']}: {column['type']}")
85
+ ```
86
+
87
+ ## Configuration Parameters
88
+
89
+ | Parameter | Type | Default | Description |
90
+ |-----------|------|---------|-------------|
91
+ | `host` | str | `127.0.0.1` | HiveServer2 host |
92
+ | `port` | int | `10000` | HiveServer2 Thrift port |
93
+ | `database` | str | `None` | Default database (falls back to `default`) |
94
+ | `username` | str | **required** | Hive username |
95
+ | `password` | str | `""` | Password (for LDAP/CUSTOM auth) |
96
+ | `auth` | str | `None` | Auth mechanism: `NONE`, `LDAP`, `CUSTOM`, `KERBEROS` |
97
+ | `configuration` | dict | `{}` | Hive session configuration key-value pairs |
98
+ | `timeout_seconds` | int | `30` | Connection timeout in seconds |
99
+
100
+ ## Features
101
+
102
+ - Query execution with multiple result formats (list, csv, pandas, arrow)
103
+ - DDL execution (CREATE, ALTER, DROP)
104
+ - Metadata retrieval (databases, tables, views, schemas)
105
+ - DDL retrieval (SHOW CREATE TABLE)
106
+ - Sample data extraction
107
+ - Database context switching (USE statement)
108
+ - Connection pooling and management
109
+ - Hive session configuration support
110
+
111
+ ## Testing
112
+
113
+ ### Unit Tests
114
+
115
+ ```bash
116
+ uv run pytest datus-hive/tests/unit -v
117
+ ```
118
+
119
+ ### Integration Tests
120
+
121
+ Start Hive using Docker:
122
+
123
+ ```bash
124
+ cd datus-hive
125
+ docker compose up -d
126
+
127
+ # Wait for Hive to be healthy (about 1-2 minutes)
128
+ docker inspect --format='{{.State.Health.Status}}' datus-hive-server
129
+ ```
130
+
131
+ Run integration tests:
132
+
133
+ ```bash
134
+ uv run pytest datus-hive/tests/integration -v
135
+ ```
136
+
137
+ Stop Hive:
138
+
139
+ ```bash
140
+ cd datus-hive
141
+ docker compose down
142
+ ```
143
+
144
+ ### TPC-H Test Data
145
+
146
+ Initialize TPC-H sample data for manual testing:
147
+
148
+ ```bash
149
+ uv run python datus-hive/scripts/init_tpch_data.py
150
+
151
+ # With custom connection:
152
+ uv run python datus-hive/scripts/init_tpch_data.py --host localhost --port 10000 --username hive
153
+
154
+ # Clean re-init (drop existing tables first):
155
+ uv run python datus-hive/scripts/init_tpch_data.py --drop
156
+ ```
157
+
158
+ This creates 5 TPC-H tables with sample data:
159
+
160
+ | Table | Rows |
161
+ |-------|------|
162
+ | `tpch_region` | 5 |
163
+ | `tpch_nation` | 25 |
164
+ | `tpch_customer` | 10 |
165
+ | `tpch_orders` | 15 |
166
+ | `tpch_supplier` | 5 |
167
+
168
+ ## Requirements
169
+
170
+ - Python >= 3.10
171
+ - Apache Hive >= 2.x (tested with 4.0.1)
172
+ - datus-agent >= 0.3.0
173
+ - datus-sqlalchemy >= 0.1.0
174
+ - pyhive >= 0.7.0
175
+
176
+ ## License
177
+
178
+ Apache License 2.0
@@ -0,0 +1,16 @@
1
+ # Copyright 2025-present DatusAI, Inc.
2
+ # Licensed under the Apache License, Version 2.0.
3
+ # See http://www.apache.org/licenses/LICENSE-2.0 for details.
4
+
5
+ from .config import HiveConfig
6
+ from .connector import HiveConnector
7
+
8
+ __version__ = "0.1.0"
9
+ __all__ = ["HiveConnector", "HiveConfig", "register"]
10
+
11
+
12
+ def register():
13
+ """Register Hive connector with Datus registry."""
14
+ from datus.tools.db_tools import connector_registry
15
+
16
+ connector_registry.register("hive", HiveConnector, config_class=HiveConfig)
@@ -0,0 +1,56 @@
1
+ # Copyright 2025-present DatusAI, Inc.
2
+ # Licensed under the Apache License, Version 2.0.
3
+ # See http://www.apache.org/licenses/LICENSE-2.0 for details.
4
+
5
+ from typing import Any, Dict, Mapping, Optional
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field
8
+
9
+
10
+ def _extract_prefixed_config(carrier_map: Mapping[str, Any], prefix: str) -> Dict[str, Any]:
11
+ """Extract Hive configuration from a prefixed carrier map."""
12
+ hive_config: Dict[str, Any] = {}
13
+ prefix_len = len(prefix)
14
+
15
+ for key, value in carrier_map.items():
16
+ if key.startswith(prefix):
17
+ hive_key = key[prefix_len:]
18
+ hive_config[hive_key] = value
19
+
20
+ configuration_params: Dict[str, Any] = {}
21
+ base_params: Dict[str, Any] = {}
22
+
23
+ for key, value in hive_config.items():
24
+ if key.startswith("configuration."):
25
+ config_key = key[14:]
26
+ configuration_params[config_key] = value
27
+ else:
28
+ if key == "port" and isinstance(value, str) and value.isdigit():
29
+ base_params[key] = int(value)
30
+ else:
31
+ base_params[key] = value
32
+
33
+ result = base_params.copy()
34
+ result["configuration"] = configuration_params
35
+ return result
36
+
37
+
38
+ class HiveConfig(BaseModel):
39
+ """Hive-specific configuration."""
40
+
41
+ model_config = ConfigDict(extra="forbid")
42
+
43
+ host: str = Field(default="127.0.0.1", description="Hive server host")
44
+ port: int = Field(default=10000, description="Hive server port")
45
+ database: Optional[str] = Field(default=None, description="Default database name")
46
+ username: str = Field(..., description="Hive username")
47
+ password: str = Field(default="", description="Hive password", json_schema_extra={"input_type": "password"})
48
+ auth: Optional[str] = Field(default=None, description="Authentication mechanism (NONE, LDAP, CUSTOM, KERBEROS)")
49
+ configuration: Dict[str, Any] = Field(default_factory=dict, description="Hive session configuration")
50
+ timeout_seconds: int = Field(default=30, description="Connection timeout in seconds")
51
+
52
+ @classmethod
53
+ def from_config_map(cls, config_map: Mapping[str, Any], prefix: str) -> "HiveConfig":
54
+ """Build HiveConfig from a prefixed carrier map."""
55
+ extracted = _extract_prefixed_config(config_map, prefix)
56
+ return cls(**extracted)