PyPI - datus-hive - Versions diffs - 0.1.0__tar.gz - Mend

datus-hive 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

datus_hive-0.1.0/.gitignore +140 -0
datus_hive-0.1.0/PKG-INFO +203 -0
datus_hive-0.1.0/README.md +178 -0
datus_hive-0.1.0/datus_hive/__init__.py +16 -0
datus_hive-0.1.0/datus_hive/config.py +56 -0
datus_hive-0.1.0/datus_hive/connector.py +347 -0
datus_hive-0.1.0/docker-compose.yml +20 -0
datus_hive-0.1.0/pyproject.toml +72 -0
datus_hive-0.1.0/scripts/init_tpch_data.py +233 -0
datus_hive-0.1.0/tests/integration/README.md +61 -0
datus_hive-0.1.0/tests/integration/__init__.py +3 -0
datus_hive-0.1.0/tests/integration/conftest.py +235 -0
datus_hive-0.1.0/tests/integration/test_integration.py +184 -0
datus_hive-0.1.0/tests/integration/test_tpch.py +145 -0
datus_hive-0.1.0/tests/unit/__init__.py +3 -0
datus_hive-0.1.0/tests/unit/test_config.py +78 -0
datus_hive-0.1.0/tests/unit/test_connector_unit.py +469 -0

datus_hive-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,140 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+Pipfile.lock
+# uv
+uv.lock
+# PEP 582
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+.omc

datus_hive-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,203 @@
+Metadata-Version: 2.4
+Name: datus-hive
+Version: 0.1.0
+Summary: Hive database adapter for Datus
+Project-URL: Homepage, https://github.com/Datus-ai/datus-db-adapters
+Project-URL: Repository, https://github.com/Datus-ai/datus-db-adapters
+Project-URL: Issues, https://github.com/Datus-ai/datus-db-adapters/issues
+Author-email: DatusAI <support@datus.ai>
+License: Apache-2.0
+Keywords: adapter,database,datus,hive
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.12
+Requires-Dist: datus-agent>0.2.1
+Requires-Dist: datus-sqlalchemy>=0.1.0
+Requires-Dist: pure-sasl>=0.6.2
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: pyhive>=0.7.0
+Requires-Dist: thrift-sasl>=0.4.3
+Requires-Dist: thrift>=0.16.0
+Description-Content-Type: text/markdown
+# datus-hive
+Hive database adapter for Datus.
+## Installation
+```bash
+pip install datus-hive
+```
+This will automatically install the required dependencies:
+- `datus-agent`
+- `datus-sqlalchemy`
+- `pyhive`
+- `thrift`
+- `thrift-sasl`
+- `pure-sasl`
+## Usage
+The adapter is automatically registered with Datus when installed. Configure your Hive connection in your Datus configuration:
+```yaml
+namespace:
+  hive:
+    type: hive
+    host: 127.0.0.1
+    port: 10000
+    username: hive
+    database: default
+```
+With authentication and session configuration:
+```yaml
+namespace:
+  hive_production:
+    type: hive
+    host: 127.0.0.1
+    port: 10000
+    database: mydb
+    username: hive_user
+    password: your_password
+    auth: CUSTOM
+    configuration:
+      hive.execution.engine: spark
+      spark.app.name: my_app
+      spark.executor.memory: 1G
+      spark.executor.instances: 2
+```
+Or use programmatically:
+```python
+from datus_hive import HiveConnector, HiveConfig
+# Create connector
+config = HiveConfig(
+    host="127.0.0.1",
+    port=10000,
+    database="default",
+    username="hive",
+)
+connector = HiveConnector(config)
+# Test connection
+connector.test_connection()
+# Execute query
+result = connector.execute(
+    {"sql_query": "SELECT * FROM my_table LIMIT 10"},
+    result_format="list",
+)
+print(result.sql_return)
+# Get table list
+tables = connector.get_tables()
+print(f"Tables: {tables}")
+# Get table schema
+schema = connector.get_schema(table_name="my_table")
+for column in schema:
+    print(f"{column['name']}: {column['type']}")
+```
+## Configuration Parameters
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `host` | str | `127.0.0.1` | HiveServer2 host |
+| `port` | int | `10000` | HiveServer2 Thrift port |
+| `database` | str | `None` | Default database (falls back to `default`) |
+| `username` | str | **required** | Hive username |
+| `password` | str | `""` | Password (for LDAP/CUSTOM auth) |
+| `auth` | str | `None` | Auth mechanism: `NONE`, `LDAP`, `CUSTOM`, `KERBEROS` |
+| `configuration` | dict | `{}` | Hive session configuration key-value pairs |
+| `timeout_seconds` | int | `30` | Connection timeout in seconds |
+## Features
+- Query execution with multiple result formats (list, csv, pandas, arrow)
+- DDL execution (CREATE, ALTER, DROP)
+- Metadata retrieval (databases, tables, views, schemas)
+- DDL retrieval (SHOW CREATE TABLE)
+- Sample data extraction
+- Database context switching (USE statement)
+- Connection pooling and management
+- Hive session configuration support
+## Testing
+### Unit Tests
+```bash
+uv run pytest datus-hive/tests/unit -v
+```
+### Integration Tests
+Start Hive using Docker:
+```bash
+cd datus-hive
+docker compose up -d
+# Wait for Hive to be healthy (about 1-2 minutes)
+docker inspect --format='{{.State.Health.Status}}' datus-hive-server
+```
+Run integration tests:
+```bash
+uv run pytest datus-hive/tests/integration -v
+```
+Stop Hive:
+```bash
+cd datus-hive
+docker compose down
+```
+### TPC-H Test Data
+Initialize TPC-H sample data for manual testing:
+```bash
+uv run python datus-hive/scripts/init_tpch_data.py
+# With custom connection:
+uv run python datus-hive/scripts/init_tpch_data.py --host localhost --port 10000 --username hive
+# Clean re-init (drop existing tables first):
+uv run python datus-hive/scripts/init_tpch_data.py --drop
+```
+This creates 5 TPC-H tables with sample data:
+| Table | Rows |
+|-------|------|
+| `tpch_region` | 5 |
+| `tpch_nation` | 25 |
+| `tpch_customer` | 10 |
+| `tpch_orders` | 15 |
+| `tpch_supplier` | 5 |
+## Requirements
+- Python >= 3.10
+- Apache Hive >= 2.x (tested with 4.0.1)
+- datus-agent >= 0.3.0
+- datus-sqlalchemy >= 0.1.0
+- pyhive >= 0.7.0
+## License
+Apache License 2.0

datus_hive-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,178 @@
+# datus-hive
+Hive database adapter for Datus.
+## Installation
+```bash
+pip install datus-hive
+```
+This will automatically install the required dependencies:
+- `datus-agent`
+- `datus-sqlalchemy`
+- `pyhive`
+- `thrift`
+- `thrift-sasl`
+- `pure-sasl`
+## Usage
+The adapter is automatically registered with Datus when installed. Configure your Hive connection in your Datus configuration:
+```yaml
+namespace:
+  hive:
+    type: hive
+    host: 127.0.0.1
+    port: 10000
+    username: hive
+    database: default
+```
+With authentication and session configuration:
+```yaml
+namespace:
+  hive_production:
+    type: hive
+    host: 127.0.0.1
+    port: 10000
+    database: mydb
+    username: hive_user
+    password: your_password
+    auth: CUSTOM
+    configuration:
+      hive.execution.engine: spark
+      spark.app.name: my_app
+      spark.executor.memory: 1G
+      spark.executor.instances: 2
+```
+Or use programmatically:
+```python
+from datus_hive import HiveConnector, HiveConfig
+# Create connector
+config = HiveConfig(
+    host="127.0.0.1",
+    port=10000,
+    database="default",
+    username="hive",
+)
+connector = HiveConnector(config)
+# Test connection
+connector.test_connection()
+# Execute query
+result = connector.execute(
+    {"sql_query": "SELECT * FROM my_table LIMIT 10"},
+    result_format="list",
+)
+print(result.sql_return)
+# Get table list
+tables = connector.get_tables()
+print(f"Tables: {tables}")
+# Get table schema
+schema = connector.get_schema(table_name="my_table")
+for column in schema:
+    print(f"{column['name']}: {column['type']}")
+```
+## Configuration Parameters
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `host` | str | `127.0.0.1` | HiveServer2 host |
+| `port` | int | `10000` | HiveServer2 Thrift port |
+| `database` | str | `None` | Default database (falls back to `default`) |
+| `username` | str | **required** | Hive username |
+| `password` | str | `""` | Password (for LDAP/CUSTOM auth) |
+| `auth` | str | `None` | Auth mechanism: `NONE`, `LDAP`, `CUSTOM`, `KERBEROS` |
+| `configuration` | dict | `{}` | Hive session configuration key-value pairs |
+| `timeout_seconds` | int | `30` | Connection timeout in seconds |
+## Features
+- Query execution with multiple result formats (list, csv, pandas, arrow)
+- DDL execution (CREATE, ALTER, DROP)
+- Metadata retrieval (databases, tables, views, schemas)
+- DDL retrieval (SHOW CREATE TABLE)
+- Sample data extraction
+- Database context switching (USE statement)
+- Connection pooling and management
+- Hive session configuration support
+## Testing
+### Unit Tests
+```bash
+uv run pytest datus-hive/tests/unit -v
+```
+### Integration Tests
+Start Hive using Docker:
+```bash
+cd datus-hive
+docker compose up -d
+# Wait for Hive to be healthy (about 1-2 minutes)
+docker inspect --format='{{.State.Health.Status}}' datus-hive-server
+```
+Run integration tests:
+```bash
+uv run pytest datus-hive/tests/integration -v
+```
+Stop Hive:
+```bash
+cd datus-hive
+docker compose down
+```
+### TPC-H Test Data
+Initialize TPC-H sample data for manual testing:
+```bash
+uv run python datus-hive/scripts/init_tpch_data.py
+# With custom connection:
+uv run python datus-hive/scripts/init_tpch_data.py --host localhost --port 10000 --username hive
+# Clean re-init (drop existing tables first):
+uv run python datus-hive/scripts/init_tpch_data.py --drop
+```
+This creates 5 TPC-H tables with sample data:
+| Table | Rows |
+|-------|------|
+| `tpch_region` | 5 |
+| `tpch_nation` | 25 |
+| `tpch_customer` | 10 |
+| `tpch_orders` | 15 |
+| `tpch_supplier` | 5 |
+## Requirements
+- Python >= 3.10
+- Apache Hive >= 2.x (tested with 4.0.1)
+- datus-agent >= 0.3.0
+- datus-sqlalchemy >= 0.1.0
+- pyhive >= 0.7.0
+## License
+Apache License 2.0

datus_hive-0.1.0/datus_hive/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright 2025-present DatusAI, Inc.
+# Licensed under the Apache License, Version 2.0.
+# See http://www.apache.org/licenses/LICENSE-2.0 for details.
+from .config import HiveConfig
+from .connector import HiveConnector
+__version__ = "0.1.0"
+__all__ = ["HiveConnector", "HiveConfig", "register"]
+def register():
+    """Register Hive connector with Datus registry."""
+    from datus.tools.db_tools import connector_registry
+    connector_registry.register("hive", HiveConnector, config_class=HiveConfig)

datus_hive-0.1.0/datus_hive/config.py ADDED Viewed

@@ -0,0 +1,56 @@
+# Copyright 2025-present DatusAI, Inc.
+# Licensed under the Apache License, Version 2.0.
+# See http://www.apache.org/licenses/LICENSE-2.0 for details.
+from typing import Any, Dict, Mapping, Optional
+from pydantic import BaseModel, ConfigDict, Field
+def _extract_prefixed_config(carrier_map: Mapping[str, Any], prefix: str) -> Dict[str, Any]:
+    """Extract Hive configuration from a prefixed carrier map."""
+    hive_config: Dict[str, Any] = {}
+    prefix_len = len(prefix)
+    for key, value in carrier_map.items():
+        if key.startswith(prefix):
+            hive_key = key[prefix_len:]
+            hive_config[hive_key] = value
+    configuration_params: Dict[str, Any] = {}
+    base_params: Dict[str, Any] = {}
+    for key, value in hive_config.items():
+        if key.startswith("configuration."):
+            config_key = key[14:]
+            configuration_params[config_key] = value
+        else:
+            if key == "port" and isinstance(value, str) and value.isdigit():
+                base_params[key] = int(value)
+            else:
+                base_params[key] = value
+    result = base_params.copy()
+    result["configuration"] = configuration_params
+    return result
+class HiveConfig(BaseModel):
+    """Hive-specific configuration."""
+    model_config = ConfigDict(extra="forbid")
+    host: str = Field(default="127.0.0.1", description="Hive server host")
+    port: int = Field(default=10000, description="Hive server port")
+    database: Optional[str] = Field(default=None, description="Default database name")
+    username: str = Field(..., description="Hive username")
+    password: str = Field(default="", description="Hive password", json_schema_extra={"input_type": "password"})
+    auth: Optional[str] = Field(default=None, description="Authentication mechanism (NONE, LDAP, CUSTOM, KERBEROS)")
+    configuration: Dict[str, Any] = Field(default_factory=dict, description="Hive session configuration")
+    timeout_seconds: int = Field(default=30, description="Connection timeout in seconds")
+    @classmethod
+    def from_config_map(cls, config_map: Mapping[str, Any], prefix: str) -> "HiveConfig":
+        """Build HiveConfig from a prefixed carrier map."""
+        extracted = _extract_prefixed_config(config_map, prefix)
+        return cls(**extracted)