apache-airflow-providers-apache-hive 6.1.0rc1__tar.gz → 6.1.1rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-apache-hive might be problematic. Click here for more details.
- {apache-airflow-providers-apache-hive-6.1.0rc1/apache_airflow_providers_apache_hive.egg-info → apache-airflow-providers-apache-hive-6.1.1rc1}/PKG-INFO +35 -7
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/README.rst +32 -3
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/__init__.py +8 -5
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/get_provider_info.py +2 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/hooks/hive.py +19 -19
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/operators/hive.py +1 -1
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/operators/hive_stats.py +8 -5
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/transfers/mssql_to_hive.py +3 -3
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/transfers/mysql_to_hive.py +19 -22
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/transfers/vertica_to_hive.py +8 -7
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1/apache_airflow_providers_apache_hive.egg-info}/PKG-INFO +35 -7
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/pyproject.toml +6 -4
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/setup.cfg +2 -3
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/setup.py +1 -1
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/LICENSE +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/MANIFEST.in +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/NOTICE +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/hooks/__init__.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/macros/__init__.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/macros/hive.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/operators/__init__.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/plugins/__init__.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/plugins/hive.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/sensors/__init__.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/sensors/hive_partition.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/sensors/metastore_partition.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/sensors/named_hive_partition.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/transfers/__init__.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/transfers/hive_to_mysql.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/transfers/hive_to_samba.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/airflow/providers/apache/hive/transfers/s3_to_hive.py +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/apache_airflow_providers_apache_hive.egg-info/SOURCES.txt +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/apache_airflow_providers_apache_hive.egg-info/dependency_links.txt +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/apache_airflow_providers_apache_hive.egg-info/entry_points.txt +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/apache_airflow_providers_apache_hive.egg-info/not-zip-safe +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/apache_airflow_providers_apache_hive.egg-info/requires.txt +0 -0
- {apache-airflow-providers-apache-hive-6.1.0rc1 → apache-airflow-providers-apache-hive-6.1.1rc1}/apache_airflow_providers_apache_hive.egg-info/top_level.txt +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-apache-hive
|
|
3
|
-
Version: 6.1.
|
|
3
|
+
Version: 6.1.1rc1
|
|
4
4
|
Summary: Provider for Apache Airflow. Implements apache-airflow-providers-apache-hive package
|
|
5
5
|
Home-page: https://airflow.apache.org/
|
|
6
6
|
Download-URL: https://archive.apache.org/dist/airflow/providers
|
|
7
7
|
Author: Apache Software Foundation
|
|
8
8
|
Author-email: dev@airflow.apache.org
|
|
9
9
|
License: Apache License 2.0
|
|
10
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.
|
|
10
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.1/
|
|
11
11
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
12
12
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
13
13
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
@@ -21,12 +21,11 @@ Classifier: Intended Audience :: System Administrators
|
|
|
21
21
|
Classifier: Framework :: Apache Airflow
|
|
22
22
|
Classifier: Framework :: Apache Airflow :: Provider
|
|
23
23
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
24
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
25
24
|
Classifier: Programming Language :: Python :: 3.8
|
|
26
25
|
Classifier: Programming Language :: Python :: 3.9
|
|
27
26
|
Classifier: Programming Language :: Python :: 3.10
|
|
28
27
|
Classifier: Topic :: System :: Monitoring
|
|
29
|
-
Requires-Python: ~=3.
|
|
28
|
+
Requires-Python: ~=3.8
|
|
30
29
|
Description-Content-Type: text/x-rst
|
|
31
30
|
Provides-Extra: amazon
|
|
32
31
|
Provides-Extra: common.sql
|
|
@@ -59,7 +58,7 @@ License-File: NOTICE
|
|
|
59
58
|
|
|
60
59
|
Package ``apache-airflow-providers-apache-hive``
|
|
61
60
|
|
|
62
|
-
Release: ``6.1.
|
|
61
|
+
Release: ``6.1.1rc1``
|
|
63
62
|
|
|
64
63
|
|
|
65
64
|
`Apache Hive <https://hive.apache.org/>`__
|
|
@@ -72,7 +71,7 @@ This is a provider package for ``apache.hive`` provider. All classes for this pr
|
|
|
72
71
|
are in ``airflow.providers.apache.hive`` python package.
|
|
73
72
|
|
|
74
73
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.
|
|
74
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.1/>`_.
|
|
76
75
|
|
|
77
76
|
|
|
78
77
|
Installation
|
|
@@ -82,7 +81,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
82
81
|
for the minimum Airflow version supported) via
|
|
83
82
|
``pip install apache-airflow-providers-apache-hive``
|
|
84
83
|
|
|
85
|
-
The package supports the following python versions: 3.
|
|
84
|
+
The package supports the following python versions: 3.8,3.9,3.10
|
|
86
85
|
|
|
87
86
|
Requirements
|
|
88
87
|
------------
|
|
@@ -150,6 +149,32 @@ Dependent package
|
|
|
150
149
|
Changelog
|
|
151
150
|
---------
|
|
152
151
|
|
|
152
|
+
|
|
153
|
+
6.1.1
|
|
154
|
+
.....
|
|
155
|
+
|
|
156
|
+
.. note::
|
|
157
|
+
This release dropped support for Python 3.7
|
|
158
|
+
|
|
159
|
+
Bug Fixes
|
|
160
|
+
~~~~~~~~~
|
|
161
|
+
|
|
162
|
+
* ``Sanitize beeline principal parameter (#31983)``
|
|
163
|
+
|
|
164
|
+
Misc
|
|
165
|
+
~~~~
|
|
166
|
+
|
|
167
|
+
* ``Replace unicodecsv with standard csv library (#31693)``
|
|
168
|
+
|
|
169
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
170
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
171
|
+
|
|
172
|
+
* ``Apache provider docstring improvements (#31730)``
|
|
173
|
+
* ``Improve docstrings in providers (#31681)``
|
|
174
|
+
* ``Add D400 pydocstyle check - Apache providers only (#31424)``
|
|
175
|
+
* ``Add Python 3.11 support (#27264)``
|
|
176
|
+
* ``Add note about dropping Python 3.7 for providers (#32015)``
|
|
177
|
+
|
|
153
178
|
6.1.0
|
|
154
179
|
.....
|
|
155
180
|
|
|
@@ -169,6 +194,9 @@ Misc
|
|
|
169
194
|
* ``Add mechanism to suspend providers (#30422)``
|
|
170
195
|
* ``Use 'AirflowProviderDeprecationWarning' in providers (#30975)``
|
|
171
196
|
* ``Decouple "job runner" from BaseJob ORM model (#30255)``
|
|
197
|
+
* ``Use '__version__' in providers not 'version' (#31393)``
|
|
198
|
+
* ``Fixing circular import error in providers caused by airflow version check (#31379)``
|
|
199
|
+
* ``Prepare docs for May 2023 wave of Providers (#31252)``
|
|
172
200
|
|
|
173
201
|
6.0.0
|
|
174
202
|
.....
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
|
|
20
20
|
Package ``apache-airflow-providers-apache-hive``
|
|
21
21
|
|
|
22
|
-
Release: ``6.1.
|
|
22
|
+
Release: ``6.1.1rc1``
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
`Apache Hive <https://hive.apache.org/>`__
|
|
@@ -32,7 +32,7 @@ This is a provider package for ``apache.hive`` provider. All classes for this pr
|
|
|
32
32
|
are in ``airflow.providers.apache.hive`` python package.
|
|
33
33
|
|
|
34
34
|
You can find package information and changelog for the provider
|
|
35
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.
|
|
35
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.1/>`_.
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
Installation
|
|
@@ -42,7 +42,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
42
42
|
for the minimum Airflow version supported) via
|
|
43
43
|
``pip install apache-airflow-providers-apache-hive``
|
|
44
44
|
|
|
45
|
-
The package supports the following python versions: 3.
|
|
45
|
+
The package supports the following python versions: 3.8,3.9,3.10
|
|
46
46
|
|
|
47
47
|
Requirements
|
|
48
48
|
------------
|
|
@@ -110,6 +110,32 @@ Dependent package
|
|
|
110
110
|
Changelog
|
|
111
111
|
---------
|
|
112
112
|
|
|
113
|
+
|
|
114
|
+
6.1.1
|
|
115
|
+
.....
|
|
116
|
+
|
|
117
|
+
.. note::
|
|
118
|
+
This release dropped support for Python 3.7
|
|
119
|
+
|
|
120
|
+
Bug Fixes
|
|
121
|
+
~~~~~~~~~
|
|
122
|
+
|
|
123
|
+
* ``Sanitize beeline principal parameter (#31983)``
|
|
124
|
+
|
|
125
|
+
Misc
|
|
126
|
+
~~~~
|
|
127
|
+
|
|
128
|
+
* ``Replace unicodecsv with standard csv library (#31693)``
|
|
129
|
+
|
|
130
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
131
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
132
|
+
|
|
133
|
+
* ``Apache provider docstring improvements (#31730)``
|
|
134
|
+
* ``Improve docstrings in providers (#31681)``
|
|
135
|
+
* ``Add D400 pydocstyle check - Apache providers only (#31424)``
|
|
136
|
+
* ``Add Python 3.11 support (#27264)``
|
|
137
|
+
* ``Add note about dropping Python 3.7 for providers (#32015)``
|
|
138
|
+
|
|
113
139
|
6.1.0
|
|
114
140
|
.....
|
|
115
141
|
|
|
@@ -129,6 +155,9 @@ Misc
|
|
|
129
155
|
* ``Add mechanism to suspend providers (#30422)``
|
|
130
156
|
* ``Use 'AirflowProviderDeprecationWarning' in providers (#30975)``
|
|
131
157
|
* ``Decouple "job runner" from BaseJob ORM model (#30255)``
|
|
158
|
+
* ``Use '__version__' in providers not 'version' (#31393)``
|
|
159
|
+
* ``Fixing circular import error in providers caused by airflow version check (#31379)``
|
|
160
|
+
* ``Prepare docs for May 2023 wave of Providers (#31252)``
|
|
132
161
|
|
|
133
162
|
6.0.0
|
|
134
163
|
.....
|
|
@@ -26,13 +26,16 @@ from __future__ import annotations
|
|
|
26
26
|
|
|
27
27
|
import packaging.version
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
__all__ = ["__version__"]
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
__version__ = "6.1.1"
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
try:
|
|
34
|
+
from airflow import __version__ as airflow_version
|
|
35
|
+
except ImportError:
|
|
36
|
+
from airflow.version import version as airflow_version
|
|
34
37
|
|
|
35
|
-
if packaging.version.parse(
|
|
38
|
+
if packaging.version.parse(airflow_version) < packaging.version.parse("2.4.0"):
|
|
36
39
|
raise RuntimeError(
|
|
37
|
-
f"The package `apache-airflow-providers-apache-hive:{
|
|
40
|
+
f"The package `apache-airflow-providers-apache-hive:{__version__}` requires Apache Airflow 2.4.0+" # NOQA: E501
|
|
38
41
|
)
|
|
@@ -29,6 +29,7 @@ def get_provider_info():
|
|
|
29
29
|
"description": "`Apache Hive <https://hive.apache.org/>`__\n",
|
|
30
30
|
"suspended": False,
|
|
31
31
|
"versions": [
|
|
32
|
+
"6.1.1",
|
|
32
33
|
"6.1.0",
|
|
33
34
|
"6.0.0",
|
|
34
35
|
"5.1.3",
|
|
@@ -66,6 +67,7 @@ def get_provider_info():
|
|
|
66
67
|
'sasl>=0.3.1; python_version>="3.9"',
|
|
67
68
|
"thrift>=0.9.2",
|
|
68
69
|
],
|
|
70
|
+
"excluded-python-versions": ["3.11"],
|
|
69
71
|
"integrations": [
|
|
70
72
|
{
|
|
71
73
|
"integration-name": "Apache Hive",
|
|
@@ -37,7 +37,7 @@ except ImportError as e:
|
|
|
37
37
|
|
|
38
38
|
raise AirflowOptionalProviderFeatureException(e)
|
|
39
39
|
|
|
40
|
-
import
|
|
40
|
+
import csv
|
|
41
41
|
|
|
42
42
|
from airflow.configuration import conf
|
|
43
43
|
from airflow.exceptions import AirflowException
|
|
@@ -137,7 +137,7 @@ class HiveCliHook(BaseHook):
|
|
|
137
137
|
return proxy_user_value # The default proxy user (undefined)
|
|
138
138
|
|
|
139
139
|
def _prepare_cli_cmd(self) -> list[Any]:
|
|
140
|
-
"""This function creates the command list from available information"""
|
|
140
|
+
"""This function creates the command list from available information."""
|
|
141
141
|
conn = self.conn
|
|
142
142
|
hive_bin = "hive"
|
|
143
143
|
cmd_extra = []
|
|
@@ -150,9 +150,9 @@ class HiveCliHook(BaseHook):
|
|
|
150
150
|
template = conn.extra_dejson.get("principal", "hive/_HOST@EXAMPLE.COM")
|
|
151
151
|
if "_HOST" in template:
|
|
152
152
|
template = utils.replace_hostname_pattern(utils.get_components(template))
|
|
153
|
-
|
|
154
153
|
proxy_user = self._get_proxy_user()
|
|
155
|
-
|
|
154
|
+
if ";" in template:
|
|
155
|
+
raise RuntimeError("The principal should not contain the ';' character")
|
|
156
156
|
jdbc_url += f";principal={template};{proxy_user}"
|
|
157
157
|
elif self.auth:
|
|
158
158
|
jdbc_url += ";auth=" + self.auth
|
|
@@ -296,7 +296,7 @@ class HiveCliHook(BaseHook):
|
|
|
296
296
|
return stdout
|
|
297
297
|
|
|
298
298
|
def test_hql(self, hql: str) -> None:
|
|
299
|
-
"""Test an hql statement using the hive cli and EXPLAIN"""
|
|
299
|
+
"""Test an hql statement using the hive cli and EXPLAIN."""
|
|
300
300
|
create, insert, other = [], [], []
|
|
301
301
|
for query in hql.split(";"): # naive
|
|
302
302
|
query_original = query
|
|
@@ -415,7 +415,7 @@ class HiveCliHook(BaseHook):
|
|
|
415
415
|
tblproperties: dict[str, Any] | None = None,
|
|
416
416
|
) -> None:
|
|
417
417
|
"""
|
|
418
|
-
Loads a local file into Hive
|
|
418
|
+
Loads a local file into Hive.
|
|
419
419
|
|
|
420
420
|
Note that the table generated in Hive uses ``STORED AS textfile``
|
|
421
421
|
which isn't the most efficient serialization format. If a
|
|
@@ -475,7 +475,7 @@ class HiveCliHook(BaseHook):
|
|
|
475
475
|
self.run_cli(hql)
|
|
476
476
|
|
|
477
477
|
def kill(self) -> None:
|
|
478
|
-
"""Kill Hive cli command"""
|
|
478
|
+
"""Kill Hive cli command."""
|
|
479
479
|
if hasattr(self, "sub_process"):
|
|
480
480
|
if self.sub_process.poll() is None:
|
|
481
481
|
print("Killing the Hive job")
|
|
@@ -486,7 +486,7 @@ class HiveCliHook(BaseHook):
|
|
|
486
486
|
|
|
487
487
|
class HiveMetastoreHook(BaseHook):
|
|
488
488
|
"""
|
|
489
|
-
Wrapper to interact with the Hive Metastore
|
|
489
|
+
Wrapper to interact with the Hive Metastore.
|
|
490
490
|
|
|
491
491
|
:param metastore_conn_id: reference to the
|
|
492
492
|
:ref: `metastore thrift service connection id <howto/connection:hive_metastore>`.
|
|
@@ -587,7 +587,7 @@ class HiveMetastoreHook(BaseHook):
|
|
|
587
587
|
|
|
588
588
|
def check_for_partition(self, schema: str, table: str, partition: str) -> bool:
|
|
589
589
|
"""
|
|
590
|
-
Checks whether a partition exists
|
|
590
|
+
Checks whether a partition exists.
|
|
591
591
|
|
|
592
592
|
:param schema: Name of hive schema (database) @table belongs to
|
|
593
593
|
:param table: Name of hive table @partition belongs to
|
|
@@ -608,7 +608,7 @@ class HiveMetastoreHook(BaseHook):
|
|
|
608
608
|
|
|
609
609
|
def check_for_named_partition(self, schema: str, table: str, partition_name: str) -> Any:
|
|
610
610
|
"""
|
|
611
|
-
Checks whether a partition with a given name exists
|
|
611
|
+
Checks whether a partition with a given name exists.
|
|
612
612
|
|
|
613
613
|
:param schema: Name of hive schema (database) @table belongs to
|
|
614
614
|
:param table: Name of hive table @partition belongs to
|
|
@@ -625,7 +625,7 @@ class HiveMetastoreHook(BaseHook):
|
|
|
625
625
|
return client.check_for_named_partition(schema, table, partition_name)
|
|
626
626
|
|
|
627
627
|
def get_table(self, table_name: str, db: str = "default") -> Any:
|
|
628
|
-
"""Get a metastore table object
|
|
628
|
+
"""Get a metastore table object.
|
|
629
629
|
|
|
630
630
|
>>> hh = HiveMetastoreHook()
|
|
631
631
|
>>> t = hh.get_table(db='airflow', table_name='static_babynames')
|
|
@@ -640,13 +640,13 @@ class HiveMetastoreHook(BaseHook):
|
|
|
640
640
|
return client.get_table(dbname=db, tbl_name=table_name)
|
|
641
641
|
|
|
642
642
|
def get_tables(self, db: str, pattern: str = "*") -> Any:
|
|
643
|
-
"""Get a metastore table object"""
|
|
643
|
+
"""Get a metastore table object."""
|
|
644
644
|
with self.metastore as client:
|
|
645
645
|
tables = client.get_tables(db_name=db, pattern=pattern)
|
|
646
646
|
return client.get_table_objects_by_name(db, tables)
|
|
647
647
|
|
|
648
648
|
def get_databases(self, pattern: str = "*") -> Any:
|
|
649
|
-
"""Get a metastore table object"""
|
|
649
|
+
"""Get a metastore table object."""
|
|
650
650
|
with self.metastore as client:
|
|
651
651
|
return client.get_databases(pattern)
|
|
652
652
|
|
|
@@ -774,7 +774,7 @@ class HiveMetastoreHook(BaseHook):
|
|
|
774
774
|
|
|
775
775
|
def table_exists(self, table_name: str, db: str = "default") -> bool:
|
|
776
776
|
"""
|
|
777
|
-
Check if table exists
|
|
777
|
+
Check if table exists.
|
|
778
778
|
|
|
779
779
|
>>> hh = HiveMetastoreHook()
|
|
780
780
|
>>> hh.table_exists(db='airflow', table_name='static_babynames')
|
|
@@ -790,7 +790,7 @@ class HiveMetastoreHook(BaseHook):
|
|
|
790
790
|
|
|
791
791
|
def drop_partitions(self, table_name, part_vals, delete_data=False, db="default"):
|
|
792
792
|
"""
|
|
793
|
-
Drop partitions from the given table matching the part_vals input
|
|
793
|
+
Drop partitions from the given table matching the part_vals input.
|
|
794
794
|
|
|
795
795
|
:param table_name: table name.
|
|
796
796
|
:param part_vals: list of partition specs.
|
|
@@ -816,7 +816,7 @@ class HiveMetastoreHook(BaseHook):
|
|
|
816
816
|
|
|
817
817
|
class HiveServer2Hook(DbApiHook):
|
|
818
818
|
"""
|
|
819
|
-
Wrapper around the pyhive library
|
|
819
|
+
Wrapper around the pyhive library.
|
|
820
820
|
|
|
821
821
|
Notes:
|
|
822
822
|
* the default auth_mechanism is PLAIN, to override it you
|
|
@@ -989,8 +989,8 @@ class HiveServer2Hook(DbApiHook):
|
|
|
989
989
|
message = None
|
|
990
990
|
|
|
991
991
|
i = 0
|
|
992
|
-
with open(csv_filepath, "
|
|
993
|
-
writer = csv.writer(file, delimiter=delimiter, lineterminator=lineterminator
|
|
992
|
+
with open(csv_filepath, "w", encoding="utf-8") as file:
|
|
993
|
+
writer = csv.writer(file, delimiter=delimiter, lineterminator=lineterminator)
|
|
994
994
|
try:
|
|
995
995
|
if output_header:
|
|
996
996
|
self.log.debug("Cursor description is %s", header)
|
|
@@ -1037,7 +1037,7 @@ class HiveServer2Hook(DbApiHook):
|
|
|
1037
1037
|
**kwargs,
|
|
1038
1038
|
) -> pandas.DataFrame:
|
|
1039
1039
|
"""
|
|
1040
|
-
Get a pandas dataframe from a Hive query
|
|
1040
|
+
Get a pandas dataframe from a Hive query.
|
|
1041
1041
|
|
|
1042
1042
|
:param sql: hql to be executed.
|
|
1043
1043
|
:param schema: target schema, default to 'default'.
|
|
@@ -123,7 +123,7 @@ class HiveOperator(BaseOperator):
|
|
|
123
123
|
self.hook: HiveCliHook | None = None
|
|
124
124
|
|
|
125
125
|
def get_hook(self) -> HiveCliHook:
|
|
126
|
-
"""Get Hive cli hook"""
|
|
126
|
+
"""Get Hive cli hook."""
|
|
127
127
|
return HiveCliHook(
|
|
128
128
|
hive_cli_conn_id=self.hive_cli_conn_id,
|
|
129
129
|
run_as=self.run_as,
|
|
@@ -33,10 +33,13 @@ if TYPE_CHECKING:
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class HiveStatsCollectionOperator(BaseOperator):
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
overwrite themselves if you rerun the
|
|
36
|
+
"""Gather partition statistics and insert them into MySQL.
|
|
37
|
+
|
|
38
|
+
Statistics are gathered with a dynamically generated Presto query and
|
|
39
|
+
inserted with this format. Stats overwrite themselves if you rerun the
|
|
40
|
+
same date/partition.
|
|
41
|
+
|
|
42
|
+
.. code-block:: sql
|
|
40
43
|
|
|
41
44
|
CREATE TABLE hive_stats (
|
|
42
45
|
ds VARCHAR(16),
|
|
@@ -98,7 +101,7 @@ class HiveStatsCollectionOperator(BaseOperator):
|
|
|
98
101
|
self.dttm = "{{ execution_date.isoformat() }}"
|
|
99
102
|
|
|
100
103
|
def get_default_exprs(self, col: str, col_type: str) -> dict[Any, Any]:
|
|
101
|
-
"""Get default expressions"""
|
|
104
|
+
"""Get default expressions."""
|
|
102
105
|
if col in self.excluded_columns:
|
|
103
106
|
return {}
|
|
104
107
|
exp = {(col, "non_null"): f"COUNT({col})"}
|
|
@@ -18,12 +18,12 @@
|
|
|
18
18
|
"""This module contains an operator to move data from MSSQL to Hive."""
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
+
import csv
|
|
21
22
|
from collections import OrderedDict
|
|
22
23
|
from tempfile import NamedTemporaryFile
|
|
23
24
|
from typing import TYPE_CHECKING, Sequence
|
|
24
25
|
|
|
25
26
|
import pymssql
|
|
26
|
-
import unicodecsv as csv
|
|
27
27
|
|
|
28
28
|
from airflow.models import BaseOperator
|
|
29
29
|
from airflow.providers.apache.hive.hooks.hive import HiveCliHook
|
|
@@ -113,8 +113,8 @@ class MsSqlToHiveOperator(BaseOperator):
|
|
|
113
113
|
with mssql.get_conn() as conn:
|
|
114
114
|
with conn.cursor() as cursor:
|
|
115
115
|
cursor.execute(self.sql)
|
|
116
|
-
with NamedTemporaryFile("w") as tmp_file:
|
|
117
|
-
csv_writer = csv.writer(tmp_file, delimiter=self.delimiter
|
|
116
|
+
with NamedTemporaryFile(mode="w", encoding="utf-8") as tmp_file:
|
|
117
|
+
csv_writer = csv.writer(tmp_file, delimiter=self.delimiter)
|
|
118
118
|
field_dict = OrderedDict()
|
|
119
119
|
for col_count, field in enumerate(cursor.description, start=1):
|
|
120
120
|
col_position = f"Column{col_count}"
|
|
@@ -18,12 +18,13 @@
|
|
|
18
18
|
"""This module contains an operator to move data from MySQL to Hive."""
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
+
import csv
|
|
21
22
|
from collections import OrderedDict
|
|
23
|
+
from contextlib import closing
|
|
22
24
|
from tempfile import NamedTemporaryFile
|
|
23
25
|
from typing import TYPE_CHECKING, Sequence
|
|
24
26
|
|
|
25
27
|
import MySQLdb
|
|
26
|
-
import unicodecsv as csv
|
|
27
28
|
|
|
28
29
|
from airflow.models import BaseOperator
|
|
29
30
|
from airflow.providers.apache.hive.hooks.hive import HiveCliHook
|
|
@@ -83,7 +84,7 @@ class MySqlToHiveOperator(BaseOperator):
|
|
|
83
84
|
recreate: bool = False,
|
|
84
85
|
partition: dict | None = None,
|
|
85
86
|
delimiter: str = chr(1),
|
|
86
|
-
quoting:
|
|
87
|
+
quoting: int | None = None,
|
|
87
88
|
quotechar: str = '"',
|
|
88
89
|
escapechar: str | None = None,
|
|
89
90
|
mysql_conn_id: str = "mysql_default",
|
|
@@ -131,28 +132,24 @@ class MySqlToHiveOperator(BaseOperator):
|
|
|
131
132
|
def execute(self, context: Context):
|
|
132
133
|
hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id, auth=self.hive_auth)
|
|
133
134
|
mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
|
|
134
|
-
|
|
135
135
|
self.log.info("Dumping MySQL query results to local file")
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
csv_writer.writerows(cursor)
|
|
136
|
+
with NamedTemporaryFile(mode="w", encoding="utf-8") as f:
|
|
137
|
+
with closing(mysql.get_conn()) as conn:
|
|
138
|
+
with closing(conn.cursor()) as cursor:
|
|
139
|
+
cursor.execute(self.sql)
|
|
140
|
+
csv_writer = csv.writer(
|
|
141
|
+
f,
|
|
142
|
+
delimiter=self.delimiter,
|
|
143
|
+
quoting=self.quoting,
|
|
144
|
+
quotechar=self.quotechar if self.quoting != csv.QUOTE_NONE else None,
|
|
145
|
+
escapechar=self.escapechar,
|
|
146
|
+
)
|
|
147
|
+
field_dict = OrderedDict()
|
|
148
|
+
if cursor.description is not None:
|
|
149
|
+
for field in cursor.description:
|
|
150
|
+
field_dict[field[0]] = self.type_map(field[1])
|
|
151
|
+
csv_writer.writerows(cursor)
|
|
153
152
|
f.flush()
|
|
154
|
-
cursor.close()
|
|
155
|
-
conn.close() # type: ignore[misc]
|
|
156
153
|
self.log.info("Loading file into Hive")
|
|
157
154
|
hive.load_file(
|
|
158
155
|
f.name,
|
|
@@ -18,12 +18,11 @@
|
|
|
18
18
|
"""This module contains an operator to move data from Vertica to Hive."""
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
+
import csv
|
|
21
22
|
from collections import OrderedDict
|
|
22
23
|
from tempfile import NamedTemporaryFile
|
|
23
24
|
from typing import TYPE_CHECKING, Any, Sequence
|
|
24
25
|
|
|
25
|
-
import unicodecsv as csv
|
|
26
|
-
|
|
27
26
|
from airflow.models import BaseOperator
|
|
28
27
|
from airflow.providers.apache.hive.hooks.hive import HiveCliHook
|
|
29
28
|
from airflow.providers.vertica.hooks.vertica import VerticaHook
|
|
@@ -94,9 +93,11 @@ class VerticaToHiveOperator(BaseOperator):
|
|
|
94
93
|
|
|
95
94
|
@classmethod
|
|
96
95
|
def type_map(cls, vertica_type):
|
|
97
|
-
"""
|
|
98
|
-
|
|
99
|
-
|
|
96
|
+
"""Manually hack Vertica-Python type mapping.
|
|
97
|
+
|
|
98
|
+
The stock datatype.py does not provide the full type mapping access.
|
|
99
|
+
|
|
100
|
+
Reference:
|
|
100
101
|
https://github.com/uber/vertica-python/blob/master/vertica_python/vertica/column.py
|
|
101
102
|
"""
|
|
102
103
|
type_map = {
|
|
@@ -117,8 +118,8 @@ class VerticaToHiveOperator(BaseOperator):
|
|
|
117
118
|
conn = vertica.get_conn()
|
|
118
119
|
cursor = conn.cursor()
|
|
119
120
|
cursor.execute(self.sql)
|
|
120
|
-
with NamedTemporaryFile("w") as f:
|
|
121
|
-
csv_writer = csv.writer(f, delimiter=self.delimiter
|
|
121
|
+
with NamedTemporaryFile(mode="w", encoding="utf-8") as f:
|
|
122
|
+
csv_writer = csv.writer(f, delimiter=self.delimiter)
|
|
122
123
|
field_dict = OrderedDict()
|
|
123
124
|
for col_count, field in enumerate(cursor.description, start=1):
|
|
124
125
|
col_position = f"Column{col_count}"
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-apache-hive
|
|
3
|
-
Version: 6.1.
|
|
3
|
+
Version: 6.1.1rc1
|
|
4
4
|
Summary: Provider for Apache Airflow. Implements apache-airflow-providers-apache-hive package
|
|
5
5
|
Home-page: https://airflow.apache.org/
|
|
6
6
|
Download-URL: https://archive.apache.org/dist/airflow/providers
|
|
7
7
|
Author: Apache Software Foundation
|
|
8
8
|
Author-email: dev@airflow.apache.org
|
|
9
9
|
License: Apache License 2.0
|
|
10
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.
|
|
10
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.1/
|
|
11
11
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
12
12
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
13
13
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
@@ -21,12 +21,11 @@ Classifier: Intended Audience :: System Administrators
|
|
|
21
21
|
Classifier: Framework :: Apache Airflow
|
|
22
22
|
Classifier: Framework :: Apache Airflow :: Provider
|
|
23
23
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
24
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
25
24
|
Classifier: Programming Language :: Python :: 3.8
|
|
26
25
|
Classifier: Programming Language :: Python :: 3.9
|
|
27
26
|
Classifier: Programming Language :: Python :: 3.10
|
|
28
27
|
Classifier: Topic :: System :: Monitoring
|
|
29
|
-
Requires-Python: ~=3.
|
|
28
|
+
Requires-Python: ~=3.8
|
|
30
29
|
Description-Content-Type: text/x-rst
|
|
31
30
|
Provides-Extra: amazon
|
|
32
31
|
Provides-Extra: common.sql
|
|
@@ -59,7 +58,7 @@ License-File: NOTICE
|
|
|
59
58
|
|
|
60
59
|
Package ``apache-airflow-providers-apache-hive``
|
|
61
60
|
|
|
62
|
-
Release: ``6.1.
|
|
61
|
+
Release: ``6.1.1rc1``
|
|
63
62
|
|
|
64
63
|
|
|
65
64
|
`Apache Hive <https://hive.apache.org/>`__
|
|
@@ -72,7 +71,7 @@ This is a provider package for ``apache.hive`` provider. All classes for this pr
|
|
|
72
71
|
are in ``airflow.providers.apache.hive`` python package.
|
|
73
72
|
|
|
74
73
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.
|
|
74
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.1/>`_.
|
|
76
75
|
|
|
77
76
|
|
|
78
77
|
Installation
|
|
@@ -82,7 +81,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
82
81
|
for the minimum Airflow version supported) via
|
|
83
82
|
``pip install apache-airflow-providers-apache-hive``
|
|
84
83
|
|
|
85
|
-
The package supports the following python versions: 3.
|
|
84
|
+
The package supports the following python versions: 3.8,3.9,3.10
|
|
86
85
|
|
|
87
86
|
Requirements
|
|
88
87
|
------------
|
|
@@ -150,6 +149,32 @@ Dependent package
|
|
|
150
149
|
Changelog
|
|
151
150
|
---------
|
|
152
151
|
|
|
152
|
+
|
|
153
|
+
6.1.1
|
|
154
|
+
.....
|
|
155
|
+
|
|
156
|
+
.. note::
|
|
157
|
+
This release dropped support for Python 3.7
|
|
158
|
+
|
|
159
|
+
Bug Fixes
|
|
160
|
+
~~~~~~~~~
|
|
161
|
+
|
|
162
|
+
* ``Sanitize beeline principal parameter (#31983)``
|
|
163
|
+
|
|
164
|
+
Misc
|
|
165
|
+
~~~~
|
|
166
|
+
|
|
167
|
+
* ``Replace unicodecsv with standard csv library (#31693)``
|
|
168
|
+
|
|
169
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
170
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
171
|
+
|
|
172
|
+
* ``Apache provider docstring improvements (#31730)``
|
|
173
|
+
* ``Improve docstrings in providers (#31681)``
|
|
174
|
+
* ``Add D400 pydocstyle check - Apache providers only (#31424)``
|
|
175
|
+
* ``Add Python 3.11 support (#27264)``
|
|
176
|
+
* ``Add note about dropping Python 3.7 for providers (#32015)``
|
|
177
|
+
|
|
153
178
|
6.1.0
|
|
154
179
|
.....
|
|
155
180
|
|
|
@@ -169,6 +194,9 @@ Misc
|
|
|
169
194
|
* ``Add mechanism to suspend providers (#30422)``
|
|
170
195
|
* ``Use 'AirflowProviderDeprecationWarning' in providers (#30975)``
|
|
171
196
|
* ``Decouple "job runner" from BaseJob ORM model (#30255)``
|
|
197
|
+
* ``Use '__version__' in providers not 'version' (#31393)``
|
|
198
|
+
* ``Fixing circular import error in providers caused by airflow version check (#31379)``
|
|
199
|
+
* ``Prepare docs for May 2023 wave of Providers (#31252)``
|
|
172
200
|
|
|
173
201
|
6.0.0
|
|
174
202
|
.....
|
|
@@ -26,6 +26,9 @@ target-version = ['py37', 'py38', 'py39', 'py310']
|
|
|
26
26
|
requires = ['setuptools==67.2.0']
|
|
27
27
|
build-backend = "setuptools.build_meta"
|
|
28
28
|
|
|
29
|
+
[project]
|
|
30
|
+
requires-python = ">=3.8"
|
|
31
|
+
|
|
29
32
|
[tool.ruff]
|
|
30
33
|
typing-modules = ["airflow.typing_compat"]
|
|
31
34
|
line-length = 110
|
|
@@ -35,12 +38,10 @@ extend-exclude = [
|
|
|
35
38
|
"airflow/providers/google/ads/_vendor/*",
|
|
36
39
|
# The files generated by stubgen aren't 100% valid syntax it turns out, and we don't ship them, so we can
|
|
37
40
|
# ignore them in ruff
|
|
38
|
-
"airflow/providers/common/sql/*/*.pyi"
|
|
41
|
+
"airflow/providers/common/sql/*/*.pyi",
|
|
42
|
+
"airflow/migrations/versions/*.py"
|
|
39
43
|
]
|
|
40
44
|
|
|
41
|
-
# TODO: Bump to Python 3.8 when support for Python 3.7 is dropped in Airflow.
|
|
42
|
-
target-version = "py37"
|
|
43
|
-
|
|
44
45
|
extend-select = [
|
|
45
46
|
"I", # Missing required import (auto-fixable)
|
|
46
47
|
"UP", # Pyupgrade
|
|
@@ -53,6 +54,7 @@ extend-select = [
|
|
|
53
54
|
"D106",
|
|
54
55
|
"D2",
|
|
55
56
|
"D3",
|
|
57
|
+
"D400",
|
|
56
58
|
# "D401", # Not enabled by ruff, but we don't want it
|
|
57
59
|
"D402",
|
|
58
60
|
"D403",
|
|
@@ -21,13 +21,12 @@ classifiers =
|
|
|
21
21
|
Framework :: Apache Airflow
|
|
22
22
|
Framework :: Apache Airflow :: Provider
|
|
23
23
|
License :: OSI Approved :: Apache Software License
|
|
24
|
-
Programming Language :: Python :: 3.7
|
|
25
24
|
Programming Language :: Python :: 3.8
|
|
26
25
|
Programming Language :: Python :: 3.9
|
|
27
26
|
Programming Language :: Python :: 3.10
|
|
28
27
|
Topic :: System :: Monitoring
|
|
29
28
|
project_urls =
|
|
30
|
-
Documentation=https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.
|
|
29
|
+
Documentation=https://airflow.apache.org/docs/apache-airflow-providers-apache-hive/6.1.1/
|
|
31
30
|
Bug Tracker=https://github.com/apache/airflow/issues
|
|
32
31
|
Source Code=https://github.com/apache/airflow
|
|
33
32
|
Slack Chat=https://s.apache.org/airflow-slack
|
|
@@ -40,7 +39,7 @@ python_tag = py3
|
|
|
40
39
|
[options]
|
|
41
40
|
zip_safe = False
|
|
42
41
|
include_package_data = True
|
|
43
|
-
python_requires = ~=3.
|
|
42
|
+
python_requires = ~=3.8
|
|
44
43
|
packages = find:
|
|
45
44
|
setup_requires =
|
|
46
45
|
setuptools
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|