dbhose-airflow 0.0.1.0__py3-none-any.whl → 0.0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbhose_airflow/__init__.py +29 -5
- dbhose_airflow/chunk_query.py +14 -0
- dbhose_airflow/move_method.py +1 -0
- {dbhose_airflow-0.0.1.0.dist-info → dbhose_airflow-0.0.2.1.dist-info}/METADATA +10 -5
- dbhose_airflow-0.0.2.1.dist-info/RECORD +12 -0
- dbhose_airflow-0.0.2.1.dist-info/licenses/CHANGELOG.md +32 -0
- {dbhose_airflow-0.0.1.0.dist-info → dbhose_airflow-0.0.2.1.dist-info}/licenses/README.md +7 -2
- dbhose_airflow-0.0.1.0.dist-info/RECORD +0 -11
- dbhose_airflow-0.0.1.0.dist-info/licenses/CHANGELOG.md +0 -14
- {dbhose_airflow-0.0.1.0.dist-info → dbhose_airflow-0.0.2.1.dist-info}/WHEEL +0 -0
- {dbhose_airflow-0.0.1.0.dist-info → dbhose_airflow-0.0.2.1.dist-info}/top_level.txt +0 -0
dbhose_airflow/__init__.py
CHANGED
|
@@ -14,6 +14,7 @@ from pandas import DataFrame as PDFrame
|
|
|
14
14
|
from polars import DataFrame as PLFrame
|
|
15
15
|
|
|
16
16
|
from .airflow_connect import dbhose_dumper
|
|
17
|
+
from .chunk_query import query_part
|
|
17
18
|
from .dq_check import DQCheck
|
|
18
19
|
from .move_method import MoveMethod
|
|
19
20
|
|
|
@@ -31,7 +32,7 @@ __all__ = (
|
|
|
31
32
|
"dbhose_dumper",
|
|
32
33
|
)
|
|
33
34
|
__author__ = "0xMihalich"
|
|
34
|
-
__version__ = "0.0.1
|
|
35
|
+
__version__ = "0.0.2.1"
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
root_path = dirname(__file__)
|
|
@@ -104,7 +105,7 @@ class DBHose:
|
|
|
104
105
|
self.connection_dest = connection_dest
|
|
105
106
|
self.connection_src = connection_src
|
|
106
107
|
self.dq_skip_check = dq_skip_check
|
|
107
|
-
self.filter_by = filter_by
|
|
108
|
+
self.filter_by = ", ".join(filter_by)
|
|
108
109
|
self.drop_temp_table = drop_temp_table
|
|
109
110
|
self.move_method = move_method
|
|
110
111
|
self.custom_move = custom_move
|
|
@@ -333,12 +334,23 @@ class DBHose:
|
|
|
333
334
|
self.logger.error(wrap_frame(error_msg))
|
|
334
335
|
raise ValueError(error_msg)
|
|
335
336
|
|
|
336
|
-
|
|
337
|
+
for query in query_part(self.custom_move):
|
|
338
|
+
self.dumper_dest.cursor.execute(query)
|
|
337
339
|
|
|
338
340
|
if self.dumper_dest.__class__ is not NativeDumper:
|
|
339
341
|
self.dumper_dest.connect.commit()
|
|
340
342
|
|
|
341
343
|
elif self.move_method.have_sql:
|
|
344
|
+
|
|
345
|
+
if (
|
|
346
|
+
self.move_method is MoveMethod.delete
|
|
347
|
+
and self.dumper_dest.__class__ is NativeDumper
|
|
348
|
+
and len(self.filter_by.split(", ")) > 4
|
|
349
|
+
):
|
|
350
|
+
error_msg = "Too many columns in filter_by (> 4)"
|
|
351
|
+
self.logger.error(wrap_frame(error_msg))
|
|
352
|
+
raise ValueError(error_msg)
|
|
353
|
+
|
|
342
354
|
move_query = read_text(
|
|
343
355
|
mv_path.format(self.dumper_dest.dbname, self.move_method.name)
|
|
344
356
|
)
|
|
@@ -349,7 +361,7 @@ class DBHose:
|
|
|
349
361
|
))
|
|
350
362
|
is_avaliable, move_query = tuple(*reader.to_rows())
|
|
351
363
|
|
|
352
|
-
if not is_avaliable:
|
|
364
|
+
if not is_avaliable or not move_query:
|
|
353
365
|
error_msg = (
|
|
354
366
|
f"Method {self.move_method.name} is not available for "
|
|
355
367
|
f"{self.table_dest}. Use another method."
|
|
@@ -357,12 +369,24 @@ class DBHose:
|
|
|
357
369
|
self.logger.error(wrap_frame(error_msg))
|
|
358
370
|
raise ValueError(error_msg)
|
|
359
371
|
|
|
360
|
-
|
|
372
|
+
for query in query_part(move_query):
|
|
373
|
+
self.dumper_dest.cursor.execute(query)
|
|
361
374
|
|
|
362
375
|
if self.dumper_dest.__class__ is not NativeDumper:
|
|
363
376
|
self.dumper_dest.connect.commit()
|
|
364
377
|
|
|
365
378
|
else:
|
|
379
|
+
if self.move_method is MoveMethod.rewrite:
|
|
380
|
+
self.logger.info("Clear table operation start")
|
|
381
|
+
self.dumper_dest.cursor.execute(
|
|
382
|
+
f"truncate table {self.table_dest}"
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
if self.dumper_dest.__class__ is not NativeDumper:
|
|
386
|
+
self.dumper_dest.connect.commit()
|
|
387
|
+
|
|
388
|
+
self.logger.info("Clear table operation done")
|
|
389
|
+
|
|
366
390
|
self.dumper_dest.write_between(self.table_dest, self.table_temp)
|
|
367
391
|
|
|
368
392
|
self.logger.info(wrap_frame(f"Data moved into {self.table_dest}"))
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from re import split
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
pattern = r";(?=(?:[^']*'[^']*')*[^']*$)"
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def query_part(query: str) -> tuple[str]:
|
|
8
|
+
"""Chunk multiquery to parts."""
|
|
9
|
+
|
|
10
|
+
return (
|
|
11
|
+
part.strip(";").strip()
|
|
12
|
+
for part in split(pattern, query)
|
|
13
|
+
if part.strip(";").strip()
|
|
14
|
+
)
|
dbhose_airflow/move_method.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dbhose_airflow
|
|
3
|
-
Version: 0.0.1
|
|
3
|
+
Version: 0.0.2.1
|
|
4
4
|
Summary: airflow class for exchanging data between DBMSs in native binary formats.
|
|
5
5
|
Home-page: https://github.com/0xMihalich/dbhose_airflow
|
|
6
6
|
Author: 0xMihalich
|
|
@@ -9,8 +9,8 @@ Description-Content-Type: text/markdown
|
|
|
9
9
|
License-File: README.md
|
|
10
10
|
License-File: CHANGELOG.md
|
|
11
11
|
Requires-Dist: apache-airflow>=2.4.3
|
|
12
|
-
Requires-Dist: native-dumper==0.3.
|
|
13
|
-
Requires-Dist: pgpack-dumper==0.3.
|
|
12
|
+
Requires-Dist: native-dumper==0.3.3.1
|
|
13
|
+
Requires-Dist: pgpack-dumper==0.3.3.1
|
|
14
14
|
Dynamic: author
|
|
15
15
|
Dynamic: author-email
|
|
16
16
|
Dynamic: description
|
|
@@ -84,7 +84,7 @@ DBHose(
|
|
|
84
84
|
- **`move_method`** (`MoveMethod`) - метод перемещения данных (по умолчанию `MoveMethod.replace`)
|
|
85
85
|
- **`custom_move`** (`str`, optional) - пользовательский SQL запрос для перемещения данных
|
|
86
86
|
- **`compress_method`** (`CompressionMethod`) - метод сжатия для дампов (по умолчанию `CompressionMethod.ZSTD`)
|
|
87
|
-
- **`timeout`** (`int`) - таймаут операций с БД в секундах (по умолчанию `DBMS_DEFAULT_TIMEOUT_SEC`)
|
|
87
|
+
- **`timeout`** (`int`) - таймаут операций с БД в секундах (по умолчанию `DBMS_DEFAULT_TIMEOUT_SEC` = 300)
|
|
88
88
|
|
|
89
89
|
## Методы
|
|
90
90
|
|
|
@@ -135,9 +135,14 @@ DBHose(
|
|
|
135
135
|
## Пример использования в DAG
|
|
136
136
|
|
|
137
137
|
```python
|
|
138
|
+
from datetime import datetime
|
|
139
|
+
|
|
138
140
|
from airflow import DAG
|
|
139
141
|
from airflow.operators.python import PythonOperator
|
|
140
|
-
from
|
|
142
|
+
from dbhose_airflow import (
|
|
143
|
+
DBHose,
|
|
144
|
+
MoveMethod,
|
|
145
|
+
)
|
|
141
146
|
|
|
142
147
|
def transfer_data():
|
|
143
148
|
# Перенос данных из PostgreSQL в ClickHouse
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
dbhose_airflow/__init__.py,sha256=4ezXu1GfNIk5Lzq2LR6EMe-vCeujt8W8kU8HJt2TeB8,15298
|
|
2
|
+
dbhose_airflow/airflow_connect.py,sha256=unsRItnK4Q_ieMiGKEsCw8Q_8wkaXdVOfaSWLNRyujM,906
|
|
3
|
+
dbhose_airflow/chunk_query.py,sha256=qtR6FM0SAEHzm08o6AzMZepyzJ3J8qd_itdFY0YJQRg,275
|
|
4
|
+
dbhose_airflow/dq_check.py,sha256=VoAw8qieA5LM1a7jaMPO3AQ7QXe_-ThZ8Gy868ozjHw,689
|
|
5
|
+
dbhose_airflow/dumper.py,sha256=9BEJ36yUJ9gH5PiVirLXymSKPOgABtp7Ee8U6MtEckY,1843
|
|
6
|
+
dbhose_airflow/move_method.py,sha256=EkrDy2VCbL78zfZZhwWH0gF4Ijno20FP1mRfjiABrkk,532
|
|
7
|
+
dbhose_airflow-0.0.2.1.dist-info/licenses/CHANGELOG.md,sha256=sQAbtKsJ8SwQCgbUoXHbb9P8Yl8-UocOhG0K8cMd70w,852
|
|
8
|
+
dbhose_airflow-0.0.2.1.dist-info/licenses/README.md,sha256=-TsSFVS-bdRMNM-xhtqiZUXyD6D_lb6Uiz8LKEPGlP0,8822
|
|
9
|
+
dbhose_airflow-0.0.2.1.dist-info/METADATA,sha256=erGKDeYmuZp3-Erz2e47c07AMg0FxFUwD6nAipugC3U,9434
|
|
10
|
+
dbhose_airflow-0.0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
dbhose_airflow-0.0.2.1.dist-info/top_level.txt,sha256=VlTXT0CLGGcVhbG9QPw2_a8H5UV03QMjvZ-NrPy6_jM,15
|
|
12
|
+
dbhose_airflow-0.0.2.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Version History
|
|
2
|
+
|
|
3
|
+
## 0.0.2.1
|
|
4
|
+
|
|
5
|
+
* Add MoveMethod.rewrite for full rewrite table with new data
|
|
6
|
+
* Add query_part function
|
|
7
|
+
* Change filter_by initialization list to string
|
|
8
|
+
* Fix Clickhouse MoveMethod.delete
|
|
9
|
+
* Improve execute custom query & MoveMethod operations
|
|
10
|
+
* Update depends native-dumper==0.3.3.1
|
|
11
|
+
* Update depends pgpack-dumper==0.3.3.1
|
|
12
|
+
|
|
13
|
+
## 0.0.2.0
|
|
14
|
+
|
|
15
|
+
* Update depends native-dumper==0.3.3.0
|
|
16
|
+
* Update depends pgpack-dumper==0.3.3.0
|
|
17
|
+
* Update README.md
|
|
18
|
+
* Add create partition into postgres and greenplum ddl queryes
|
|
19
|
+
* Improve delete.sql for greenplum and postgres
|
|
20
|
+
|
|
21
|
+
## 0.0.1.0
|
|
22
|
+
|
|
23
|
+
* Update depends native-dumper==0.3.2.3
|
|
24
|
+
* Update depends pgpack-dumper==0.3.2.2
|
|
25
|
+
* Move old README.md into OLD_DOCS.md
|
|
26
|
+
* Create new README.md
|
|
27
|
+
* Delete dbhose-utils from depends
|
|
28
|
+
* Rename repository dbhose -> dbhose_airflow
|
|
29
|
+
|
|
30
|
+
## 0.0.0.1
|
|
31
|
+
|
|
32
|
+
First version of the library dbhose_airflow
|
|
@@ -63,7 +63,7 @@ DBHose(
|
|
|
63
63
|
- **`move_method`** (`MoveMethod`) - метод перемещения данных (по умолчанию `MoveMethod.replace`)
|
|
64
64
|
- **`custom_move`** (`str`, optional) - пользовательский SQL запрос для перемещения данных
|
|
65
65
|
- **`compress_method`** (`CompressionMethod`) - метод сжатия для дампов (по умолчанию `CompressionMethod.ZSTD`)
|
|
66
|
-
- **`timeout`** (`int`) - таймаут операций с БД в секундах (по умолчанию `DBMS_DEFAULT_TIMEOUT_SEC`)
|
|
66
|
+
- **`timeout`** (`int`) - таймаут операций с БД в секундах (по умолчанию `DBMS_DEFAULT_TIMEOUT_SEC` = 300)
|
|
67
67
|
|
|
68
68
|
## Методы
|
|
69
69
|
|
|
@@ -114,9 +114,14 @@ DBHose(
|
|
|
114
114
|
## Пример использования в DAG
|
|
115
115
|
|
|
116
116
|
```python
|
|
117
|
+
from datetime import datetime
|
|
118
|
+
|
|
117
119
|
from airflow import DAG
|
|
118
120
|
from airflow.operators.python import PythonOperator
|
|
119
|
-
from
|
|
121
|
+
from dbhose_airflow import (
|
|
122
|
+
DBHose,
|
|
123
|
+
MoveMethod,
|
|
124
|
+
)
|
|
120
125
|
|
|
121
126
|
def transfer_data():
|
|
122
127
|
# Перенос данных из PostgreSQL в ClickHouse
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
dbhose_airflow/__init__.py,sha256=bDVnzUxA_AD1-rrJqpJPGN76yPI76NDrkAO1DSavMYA,14334
|
|
2
|
-
dbhose_airflow/airflow_connect.py,sha256=unsRItnK4Q_ieMiGKEsCw8Q_8wkaXdVOfaSWLNRyujM,906
|
|
3
|
-
dbhose_airflow/dq_check.py,sha256=VoAw8qieA5LM1a7jaMPO3AQ7QXe_-ThZ8Gy868ozjHw,689
|
|
4
|
-
dbhose_airflow/dumper.py,sha256=9BEJ36yUJ9gH5PiVirLXymSKPOgABtp7Ee8U6MtEckY,1843
|
|
5
|
-
dbhose_airflow/move_method.py,sha256=c4g7wuiwDKudrKSWP4ov1atJGIFknHCgPnY9FMf9Ymc,477
|
|
6
|
-
dbhose_airflow-0.0.1.0.dist-info/licenses/CHANGELOG.md,sha256=ps8G7NfFWWW2CcXLPl6OcE_mVcx7dgahWK6J6Hh1xQI,309
|
|
7
|
-
dbhose_airflow-0.0.1.0.dist-info/licenses/README.md,sha256=SEcBu3s27QTwMLCVlQbXfErdRWsQtfFfbCE9b6FWwoI,8756
|
|
8
|
-
dbhose_airflow-0.0.1.0.dist-info/METADATA,sha256=V20eV-fvWe-aylcrsjaOVgq0CWS0ONwV_rnPnAwbhRM,9368
|
|
9
|
-
dbhose_airflow-0.0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
-
dbhose_airflow-0.0.1.0.dist-info/top_level.txt,sha256=VlTXT0CLGGcVhbG9QPw2_a8H5UV03QMjvZ-NrPy6_jM,15
|
|
11
|
-
dbhose_airflow-0.0.1.0.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
# Version History
|
|
2
|
-
|
|
3
|
-
## 0.0.1.0
|
|
4
|
-
|
|
5
|
-
* Update depends native-dumper==0.3.2.3
|
|
6
|
-
* Update depends pgpack-dumper==0.3.2.2
|
|
7
|
-
* Move old README.md into OLD_DOCS.md
|
|
8
|
-
* Create new README.md
|
|
9
|
-
* Delete dbhose-utils from depends
|
|
10
|
-
* Rename repository dbhose -> dbhose_airflow
|
|
11
|
-
|
|
12
|
-
## 0.0.0.1
|
|
13
|
-
|
|
14
|
-
First version of the library dbhose_airflow
|
|
File without changes
|
|
File without changes
|