dbhose-airflow 0.0.1.0__py3-none-any.whl → 0.0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ from pandas import DataFrame as PDFrame
14
14
  from polars import DataFrame as PLFrame
15
15
 
16
16
  from .airflow_connect import dbhose_dumper
17
+ from .chunk_query import query_part
17
18
  from .dq_check import DQCheck
18
19
  from .move_method import MoveMethod
19
20
 
@@ -31,7 +32,7 @@ __all__ = (
31
32
  "dbhose_dumper",
32
33
  )
33
34
  __author__ = "0xMihalich"
34
- __version__ = "0.0.1.0"
35
+ __version__ = "0.0.2.1"
35
36
 
36
37
 
37
38
  root_path = dirname(__file__)
@@ -104,7 +105,7 @@ class DBHose:
104
105
  self.connection_dest = connection_dest
105
106
  self.connection_src = connection_src
106
107
  self.dq_skip_check = dq_skip_check
107
- self.filter_by = filter_by
108
+ self.filter_by = ", ".join(filter_by)
108
109
  self.drop_temp_table = drop_temp_table
109
110
  self.move_method = move_method
110
111
  self.custom_move = custom_move
@@ -333,12 +334,23 @@ class DBHose:
333
334
  self.logger.error(wrap_frame(error_msg))
334
335
  raise ValueError(error_msg)
335
336
 
336
- self.dumper_dest.cursor.execute(self.custom_move)
337
+ for query in query_part(self.custom_move):
338
+ self.dumper_dest.cursor.execute(query)
337
339
 
338
340
  if self.dumper_dest.__class__ is not NativeDumper:
339
341
  self.dumper_dest.connect.commit()
340
342
 
341
343
  elif self.move_method.have_sql:
344
+
345
+ if (
346
+ self.move_method is MoveMethod.delete
347
+ and self.dumper_dest.__class__ is NativeDumper
348
+ and len(self.filter_by.split(", ")) > 4
349
+ ):
350
+ error_msg = "Too many columns in filter_by (> 4)"
351
+ self.logger.error(wrap_frame(error_msg))
352
+ raise ValueError(error_msg)
353
+
342
354
  move_query = read_text(
343
355
  mv_path.format(self.dumper_dest.dbname, self.move_method.name)
344
356
  )
@@ -349,7 +361,7 @@ class DBHose:
349
361
  ))
350
362
  is_avaliable, move_query = tuple(*reader.to_rows())
351
363
 
352
- if not is_avaliable:
364
+ if not is_avaliable or not move_query:
353
365
  error_msg = (
354
366
  f"Method {self.move_method.name} is not available for "
355
367
  f"{self.table_dest}. Use another method."
@@ -357,12 +369,24 @@ class DBHose:
357
369
  self.logger.error(wrap_frame(error_msg))
358
370
  raise ValueError(error_msg)
359
371
 
360
- self.dumper_dest.cursor.execute(move_query)
372
+ for query in query_part(move_query):
373
+ self.dumper_dest.cursor.execute(query)
361
374
 
362
375
  if self.dumper_dest.__class__ is not NativeDumper:
363
376
  self.dumper_dest.connect.commit()
364
377
 
365
378
  else:
379
+ if self.move_method is MoveMethod.rewrite:
380
+ self.logger.info("Clear table operation start")
381
+ self.dumper_dest.cursor.execute(
382
+ f"truncate table {self.table_dest}"
383
+ )
384
+
385
+ if self.dumper_dest.__class__ is not NativeDumper:
386
+ self.dumper_dest.connect.commit()
387
+
388
+ self.logger.info("Clear table operation done")
389
+
366
390
  self.dumper_dest.write_between(self.table_dest, self.table_temp)
367
391
 
368
392
  self.logger.info(wrap_frame(f"Data moved into {self.table_dest}"))
@@ -0,0 +1,14 @@
1
+ from re import split
2
+
3
+
4
+ pattern = r";(?=(?:[^']*'[^']*')*[^']*$)"
5
+
6
+
7
+ def query_part(query: str) -> tuple[str]:
8
+ """Chunk multiquery to parts."""
9
+
10
+ return (
11
+ part.strip(";").strip()
12
+ for part in split(pattern, query)
13
+ if part.strip(";").strip()
14
+ )
@@ -18,3 +18,4 @@ class MoveMethod(MoveType, Enum):
18
18
  custom = MoveType("custom", False, False, True)
19
19
  delete = MoveType("delete", True, True, False)
20
20
  replace = MoveType("replace", True, False, False)
21
+ rewrite = MoveType("rewrite", False, False, False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dbhose_airflow
3
- Version: 0.0.1.0
3
+ Version: 0.0.2.1
4
4
  Summary: airflow class for exchanging data between DBMSs in native binary formats.
5
5
  Home-page: https://github.com/0xMihalich/dbhose_airflow
6
6
  Author: 0xMihalich
@@ -9,8 +9,8 @@ Description-Content-Type: text/markdown
9
9
  License-File: README.md
10
10
  License-File: CHANGELOG.md
11
11
  Requires-Dist: apache-airflow>=2.4.3
12
- Requires-Dist: native-dumper==0.3.2.3
13
- Requires-Dist: pgpack-dumper==0.3.2.2
12
+ Requires-Dist: native-dumper==0.3.3.1
13
+ Requires-Dist: pgpack-dumper==0.3.3.1
14
14
  Dynamic: author
15
15
  Dynamic: author-email
16
16
  Dynamic: description
@@ -84,7 +84,7 @@ DBHose(
84
84
  - **`move_method`** (`MoveMethod`) - метод перемещения данных (по умолчанию `MoveMethod.replace`)
85
85
  - **`custom_move`** (`str`, optional) - пользовательский SQL запрос для перемещения данных
86
86
  - **`compress_method`** (`CompressionMethod`) - метод сжатия для дампов (по умолчанию `CompressionMethod.ZSTD`)
87
- - **`timeout`** (`int`) - таймаут операций с БД в секундах (по умолчанию `DBMS_DEFAULT_TIMEOUT_SEC`)
87
+ - **`timeout`** (`int`) - таймаут операций с БД в секундах (по умолчанию `DBMS_DEFAULT_TIMEOUT_SEC` = 300)
88
88
 
89
89
  ## Методы
90
90
 
@@ -135,9 +135,14 @@ DBHose(
135
135
  ## Пример использования в DAG
136
136
 
137
137
  ```python
138
+ from datetime import datetime
139
+
138
140
  from airflow import DAG
139
141
  from airflow.operators.python import PythonOperator
140
- from datetime import datetime
142
+ from dbhose_airflow import (
143
+ DBHose,
144
+ MoveMethod,
145
+ )
141
146
 
142
147
  def transfer_data():
143
148
  # Перенос данных из PostgreSQL в ClickHouse
@@ -0,0 +1,12 @@
1
+ dbhose_airflow/__init__.py,sha256=4ezXu1GfNIk5Lzq2LR6EMe-vCeujt8W8kU8HJt2TeB8,15298
2
+ dbhose_airflow/airflow_connect.py,sha256=unsRItnK4Q_ieMiGKEsCw8Q_8wkaXdVOfaSWLNRyujM,906
3
+ dbhose_airflow/chunk_query.py,sha256=qtR6FM0SAEHzm08o6AzMZepyzJ3J8qd_itdFY0YJQRg,275
4
+ dbhose_airflow/dq_check.py,sha256=VoAw8qieA5LM1a7jaMPO3AQ7QXe_-ThZ8Gy868ozjHw,689
5
+ dbhose_airflow/dumper.py,sha256=9BEJ36yUJ9gH5PiVirLXymSKPOgABtp7Ee8U6MtEckY,1843
6
+ dbhose_airflow/move_method.py,sha256=EkrDy2VCbL78zfZZhwWH0gF4Ijno20FP1mRfjiABrkk,532
7
+ dbhose_airflow-0.0.2.1.dist-info/licenses/CHANGELOG.md,sha256=sQAbtKsJ8SwQCgbUoXHbb9P8Yl8-UocOhG0K8cMd70w,852
8
+ dbhose_airflow-0.0.2.1.dist-info/licenses/README.md,sha256=-TsSFVS-bdRMNM-xhtqiZUXyD6D_lb6Uiz8LKEPGlP0,8822
9
+ dbhose_airflow-0.0.2.1.dist-info/METADATA,sha256=erGKDeYmuZp3-Erz2e47c07AMg0FxFUwD6nAipugC3U,9434
10
+ dbhose_airflow-0.0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ dbhose_airflow-0.0.2.1.dist-info/top_level.txt,sha256=VlTXT0CLGGcVhbG9QPw2_a8H5UV03QMjvZ-NrPy6_jM,15
12
+ dbhose_airflow-0.0.2.1.dist-info/RECORD,,
@@ -0,0 +1,32 @@
1
+ # Version History
2
+
3
+ ## 0.0.2.1
4
+
5
+ * Add MoveMethod.rewrite for full rewrite table with new data
6
+ * Add query_part function
7
+ * Change filter_by initialization list to string
8
+ * Fix Clickhouse MoveMethod.delete
9
+ * Improve execute custom query & MoveMethod operations
10
+ * Update depends native-dumper==0.3.3.1
11
+ * Update depends pgpack-dumper==0.3.3.1
12
+
13
+ ## 0.0.2.0
14
+
15
+ * Update depends native-dumper==0.3.3.0
16
+ * Update depends pgpack-dumper==0.3.3.0
17
+ * Update README.md
18
+ * Add create partition into postgres and greenplum ddl queryes
19
+ * Improve delete.sql for greenplum and postgres
20
+
21
+ ## 0.0.1.0
22
+
23
+ * Update depends native-dumper==0.3.2.3
24
+ * Update depends pgpack-dumper==0.3.2.2
25
+ * Move old README.md into OLD_DOCS.md
26
+ * Create new README.md
27
+ * Delete dbhose-utils from depends
28
+ * Rename repository dbhose -> dbhose_airflow
29
+
30
+ ## 0.0.0.1
31
+
32
+ First version of the library dbhose_airflow
@@ -63,7 +63,7 @@ DBHose(
63
63
  - **`move_method`** (`MoveMethod`) - метод перемещения данных (по умолчанию `MoveMethod.replace`)
64
64
  - **`custom_move`** (`str`, optional) - пользовательский SQL запрос для перемещения данных
65
65
  - **`compress_method`** (`CompressionMethod`) - метод сжатия для дампов (по умолчанию `CompressionMethod.ZSTD`)
66
- - **`timeout`** (`int`) - таймаут операций с БД в секундах (по умолчанию `DBMS_DEFAULT_TIMEOUT_SEC`)
66
+ - **`timeout`** (`int`) - таймаут операций с БД в секундах (по умолчанию `DBMS_DEFAULT_TIMEOUT_SEC` = 300)
67
67
 
68
68
  ## Методы
69
69
 
@@ -114,9 +114,14 @@ DBHose(
114
114
  ## Пример использования в DAG
115
115
 
116
116
  ```python
117
+ from datetime import datetime
118
+
117
119
  from airflow import DAG
118
120
  from airflow.operators.python import PythonOperator
119
- from datetime import datetime
121
+ from dbhose_airflow import (
122
+ DBHose,
123
+ MoveMethod,
124
+ )
120
125
 
121
126
  def transfer_data():
122
127
  # Перенос данных из PostgreSQL в ClickHouse
@@ -1,11 +0,0 @@
1
- dbhose_airflow/__init__.py,sha256=bDVnzUxA_AD1-rrJqpJPGN76yPI76NDrkAO1DSavMYA,14334
2
- dbhose_airflow/airflow_connect.py,sha256=unsRItnK4Q_ieMiGKEsCw8Q_8wkaXdVOfaSWLNRyujM,906
3
- dbhose_airflow/dq_check.py,sha256=VoAw8qieA5LM1a7jaMPO3AQ7QXe_-ThZ8Gy868ozjHw,689
4
- dbhose_airflow/dumper.py,sha256=9BEJ36yUJ9gH5PiVirLXymSKPOgABtp7Ee8U6MtEckY,1843
5
- dbhose_airflow/move_method.py,sha256=c4g7wuiwDKudrKSWP4ov1atJGIFknHCgPnY9FMf9Ymc,477
6
- dbhose_airflow-0.0.1.0.dist-info/licenses/CHANGELOG.md,sha256=ps8G7NfFWWW2CcXLPl6OcE_mVcx7dgahWK6J6Hh1xQI,309
7
- dbhose_airflow-0.0.1.0.dist-info/licenses/README.md,sha256=SEcBu3s27QTwMLCVlQbXfErdRWsQtfFfbCE9b6FWwoI,8756
8
- dbhose_airflow-0.0.1.0.dist-info/METADATA,sha256=V20eV-fvWe-aylcrsjaOVgq0CWS0ONwV_rnPnAwbhRM,9368
9
- dbhose_airflow-0.0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- dbhose_airflow-0.0.1.0.dist-info/top_level.txt,sha256=VlTXT0CLGGcVhbG9QPw2_a8H5UV03QMjvZ-NrPy6_jM,15
11
- dbhose_airflow-0.0.1.0.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- # Version History
2
-
3
- ## 0.0.1.0
4
-
5
- * Update depends native-dumper==0.3.2.3
6
- * Update depends pgpack-dumper==0.3.2.2
7
- * Move old README.md into OLD_DOCS.md
8
- * Create new README.md
9
- * Delete dbhose-utils from depends
10
- * Rename repository dbhose -> dbhose_airflow
11
-
12
- ## 0.0.0.1
13
-
14
- First version of the library dbhose_airflow