rdxz2-utill 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rdxz2-utill might be problematic. Click here for more details.
- {rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/METADATA +1 -1
- {rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/RECORD +9 -9
- utill/my_bq.py +43 -44
- utill/my_file.py +1 -1
- utill/my_pg.py +13 -12
- {rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/WHEEL +0 -0
- {rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/entry_points.txt +0 -0
- {rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/licenses/LICENSE +0 -0
- {rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
rdxz2_utill-0.0.
|
|
1
|
+
rdxz2_utill-0.0.11.dist-info/licenses/LICENSE,sha256=PF9CUvzP8XFYopEAzrMzSCovF7RdBdscPqJCDC6KjPc,1073
|
|
2
2
|
utill/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
utill/my_bq.py,sha256=
|
|
3
|
+
utill/my_bq.py,sha256=eWHnCz-tPHLtK4Ac9uNRLw_iyoEvOEy-mk9OMxyWZGc,14771
|
|
4
4
|
utill/my_compare.py,sha256=619QbVk3GihWxen95yVnivKHkah8GgPTLGiSkgHxykw,886
|
|
5
5
|
utill/my_const.py,sha256=88dOqn6NPQ5-hfRqdkew5POoAIyO91XXOGvN76oNsdo,251
|
|
6
6
|
utill/my_csv.py,sha256=svgu93R0pP7UW0B58eJMi0vuJnYhqMtafzCsTIk4yUU,2781
|
|
@@ -8,12 +8,12 @@ utill/my_datetime.py,sha256=KEZTplLk3tgVqqC3wClXFcsF_zo40fma_rtPg4kSJHc,2125
|
|
|
8
8
|
utill/my_dict.py,sha256=jPaPfdn4WYpm0uIBPiYFinpHhx1jXpFVDJ9npmvxGZQ,391
|
|
9
9
|
utill/my_encryption.py,sha256=SCF7PPur39cW4RHidsRhw-9BZP-ymUH-6LZ9nAHJDsY,2105
|
|
10
10
|
utill/my_env.py,sha256=mREys72Ybg2p9p2s7ApOt0s_6F5-qxR8FyYEcSJ8pmU,2093
|
|
11
|
-
utill/my_file.py,sha256=
|
|
11
|
+
utill/my_file.py,sha256=H2V8qGSCwnztBKiLYA38-4KUaGFQhznJz86cdilLtAE,1879
|
|
12
12
|
utill/my_gcs.py,sha256=KUx89rZx2-dq-GV1LbbvbZ79Qr9NznjG1Zipop4hMZE,4216
|
|
13
13
|
utill/my_input.py,sha256=OyKLoutXpwISReltuL_Gw2oojv16tYWJqQpqabBOQx4,350
|
|
14
14
|
utill/my_json.py,sha256=WgW6mavGhfs4h1N5XbhsDnRk2dbh_ttJWdJUj4iWDN4,1473
|
|
15
15
|
utill/my_mb.py,sha256=IyrySs92TqtjBUvPMeUN3P2kRK8EttTFRPZsv5Cr-xw,15090
|
|
16
|
-
utill/my_pg.py,sha256=
|
|
16
|
+
utill/my_pg.py,sha256=J9USygc-oug4w7AkBacA9x043jHZrDfQPGFEqXavZAY,6799
|
|
17
17
|
utill/my_queue.py,sha256=hINP4_yjmboSjHgo1J3CtPm2X9SE3HfczyED3ip7nfk,1930
|
|
18
18
|
utill/my_string.py,sha256=pINYFR1ligTyVZYzV8P_FolCsZQwYE1jaFNTuQ3XS_8,833
|
|
19
19
|
utill/my_style.py,sha256=Wy6j4WL9RgGeX6cS9hhlOrufc9UC4UPTQ5UJa0ZJ3Yo,900
|
|
@@ -28,8 +28,8 @@ utill/cmd/_pg.py,sha256=RVxEiSifyIwMDYDM69vt6WSLdVDr1cMzY6r4T2PzNRA,492
|
|
|
28
28
|
utill/cmd/utill.py,sha256=TlHfiwOUcK1m58PrRCjX9sARiPYZUsoTk-KOTCOz1vM,3558
|
|
29
29
|
utill/templates/mb.json,sha256=M46ZHSaSh4rbD_KGUViGr2B2ZV8_PC-O5Evqi35JK5g,59
|
|
30
30
|
utill/templates/pg.json,sha256=LkJt0VV3zcyt7Tpn6gulsoVQgUc-9uImXOStvzu8cdU,271
|
|
31
|
-
rdxz2_utill-0.0.
|
|
32
|
-
rdxz2_utill-0.0.
|
|
33
|
-
rdxz2_utill-0.0.
|
|
34
|
-
rdxz2_utill-0.0.
|
|
35
|
-
rdxz2_utill-0.0.
|
|
31
|
+
rdxz2_utill-0.0.11.dist-info/METADATA,sha256=ZGCUekaj1Zr5C8zjKwLBL6Q7tf1oPFEzuwS29Dsr7h0,4402
|
|
32
|
+
rdxz2_utill-0.0.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
33
|
+
rdxz2_utill-0.0.11.dist-info/entry_points.txt,sha256=9n5NWz5Wi9jDvYhB_81_4icgT5xABZ-QivHD8ibcafg,47
|
|
34
|
+
rdxz2_utill-0.0.11.dist-info/top_level.txt,sha256=tuAYZoCsr02JYbpZj7I6fl1IIo53v3GG0uoj-_fINVk,6
|
|
35
|
+
rdxz2_utill-0.0.11.dist-info/RECORD,,
|
utill/my_bq.py
CHANGED
|
@@ -1,21 +1,19 @@
|
|
|
1
|
-
import humanize
|
|
2
|
-
import math
|
|
3
|
-
import os
|
|
4
|
-
import shutil
|
|
5
|
-
|
|
6
|
-
from enum import Enum
|
|
7
|
-
from google.cloud import bigquery, storage
|
|
8
|
-
from loguru import logger
|
|
9
|
-
from textwrap import dedent
|
|
10
|
-
|
|
11
1
|
from .my_const import ByteSize
|
|
12
|
-
from .my_csv import read_header, combine as
|
|
2
|
+
from .my_csv import read_header, combine as compress
|
|
13
3
|
from .my_datetime import current_datetime_str
|
|
14
4
|
from .my_env import envs
|
|
15
5
|
from .my_gcs import GCS
|
|
16
6
|
from .my_queue import ThreadingQ
|
|
17
7
|
from .my_string import replace_nonnumeric
|
|
18
8
|
from .my_xlsx import csv_to_xlsx
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from google.cloud import bigquery, storage
|
|
11
|
+
from loguru import logger
|
|
12
|
+
from textwrap import dedent
|
|
13
|
+
import csv
|
|
14
|
+
import humanize
|
|
15
|
+
import math
|
|
16
|
+
import os
|
|
19
17
|
|
|
20
18
|
MAP__PYTHON_DTYPE__BQ_DTYPE = {
|
|
21
19
|
int: 'INTEGER',
|
|
@@ -92,7 +90,6 @@ class BQ():
|
|
|
92
90
|
logger.debug(f'🔎 Query:\n{query}')
|
|
93
91
|
query_job_config = bigquery.QueryJobConfig(dry_run=dry_run, query_parameters=query_parameters)
|
|
94
92
|
query_job = self.client.query(query, job_config=query_job_config)
|
|
95
|
-
query_job.result() # Wait query execution
|
|
96
93
|
|
|
97
94
|
if not multi:
|
|
98
95
|
logger.debug(f'[Job ID] {query_job.job_id}, [Processed] {humanize.naturalsize(query_job.total_bytes_processed)}, [Billed] {humanize.naturalsize(query_job.total_bytes_billed)}, [Affected] {query_job.num_dml_affected_rows or 0} row(s)',)
|
|
@@ -169,6 +166,7 @@ class BQ():
|
|
|
169
166
|
field_delimiter=',')
|
|
170
167
|
AS (
|
|
171
168
|
{query}
|
|
169
|
+
ORDER BY 1
|
|
172
170
|
);
|
|
173
171
|
'''
|
|
174
172
|
)
|
|
@@ -248,40 +246,41 @@ class BQ():
|
|
|
248
246
|
|
|
249
247
|
# END: Load to BQ ----->>
|
|
250
248
|
|
|
251
|
-
def download_csv(self, query: str,
|
|
252
|
-
if not
|
|
253
|
-
raise ValueError('Destination filename must ends with .csv
|
|
249
|
+
def download_csv(self, query: str, dst_filepath: str, row_limit: int | None = None):
|
|
250
|
+
if not dst_filepath.endswith('.csv'):
|
|
251
|
+
raise ValueError('Destination filename must ends with .csv')
|
|
254
252
|
|
|
255
|
-
|
|
253
|
+
dst_filepath = os.path.expanduser(dst_filepath) # /path/to/file.csv
|
|
256
254
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
255
|
+
query_job = self.execute_query(query)
|
|
256
|
+
query_job_result = query_job.result()
|
|
257
|
+
row_count = 0
|
|
258
|
+
file_index = 1
|
|
259
|
+
|
|
260
|
+
# Stream-download-split result
|
|
261
|
+
def open_file(f):
|
|
262
|
+
if f:
|
|
263
|
+
f.close()
|
|
264
|
+
dst_filepath_part = f'{dst_filepath.removesuffix(".csv")}_{file_index:06}.csv' if row_limit else dst_filepath
|
|
265
|
+
logger.info(f'Writing into file: {dst_filepath_part} ...')
|
|
266
|
+
f = open(dst_filepath_part, 'w', newline='', encoding='utf-8')
|
|
267
|
+
writer = csv.writer(f)
|
|
268
|
+
writer.writerow([field.name for field in query_job_result.schema]) # Write header
|
|
269
|
+
|
|
270
|
+
return f, writer
|
|
271
|
+
|
|
272
|
+
f, writer = open_file(None)
|
|
273
|
+
for row in query_job_result:
|
|
274
|
+
writer.writerow(row)
|
|
275
|
+
|
|
276
|
+
if row_limit:
|
|
277
|
+
row_count += 1
|
|
278
|
+
if row_count >= row_limit:
|
|
279
|
+
row_count = 0
|
|
280
|
+
file_index += 1
|
|
281
|
+
f, writer = open_file(f)
|
|
282
|
+
if f:
|
|
283
|
+
f.close()
|
|
285
284
|
|
|
286
285
|
def download_xlsx(self, src_table_fqn: str, dst_filename: str, xlsx_row_limit: int = 950000):
|
|
287
286
|
if not dst_filename.endswith('.xlsx'):
|
utill/my_file.py
CHANGED
utill/my_pg.py
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
from .my_env import PG_FILENAME
|
|
2
|
+
from .my_string import generate_random_string
|
|
3
|
+
from .my_tunnel import establish_tunnel
|
|
4
|
+
from loguru import logger
|
|
5
|
+
from textwrap import dedent
|
|
1
6
|
import csv
|
|
2
7
|
import json
|
|
3
8
|
import os
|
|
@@ -5,13 +10,6 @@ import psycopg
|
|
|
5
10
|
import psycopg.conninfo
|
|
6
11
|
import psycopg.rows
|
|
7
12
|
|
|
8
|
-
from loguru import logger
|
|
9
|
-
from textwrap import dedent
|
|
10
|
-
|
|
11
|
-
from .my_env import PG_FILENAME
|
|
12
|
-
from .my_string import generate_random_string
|
|
13
|
-
from .my_tunnel import establish_tunnel
|
|
14
|
-
|
|
15
13
|
|
|
16
14
|
class PG:
|
|
17
15
|
def __init__(
|
|
@@ -100,6 +98,9 @@ class PG:
|
|
|
100
98
|
f.write(data)
|
|
101
99
|
|
|
102
100
|
def pg_to_pg(self, pg: "PG", src_table_name: str, dst_table_name: str, cols: list[str] = None) -> None:
|
|
101
|
+
self.ensure_table_exists(src_table_name)
|
|
102
|
+
pg.ensure_table_exists(dst_table_name)
|
|
103
|
+
|
|
103
104
|
tmp_filename = generate_random_string(alphanum=True) + '.csv'
|
|
104
105
|
cols_str = ','.join([f'"{x}"' for x in cols]) if (cols is not None and cols != []) else '*'
|
|
105
106
|
try:
|
|
@@ -110,12 +111,12 @@ class PG:
|
|
|
110
111
|
finally:
|
|
111
112
|
os.remove(tmp_filename) if os.path.exists(tmp_filename) else None
|
|
112
113
|
|
|
113
|
-
def
|
|
114
|
-
if not self.execute_query('''SELECT count(1) AS "cnt" FROM "information_schema"."tables" WHERE "table_schema" || '.' || "table_name" = %s;''', table_name).fetchone()[0]:
|
|
114
|
+
def ensure_table_exists(self, table_name: str) -> bool:
|
|
115
|
+
if not self.execute_query('''SELECT count(1) AS "cnt" FROM "information_schema"."tables" WHERE "table_schema" || '.' || "table_name" = %s;''', (table_name, )).fetchone()[0]:
|
|
115
116
|
raise Exception(f'Target table \'{table_name}\' not created, please create it first!')
|
|
116
117
|
|
|
117
118
|
def upload_tuples(self, cols: list[str], src_tuples: list[tuple], src_table_name: str) -> None:
|
|
118
|
-
self.
|
|
119
|
+
self.ensure_table_exists(src_table_name)
|
|
119
120
|
|
|
120
121
|
cols_str = ','.join([f'"{x}"' for x in cols])
|
|
121
122
|
query = f'''COPY {src_table_name}({cols_str}) FROM STDIN'''
|
|
@@ -125,7 +126,7 @@ class PG:
|
|
|
125
126
|
copy.write_row(row)
|
|
126
127
|
|
|
127
128
|
def upload_list_of_dict(self, src_data: list[dict], dst_table_name: str) -> None:
|
|
128
|
-
self.
|
|
129
|
+
self.ensure_table_exists(dst_table_name)
|
|
129
130
|
|
|
130
131
|
if len(src_data) == 0:
|
|
131
132
|
raise ValueError('No data to upload!')
|
|
@@ -141,7 +142,7 @@ class PG:
|
|
|
141
142
|
def upload_csv(self, src_filename: str, dst_table_name: str) -> None:
|
|
142
143
|
src_filename = os.path.expanduser(src_filename)
|
|
143
144
|
|
|
144
|
-
self.
|
|
145
|
+
self.ensure_table_exists(dst_table_name)
|
|
145
146
|
|
|
146
147
|
cols_str = ','.join([f'"{x}"' for x in next(csv.reader(open(src_filename, 'r')))])
|
|
147
148
|
query = dedent(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|