PyPI - rdxz2-utill - Versions diffs - 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl - Mend

rdxz2-utill 0.0.10py3-none-any.whl → 0.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rdxz2-utill might be problematic. Click here for more details.

Files changed (9) hide show

{rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rdxz2-utill
-Version: 0.0.10
+Version: 0.0.11
 Summary: Your daily Python utility
 Author-email: Richard Dharmawan <richard.dharmawan@gmail.com>
 License: MIT License

{rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-rdxz2_utill-0.0.10.dist-info/licenses/LICENSE,sha256=PF9CUvzP8XFYopEAzrMzSCovF7RdBdscPqJCDC6KjPc,1073
+rdxz2_utill-0.0.11.dist-info/licenses/LICENSE,sha256=PF9CUvzP8XFYopEAzrMzSCovF7RdBdscPqJCDC6KjPc,1073
 utill/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-utill/my_bq.py,sha256=zxmQJUojJCHSZ7fo20ybbwz_cXnbCh9XNw4feKQJcZc,14848
+utill/my_bq.py,sha256=eWHnCz-tPHLtK4Ac9uNRLw_iyoEvOEy-mk9OMxyWZGc,14771
 utill/my_compare.py,sha256=619QbVk3GihWxen95yVnivKHkah8GgPTLGiSkgHxykw,886
 utill/my_const.py,sha256=88dOqn6NPQ5-hfRqdkew5POoAIyO91XXOGvN76oNsdo,251
 utill/my_csv.py,sha256=svgu93R0pP7UW0B58eJMi0vuJnYhqMtafzCsTIk4yUU,2781
@@ -8,12 +8,12 @@ utill/my_datetime.py,sha256=KEZTplLk3tgVqqC3wClXFcsF_zo40fma_rtPg4kSJHc,2125
 utill/my_dict.py,sha256=jPaPfdn4WYpm0uIBPiYFinpHhx1jXpFVDJ9npmvxGZQ,391
 utill/my_encryption.py,sha256=SCF7PPur39cW4RHidsRhw-9BZP-ymUH-6LZ9nAHJDsY,2105
 utill/my_env.py,sha256=mREys72Ybg2p9p2s7ApOt0s_6F5-qxR8FyYEcSJ8pmU,2093
-utill/my_file.py,sha256=H3QmIOwubQCUMoOuk7jwf6AnqsljWZIuM7OjelyZby4,1865
+utill/my_file.py,sha256=H2V8qGSCwnztBKiLYA38-4KUaGFQhznJz86cdilLtAE,1879
 utill/my_gcs.py,sha256=KUx89rZx2-dq-GV1LbbvbZ79Qr9NznjG1Zipop4hMZE,4216
 utill/my_input.py,sha256=OyKLoutXpwISReltuL_Gw2oojv16tYWJqQpqabBOQx4,350
 utill/my_json.py,sha256=WgW6mavGhfs4h1N5XbhsDnRk2dbh_ttJWdJUj4iWDN4,1473
 utill/my_mb.py,sha256=IyrySs92TqtjBUvPMeUN3P2kRK8EttTFRPZsv5Cr-xw,15090
-utill/my_pg.py,sha256=ltNPAsrOJurcCQTDZfAdsOXBSnbmxPzu4E-VtMvUxcM,6708
+utill/my_pg.py,sha256=J9USygc-oug4w7AkBacA9x043jHZrDfQPGFEqXavZAY,6799
 utill/my_queue.py,sha256=hINP4_yjmboSjHgo1J3CtPm2X9SE3HfczyED3ip7nfk,1930
 utill/my_string.py,sha256=pINYFR1ligTyVZYzV8P_FolCsZQwYE1jaFNTuQ3XS_8,833
 utill/my_style.py,sha256=Wy6j4WL9RgGeX6cS9hhlOrufc9UC4UPTQ5UJa0ZJ3Yo,900
@@ -28,8 +28,8 @@ utill/cmd/_pg.py,sha256=RVxEiSifyIwMDYDM69vt6WSLdVDr1cMzY6r4T2PzNRA,492
 utill/cmd/utill.py,sha256=TlHfiwOUcK1m58PrRCjX9sARiPYZUsoTk-KOTCOz1vM,3558
 utill/templates/mb.json,sha256=M46ZHSaSh4rbD_KGUViGr2B2ZV8_PC-O5Evqi35JK5g,59
 utill/templates/pg.json,sha256=LkJt0VV3zcyt7Tpn6gulsoVQgUc-9uImXOStvzu8cdU,271
-rdxz2_utill-0.0.10.dist-info/METADATA,sha256=XNWgh3Y8eF9C7yX_VGUSTtoMB0Cc9A9m9x4Q-1g2PLk,4402
-rdxz2_utill-0.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rdxz2_utill-0.0.10.dist-info/entry_points.txt,sha256=9n5NWz5Wi9jDvYhB_81_4icgT5xABZ-QivHD8ibcafg,47
-rdxz2_utill-0.0.10.dist-info/top_level.txt,sha256=tuAYZoCsr02JYbpZj7I6fl1IIo53v3GG0uoj-_fINVk,6
-rdxz2_utill-0.0.10.dist-info/RECORD,,
+rdxz2_utill-0.0.11.dist-info/METADATA,sha256=ZGCUekaj1Zr5C8zjKwLBL6Q7tf1oPFEzuwS29Dsr7h0,4402
+rdxz2_utill-0.0.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rdxz2_utill-0.0.11.dist-info/entry_points.txt,sha256=9n5NWz5Wi9jDvYhB_81_4icgT5xABZ-QivHD8ibcafg,47
+rdxz2_utill-0.0.11.dist-info/top_level.txt,sha256=tuAYZoCsr02JYbpZj7I6fl1IIo53v3GG0uoj-_fINVk,6
+rdxz2_utill-0.0.11.dist-info/RECORD,,

utill/my_bq.py CHANGED Viewed

@@ -1,21 +1,19 @@
-import humanize
-import math
-import os
-import shutil
-from enum import Enum
-from google.cloud import bigquery, storage
-from loguru import logger
-from textwrap import dedent
 from .my_const import ByteSize
-from .my_csv import read_header, combine as csv_combine, compress
+from .my_csv import read_header, combine as compress
 from .my_datetime import current_datetime_str
 from .my_env import envs
 from .my_gcs import GCS
 from .my_queue import ThreadingQ
 from .my_string import replace_nonnumeric
 from .my_xlsx import csv_to_xlsx
+from enum import Enum
+from google.cloud import bigquery, storage
+from loguru import logger
+from textwrap import dedent
+import csv
+import humanize
+import math
+import os
 MAP__PYTHON_DTYPE__BQ_DTYPE = {
     int: 'INTEGER',
@@ -92,7 +90,6 @@ class BQ():
         logger.debug(f'🔎 Query:\n{query}')
         query_job_config = bigquery.QueryJobConfig(dry_run=dry_run, query_parameters=query_parameters)
         query_job = self.client.query(query, job_config=query_job_config)
-        query_job.result()  # Wait query execution
         if not multi:
             logger.debug(f'[Job ID] {query_job.job_id}, [Processed] {humanize.naturalsize(query_job.total_bytes_processed)}, [Billed] {humanize.naturalsize(query_job.total_bytes_billed)}, [Affected] {query_job.num_dml_affected_rows or 0} row(s)',)
@@ -169,6 +166,7 @@ class BQ():
                 field_delimiter=',')
             AS (
             {query}
+            ORDER BY 1
             );
             '''
         )
@@ -248,40 +246,41 @@ class BQ():
         # END: Load to BQ ----->>
-    def download_csv(self, query: str, dst_filename: str, combine: bool = True, pre_query: str = None):
-        if not dst_filename.endswith('.csv'):
-            raise ValueError('Destination filename must ends with .csv!')
+    def download_csv(self, query: str, dst_filepath: str, row_limit: int | None = None):
+        if not dst_filepath.endswith('.csv'):
+            raise ValueError('Destination filename must ends with .csv')
-        dst_filename = os.path.expanduser(dst_filename)
+        dst_filepath = os.path.expanduser(dst_filepath)  # /path/to/file.csv
-        dirname = dst_filename.removesuffix('.csv')
-        # Remove & recreate existing folder
-        if os.path.exists(dirname):
-            shutil.rmtree(dirname)
-        os.makedirs(dirname, exist_ok=True)
-        # Export data into GCS
-        current_time = current_datetime_str()
-        gcs_path = f'gs://{envs.GCS_BUCKET}/tmp/unload__{current_time}/*.csv.gz'
-        self.export_data(query, gcs_path, pre_query)
-        # Download into local machine
-        gcs = GCS(self.project)
-        logger.info('Downloads from GCS...')
-        downloaded_filenames = []
-        for blob in gcs.list(f'tmp/unload__{current_time}/'):
-            file_path_part = os.path.join(dirname, blob.name.split('/')[-1])
-            gcs.download(blob, file_path_part)
-            downloaded_filenames.append(file_path_part)
-        # Combine the file and clean up the file chunks
-        if combine:
-            logger.info('Combine downloaded csv...')
-            csv_combine(downloaded_filenames, dst_filename)
-            shutil.rmtree(dirname)
-        return dst_filename
+        query_job = self.execute_query(query)
+        query_job_result = query_job.result()
+        row_count = 0
+        file_index = 1
+        # Stream-download-split result
+        def open_file(f):
+            if f:
+                f.close()
+            dst_filepath_part = f'{dst_filepath.removesuffix(".csv")}_{file_index:06}.csv' if row_limit else dst_filepath
+            logger.info(f'Writing into file: {dst_filepath_part} ...')
+            f = open(dst_filepath_part, 'w', newline='', encoding='utf-8')
+            writer = csv.writer(f)
+            writer.writerow([field.name for field in query_job_result.schema])  # Write header
+            return f, writer
+        f, writer = open_file(None)
+        for row in query_job_result:
+            writer.writerow(row)
+            if row_limit:
+                row_count += 1
+                if row_count >= row_limit:
+                    row_count = 0
+                    file_index += 1
+                    f, writer = open_file(f)
+        if f:
+            f.close()
     def download_xlsx(self, src_table_fqn: str, dst_filename: str, xlsx_row_limit: int = 950000):
         if not dst_filename.endswith('.xlsx'):

utill/my_file.py CHANGED Viewed

@@ -33,7 +33,7 @@ def decompress(src_file: str, keep: bool = False):
         with open(dst_file, 'wb') as f_out:
             shutil.copyfileobj(f_in, f_out)
-    keep or os.remove(src_file)
+    os.remove(src_file) if not keep else None
     return dst_file

utill/my_pg.py CHANGED Viewed

@@ -1,3 +1,8 @@
+from .my_env import PG_FILENAME
+from .my_string import generate_random_string
+from .my_tunnel import establish_tunnel
+from loguru import logger
+from textwrap import dedent
 import csv
 import json
 import os
@@ -5,13 +10,6 @@ import psycopg
 import psycopg.conninfo
 import psycopg.rows
-from loguru import logger
-from textwrap import dedent
-from .my_env import PG_FILENAME
-from .my_string import generate_random_string
-from .my_tunnel import establish_tunnel
 class PG:
     def __init__(
@@ -100,6 +98,9 @@ class PG:
                     f.write(data)
     def pg_to_pg(self, pg: "PG", src_table_name: str, dst_table_name: str, cols: list[str] = None) -> None:
+        self.ensure_table_exists(src_table_name)
+        pg.ensure_table_exists(dst_table_name)
         tmp_filename = generate_random_string(alphanum=True) + '.csv'
         cols_str = ','.join([f'"{x}"' for x in cols]) if (cols is not None and cols != []) else '*'
         try:
@@ -110,12 +111,12 @@ class PG:
         finally:
             os.remove(tmp_filename) if os.path.exists(tmp_filename) else None
-    def check_table_existence(self, table_name: str) -> bool:
-        if not self.execute_query('''SELECT count(1) AS "cnt" FROM "information_schema"."tables" WHERE "table_schema" || '.' || "table_name" = %s;''', table_name).fetchone()[0]:
+    def ensure_table_exists(self, table_name: str) -> bool:
+        if not self.execute_query('''SELECT count(1) AS "cnt" FROM "information_schema"."tables" WHERE "table_schema" || '.' || "table_name" = %s;''', (table_name, )).fetchone()[0]:
             raise Exception(f'Target table \'{table_name}\' not created, please create it first!')
     def upload_tuples(self, cols: list[str], src_tuples: list[tuple], src_table_name: str) -> None:
-        self.check_table_existence(src_table_name)
+        self.ensure_table_exists(src_table_name)
         cols_str = ','.join([f'"{x}"' for x in cols])
         query = f'''COPY {src_table_name}({cols_str}) FROM STDIN'''
@@ -125,7 +126,7 @@ class PG:
                 copy.write_row(row)
     def upload_list_of_dict(self, src_data: list[dict], dst_table_name: str) -> None:
-        self.check_table_existence(dst_table_name)
+        self.ensure_table_exists(dst_table_name)
         if len(src_data) == 0:
             raise ValueError('No data to upload!')
@@ -141,7 +142,7 @@ class PG:
     def upload_csv(self, src_filename: str, dst_table_name: str) -> None:
         src_filename = os.path.expanduser(src_filename)
-        self.check_table_existence(dst_table_name)
+        self.ensure_table_exists(dst_table_name)
         cols_str = ','.join([f'"{x}"' for x in next(csv.reader(open(src_filename, 'r')))])
         query = dedent(

{rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rdxz2_utill-0.0.10.dist-info → rdxz2_utill-0.0.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

rdxz2-utill 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl

Potentially problematic release.

rdxz2-utill 0.0.10py3-none-any.whl → 0.0.11py3-none-any.whl