pidatametrics1 0.3.6__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pidatametrics1-0.3.6 → pidatametrics1-0.4.0}/PKG-INFO +1 -1
- {pidatametrics1-0.3.6 → pidatametrics1-0.4.0}/pyproject.toml +1 -1
- {pidatametrics1-0.3.6 → pidatametrics1-0.4.0}/src/pidatametrics/exporter.py +63 -22
- {pidatametrics1-0.3.6 → pidatametrics1-0.4.0}/README.md +0 -0
- {pidatametrics1-0.3.6 → pidatametrics1-0.4.0}/src/pidatametrics/__init__.py +0 -0
- {pidatametrics1-0.3.6 → pidatametrics1-0.4.0}/src/pidatametrics/client.py +0 -0
- {pidatametrics1-0.3.6 → pidatametrics1-0.4.0}/src/pidatametrics/manager.py +0 -0
- {pidatametrics1-0.3.6 → pidatametrics1-0.4.0}/src/pidatametrics/parsers.py +0 -0
|
@@ -1,20 +1,34 @@
|
|
|
1
1
|
import csv
|
|
2
|
-
import
|
|
2
|
+
import re
|
|
3
|
+
import math
|
|
3
4
|
from google.cloud import bigquery
|
|
4
5
|
|
|
5
|
-
# Optional imports
|
|
6
|
+
# Optional imports
|
|
6
7
|
try:
|
|
7
8
|
import pandas as pd
|
|
9
|
+
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
|
|
8
10
|
except ImportError:
|
|
9
11
|
pd = None
|
|
12
|
+
ILLEGAL_CHARACTERS_RE = None
|
|
10
13
|
|
|
11
14
|
try:
|
|
12
15
|
import gspread
|
|
13
16
|
import google.auth
|
|
17
|
+
from gspread.exceptions import APIError
|
|
14
18
|
except ImportError:
|
|
15
19
|
gspread = None
|
|
16
20
|
|
|
17
21
|
class PiExporter:
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def _clean_for_excel(text):
|
|
25
|
+
"""Removes characters that cause Excel to crash/corrupt."""
|
|
26
|
+
if not isinstance(text, str):
|
|
27
|
+
return text
|
|
28
|
+
if ILLEGAL_CHARACTERS_RE:
|
|
29
|
+
return ILLEGAL_CHARACTERS_RE.sub('', text)
|
|
30
|
+
return text
|
|
31
|
+
|
|
18
32
|
@staticmethod
|
|
19
33
|
def to_csv(data, filename):
|
|
20
34
|
if not data:
|
|
@@ -33,9 +47,9 @@ class PiExporter:
|
|
|
33
47
|
|
|
34
48
|
@staticmethod
|
|
35
49
|
def to_excel(data, filename):
|
|
36
|
-
"""Exports
|
|
50
|
+
"""Exports to Excel with sanitization."""
|
|
37
51
|
if pd is None:
|
|
38
|
-
print("Error: Pandas not installed.
|
|
52
|
+
print("Error: Pandas/Openpyxl not installed.")
|
|
39
53
|
return
|
|
40
54
|
if not data:
|
|
41
55
|
print("No data to export.")
|
|
@@ -44,58 +58,85 @@ class PiExporter:
|
|
|
44
58
|
if not filename.endswith('.xlsx'):
|
|
45
59
|
filename += '.xlsx'
|
|
46
60
|
|
|
61
|
+
print(f"Preparing Excel file: {filename} ({len(data)} rows)...")
|
|
62
|
+
|
|
47
63
|
try:
|
|
48
64
|
df = pd.DataFrame(data)
|
|
49
|
-
|
|
65
|
+
|
|
66
|
+
# Sanitize Data (Fixes "Excel found a problem with content")
|
|
67
|
+
for col in df.select_dtypes(include=['object']).columns:
|
|
68
|
+
df[col] = df[col].apply(PiExporter._clean_for_excel)
|
|
69
|
+
|
|
70
|
+
df.to_excel(filename, index=False, engine='openpyxl')
|
|
50
71
|
print(f"Successfully saved {len(data)} rows to {filename}")
|
|
72
|
+
|
|
51
73
|
except Exception as e:
|
|
52
74
|
print(f"Excel Export Failed: {e}")
|
|
53
75
|
|
|
54
76
|
@staticmethod
|
|
55
77
|
def to_google_sheet(data, spreadsheet_name, tab_name="Sheet1"):
|
|
56
|
-
"""
|
|
57
|
-
Exports to Google Sheet using the Filename (not ID).
|
|
58
|
-
Uses the active Colab authentication.
|
|
59
|
-
"""
|
|
60
78
|
if gspread is None:
|
|
61
|
-
print("Error: gspread not installed.
|
|
79
|
+
print("Error: gspread not installed.")
|
|
62
80
|
return
|
|
63
81
|
if not data:
|
|
64
82
|
print("No data to upload.")
|
|
65
83
|
return
|
|
66
84
|
|
|
67
|
-
|
|
85
|
+
row_count = len(data)
|
|
86
|
+
print(f"Preparing Google Sheet upload: {row_count} rows...")
|
|
68
87
|
|
|
69
88
|
try:
|
|
70
|
-
# 1.
|
|
89
|
+
# 1. Auth
|
|
71
90
|
creds, _ = google.auth.default()
|
|
72
91
|
client = gspread.authorize(creds)
|
|
73
92
|
|
|
74
|
-
# 2. Open
|
|
93
|
+
# 2. Open Sheet
|
|
75
94
|
try:
|
|
76
95
|
sh = client.open(spreadsheet_name)
|
|
77
96
|
except gspread.SpreadsheetNotFound:
|
|
78
97
|
print(f"Sheet '{spreadsheet_name}' not found. Creating it...")
|
|
79
98
|
sh = client.create(spreadsheet_name)
|
|
80
99
|
|
|
81
|
-
# 3.
|
|
100
|
+
# 3. Setup Tab
|
|
82
101
|
try:
|
|
83
102
|
worksheet = sh.worksheet(tab_name)
|
|
84
|
-
worksheet.clear()
|
|
103
|
+
worksheet.clear()
|
|
85
104
|
except gspread.WorksheetNotFound:
|
|
86
|
-
worksheet = sh.add_worksheet(title=tab_name, rows=
|
|
105
|
+
worksheet = sh.add_worksheet(title=tab_name, rows=row_count+100, cols=20)
|
|
87
106
|
|
|
88
107
|
# 4. Prepare Data
|
|
89
108
|
headers = list(data[0].keys())
|
|
90
109
|
rows = [[row.get(col, '') for col in headers] for row in data]
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
110
|
+
|
|
111
|
+
# 5. Upload Headers
|
|
112
|
+
worksheet.update([headers], 'A1')
|
|
113
|
+
|
|
114
|
+
# 6. CHUNKED UPLOAD (Optimized for Speed)
|
|
115
|
+
# 30,000 is the sweet spot.
|
|
116
|
+
# 90k often causes "Payload Too Large" or "Timeout" errors.
|
|
117
|
+
chunk_size = 30000
|
|
118
|
+
total_chunks = math.ceil(len(rows) / chunk_size)
|
|
119
|
+
|
|
120
|
+
print(f"Starting upload in {total_chunks} chunks (Size: {chunk_size})...")
|
|
121
|
+
|
|
122
|
+
for i in range(total_chunks):
|
|
123
|
+
start = i * chunk_size
|
|
124
|
+
end = start + chunk_size
|
|
125
|
+
chunk = rows[start:end]
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
# value_input_option='RAW' is faster than 'USER_ENTERED'
|
|
129
|
+
worksheet.append_rows(chunk, value_input_option='RAW')
|
|
130
|
+
print(f" - Uploaded chunk {i+1}/{total_chunks} ({len(chunk)} rows)")
|
|
131
|
+
except APIError as e:
|
|
132
|
+
print(f" ❌ Error on chunk {i+1}: {e}")
|
|
133
|
+
print(" (If this is a timeout, try reducing chunk_size in the code)")
|
|
134
|
+
raise e
|
|
135
|
+
|
|
136
|
+
print(f"✅ Successfully uploaded {row_count} rows to '{spreadsheet_name}' (Tab: {tab_name})")
|
|
96
137
|
|
|
97
138
|
except Exception as e:
|
|
98
|
-
print(f"Google Sheet Upload Failed: {e}")
|
|
139
|
+
print(f"❌ Google Sheet Upload Failed: {e}")
|
|
99
140
|
|
|
100
141
|
@staticmethod
|
|
101
142
|
def to_bigquery(data, project_id, dataset_id, table_id):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|