pidatametrics1 0.3.7__py2.py3-none-any.whl → 0.4.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pidatametrics/exporter.py
CHANGED
|
@@ -14,6 +14,7 @@ except ImportError:
|
|
|
14
14
|
try:
|
|
15
15
|
import gspread
|
|
16
16
|
import google.auth
|
|
17
|
+
from gspread.exceptions import APIError
|
|
17
18
|
except ImportError:
|
|
18
19
|
gspread = None
|
|
19
20
|
|
|
@@ -24,7 +25,6 @@ class PiExporter:
|
|
|
24
25
|
"""Removes characters that cause Excel to crash/corrupt."""
|
|
25
26
|
if not isinstance(text, str):
|
|
26
27
|
return text
|
|
27
|
-
# Remove illegal control characters (null bytes, etc.)
|
|
28
28
|
if ILLEGAL_CHARACTERS_RE:
|
|
29
29
|
return ILLEGAL_CHARACTERS_RE.sub('', text)
|
|
30
30
|
return text
|
|
@@ -47,9 +47,7 @@ class PiExporter:
|
|
|
47
47
|
|
|
48
48
|
@staticmethod
|
|
49
49
|
def to_excel(data, filename):
|
|
50
|
-
"""
|
|
51
|
-
Exports to Excel with sanitization to prevent 'Corrupted File' errors.
|
|
52
|
-
"""
|
|
50
|
+
"""Exports to Excel with sanitization."""
|
|
53
51
|
if pd is None:
|
|
54
52
|
print("Error: Pandas/Openpyxl not installed.")
|
|
55
53
|
return
|
|
@@ -63,15 +61,12 @@ class PiExporter:
|
|
|
63
61
|
print(f"Preparing Excel file: {filename} ({len(data)} rows)...")
|
|
64
62
|
|
|
65
63
|
try:
|
|
66
|
-
# 1. Create DataFrame
|
|
67
64
|
df = pd.DataFrame(data)
|
|
68
65
|
|
|
69
|
-
#
|
|
70
|
-
# Apply cleaning to all string columns
|
|
66
|
+
# Sanitize Data (Fixes "Excel found a problem with content")
|
|
71
67
|
for col in df.select_dtypes(include=['object']).columns:
|
|
72
68
|
df[col] = df[col].apply(PiExporter._clean_for_excel)
|
|
73
69
|
|
|
74
|
-
# 3. Export
|
|
75
70
|
df.to_excel(filename, index=False, engine='openpyxl')
|
|
76
71
|
print(f"Successfully saved {len(data)} rows to {filename}")
|
|
77
72
|
|
|
@@ -90,11 +85,6 @@ class PiExporter:
|
|
|
90
85
|
row_count = len(data)
|
|
91
86
|
print(f"Preparing Google Sheet upload: {row_count} rows...")
|
|
92
87
|
|
|
93
|
-
# --- WARNING FOR LARGE DATASETS ---
|
|
94
|
-
if row_count > 50000:
|
|
95
|
-
print(f"⚠️ WARNING: You are uploading {row_count} rows.")
|
|
96
|
-
print(" Google Sheets may become slow. Uploading in chunks...")
|
|
97
|
-
|
|
98
88
|
try:
|
|
99
89
|
# 1. Auth
|
|
100
90
|
creds, _ = google.auth.default()
|
|
@@ -118,24 +108,30 @@ class PiExporter:
|
|
|
118
108
|
headers = list(data[0].keys())
|
|
119
109
|
rows = [[row.get(col, '') for col in headers] for row in data]
|
|
120
110
|
|
|
121
|
-
# 5. Upload Headers
|
|
111
|
+
# 5. Upload Headers
|
|
122
112
|
worksheet.update([headers], 'A1')
|
|
123
113
|
|
|
124
|
-
# 6. CHUNKED UPLOAD (
|
|
125
|
-
|
|
114
|
+
# 6. CHUNKED UPLOAD (Optimized for Speed)
|
|
115
|
+
# 30,000 is the sweet spot.
|
|
116
|
+
# 90k often causes "Payload Too Large" or "Timeout" errors.
|
|
117
|
+
chunk_size = 30000
|
|
126
118
|
total_chunks = math.ceil(len(rows) / chunk_size)
|
|
127
119
|
|
|
128
|
-
print(f"Starting upload in {total_chunks} chunks...")
|
|
120
|
+
print(f"Starting upload in {total_chunks} chunks (Size: {chunk_size})...")
|
|
129
121
|
|
|
130
122
|
for i in range(total_chunks):
|
|
131
123
|
start = i * chunk_size
|
|
132
124
|
end = start + chunk_size
|
|
133
125
|
chunk = rows[start:end]
|
|
134
126
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
127
|
+
try:
|
|
128
|
+
# value_input_option='RAW' is faster than 'USER_ENTERED'
|
|
129
|
+
worksheet.append_rows(chunk, value_input_option='RAW')
|
|
130
|
+
print(f" - Uploaded chunk {i+1}/{total_chunks} ({len(chunk)} rows)")
|
|
131
|
+
except APIError as e:
|
|
132
|
+
print(f" ❌ Error on chunk {i+1}: {e}")
|
|
133
|
+
print(" (If this is a timeout, try reducing chunk_size in the code)")
|
|
134
|
+
raise e
|
|
139
135
|
|
|
140
136
|
print(f"✅ Successfully uploaded {row_count} rows to '{spreadsheet_name}' (Tab: {tab_name})")
|
|
141
137
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
pidatametrics/__init__.py,sha256=cmNSHvjvMsYO1tMv0Nf-7LGjIJ8AFXmUIwiv8jQ34BI,137
|
|
2
2
|
pidatametrics/client.py,sha256=tHH0GV0rk2SizVqRdKepjdDQevkfdWlHOJHwsPR2PCk,4399
|
|
3
|
-
pidatametrics/exporter.py,sha256=
|
|
3
|
+
pidatametrics/exporter.py,sha256=PffQ0f515du1JnvUmh4xDM8YQ7ww8WgeFKSss233Y-I,5723
|
|
4
4
|
pidatametrics/manager.py,sha256=tUeeJ-wKAlhpWsaZEAjxtZCtA2EbQcTBB1JkXPEVV50,9101
|
|
5
5
|
pidatametrics/parsers.py,sha256=fiLx3080wNubT1VqSIeDvlrKT85KdqlKhY6FaB2XuC8,5989
|
|
6
|
-
pidatametrics1-0.
|
|
7
|
-
pidatametrics1-0.
|
|
8
|
-
pidatametrics1-0.
|
|
6
|
+
pidatametrics1-0.4.0.dist-info/METADATA,sha256=g4rCEmRmzUuuokAKK7v5EaAVpnredVEnPIE_ukOV0ao,293
|
|
7
|
+
pidatametrics1-0.4.0.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
|
|
8
|
+
pidatametrics1-0.4.0.dist-info/RECORD,,
|
|
File without changes
|