pidatametrics1 0.3.6__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pidatametrics1-0.3.6 → pidatametrics1-0.3.7}/PKG-INFO +1 -1
- {pidatametrics1-0.3.6 → pidatametrics1-0.3.7}/pyproject.toml +1 -1
- {pidatametrics1-0.3.6 → pidatametrics1-0.3.7}/src/pidatametrics/exporter.py +66 -21
- {pidatametrics1-0.3.6 → pidatametrics1-0.3.7}/README.md +0 -0
- {pidatametrics1-0.3.6 → pidatametrics1-0.3.7}/src/pidatametrics/__init__.py +0 -0
- {pidatametrics1-0.3.6 → pidatametrics1-0.3.7}/src/pidatametrics/client.py +0 -0
- {pidatametrics1-0.3.6 → pidatametrics1-0.3.7}/src/pidatametrics/manager.py +0 -0
- {pidatametrics1-0.3.6 → pidatametrics1-0.3.7}/src/pidatametrics/parsers.py +0 -0
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import csv
|
|
2
|
-
import
|
|
2
|
+
import re
|
|
3
|
+
import math
|
|
3
4
|
from google.cloud import bigquery
|
|
4
5
|
|
|
5
|
-
# Optional imports
|
|
6
|
+
# Optional imports
|
|
6
7
|
try:
|
|
7
8
|
import pandas as pd
|
|
9
|
+
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
|
|
8
10
|
except ImportError:
|
|
9
11
|
pd = None
|
|
12
|
+
ILLEGAL_CHARACTERS_RE = None
|
|
10
13
|
|
|
11
14
|
try:
|
|
12
15
|
import gspread
|
|
@@ -15,6 +18,17 @@ except ImportError:
|
|
|
15
18
|
gspread = None
|
|
16
19
|
|
|
17
20
|
class PiExporter:
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def _clean_for_excel(text):
|
|
24
|
+
"""Removes characters that cause Excel to crash/corrupt."""
|
|
25
|
+
if not isinstance(text, str):
|
|
26
|
+
return text
|
|
27
|
+
# Remove illegal control characters (null bytes, etc.)
|
|
28
|
+
if ILLEGAL_CHARACTERS_RE:
|
|
29
|
+
return ILLEGAL_CHARACTERS_RE.sub('', text)
|
|
30
|
+
return text
|
|
31
|
+
|
|
18
32
|
@staticmethod
|
|
19
33
|
def to_csv(data, filename):
|
|
20
34
|
if not data:
|
|
@@ -33,9 +47,11 @@ class PiExporter:
|
|
|
33
47
|
|
|
34
48
|
@staticmethod
|
|
35
49
|
def to_excel(data, filename):
|
|
36
|
-
"""
|
|
50
|
+
"""
|
|
51
|
+
Exports to Excel with sanitization to prevent 'Corrupted File' errors.
|
|
52
|
+
"""
|
|
37
53
|
if pd is None:
|
|
38
|
-
print("Error: Pandas not installed.
|
|
54
|
+
print("Error: Pandas/Openpyxl not installed.")
|
|
39
55
|
return
|
|
40
56
|
if not data:
|
|
41
57
|
print("No data to export.")
|
|
@@ -44,58 +60,87 @@ class PiExporter:
|
|
|
44
60
|
if not filename.endswith('.xlsx'):
|
|
45
61
|
filename += '.xlsx'
|
|
46
62
|
|
|
63
|
+
print(f"Preparing Excel file: {filename} ({len(data)} rows)...")
|
|
64
|
+
|
|
47
65
|
try:
|
|
66
|
+
# 1. Create DataFrame
|
|
48
67
|
df = pd.DataFrame(data)
|
|
49
|
-
|
|
68
|
+
|
|
69
|
+
# 2. Sanitize Data (Fixes "Excel found a problem with content")
|
|
70
|
+
# Apply cleaning to all string columns
|
|
71
|
+
for col in df.select_dtypes(include=['object']).columns:
|
|
72
|
+
df[col] = df[col].apply(PiExporter._clean_for_excel)
|
|
73
|
+
|
|
74
|
+
# 3. Export
|
|
75
|
+
df.to_excel(filename, index=False, engine='openpyxl')
|
|
50
76
|
print(f"Successfully saved {len(data)} rows to {filename}")
|
|
77
|
+
|
|
51
78
|
except Exception as e:
|
|
52
79
|
print(f"Excel Export Failed: {e}")
|
|
53
80
|
|
|
54
81
|
@staticmethod
|
|
55
82
|
def to_google_sheet(data, spreadsheet_name, tab_name="Sheet1"):
|
|
56
|
-
"""
|
|
57
|
-
Exports to Google Sheet using the Filename (not ID).
|
|
58
|
-
Uses the active Colab authentication.
|
|
59
|
-
"""
|
|
60
83
|
if gspread is None:
|
|
61
|
-
print("Error: gspread not installed.
|
|
84
|
+
print("Error: gspread not installed.")
|
|
62
85
|
return
|
|
63
86
|
if not data:
|
|
64
87
|
print("No data to upload.")
|
|
65
88
|
return
|
|
66
89
|
|
|
67
|
-
|
|
90
|
+
row_count = len(data)
|
|
91
|
+
print(f"Preparing Google Sheet upload: {row_count} rows...")
|
|
92
|
+
|
|
93
|
+
# --- WARNING FOR LARGE DATASETS ---
|
|
94
|
+
if row_count > 50000:
|
|
95
|
+
print(f"⚠️ WARNING: You are uploading {row_count} rows.")
|
|
96
|
+
print(" Google Sheets may become slow. Uploading in chunks...")
|
|
68
97
|
|
|
69
98
|
try:
|
|
70
|
-
# 1.
|
|
99
|
+
# 1. Auth
|
|
71
100
|
creds, _ = google.auth.default()
|
|
72
101
|
client = gspread.authorize(creds)
|
|
73
102
|
|
|
74
|
-
# 2. Open
|
|
103
|
+
# 2. Open Sheet
|
|
75
104
|
try:
|
|
76
105
|
sh = client.open(spreadsheet_name)
|
|
77
106
|
except gspread.SpreadsheetNotFound:
|
|
78
107
|
print(f"Sheet '{spreadsheet_name}' not found. Creating it...")
|
|
79
108
|
sh = client.create(spreadsheet_name)
|
|
80
109
|
|
|
81
|
-
# 3.
|
|
110
|
+
# 3. Setup Tab
|
|
82
111
|
try:
|
|
83
112
|
worksheet = sh.worksheet(tab_name)
|
|
84
|
-
worksheet.clear()
|
|
113
|
+
worksheet.clear()
|
|
85
114
|
except gspread.WorksheetNotFound:
|
|
86
|
-
worksheet = sh.add_worksheet(title=tab_name, rows=
|
|
115
|
+
worksheet = sh.add_worksheet(title=tab_name, rows=row_count+100, cols=20)
|
|
87
116
|
|
|
88
117
|
# 4. Prepare Data
|
|
89
118
|
headers = list(data[0].keys())
|
|
90
119
|
rows = [[row.get(col, '') for col in headers] for row in data]
|
|
91
|
-
|
|
120
|
+
|
|
121
|
+
# 5. Upload Headers first
|
|
122
|
+
worksheet.update([headers], 'A1')
|
|
123
|
+
|
|
124
|
+
# 6. CHUNKED UPLOAD (To prevent timeouts on large data)
|
|
125
|
+
chunk_size = 5000 # Safe limit for gspread
|
|
126
|
+
total_chunks = math.ceil(len(rows) / chunk_size)
|
|
127
|
+
|
|
128
|
+
print(f"Starting upload in {total_chunks} chunks...")
|
|
129
|
+
|
|
130
|
+
for i in range(total_chunks):
|
|
131
|
+
start = i * chunk_size
|
|
132
|
+
end = start + chunk_size
|
|
133
|
+
chunk = rows[start:end]
|
|
134
|
+
|
|
135
|
+
# Append rows is safer for large datasets than update range
|
|
136
|
+
worksheet.append_rows(chunk, value_input_option='RAW')
|
|
137
|
+
|
|
138
|
+
print(f" - Uploaded chunk {i+1}/{total_chunks} ({len(chunk)} rows)")
|
|
92
139
|
|
|
93
|
-
|
|
94
|
-
worksheet.update(all_values)
|
|
95
|
-
print(f"Successfully uploaded {len(data)} rows to '{spreadsheet_name}' (Tab: {tab_name})")
|
|
140
|
+
print(f"✅ Successfully uploaded {row_count} rows to '{spreadsheet_name}' (Tab: {tab_name})")
|
|
96
141
|
|
|
97
142
|
except Exception as e:
|
|
98
|
-
print(f"Google Sheet Upload Failed: {e}")
|
|
143
|
+
print(f"❌ Google Sheet Upload Failed: {e}")
|
|
99
144
|
|
|
100
145
|
@staticmethod
|
|
101
146
|
def to_bigquery(data, project_id, dataset_id, table_id):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|