pidatametrics1 0.3.6__py2.py3-none-any.whl → 0.3.7__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pidatametrics/exporter.py CHANGED
@@ -1,12 +1,15 @@
1
1
  import csv
2
- import json
2
+ import re
3
+ import math
3
4
  from google.cloud import bigquery
4
5
 
5
- # Optional imports with error handling
6
+ # Optional imports
6
7
  try:
7
8
  import pandas as pd
9
+ from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
8
10
  except ImportError:
9
11
  pd = None
12
+ ILLEGAL_CHARACTERS_RE = None
10
13
 
11
14
  try:
12
15
  import gspread
@@ -15,6 +18,17 @@ except ImportError:
15
18
  gspread = None
16
19
 
17
20
  class PiExporter:
21
+
22
+ @staticmethod
23
+ def _clean_for_excel(text):
24
+ """Removes characters that cause Excel to crash/corrupt."""
25
+ if not isinstance(text, str):
26
+ return text
27
+ # Remove illegal control characters (null bytes, etc.)
28
+ if ILLEGAL_CHARACTERS_RE:
29
+ return ILLEGAL_CHARACTERS_RE.sub('', text)
30
+ return text
31
+
18
32
  @staticmethod
19
33
  def to_csv(data, filename):
20
34
  if not data:
@@ -33,9 +47,11 @@ class PiExporter:
33
47
 
34
48
  @staticmethod
35
49
  def to_excel(data, filename):
36
- """Exports data to Excel. Requires pandas and openpyxl."""
50
+ """
51
+ Exports to Excel with sanitization to prevent 'Corrupted File' errors.
52
+ """
37
53
  if pd is None:
38
- print("Error: Pandas not installed. Run: pip install pandas openpyxl")
54
+ print("Error: Pandas/Openpyxl not installed.")
39
55
  return
40
56
  if not data:
41
57
  print("No data to export.")
@@ -44,58 +60,87 @@ class PiExporter:
44
60
  if not filename.endswith('.xlsx'):
45
61
  filename += '.xlsx'
46
62
 
63
+ print(f"Preparing Excel file: {filename} ({len(data)} rows)...")
64
+
47
65
  try:
66
+ # 1. Create DataFrame
48
67
  df = pd.DataFrame(data)
49
- df.to_excel(filename, index=False)
68
+
69
+ # 2. Sanitize Data (Fixes "Excel found a problem with content")
70
+ # Apply cleaning to all string columns
71
+ for col in df.select_dtypes(include=['object']).columns:
72
+ df[col] = df[col].apply(PiExporter._clean_for_excel)
73
+
74
+ # 3. Export
75
+ df.to_excel(filename, index=False, engine='openpyxl')
50
76
  print(f"Successfully saved {len(data)} rows to {filename}")
77
+
51
78
  except Exception as e:
52
79
  print(f"Excel Export Failed: {e}")
53
80
 
54
81
  @staticmethod
55
82
  def to_google_sheet(data, spreadsheet_name, tab_name="Sheet1"):
56
- """
57
- Exports to Google Sheet using the Filename (not ID).
58
- Uses the active Colab authentication.
59
- """
60
83
  if gspread is None:
61
- print("Error: gspread not installed. Run: pip install gspread google-auth")
84
+ print("Error: gspread not installed.")
62
85
  return
63
86
  if not data:
64
87
  print("No data to upload.")
65
88
  return
66
89
 
67
- print(f"Connecting to Google Sheet: '{spreadsheet_name}'...")
90
+ row_count = len(data)
91
+ print(f"Preparing Google Sheet upload: {row_count} rows...")
92
+
93
+ # --- WARNING FOR LARGE DATASETS ---
94
+ if row_count > 50000:
95
+ print(f"⚠️ WARNING: You are uploading {row_count} rows.")
96
+ print(" Google Sheets may become slow. Uploading in chunks...")
68
97
 
69
98
  try:
70
- # 1. Get Default Credentials (works with Colab auth.authenticate_user)
99
+ # 1. Auth
71
100
  creds, _ = google.auth.default()
72
101
  client = gspread.authorize(creds)
73
102
 
74
- # 2. Open by Name (Title)
103
+ # 2. Open Sheet
75
104
  try:
76
105
  sh = client.open(spreadsheet_name)
77
106
  except gspread.SpreadsheetNotFound:
78
107
  print(f"Sheet '{spreadsheet_name}' not found. Creating it...")
79
108
  sh = client.create(spreadsheet_name)
80
109
 
81
- # 3. Select or Create Worksheet (Tab)
110
+ # 3. Setup Tab
82
111
  try:
83
112
  worksheet = sh.worksheet(tab_name)
84
- worksheet.clear() # Clear old data
113
+ worksheet.clear()
85
114
  except gspread.WorksheetNotFound:
86
- worksheet = sh.add_worksheet(title=tab_name, rows=len(data)+100, cols=20)
115
+ worksheet = sh.add_worksheet(title=tab_name, rows=row_count+100, cols=20)
87
116
 
88
117
  # 4. Prepare Data
89
118
  headers = list(data[0].keys())
90
119
  rows = [[row.get(col, '') for col in headers] for row in data]
91
- all_values = [headers] + rows
120
+
121
+ # 5. Upload Headers first
122
+ worksheet.update([headers], 'A1')
123
+
124
+ # 6. CHUNKED UPLOAD (To prevent timeouts on large data)
125
+ chunk_size = 5000 # Safe limit for gspread
126
+ total_chunks = math.ceil(len(rows) / chunk_size)
127
+
128
+ print(f"Starting upload in {total_chunks} chunks...")
129
+
130
+ for i in range(total_chunks):
131
+ start = i * chunk_size
132
+ end = start + chunk_size
133
+ chunk = rows[start:end]
134
+
135
+ # Append rows is safer for large datasets than update range
136
+ worksheet.append_rows(chunk, value_input_option='RAW')
137
+
138
+ print(f" - Uploaded chunk {i+1}/{total_chunks} ({len(chunk)} rows)")
92
139
 
93
- # 5. Update
94
- worksheet.update(all_values)
95
- print(f"Successfully uploaded {len(data)} rows to '{spreadsheet_name}' (Tab: {tab_name})")
140
+ print(f"✅ Successfully uploaded {row_count} rows to '{spreadsheet_name}' (Tab: {tab_name})")
96
141
 
97
142
  except Exception as e:
98
- print(f"Google Sheet Upload Failed: {e}")
143
+ print(f"Google Sheet Upload Failed: {e}")
99
144
 
100
145
  @staticmethod
101
146
  def to_bigquery(data, project_id, dataset_id, table_id):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pidatametrics1
3
- Version: 0.3.6
3
+ Version: 0.3.7
4
4
  Summary: A test wrapper for Pi Datametrics API with CSV and BigQuery support.
5
5
  Requires-Dist: google-auth
6
6
  Requires-Dist: google-cloud-bigquery
@@ -1,8 +1,8 @@
1
1
  pidatametrics/__init__.py,sha256=cmNSHvjvMsYO1tMv0Nf-7LGjIJ8AFXmUIwiv8jQ34BI,137
2
2
  pidatametrics/client.py,sha256=tHH0GV0rk2SizVqRdKepjdDQevkfdWlHOJHwsPR2PCk,4399
3
- pidatametrics/exporter.py,sha256=CcsdVhxI6rXi0zlQaYzFEGX0GL3ZaNV94Pj5r_WrZc4,4226
3
+ pidatametrics/exporter.py,sha256=yLN40kqwibHWs45gXKe_I1j9td9lJt-8LLFSS-Pk9-U,5813
4
4
  pidatametrics/manager.py,sha256=tUeeJ-wKAlhpWsaZEAjxtZCtA2EbQcTBB1JkXPEVV50,9101
5
5
  pidatametrics/parsers.py,sha256=fiLx3080wNubT1VqSIeDvlrKT85KdqlKhY6FaB2XuC8,5989
6
- pidatametrics1-0.3.6.dist-info/METADATA,sha256=DOlktdH-uHRwem6nbXWDIn3NLXmw67iENpiiX4f7--o,293
7
- pidatametrics1-0.3.6.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
8
- pidatametrics1-0.3.6.dist-info/RECORD,,
6
+ pidatametrics1-0.3.7.dist-info/METADATA,sha256=3rqys7d0-SNpsghd25f3eS99WSl-1QYzTY0HjbAYNak,293
7
+ pidatametrics1-0.3.7.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
8
+ pidatametrics1-0.3.7.dist-info/RECORD,,