pidatametrics1 0.3.7__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pidatametrics1
3
- Version: 0.3.7
3
+ Version: 0.4.0
4
4
  Summary: A test wrapper for Pi Datametrics API with CSV and BigQuery support.
5
5
  Requires-Dist: google-auth
6
6
  Requires-Dist: google-cloud-bigquery
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "pidatametrics1"
7
- version = "0.3.7"
7
+ version = "0.4.0"
8
8
  description = "A test wrapper for Pi Datametrics API with CSV and BigQuery support."
9
9
  dependencies = [
10
10
  "requests",
@@ -14,6 +14,7 @@ except ImportError:
14
14
  try:
15
15
  import gspread
16
16
  import google.auth
17
+ from gspread.exceptions import APIError
17
18
  except ImportError:
18
19
  gspread = None
19
20
 
@@ -24,7 +25,6 @@ class PiExporter:
24
25
  """Removes characters that cause Excel to crash/corrupt."""
25
26
  if not isinstance(text, str):
26
27
  return text
27
- # Remove illegal control characters (null bytes, etc.)
28
28
  if ILLEGAL_CHARACTERS_RE:
29
29
  return ILLEGAL_CHARACTERS_RE.sub('', text)
30
30
  return text
@@ -47,9 +47,7 @@ class PiExporter:
47
47
 
48
48
  @staticmethod
49
49
  def to_excel(data, filename):
50
- """
51
- Exports to Excel with sanitization to prevent 'Corrupted File' errors.
52
- """
50
+ """Exports to Excel with sanitization."""
53
51
  if pd is None:
54
52
  print("Error: Pandas/Openpyxl not installed.")
55
53
  return
@@ -63,15 +61,12 @@ class PiExporter:
63
61
  print(f"Preparing Excel file: {filename} ({len(data)} rows)...")
64
62
 
65
63
  try:
66
- # 1. Create DataFrame
67
64
  df = pd.DataFrame(data)
68
65
 
69
- # 2. Sanitize Data (Fixes "Excel found a problem with content")
70
- # Apply cleaning to all string columns
66
+ # Sanitize Data (Fixes "Excel found a problem with content")
71
67
  for col in df.select_dtypes(include=['object']).columns:
72
68
  df[col] = df[col].apply(PiExporter._clean_for_excel)
73
69
 
74
- # 3. Export
75
70
  df.to_excel(filename, index=False, engine='openpyxl')
76
71
  print(f"Successfully saved {len(data)} rows to {filename}")
77
72
 
@@ -90,11 +85,6 @@ class PiExporter:
90
85
  row_count = len(data)
91
86
  print(f"Preparing Google Sheet upload: {row_count} rows...")
92
87
 
93
- # --- WARNING FOR LARGE DATASETS ---
94
- if row_count > 50000:
95
- print(f"⚠️ WARNING: You are uploading {row_count} rows.")
96
- print(" Google Sheets may become slow. Uploading in chunks...")
97
-
98
88
  try:
99
89
  # 1. Auth
100
90
  creds, _ = google.auth.default()
@@ -118,24 +108,30 @@ class PiExporter:
118
108
  headers = list(data[0].keys())
119
109
  rows = [[row.get(col, '') for col in headers] for row in data]
120
110
 
121
- # 5. Upload Headers first
111
+ # 5. Upload Headers
122
112
  worksheet.update([headers], 'A1')
123
113
 
124
- # 6. CHUNKED UPLOAD (To prevent timeouts on large data)
125
- chunk_size = 5000 # Safe limit for gspread
114
+ # 6. CHUNKED UPLOAD (Optimized for Speed)
115
+ # 30,000 is the sweet spot.
116
+ # 90k often causes "Payload Too Large" or "Timeout" errors.
117
+ chunk_size = 30000
126
118
  total_chunks = math.ceil(len(rows) / chunk_size)
127
119
 
128
- print(f"Starting upload in {total_chunks} chunks...")
120
+ print(f"Starting upload in {total_chunks} chunks (Size: {chunk_size})...")
129
121
 
130
122
  for i in range(total_chunks):
131
123
  start = i * chunk_size
132
124
  end = start + chunk_size
133
125
  chunk = rows[start:end]
134
126
 
135
- # Append rows is safer for large datasets than update range
136
- worksheet.append_rows(chunk, value_input_option='RAW')
137
-
138
- print(f" - Uploaded chunk {i+1}/{total_chunks} ({len(chunk)} rows)")
127
+ try:
128
+ # value_input_option='RAW' is faster than 'USER_ENTERED'
129
+ worksheet.append_rows(chunk, value_input_option='RAW')
130
+ print(f" - Uploaded chunk {i+1}/{total_chunks} ({len(chunk)} rows)")
131
+ except APIError as e:
132
+ print(f" ❌ Error on chunk {i+1}: {e}")
133
+ print(" (If this is a timeout, try reducing chunk_size in the code)")
134
+ raise e
139
135
 
140
136
  print(f"✅ Successfully uploaded {row_count} rows to '{spreadsheet_name}' (Tab: {tab_name})")
141
137
 
File without changes