pidatametrics1 0.3.5__py2.py3-none-any.whl → 0.3.7__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pidatametrics/exporter.py +66 -21
- pidatametrics/manager.py +39 -24
- {pidatametrics1-0.3.5.dist-info → pidatametrics1-0.3.7.dist-info}/METADATA +2 -2
- pidatametrics1-0.3.7.dist-info/RECORD +8 -0
- pidatametrics1-0.3.5.dist-info/RECORD +0 -8
- {pidatametrics1-0.3.5.dist-info → pidatametrics1-0.3.7.dist-info}/WHEEL +0 -0
pidatametrics/exporter.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import csv
|
|
2
|
-
import
|
|
2
|
+
import re
|
|
3
|
+
import math
|
|
3
4
|
from google.cloud import bigquery
|
|
4
5
|
|
|
5
|
-
# Optional imports
|
|
6
|
+
# Optional imports
|
|
6
7
|
try:
|
|
7
8
|
import pandas as pd
|
|
9
|
+
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
|
|
8
10
|
except ImportError:
|
|
9
11
|
pd = None
|
|
12
|
+
ILLEGAL_CHARACTERS_RE = None
|
|
10
13
|
|
|
11
14
|
try:
|
|
12
15
|
import gspread
|
|
@@ -15,6 +18,17 @@ except ImportError:
|
|
|
15
18
|
gspread = None
|
|
16
19
|
|
|
17
20
|
class PiExporter:
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def _clean_for_excel(text):
|
|
24
|
+
"""Removes characters that cause Excel to crash/corrupt."""
|
|
25
|
+
if not isinstance(text, str):
|
|
26
|
+
return text
|
|
27
|
+
# Remove illegal control characters (null bytes, etc.)
|
|
28
|
+
if ILLEGAL_CHARACTERS_RE:
|
|
29
|
+
return ILLEGAL_CHARACTERS_RE.sub('', text)
|
|
30
|
+
return text
|
|
31
|
+
|
|
18
32
|
@staticmethod
|
|
19
33
|
def to_csv(data, filename):
|
|
20
34
|
if not data:
|
|
@@ -33,9 +47,11 @@ class PiExporter:
|
|
|
33
47
|
|
|
34
48
|
@staticmethod
|
|
35
49
|
def to_excel(data, filename):
|
|
36
|
-
"""
|
|
50
|
+
"""
|
|
51
|
+
Exports to Excel with sanitization to prevent 'Corrupted File' errors.
|
|
52
|
+
"""
|
|
37
53
|
if pd is None:
|
|
38
|
-
print("Error: Pandas not installed.
|
|
54
|
+
print("Error: Pandas/Openpyxl not installed.")
|
|
39
55
|
return
|
|
40
56
|
if not data:
|
|
41
57
|
print("No data to export.")
|
|
@@ -44,58 +60,87 @@ class PiExporter:
|
|
|
44
60
|
if not filename.endswith('.xlsx'):
|
|
45
61
|
filename += '.xlsx'
|
|
46
62
|
|
|
63
|
+
print(f"Preparing Excel file: {filename} ({len(data)} rows)...")
|
|
64
|
+
|
|
47
65
|
try:
|
|
66
|
+
# 1. Create DataFrame
|
|
48
67
|
df = pd.DataFrame(data)
|
|
49
|
-
|
|
68
|
+
|
|
69
|
+
# 2. Sanitize Data (Fixes "Excel found a problem with content")
|
|
70
|
+
# Apply cleaning to all string columns
|
|
71
|
+
for col in df.select_dtypes(include=['object']).columns:
|
|
72
|
+
df[col] = df[col].apply(PiExporter._clean_for_excel)
|
|
73
|
+
|
|
74
|
+
# 3. Export
|
|
75
|
+
df.to_excel(filename, index=False, engine='openpyxl')
|
|
50
76
|
print(f"Successfully saved {len(data)} rows to {filename}")
|
|
77
|
+
|
|
51
78
|
except Exception as e:
|
|
52
79
|
print(f"Excel Export Failed: {e}")
|
|
53
80
|
|
|
54
81
|
@staticmethod
|
|
55
82
|
def to_google_sheet(data, spreadsheet_name, tab_name="Sheet1"):
|
|
56
|
-
"""
|
|
57
|
-
Exports to Google Sheet using the Filename (not ID).
|
|
58
|
-
Uses the active Colab authentication.
|
|
59
|
-
"""
|
|
60
83
|
if gspread is None:
|
|
61
|
-
print("Error: gspread not installed.
|
|
84
|
+
print("Error: gspread not installed.")
|
|
62
85
|
return
|
|
63
86
|
if not data:
|
|
64
87
|
print("No data to upload.")
|
|
65
88
|
return
|
|
66
89
|
|
|
67
|
-
|
|
90
|
+
row_count = len(data)
|
|
91
|
+
print(f"Preparing Google Sheet upload: {row_count} rows...")
|
|
92
|
+
|
|
93
|
+
# --- WARNING FOR LARGE DATASETS ---
|
|
94
|
+
if row_count > 50000:
|
|
95
|
+
print(f"⚠️ WARNING: You are uploading {row_count} rows.")
|
|
96
|
+
print(" Google Sheets may become slow. Uploading in chunks...")
|
|
68
97
|
|
|
69
98
|
try:
|
|
70
|
-
# 1.
|
|
99
|
+
# 1. Auth
|
|
71
100
|
creds, _ = google.auth.default()
|
|
72
101
|
client = gspread.authorize(creds)
|
|
73
102
|
|
|
74
|
-
# 2. Open
|
|
103
|
+
# 2. Open Sheet
|
|
75
104
|
try:
|
|
76
105
|
sh = client.open(spreadsheet_name)
|
|
77
106
|
except gspread.SpreadsheetNotFound:
|
|
78
107
|
print(f"Sheet '{spreadsheet_name}' not found. Creating it...")
|
|
79
108
|
sh = client.create(spreadsheet_name)
|
|
80
109
|
|
|
81
|
-
# 3.
|
|
110
|
+
# 3. Setup Tab
|
|
82
111
|
try:
|
|
83
112
|
worksheet = sh.worksheet(tab_name)
|
|
84
|
-
worksheet.clear()
|
|
113
|
+
worksheet.clear()
|
|
85
114
|
except gspread.WorksheetNotFound:
|
|
86
|
-
worksheet = sh.add_worksheet(title=tab_name, rows=
|
|
115
|
+
worksheet = sh.add_worksheet(title=tab_name, rows=row_count+100, cols=20)
|
|
87
116
|
|
|
88
117
|
# 4. Prepare Data
|
|
89
118
|
headers = list(data[0].keys())
|
|
90
119
|
rows = [[row.get(col, '') for col in headers] for row in data]
|
|
91
|
-
|
|
120
|
+
|
|
121
|
+
# 5. Upload Headers first
|
|
122
|
+
worksheet.update([headers], 'A1')
|
|
123
|
+
|
|
124
|
+
# 6. CHUNKED UPLOAD (To prevent timeouts on large data)
|
|
125
|
+
chunk_size = 5000 # Safe limit for gspread
|
|
126
|
+
total_chunks = math.ceil(len(rows) / chunk_size)
|
|
127
|
+
|
|
128
|
+
print(f"Starting upload in {total_chunks} chunks...")
|
|
129
|
+
|
|
130
|
+
for i in range(total_chunks):
|
|
131
|
+
start = i * chunk_size
|
|
132
|
+
end = start + chunk_size
|
|
133
|
+
chunk = rows[start:end]
|
|
134
|
+
|
|
135
|
+
# Append rows is safer for large datasets than update range
|
|
136
|
+
worksheet.append_rows(chunk, value_input_option='RAW')
|
|
137
|
+
|
|
138
|
+
print(f" - Uploaded chunk {i+1}/{total_chunks} ({len(chunk)} rows)")
|
|
92
139
|
|
|
93
|
-
|
|
94
|
-
worksheet.update(all_values)
|
|
95
|
-
print(f"Successfully uploaded {len(data)} rows to '{spreadsheet_name}' (Tab: {tab_name})")
|
|
140
|
+
print(f"✅ Successfully uploaded {row_count} rows to '{spreadsheet_name}' (Tab: {tab_name})")
|
|
96
141
|
|
|
97
142
|
except Exception as e:
|
|
98
|
-
print(f"Google Sheet Upload Failed: {e}")
|
|
143
|
+
print(f"❌ Google Sheet Upload Failed: {e}")
|
|
99
144
|
|
|
100
145
|
@staticmethod
|
|
101
146
|
def to_bigquery(data, project_id, dataset_id, table_id):
|
pidatametrics/manager.py
CHANGED
|
@@ -6,22 +6,21 @@ from dateutil.relativedelta import relativedelta
|
|
|
6
6
|
|
|
7
7
|
class PiReportManager(PiDataMetrics):
|
|
8
8
|
|
|
9
|
-
# --- HELPER: Generate Unique
|
|
10
|
-
def
|
|
9
|
+
# --- HELPER: Generate Unique Name ---
|
|
10
|
+
def _generate_unique_name(self, base_name, workspace_ref=None):
|
|
11
11
|
"""
|
|
12
|
-
Creates a
|
|
12
|
+
Creates a unique name like: Hist_51780_0502_1430
|
|
13
13
|
(Base_WorkspaceID_Date_Time)
|
|
14
14
|
"""
|
|
15
15
|
now = datetime.datetime.now()
|
|
16
|
-
timestamp = now.strftime("%d%m_%H%M") # e.g., 0502_1430
|
|
16
|
+
timestamp = now.strftime("%d%m_%H%M") # e.g., 0502_1430
|
|
17
17
|
|
|
18
18
|
ws_part = f"_{workspace_ref}" if workspace_ref else ""
|
|
19
19
|
|
|
20
|
-
#
|
|
21
|
-
# Timestamp (9) + Base (approx 10) leaves ~10 for ID.
|
|
20
|
+
# Combine parts
|
|
22
21
|
full_name = f"{base_name}{ws_part}_{timestamp}"
|
|
23
22
|
|
|
24
|
-
# Truncate to 31 chars
|
|
23
|
+
# Truncate to 31 chars (Google Sheets limit) just in case
|
|
25
24
|
return full_name[:31]
|
|
26
25
|
|
|
27
26
|
def _resolve_workspaces(self, ids_str=None, name_pattern=None):
|
|
@@ -62,20 +61,34 @@ class PiReportManager(PiDataMetrics):
|
|
|
62
61
|
current_date -= relativedelta(months=1)
|
|
63
62
|
return dates
|
|
64
63
|
|
|
65
|
-
|
|
64
|
+
# --- UPDATED EXPORT LOGIC ---
|
|
65
|
+
def _export_data(self, data, output_mode, bq_config, spreadsheet_name, unique_name):
|
|
66
|
+
"""
|
|
67
|
+
Handles export routing.
|
|
68
|
+
unique_name is used for:
|
|
69
|
+
- CSV Filename
|
|
70
|
+
- Excel Filename
|
|
71
|
+
- Google Sheet Tab Name
|
|
72
|
+
"""
|
|
66
73
|
if not data:
|
|
67
74
|
print("No data to export.")
|
|
68
75
|
return
|
|
69
76
|
|
|
70
77
|
if output_mode == 'bigquery' and bq_config:
|
|
78
|
+
# BigQuery doesn't use filenames, it uses the Table ID in config
|
|
71
79
|
PiExporter.to_bigquery(data, bq_config['project'], bq_config['dataset'], bq_config['table'])
|
|
80
|
+
|
|
72
81
|
elif output_mode == 'excel':
|
|
73
|
-
|
|
82
|
+
# Use unique_name as filename
|
|
83
|
+
PiExporter.to_excel(data, unique_name)
|
|
84
|
+
|
|
74
85
|
elif output_mode == 'gsheet' and spreadsheet_name:
|
|
75
|
-
#
|
|
76
|
-
PiExporter.to_google_sheet(data, spreadsheet_name, tab_name)
|
|
86
|
+
# Use unique_name as Tab Name
|
|
87
|
+
PiExporter.to_google_sheet(data, spreadsheet_name, tab_name=unique_name)
|
|
88
|
+
|
|
77
89
|
else:
|
|
78
|
-
|
|
90
|
+
# Default to CSV, use unique_name as filename
|
|
91
|
+
PiExporter.to_csv(data, unique_name)
|
|
79
92
|
|
|
80
93
|
def run_volume_report(self, filename, workspace_ids=None, workspace_name=None, output_mode='csv', bq_config=None, spreadsheet_name=None):
|
|
81
94
|
targets = self._resolve_workspaces(workspace_ids, workspace_name)
|
|
@@ -89,11 +102,11 @@ class PiReportManager(PiDataMetrics):
|
|
|
89
102
|
rows = PiParsers.parse_volume_data(vol_data, stg['name'], terms, ws_name)
|
|
90
103
|
all_rows.extend(rows)
|
|
91
104
|
|
|
92
|
-
#
|
|
105
|
+
# Generate Unique Name
|
|
93
106
|
ws_ref = list(targets.keys())[0] if targets else "Multi"
|
|
94
|
-
|
|
107
|
+
unique_name = self._generate_unique_name("Vol", ws_ref)
|
|
95
108
|
|
|
96
|
-
self._export_data(all_rows, output_mode,
|
|
109
|
+
self._export_data(all_rows, output_mode, bq_config, spreadsheet_name, unique_name)
|
|
97
110
|
|
|
98
111
|
def run_serp_report(self, data_sources, output_mode='csv', bq_config=None, filename=None, manual_duplication=None, spreadsheet_name=None):
|
|
99
112
|
yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
|
|
@@ -105,11 +118,11 @@ class PiReportManager(PiDataMetrics):
|
|
|
105
118
|
rows = PiParsers.parse_serp_response(raw_data, market, w_name, se_name, yesterday, cat_map, manual_duplication)
|
|
106
119
|
all_rows.extend(rows)
|
|
107
120
|
|
|
108
|
-
#
|
|
121
|
+
# Generate Unique Name
|
|
109
122
|
ws_ref = data_sources[0][1] if data_sources else "All"
|
|
110
|
-
|
|
123
|
+
unique_name = self._generate_unique_name("SERP", ws_ref)
|
|
111
124
|
|
|
112
|
-
self._export_data(all_rows, output_mode,
|
|
125
|
+
self._export_data(all_rows, output_mode, bq_config, spreadsheet_name, unique_name)
|
|
113
126
|
|
|
114
127
|
def run_historical_serp_report(self, data_sources, duration, frequency, start_date=None, features=None, num_results=25, output_mode='csv', bq_config=None, filename="historical_data", spreadsheet_name=None):
|
|
115
128
|
if features is None:
|
|
@@ -141,19 +154,21 @@ class PiReportManager(PiDataMetrics):
|
|
|
141
154
|
except Exception as e:
|
|
142
155
|
print(f"Failed to fetch {w_name} on {date}: {e}")
|
|
143
156
|
|
|
157
|
+
# BigQuery uploads immediately per day
|
|
144
158
|
if output_mode == 'bigquery' and bq_config:
|
|
145
159
|
if daily_rows:
|
|
146
160
|
print(f"Uploading {len(daily_rows)} rows for {date} to BigQuery...")
|
|
147
161
|
PiExporter.to_bigquery(daily_rows, bq_config['project'], bq_config['dataset'], bq_config['table'])
|
|
162
|
+
# Others accumulate
|
|
148
163
|
elif output_mode in ['csv', 'excel', 'gsheet']:
|
|
149
164
|
all_file_rows.extend(daily_rows)
|
|
150
165
|
|
|
166
|
+
# Final Export for Files
|
|
151
167
|
if output_mode in ['csv', 'excel', 'gsheet']:
|
|
152
|
-
# Use the Workspace ID from the first data source
|
|
153
168
|
ws_ref = data_sources[0][1] if data_sources else "All"
|
|
154
|
-
|
|
169
|
+
unique_name = self._generate_unique_name("Hist", ws_ref)
|
|
155
170
|
|
|
156
|
-
self._export_data(all_file_rows, output_mode,
|
|
171
|
+
self._export_data(all_file_rows, output_mode, bq_config, spreadsheet_name, unique_name)
|
|
157
172
|
|
|
158
173
|
def run_llm_report(self, data_sources, start_period, end_period, stg_ids=None, output_mode='csv', bq_config=None, filename="llm_output", spreadsheet_name=None):
|
|
159
174
|
all_rows = []
|
|
@@ -170,8 +185,8 @@ class PiReportManager(PiDataMetrics):
|
|
|
170
185
|
except Exception as e:
|
|
171
186
|
print(f"Failed to fetch LLM data for {w_name}: {e}")
|
|
172
187
|
|
|
173
|
-
#
|
|
188
|
+
# Generate Unique Name
|
|
174
189
|
ws_ref = data_sources[0][1] if data_sources else "All"
|
|
175
|
-
|
|
190
|
+
unique_name = self._generate_unique_name("LLM", ws_ref)
|
|
176
191
|
|
|
177
|
-
self._export_data(all_rows, output_mode,
|
|
192
|
+
self._export_data(all_rows, output_mode, bq_config, spreadsheet_name, unique_name)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pidatametrics1
|
|
3
|
-
Version: 0.3.
|
|
4
|
-
Summary: A wrapper for Pi Datametrics API with CSV and BigQuery support.
|
|
3
|
+
Version: 0.3.7
|
|
4
|
+
Summary: A test wrapper for Pi Datametrics API with CSV and BigQuery support.
|
|
5
5
|
Requires-Dist: google-auth
|
|
6
6
|
Requires-Dist: google-cloud-bigquery
|
|
7
7
|
Requires-Dist: gspread
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
pidatametrics/__init__.py,sha256=cmNSHvjvMsYO1tMv0Nf-7LGjIJ8AFXmUIwiv8jQ34BI,137
|
|
2
|
+
pidatametrics/client.py,sha256=tHH0GV0rk2SizVqRdKepjdDQevkfdWlHOJHwsPR2PCk,4399
|
|
3
|
+
pidatametrics/exporter.py,sha256=yLN40kqwibHWs45gXKe_I1j9td9lJt-8LLFSS-Pk9-U,5813
|
|
4
|
+
pidatametrics/manager.py,sha256=tUeeJ-wKAlhpWsaZEAjxtZCtA2EbQcTBB1JkXPEVV50,9101
|
|
5
|
+
pidatametrics/parsers.py,sha256=fiLx3080wNubT1VqSIeDvlrKT85KdqlKhY6FaB2XuC8,5989
|
|
6
|
+
pidatametrics1-0.3.7.dist-info/METADATA,sha256=3rqys7d0-SNpsghd25f3eS99WSl-1QYzTY0HjbAYNak,293
|
|
7
|
+
pidatametrics1-0.3.7.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
|
|
8
|
+
pidatametrics1-0.3.7.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
pidatametrics/__init__.py,sha256=cmNSHvjvMsYO1tMv0Nf-7LGjIJ8AFXmUIwiv8jQ34BI,137
|
|
2
|
-
pidatametrics/client.py,sha256=tHH0GV0rk2SizVqRdKepjdDQevkfdWlHOJHwsPR2PCk,4399
|
|
3
|
-
pidatametrics/exporter.py,sha256=CcsdVhxI6rXi0zlQaYzFEGX0GL3ZaNV94Pj5r_WrZc4,4226
|
|
4
|
-
pidatametrics/manager.py,sha256=Sz4ecxwtY-lVjDjXsYO_rmLbK5_o9ZU0Fdv1MK50r40,8899
|
|
5
|
-
pidatametrics/parsers.py,sha256=fiLx3080wNubT1VqSIeDvlrKT85KdqlKhY6FaB2XuC8,5989
|
|
6
|
-
pidatametrics1-0.3.5.dist-info/METADATA,sha256=GhLbrT6GUpcUq5F2PIzW5ThcnR-ApKkCL1WNkCBACzM,288
|
|
7
|
-
pidatametrics1-0.3.5.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
|
|
8
|
-
pidatametrics1-0.3.5.dist-info/RECORD,,
|
|
File without changes
|