pidatametrics1 0.3.4__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pidatametrics/__init__.py +4 -0
- pidatametrics/client.py +104 -0
- pidatametrics/exporter.py +124 -0
- pidatametrics/manager.py +156 -0
- pidatametrics/parsers.py +156 -0
- pidatametrics1-0.3.4.dist-info/METADATA +10 -0
- pidatametrics1-0.3.4.dist-info/RECORD +8 -0
- pidatametrics1-0.3.4.dist-info/WHEEL +5 -0
pidatametrics/client.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# --- START OF FILE client.py ---
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
from requests.auth import HTTPBasicAuth
|
|
5
|
+
from urllib.parse import urlencode
|
|
6
|
+
|
|
7
|
+
class PiDataMetrics:
|
|
8
|
+
def __init__(self, client_id, client_secret, account_id=1377):
|
|
9
|
+
self.account_id = account_id
|
|
10
|
+
self.auth_url = "https://app.pi-datametrics.com/api/auth"
|
|
11
|
+
self.base_url = f"https://app.pi-datametrics.com/api/accounts/{account_id}"
|
|
12
|
+
self.access_token = self._get_access_token(client_id, client_secret)
|
|
13
|
+
self.headers = {
|
|
14
|
+
'Authorization': f'Bearer {self.access_token}',
|
|
15
|
+
'Content-Type': 'application/json'
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
def _get_access_token(self, client_id, client_secret):
|
|
19
|
+
data = {"grant_type": "client_credentials"}
|
|
20
|
+
auth = HTTPBasicAuth(client_id, client_secret)
|
|
21
|
+
try:
|
|
22
|
+
response = requests.post(self.auth_url, data=data, auth=auth)
|
|
23
|
+
response.raise_for_status()
|
|
24
|
+
return response.json()['access_token']
|
|
25
|
+
except requests.exceptions.RequestException as e:
|
|
26
|
+
raise SystemExit(f"Authentication Failed: {e}")
|
|
27
|
+
|
|
28
|
+
def fetch_endpoint(self, endpoint_path, params=None):
|
|
29
|
+
url = f"{self.base_url}/{endpoint_path}"
|
|
30
|
+
|
|
31
|
+
# --- FIX: Manually encode params to preserve brackets [] and Order ---
|
|
32
|
+
if params:
|
|
33
|
+
# safe='[]' prevents encoding brackets to %5B%5D
|
|
34
|
+
# doseq=True handles lists correctly (e.g. key[]=val1&key[]=val2)
|
|
35
|
+
query_string = urlencode(params, doseq=True, safe='[]')
|
|
36
|
+
url = f"{url}?{query_string}"
|
|
37
|
+
params = None # Clear params so requests doesn't append them again
|
|
38
|
+
# -----------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
response = requests.get(url, headers=self.headers, params=params)
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
response.raise_for_status()
|
|
44
|
+
except requests.exceptions.HTTPError as e:
|
|
45
|
+
print(f"API Error: {e}")
|
|
46
|
+
print(f"Requested URL: {response.url}")
|
|
47
|
+
print(f"Response Body: {response.text}")
|
|
48
|
+
raise e
|
|
49
|
+
|
|
50
|
+
return response.json().get('data', [])
|
|
51
|
+
|
|
52
|
+
def get_workspaces(self):
|
|
53
|
+
return self.fetch_endpoint("workspaces")
|
|
54
|
+
|
|
55
|
+
def get_stgs(self, workspace_id):
|
|
56
|
+
return self.fetch_endpoint(f"workspaces/{workspace_id}/search-term-groups")
|
|
57
|
+
|
|
58
|
+
def get_search_terms(self, workspace_id, stg_id):
|
|
59
|
+
return self.fetch_endpoint(f"workspaces/{workspace_id}/search-term-groups/{stg_id}/search-terms")
|
|
60
|
+
|
|
61
|
+
def get_bulk_serp_data(self, workspace_id, search_engine_id, period, **kwargs):
|
|
62
|
+
# The order here matters.
|
|
63
|
+
# 1. search-engine-id
|
|
64
|
+
# 2. period
|
|
65
|
+
# 3. kwargs (which now has number-of-results FIRST, then serp-feature[])
|
|
66
|
+
params = {"search-engine-id": search_engine_id, "period": period}
|
|
67
|
+
params.update(kwargs)
|
|
68
|
+
return self.fetch_endpoint(f"workspaces/{workspace_id}/search-data/bulk-search-results", params=params)
|
|
69
|
+
|
|
70
|
+
def get_bulk_volume(self, workspace_id, start_date=None, end_date=None):
|
|
71
|
+
params = {}
|
|
72
|
+
if start_date and end_date:
|
|
73
|
+
params = {'start-period': start_date, 'end-period': end_date}
|
|
74
|
+
return self.fetch_endpoint(f"workspaces/{workspace_id}/volume-data/bulk-search-volume", params=params)
|
|
75
|
+
|
|
76
|
+
def get_llm_mentions(self, workspace_id, search_engine_id, start_period, end_period, stg_ids=None):
|
|
77
|
+
url = "https://app.pi-datametrics.com/api/data/llm/mentions"
|
|
78
|
+
|
|
79
|
+
params = {
|
|
80
|
+
"account-id": self.account_id,
|
|
81
|
+
"workspace-id": workspace_id,
|
|
82
|
+
"start-period": start_period,
|
|
83
|
+
"end-period": end_period,
|
|
84
|
+
"search-engine-id": search_engine_id
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if stg_ids:
|
|
88
|
+
params["search-term-group-id[]"] = stg_ids
|
|
89
|
+
|
|
90
|
+
# Apply the same fix here for consistency
|
|
91
|
+
query_string = urlencode(params, doseq=True, safe='[]')
|
|
92
|
+
url = f"{url}?{query_string}"
|
|
93
|
+
|
|
94
|
+
response = requests.get(url, headers=self.headers)
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
response.raise_for_status()
|
|
98
|
+
except requests.exceptions.HTTPError as e:
|
|
99
|
+
print(f"API Error: {e}")
|
|
100
|
+
print(f"Requested URL: {response.url}")
|
|
101
|
+
print(f"Response Body: {response.text}")
|
|
102
|
+
raise e
|
|
103
|
+
|
|
104
|
+
return response.json().get('data', [])
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import json
|
|
3
|
+
from google.cloud import bigquery
|
|
4
|
+
|
|
5
|
+
# Optional imports with error handling
|
|
6
|
+
try:
|
|
7
|
+
import pandas as pd
|
|
8
|
+
except ImportError:
|
|
9
|
+
pd = None
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import gspread
|
|
13
|
+
import google.auth
|
|
14
|
+
except ImportError:
|
|
15
|
+
gspread = None
|
|
16
|
+
|
|
17
|
+
class PiExporter:
|
|
18
|
+
@staticmethod
|
|
19
|
+
def to_csv(data, filename):
|
|
20
|
+
if not data:
|
|
21
|
+
print("No data to export.")
|
|
22
|
+
return
|
|
23
|
+
|
|
24
|
+
if not filename.endswith('.csv'):
|
|
25
|
+
filename += '.csv'
|
|
26
|
+
|
|
27
|
+
keys = data[0].keys()
|
|
28
|
+
with open(filename, 'w', newline='', encoding='utf-8') as f:
|
|
29
|
+
writer = csv.DictWriter(f, fieldnames=keys)
|
|
30
|
+
writer.writeheader()
|
|
31
|
+
writer.writerows(data)
|
|
32
|
+
print(f"Successfully saved {len(data)} rows to {filename}")
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def to_excel(data, filename):
|
|
36
|
+
"""Exports data to Excel. Requires pandas and openpyxl."""
|
|
37
|
+
if pd is None:
|
|
38
|
+
print("Error: Pandas not installed. Run: pip install pandas openpyxl")
|
|
39
|
+
return
|
|
40
|
+
if not data:
|
|
41
|
+
print("No data to export.")
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
if not filename.endswith('.xlsx'):
|
|
45
|
+
filename += '.xlsx'
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
df = pd.DataFrame(data)
|
|
49
|
+
df.to_excel(filename, index=False)
|
|
50
|
+
print(f"Successfully saved {len(data)} rows to {filename}")
|
|
51
|
+
except Exception as e:
|
|
52
|
+
print(f"Excel Export Failed: {e}")
|
|
53
|
+
|
|
54
|
+
@staticmethod
|
|
55
|
+
def to_google_sheet(data, spreadsheet_name, tab_name="Sheet1"):
|
|
56
|
+
"""
|
|
57
|
+
Exports to Google Sheet using the Filename (not ID).
|
|
58
|
+
Uses the active Colab authentication.
|
|
59
|
+
"""
|
|
60
|
+
if gspread is None:
|
|
61
|
+
print("Error: gspread not installed. Run: pip install gspread google-auth")
|
|
62
|
+
return
|
|
63
|
+
if not data:
|
|
64
|
+
print("No data to upload.")
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
print(f"Connecting to Google Sheet: '{spreadsheet_name}'...")
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
# 1. Get Default Credentials (works with Colab auth.authenticate_user)
|
|
71
|
+
creds, _ = google.auth.default()
|
|
72
|
+
client = gspread.authorize(creds)
|
|
73
|
+
|
|
74
|
+
# 2. Open by Name (Title)
|
|
75
|
+
try:
|
|
76
|
+
sh = client.open(spreadsheet_name)
|
|
77
|
+
except gspread.SpreadsheetNotFound:
|
|
78
|
+
print(f"Sheet '{spreadsheet_name}' not found. Creating it...")
|
|
79
|
+
sh = client.create(spreadsheet_name)
|
|
80
|
+
|
|
81
|
+
# 3. Select or Create Worksheet (Tab)
|
|
82
|
+
try:
|
|
83
|
+
worksheet = sh.worksheet(tab_name)
|
|
84
|
+
worksheet.clear() # Clear old data
|
|
85
|
+
except gspread.WorksheetNotFound:
|
|
86
|
+
worksheet = sh.add_worksheet(title=tab_name, rows=len(data)+100, cols=20)
|
|
87
|
+
|
|
88
|
+
# 4. Prepare Data
|
|
89
|
+
headers = list(data[0].keys())
|
|
90
|
+
rows = [[row.get(col, '') for col in headers] for row in data]
|
|
91
|
+
all_values = [headers] + rows
|
|
92
|
+
|
|
93
|
+
# 5. Update
|
|
94
|
+
worksheet.update(all_values)
|
|
95
|
+
print(f"Successfully uploaded {len(data)} rows to '{spreadsheet_name}' (Tab: {tab_name})")
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
print(f"Google Sheet Upload Failed: {e}")
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def to_bigquery(data, project_id, dataset_id, table_id):
|
|
102
|
+
if not data:
|
|
103
|
+
print("No data to upload.")
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
client = bigquery.Client(project=project_id)
|
|
107
|
+
table_ref = f"{project_id}.{dataset_id}.{table_id}"
|
|
108
|
+
|
|
109
|
+
print(f"Uploading {len(data)} rows to BigQuery table {table_ref}...")
|
|
110
|
+
|
|
111
|
+
job_config = bigquery.LoadJobConfig(
|
|
112
|
+
autodetect=True,
|
|
113
|
+
source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
|
|
114
|
+
write_disposition=bigquery.WriteDisposition.WRITE_APPEND
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
job = client.load_table_from_json(data, table_ref, job_config=job_config)
|
|
119
|
+
job.result()
|
|
120
|
+
print(f"Upload successful. Loaded {job.output_rows} rows.")
|
|
121
|
+
except Exception as e:
|
|
122
|
+
print(f"BigQuery Upload Failed: {e}")
|
|
123
|
+
if hasattr(e, 'errors'):
|
|
124
|
+
print(e.errors)
|
pidatametrics/manager.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from .client import PiDataMetrics
|
|
2
|
+
from .parsers import PiParsers
|
|
3
|
+
from .exporter import PiExporter
|
|
4
|
+
import datetime
|
|
5
|
+
from dateutil.relativedelta import relativedelta
|
|
6
|
+
|
|
7
|
+
class PiReportManager(PiDataMetrics):
|
|
8
|
+
def _resolve_workspaces(self, ids_str=None, name_pattern=None):
|
|
9
|
+
all_ws = self.get_workspaces()
|
|
10
|
+
targets = {}
|
|
11
|
+
if ids_str and ids_str.strip():
|
|
12
|
+
target_ids = [int(x.strip()) for x in ids_str.split(',') if x.strip().isdigit()]
|
|
13
|
+
for ws in all_ws:
|
|
14
|
+
if ws['id'] in target_ids:
|
|
15
|
+
targets[ws['id']] = ws['name']
|
|
16
|
+
elif name_pattern:
|
|
17
|
+
for ws in all_ws:
|
|
18
|
+
if ws.get('tracked') and name_pattern.lower() in ws['name'].lower():
|
|
19
|
+
targets[ws['id']] = ws['name']
|
|
20
|
+
return targets
|
|
21
|
+
|
|
22
|
+
def _generate_historical_dates(self, start_date_str, duration, frequency):
|
|
23
|
+
dates = []
|
|
24
|
+
try:
|
|
25
|
+
current_date = datetime.datetime.strptime(start_date_str, "%Y-%m-%d")
|
|
26
|
+
except ValueError:
|
|
27
|
+
print(f"Error: Invalid date format {start_date_str}. Using yesterday.")
|
|
28
|
+
current_date = datetime.datetime.now() - datetime.timedelta(days=1)
|
|
29
|
+
if current_date > datetime.datetime.now():
|
|
30
|
+
print(f"WARNING: Start date {current_date.strftime('%Y-%m-%d')} is in the future!")
|
|
31
|
+
if frequency == 'weekly':
|
|
32
|
+
days_since_sunday = (current_date.weekday() + 1) % 7
|
|
33
|
+
if days_since_sunday > 0:
|
|
34
|
+
current_date -= datetime.timedelta(days=days_since_sunday)
|
|
35
|
+
print(f"Note: Adjusted start date to previous Sunday: {current_date.strftime('%Y-%m-%d')}")
|
|
36
|
+
for _ in range(int(duration)):
|
|
37
|
+
dates.append(current_date.strftime("%Y-%m-%d"))
|
|
38
|
+
if frequency == 'daily':
|
|
39
|
+
current_date -= datetime.timedelta(days=1)
|
|
40
|
+
elif frequency == 'weekly':
|
|
41
|
+
current_date -= datetime.timedelta(weeks=1)
|
|
42
|
+
elif frequency == 'monthly':
|
|
43
|
+
current_date -= relativedelta(months=1)
|
|
44
|
+
return dates
|
|
45
|
+
|
|
46
|
+
# --- HELPER METHOD FOR EXPORTING ---
|
|
47
|
+
def _export_data(self, data, output_mode, filename, bq_config, spreadsheet_name, tab_name="Sheet1"):
|
|
48
|
+
if not data:
|
|
49
|
+
print("No data to export.")
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
if output_mode == 'bigquery' and bq_config:
|
|
53
|
+
PiExporter.to_bigquery(data, bq_config['project'], bq_config['dataset'], bq_config['table'])
|
|
54
|
+
elif output_mode == 'excel':
|
|
55
|
+
PiExporter.to_excel(data, filename)
|
|
56
|
+
elif output_mode == 'gsheet' and spreadsheet_name:
|
|
57
|
+
PiExporter.to_google_sheet(data, spreadsheet_name, tab_name)
|
|
58
|
+
else:
|
|
59
|
+
# Default to CSV
|
|
60
|
+
PiExporter.to_csv(data, filename)
|
|
61
|
+
|
|
62
|
+
def run_volume_report(self, filename, workspace_ids=None, workspace_name=None, output_mode='csv', bq_config=None, spreadsheet_name=None):
|
|
63
|
+
targets = self._resolve_workspaces(workspace_ids, workspace_name)
|
|
64
|
+
if not targets: return
|
|
65
|
+
all_rows = []
|
|
66
|
+
for ws_id, ws_name in targets.items():
|
|
67
|
+
vol_data = self.get_bulk_volume(ws_id)
|
|
68
|
+
stgs = self.get_stgs(ws_id)
|
|
69
|
+
for stg in stgs:
|
|
70
|
+
terms = self.get_search_terms(ws_id, stg['id'])
|
|
71
|
+
rows = PiParsers.parse_volume_data(vol_data, stg['name'], terms, ws_name)
|
|
72
|
+
all_rows.extend(rows)
|
|
73
|
+
|
|
74
|
+
self._export_data(all_rows, output_mode, filename, bq_config, spreadsheet_name, tab_name="Volume")
|
|
75
|
+
|
|
76
|
+
def run_serp_report(self, data_sources, output_mode='csv', bq_config=None, filename=None, manual_duplication=None, spreadsheet_name=None):
|
|
77
|
+
yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
|
|
78
|
+
all_rows = []
|
|
79
|
+
for source in data_sources:
|
|
80
|
+
market, w_id, w_name, se_id, se_name = source
|
|
81
|
+
raw_data = self.get_bulk_serp_data(w_id, se_id, yesterday)
|
|
82
|
+
cat_map = PiParsers.build_category_map(self, w_id)
|
|
83
|
+
rows = PiParsers.parse_serp_response(raw_data, market, w_name, se_name, yesterday, cat_map, manual_duplication)
|
|
84
|
+
all_rows.extend(rows)
|
|
85
|
+
|
|
86
|
+
self._export_data(all_rows, output_mode, filename or "serp_output", bq_config, spreadsheet_name, tab_name="SERP")
|
|
87
|
+
|
|
88
|
+
def run_historical_serp_report(self, data_sources, duration, frequency, start_date=None, features=None, num_results=25, output_mode='csv', bq_config=None, filename="historical_data", spreadsheet_name=None):
|
|
89
|
+
if features is None:
|
|
90
|
+
features = ['classicLink', 'popularProducts']
|
|
91
|
+
|
|
92
|
+
if not start_date:
|
|
93
|
+
start_date = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
|
|
94
|
+
|
|
95
|
+
target_dates = self._generate_historical_dates(start_date, duration, frequency)
|
|
96
|
+
|
|
97
|
+
print(f"Starting Historical Report ({frequency}) for last {duration} periods...")
|
|
98
|
+
|
|
99
|
+
all_file_rows = [] # Used for CSV, Excel, and GSheet
|
|
100
|
+
|
|
101
|
+
for i, date in enumerate(target_dates):
|
|
102
|
+
print(f"[{i+1}/{len(target_dates)}] Processing Date: {date}...")
|
|
103
|
+
|
|
104
|
+
daily_rows = [] # Reset container for this specific date
|
|
105
|
+
|
|
106
|
+
for source in data_sources:
|
|
107
|
+
market, w_id, w_name, se_id, se_name = source
|
|
108
|
+
try:
|
|
109
|
+
params = {
|
|
110
|
+
'number-of-results': num_results,
|
|
111
|
+
'serp-feature[]': features
|
|
112
|
+
}
|
|
113
|
+
raw_data = self.get_bulk_serp_data(w_id, se_id, date, **params)
|
|
114
|
+
|
|
115
|
+
rows = PiParsers.parse_serp_response(
|
|
116
|
+
raw_data, market, w_name, se_name, date, category_map=None
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
daily_rows.extend(rows)
|
|
120
|
+
|
|
121
|
+
except Exception as e:
|
|
122
|
+
print(f"Failed to fetch {w_name} on {date}: {e}")
|
|
123
|
+
|
|
124
|
+
# --- UPLOAD LOGIC: PER DATE (BigQuery Only) ---
|
|
125
|
+
if output_mode == 'bigquery' and bq_config:
|
|
126
|
+
if daily_rows:
|
|
127
|
+
print(f"Uploading {len(daily_rows)} rows for {date} to BigQuery...")
|
|
128
|
+
PiExporter.to_bigquery(daily_rows, bq_config['project'], bq_config['dataset'], bq_config['table'])
|
|
129
|
+
else:
|
|
130
|
+
print(f"No data found for {date}, skipping upload.")
|
|
131
|
+
|
|
132
|
+
# --- FILE LOGIC: ACCUMULATE (CSV, Excel, GSheet) ---
|
|
133
|
+
elif output_mode in ['csv', 'excel', 'gsheet']:
|
|
134
|
+
all_file_rows.extend(daily_rows)
|
|
135
|
+
|
|
136
|
+
# Final Save for File-based outputs
|
|
137
|
+
if output_mode in ['csv', 'excel', 'gsheet']:
|
|
138
|
+
self._export_data(all_file_rows, output_mode, filename, bq_config, spreadsheet_name, tab_name="Historical")
|
|
139
|
+
|
|
140
|
+
def run_llm_report(self, data_sources, start_period, end_period, stg_ids=None, output_mode='csv', bq_config=None, filename="llm_output", spreadsheet_name=None):
|
|
141
|
+
all_rows = []
|
|
142
|
+
print(f"Starting LLM Report from {start_period} to {end_period}...")
|
|
143
|
+
|
|
144
|
+
for source in data_sources:
|
|
145
|
+
market, w_id, w_name, se_id, se_name = source
|
|
146
|
+
try:
|
|
147
|
+
print(f"Fetching LLM data for {w_name} ({se_name})...")
|
|
148
|
+
raw_data = self.get_llm_mentions(w_id, se_id, start_period, end_period, stg_ids)
|
|
149
|
+
|
|
150
|
+
rows = PiParsers.parse_llm_response(raw_data, market, w_name, se_name)
|
|
151
|
+
all_rows.extend(rows)
|
|
152
|
+
print(f"Found {len(rows)} mentions/queries.")
|
|
153
|
+
except Exception as e:
|
|
154
|
+
print(f"Failed to fetch LLM data for {w_name}: {e}")
|
|
155
|
+
|
|
156
|
+
self._export_data(all_rows, output_mode, filename, bq_config, spreadsheet_name, tab_name="LLM_Mentions")
|
pidatametrics/parsers.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
class PiParsers:
|
|
4
|
+
@staticmethod
|
|
5
|
+
def parse_serp_response(raw_data, market, workspace_name, search_engine_name, date, category_map=None, manual_duplication=None):
|
|
6
|
+
"""
|
|
7
|
+
Transforms the nested 'Bulk Search Results' JSON into a flat list of dicts.
|
|
8
|
+
"""
|
|
9
|
+
flat_rows = []
|
|
10
|
+
|
|
11
|
+
for item in raw_data:
|
|
12
|
+
search_term = item.get('searchTerm')
|
|
13
|
+
|
|
14
|
+
category = ""
|
|
15
|
+
if category_map:
|
|
16
|
+
category = category_map.get(search_term, "")
|
|
17
|
+
|
|
18
|
+
results = item.get('results', [])
|
|
19
|
+
previous_pos = None
|
|
20
|
+
|
|
21
|
+
if results:
|
|
22
|
+
for res in results:
|
|
23
|
+
feature_type = res.get('feature')
|
|
24
|
+
title = res.get('title')
|
|
25
|
+
|
|
26
|
+
# If feature is popularProducts AND title is null/empty, SKIP this row.
|
|
27
|
+
if feature_type == 'popularProducts' and not title:
|
|
28
|
+
continue
|
|
29
|
+
|
|
30
|
+
# --- Logic for Position Fill-Down ---
|
|
31
|
+
pos = res.get('position')
|
|
32
|
+
if pos is None:
|
|
33
|
+
pos = previous_pos
|
|
34
|
+
|
|
35
|
+
# --- Logic for Attributes (Popular Products) ---
|
|
36
|
+
attributes = res.get('attributes', {})
|
|
37
|
+
price = None
|
|
38
|
+
site_name = None
|
|
39
|
+
|
|
40
|
+
if isinstance(attributes, dict):
|
|
41
|
+
price = attributes.get('price')
|
|
42
|
+
site_name = attributes.get('site')
|
|
43
|
+
attr_str = json.dumps(attributes)
|
|
44
|
+
else:
|
|
45
|
+
attr_str = None
|
|
46
|
+
|
|
47
|
+
# Create Row
|
|
48
|
+
row = {
|
|
49
|
+
'Date': date,
|
|
50
|
+
'Market': market,
|
|
51
|
+
'SearchTerm': search_term,
|
|
52
|
+
'URL': res.get('url'),
|
|
53
|
+
'Position': pos,
|
|
54
|
+
'SERPFeature': feature_type,
|
|
55
|
+
'PageTitle': title,
|
|
56
|
+
'Price': price,
|
|
57
|
+
'SiteName': site_name,
|
|
58
|
+
'SearchEngine': search_engine_name,
|
|
59
|
+
'Attributes': attr_str,
|
|
60
|
+
'Category': category,
|
|
61
|
+
'Workspace': workspace_name
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
flat_rows.append(row)
|
|
65
|
+
previous_pos = pos
|
|
66
|
+
else:
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
return flat_rows
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def parse_volume_data(volume_data, stg_name, stg_terms, workspace_name):
|
|
73
|
+
rows = []
|
|
74
|
+
volume_lookup = {item.get('search-term'): item for item in volume_data}
|
|
75
|
+
|
|
76
|
+
for term in stg_terms:
|
|
77
|
+
term_text = term if isinstance(term, str) else term.get('term', '')
|
|
78
|
+
|
|
79
|
+
if term_text in volume_lookup:
|
|
80
|
+
item = volume_lookup[term_text]
|
|
81
|
+
cpc = item.get('cpc', '')
|
|
82
|
+
monthly_volume = item.get('monthly-volume', {})
|
|
83
|
+
|
|
84
|
+
for month, vol in monthly_volume.items():
|
|
85
|
+
rows.append({
|
|
86
|
+
"Workspace": workspace_name,
|
|
87
|
+
"STG": stg_name,
|
|
88
|
+
"Search Term": term_text,
|
|
89
|
+
"Month": month,
|
|
90
|
+
"Search Volume": vol,
|
|
91
|
+
"CPC": cpc
|
|
92
|
+
})
|
|
93
|
+
return rows
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def build_category_map(pi_client, workspace_id):
|
|
97
|
+
mapping = {}
|
|
98
|
+
stgs = pi_client.get_stgs(workspace_id)
|
|
99
|
+
for stg in stgs:
|
|
100
|
+
terms = pi_client.get_search_terms(workspace_id, stg['id'])
|
|
101
|
+
for term in terms:
|
|
102
|
+
t_text = term if isinstance(term, str) else term.get('term')
|
|
103
|
+
mapping[t_text] = stg['name']
|
|
104
|
+
return mapping
|
|
105
|
+
|
|
106
|
+
# --- NEW: LLM Parser ---
|
|
107
|
+
@staticmethod
|
|
108
|
+
def parse_llm_response(raw_data, market, workspace_name, search_engine_name):
|
|
109
|
+
"""
|
|
110
|
+
Flattens LLM mentions data.
|
|
111
|
+
Each mention becomes a row. If no mentions exist for a query, the query is still recorded.
|
|
112
|
+
"""
|
|
113
|
+
rows = []
|
|
114
|
+
for item in raw_data:
|
|
115
|
+
period = item.get('period')
|
|
116
|
+
query = item.get('query')
|
|
117
|
+
response_text = item.get('response')
|
|
118
|
+
mentions = item.get('mentions', [])
|
|
119
|
+
|
|
120
|
+
base_row = {
|
|
121
|
+
'Period': period,
|
|
122
|
+
'Market': market,
|
|
123
|
+
'Workspace': workspace_name,
|
|
124
|
+
'SearchEngine': search_engine_name,
|
|
125
|
+
'Query': query,
|
|
126
|
+
'LLM_Response': response_text
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if mentions:
|
|
130
|
+
for m in mentions:
|
|
131
|
+
row = base_row.copy()
|
|
132
|
+
row.update({
|
|
133
|
+
'Entity': m.get('entity'),
|
|
134
|
+
'Citation': m.get('citation'),
|
|
135
|
+
'Source_Site': m.get('source_site'),
|
|
136
|
+
'Source_Domain': m.get('source_domain'),
|
|
137
|
+
'Source_URL': m.get('source_url'),
|
|
138
|
+
'Sentiment': m.get('sentiment'),
|
|
139
|
+
'Sentiment_Category': m.get('sentiment_category')
|
|
140
|
+
})
|
|
141
|
+
rows.append(row)
|
|
142
|
+
else:
|
|
143
|
+
# Record the query even if no citations found
|
|
144
|
+
row = base_row.copy()
|
|
145
|
+
row.update({
|
|
146
|
+
'Entity': None,
|
|
147
|
+
'Citation': None,
|
|
148
|
+
'Source_Site': None,
|
|
149
|
+
'Source_Domain': None,
|
|
150
|
+
'Source_URL': None,
|
|
151
|
+
'Sentiment': None,
|
|
152
|
+
'Sentiment_Category': None
|
|
153
|
+
})
|
|
154
|
+
rows.append(row)
|
|
155
|
+
|
|
156
|
+
return rows
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pidatametrics1
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Summary: A wrapper for Pi Datametrics API with CSV and BigQuery support.
|
|
5
|
+
Requires-Dist: google-auth
|
|
6
|
+
Requires-Dist: google-cloud-bigquery
|
|
7
|
+
Requires-Dist: gspread
|
|
8
|
+
Requires-Dist: openpyxl
|
|
9
|
+
Requires-Dist: pandas
|
|
10
|
+
Requires-Dist: requests
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
pidatametrics/__init__.py,sha256=cmNSHvjvMsYO1tMv0Nf-7LGjIJ8AFXmUIwiv8jQ34BI,137
|
|
2
|
+
pidatametrics/client.py,sha256=tHH0GV0rk2SizVqRdKepjdDQevkfdWlHOJHwsPR2PCk,4399
|
|
3
|
+
pidatametrics/exporter.py,sha256=CcsdVhxI6rXi0zlQaYzFEGX0GL3ZaNV94Pj5r_WrZc4,4226
|
|
4
|
+
pidatametrics/manager.py,sha256=WbOQ-iZMhabkE49h3Rha0AyloqQgqdgiWBMc5S7v9jk,7888
|
|
5
|
+
pidatametrics/parsers.py,sha256=fiLx3080wNubT1VqSIeDvlrKT85KdqlKhY6FaB2XuC8,5989
|
|
6
|
+
pidatametrics1-0.3.4.dist-info/METADATA,sha256=PVNEkux2CMTwXSE0mVI-tmZu9LWzNTBzoke2WwsC9bk,288
|
|
7
|
+
pidatametrics1-0.3.4.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
|
|
8
|
+
pidatametrics1-0.3.4.dist-info/RECORD,,
|