PyPI - pidatametrics - Versions diffs - 0.3.0__tar.gz - Mend

pidatametrics 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

pidatametrics-0.3.0/PKG-INFO +6 -0
pidatametrics-0.3.0/README.md +0 -0
pidatametrics-0.3.0/pyproject.toml +15 -0
pidatametrics-0.3.0/src/pidatametrics/__init__.py +4 -0
pidatametrics-0.3.0/src/pidatametrics/client.py +76 -0
pidatametrics-0.3.0/src/pidatametrics/exporter.py +62 -0
pidatametrics-0.3.0/src/pidatametrics/manager.py +159 -0
pidatametrics-0.3.0/src/pidatametrics/parsers.py +156 -0

pidatametrics-0.3.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,6 @@
+Metadata-Version: 2.4
+Name: pidatametrics
+Version: 0.3.0
+Summary: A wrapper for Pi Datametrics API with CSV and BigQuery support.
+Requires-Dist: google-cloud-bigquery
+Requires-Dist: requests

pidatametrics-0.3.0/README.md ADDED Viewed

File without changes

pidatametrics-0.3.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,15 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "pidatametrics"
+version = "0.3.0"
+description = "A wrapper for Pi Datametrics API with CSV and BigQuery support."
+dependencies = [
+    "requests",
+    "google-cloud-bigquery"
+]
+[tool.hatch.build.targets.wheel]
+packages = ["src/pidatametrics"]

pidatametrics-0.3.0/src/pidatametrics/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .client import PiDataMetrics
+from .parsers import PiParsers
+from .exporter import PiExporter
+from .manager import PiReportManager

pidatametrics-0.3.0/src/pidatametrics/client.py ADDED Viewed

@@ -0,0 +1,76 @@
+import requests
+from requests.auth import HTTPBasicAuth
+class PiDataMetrics:
+    def __init__(self, client_id, client_secret, account_id=1377):
+        self.account_id = account_id  # Store account_id for global endpoints
+        self.auth_url = "https://app.pi-datametrics.com/api/auth"
+        self.base_url = f"https://app.pi-datametrics.com/api/accounts/{account_id}"
+        self.access_token = self._get_access_token(client_id, client_secret)
+        self.headers = {
+            'Authorization': f'Bearer {self.access_token}',
+            'Content-Type': 'application/json'
+        }
+    def _get_access_token(self, client_id, client_secret):
+        data = {"grant_type": "client_credentials"}
+        auth = HTTPBasicAuth(client_id, client_secret)
+        try:
+            response = requests.post(self.auth_url, data=data, auth=auth)
+            response.raise_for_status()
+            return response.json()['access_token']
+        except requests.exceptions.RequestException as e:
+            raise SystemExit(f"Authentication Failed: {e}")
+    # --- Generic Request Handler ---
+    def fetch_endpoint(self, endpoint_path, params=None):
+        """Generic method to fetch any endpoint relative to account base URL"""
+        url = f"{self.base_url}/{endpoint_path}"
+        response = requests.get(url, headers=self.headers, params=params)
+        response.raise_for_status()
+        return response.json().get('data', [])
+    # --- Specific Endpoint Wrappers ---
+    def get_workspaces(self):
+        return self.fetch_endpoint("workspaces")
+    def get_stgs(self, workspace_id):
+        return self.fetch_endpoint(f"workspaces/{workspace_id}/search-term-groups")
+    def get_search_terms(self, workspace_id, stg_id):
+        return self.fetch_endpoint(f"workspaces/{workspace_id}/search-term-groups/{stg_id}/search-terms")
+    def get_bulk_serp_data(self, workspace_id, search_engine_id, period, **kwargs):
+        params = {"search-engine-id": search_engine_id, "period": period}
+        params.update(kwargs)
+        return self.fetch_endpoint(f"workspaces/{workspace_id}/search-data/bulk-search-results", params=params)
+    def get_bulk_volume(self, workspace_id, start_date=None, end_date=None):
+        params = {}
+        if start_date and end_date:
+            params = {'start-period': start_date, 'end-period': end_date}
+        return self.fetch_endpoint(f"workspaces/{workspace_id}/volume-data/bulk-search-volume", params=params)
+    # --- NEW: LLM Mentions Endpoint ---
+    def get_llm_mentions(self, workspace_id, search_engine_id, start_period, end_period, stg_ids=None):
+        """
+        Fetches LLM citation data.
+        Note: This endpoint uses a different base path (/api/data/) than the standard account endpoints.
+        """
+        url = "https://app.pi-datametrics.com/api/data/llm/mentions"
+        params = {
+            "account-id": self.account_id,
+            "workspace-id": workspace_id,
+            "search-engine-id": search_engine_id,
+            "start-period": start_period,
+            "end-period": end_period
+        }
+        if stg_ids:
+            # API expects array of integers
+            params["search-term-group-id[]"] = stg_ids
+        response = requests.get(url, headers=self.headers, params=params)
+        response.raise_for_status()
+        return response.json().get('data', [])

pidatametrics-0.3.0/src/pidatametrics/exporter.py ADDED Viewed

@@ -0,0 +1,62 @@
+import csv
+from google.cloud import bigquery
+class PiExporter:
+    @staticmethod
+    def to_csv(data, filename):
+        if not data:
+            print("No data to export.")
+            return
+        if not filename.endswith('.csv'):
+            filename += '.csv'
+        keys = data[0].keys()
+        with open(filename, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=keys)
+            writer.writeheader()
+            writer.writerows(data)
+        print(f"Successfully saved {len(data)} rows to {filename}")
+    @staticmethod
+    def to_bigquery(data, project_id, dataset_id, table_id):
+        if not data:
+            print("No data to upload.")
+            return
+        client = bigquery.Client(project=project_id)
+        table_ref = f"{project_id}.{dataset_id}.{table_id}"
+        print(f"Uploading {len(data)} rows to BigQuery table {table_ref}...")
+        # Auto-detect schema is usually fine for JSON inserts,
+        # but explicit schema is safer. For generic use, we try auto-detect first.
+        job_config = bigquery.LoadJobConfig(
+            autodetect=True,
+            source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
+            write_disposition=bigquery.WriteDisposition.WRITE_APPEND
+        )
+        try:
+            # --- CHANGE STARTED HERE ---
+            # Old Code (Caused SSL Error):
+            # errors = client.insert_rows_json(table_ref, data)
+            # New Code (Uses Batch Load + your job_config):
+            job = client.load_table_from_json(
+                data,
+                table_ref,
+                job_config=job_config
+            )
+            # Wait for the job to complete (this is required for batch loads)
+            job.result()
+            print(f"Upload successful. Loaded {job.output_rows} rows.")
+            # --- CHANGE ENDED HERE ---
+        except Exception as e:
+            print(f"BigQuery Upload Failed: {e}")
+            # Optional: Print detailed error list if available in exception
+            if hasattr(e, 'errors'):
+                print(e.errors)

pidatametrics-0.3.0/src/pidatametrics/manager.py ADDED Viewed

@@ -0,0 +1,159 @@
+from .client import PiDataMetrics
+from .parsers import PiParsers
+from .exporter import PiExporter
+import datetime
+from dateutil.relativedelta import relativedelta
+class PiReportManager(PiDataMetrics):
+    def _resolve_workspaces(self, ids_str=None, name_pattern=None):
+        all_ws = self.get_workspaces()
+        targets = {}
+        if ids_str and ids_str.strip():
+            target_ids = [int(x.strip()) for x in ids_str.split(',') if x.strip().isdigit()]
+            for ws in all_ws:
+                if ws['id'] in target_ids:
+                    targets[ws['id']] = ws['name']
+        elif name_pattern:
+            for ws in all_ws:
+                if ws.get('tracked') and name_pattern.lower() in ws['name'].lower():
+                    targets[ws['id']] = ws['name']
+        return targets
+    def _generate_historical_dates(self, start_date_str, duration, frequency):
+        dates = []
+        try:
+            current_date = datetime.datetime.strptime(start_date_str, "%Y-%m-%d")
+        except ValueError:
+            print(f"Error: Invalid date format {start_date_str}. Using yesterday.")
+            current_date = datetime.datetime.now() - datetime.timedelta(days=1)
+        if current_date > datetime.datetime.now():
+            print(f"WARNING: Start date {current_date.strftime('%Y-%m-%d')} is in the future!")
+        if frequency == 'weekly':
+            days_since_sunday = (current_date.weekday() + 1) % 7
+            if days_since_sunday > 0:
+                current_date -= datetime.timedelta(days=days_since_sunday)
+                print(f"Note: Adjusted start date to previous Sunday: {current_date.strftime('%Y-%m-%d')}")
+        for _ in range(int(duration)):
+            dates.append(current_date.strftime("%Y-%m-%d"))
+            if frequency == 'daily':
+                current_date -= datetime.timedelta(days=1)
+            elif frequency == 'weekly':
+                current_date -= datetime.timedelta(weeks=1)
+            elif frequency == 'monthly':
+                current_date -= relativedelta(months=1)
+        return dates
+    def run_volume_report(self, filename, workspace_ids=None, workspace_name=None, output_mode='csv', bq_config=None):
+        targets = self._resolve_workspaces(workspace_ids, workspace_name)
+        if not targets: return
+        all_rows = []
+        for ws_id, ws_name in targets.items():
+            vol_data = self.get_bulk_volume(ws_id)
+            stgs = self.get_stgs(ws_id)
+            for stg in stgs:
+                terms = self.get_search_terms(ws_id, stg['id'])
+                rows = PiParsers.parse_volume_data(vol_data, stg['name'], terms, ws_name)
+                all_rows.extend(rows)
+        if output_mode == 'bigquery' and bq_config:
+            PiExporter.to_bigquery(all_rows, bq_config['project'], bq_config['dataset'], bq_config['table'])
+        else:
+            PiExporter.to_csv(all_rows, filename)
+    def run_serp_report(self, data_sources, output_mode='csv', bq_config=None, filename=None, manual_duplication=None):
+        yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
+        all_rows = []
+        for source in data_sources:
+            market, w_id, w_name, se_id, se_name = source
+            raw_data = self.get_bulk_serp_data(w_id, se_id, yesterday)
+            cat_map = PiParsers.build_category_map(self, w_id)
+            rows = PiParsers.parse_serp_response(raw_data, market, w_name, se_name, yesterday, cat_map, manual_duplication)
+            all_rows.extend(rows)
+        if output_mode == 'bigquery' and bq_config:
+            PiExporter.to_bigquery(all_rows, bq_config['project'], bq_config['dataset'], bq_config['table'])
+        else:
+            PiExporter.to_csv(all_rows, filename or "serp_output.csv")
+    def run_historical_serp_report(self, data_sources, duration, frequency, start_date=None, features=None, num_results=25, output_mode='csv', bq_config=None, filename="historical_data.csv"):
+        if features is None:
+            features = ['classicLink', 'popularProducts']
+        if not start_date:
+            start_date = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
+        target_dates = self._generate_historical_dates(start_date, duration, frequency)
+        print(f"Starting Historical Report ({frequency}) for last {duration} periods...")
+        all_csv_rows = [] # Only used if output_mode is CSV
+        for i, date in enumerate(target_dates):
+            print(f"[{i+1}/{len(target_dates)}] Processing Date: {date}...")
+            daily_rows = [] # Reset container for this specific date
+            for source in data_sources:
+                market, w_id, w_name, se_id, se_name = source
+                try:
+                    params = {
+                        'serp-feature[]': features,
+                        'number-of-results': num_results
+                    }
+                    raw_data = self.get_bulk_serp_data(w_id, se_id, date, **params)
+                    # Parser handles filtering null titles
+                    rows = PiParsers.parse_serp_response(
+                        raw_data, market, w_name, se_name, date, category_map=None
+                    )
+                    daily_rows.extend(rows)
+                except Exception as e:
+                    print(f"Failed to fetch {w_name} on {date}: {e}")
+            # --- UPLOAD LOGIC: PER DATE ---
+            if output_mode == 'bigquery' and bq_config:
+                if daily_rows:
+                    print(f"Uploading {len(daily_rows)} rows for {date} to BigQuery...")
+                    PiExporter.to_bigquery(daily_rows, bq_config['project'], bq_config['dataset'], bq_config['table'])
+                else:
+                    print(f"No data found for {date}, skipping upload.")
+            # --- CSV LOGIC: ACCUMULATE ---
+            elif output_mode == 'csv':
+                all_csv_rows.extend(daily_rows)
+        # Final Save for CSV only
+        if output_mode == 'csv':
+            PiExporter.to_csv(all_csv_rows, filename)
+    # --- NEW: LLM Report ---
+    def run_llm_report(self, data_sources, start_period, end_period, stg_ids=None, output_mode='csv', bq_config=None, filename="llm_output.csv"):
+        """
+        Runs the LLM Mentions report for the specified data sources and date range.
+        :param data_sources: List of tuples (market, workspace_id, workspace_name, search_engine_id, search_engine_name)
+        :param start_period: YYYY-MM-DD or YYYY-WWW
+        :param end_period: YYYY-MM-DD or YYYY-WWW
+        :param stg_ids: Optional list of integers to filter by Search Term Group
+        """
+        all_rows = []
+        print(f"Starting LLM Report from {start_period} to {end_period}...")
+        for source in data_sources:
+            market, w_id, w_name, se_id, se_name = source
+            try:
+                print(f"Fetching LLM data for {w_name} ({se_name})...")
+                raw_data = self.get_llm_mentions(w_id, se_id, start_period, end_period, stg_ids)
+                rows = PiParsers.parse_llm_response(raw_data, market, w_name, se_name)
+                all_rows.extend(rows)
+                print(f"Found {len(rows)} mentions/queries.")
+            except Exception as e:
+                print(f"Failed to fetch LLM data for {w_name}: {e}")
+        if output_mode == 'bigquery' and bq_config:
+            PiExporter.to_bigquery(all_rows, bq_config['project'], bq_config['dataset'], bq_config['table'])
+        else:
+            PiExporter.to_csv(all_rows, filename)

pidatametrics-0.3.0/src/pidatametrics/parsers.py ADDED Viewed

@@ -0,0 +1,156 @@
+import json
+class PiParsers:
+    @staticmethod
+    def parse_serp_response(raw_data, market, workspace_name, search_engine_name, date, category_map=None, manual_duplication=None):
+        """
+        Transforms the nested 'Bulk Search Results' JSON into a flat list of dicts.
+        """
+        flat_rows = []
+        for item in raw_data:
+            search_term = item.get('searchTerm')
+            category = ""
+            if category_map:
+                category = category_map.get(search_term, "")
+            results = item.get('results', [])
+            previous_pos = None
+            if results:
+                for res in results:
+                    feature_type = res.get('feature')
+                    title = res.get('title')
+                    # If feature is popularProducts AND title is null/empty, SKIP this row.
+                    if feature_type == 'popularProducts' and not title:
+                        continue
+                    # --- Logic for Position Fill-Down ---
+                    pos = res.get('position')
+                    if pos is None:
+                        pos = previous_pos
+                    # --- Logic for Attributes (Popular Products) ---
+                    attributes = res.get('attributes', {})
+                    price = None
+                    site_name = None
+                    if isinstance(attributes, dict):
+                        price = attributes.get('price')
+                        site_name = attributes.get('site')
+                        attr_str = json.dumps(attributes)
+                    else:
+                        attr_str = None
+                    # Create Row
+                    row = {
+                        'Date': date,
+                        'Market': market,
+                        'SearchTerm': search_term,
+                        'URL': res.get('url'),
+                        'Position': pos,
+                        'SERPFeature': feature_type,
+                        'PageTitle': title,
+                        'Price': price,
+                        'SiteName': site_name,
+                        'SearchEngine': search_engine_name,
+                        'Attributes': attr_str,
+                        'Category': category,
+                        'Workspace': workspace_name
+                    }
+                    flat_rows.append(row)
+                    previous_pos = pos
+            else:
+                pass
+        return flat_rows
+    @staticmethod
+    def parse_volume_data(volume_data, stg_name, stg_terms, workspace_name):
+        rows = []
+        volume_lookup = {item.get('search-term'): item for item in volume_data}
+        for term in stg_terms:
+            term_text = term if isinstance(term, str) else term.get('term', '')
+            if term_text in volume_lookup:
+                item = volume_lookup[term_text]
+                cpc = item.get('cpc', '')
+                monthly_volume = item.get('monthly-volume', {})
+                for month, vol in monthly_volume.items():
+                    rows.append({
+                        "Workspace": workspace_name,
+                        "STG": stg_name,
+                        "Search Term": term_text,
+                        "Month": month,
+                        "Search Volume": vol,
+                        "CPC": cpc
+                    })
+        return rows
+    @staticmethod
+    def build_category_map(pi_client, workspace_id):
+        mapping = {}
+        stgs = pi_client.get_stgs(workspace_id)
+        for stg in stgs:
+            terms = pi_client.get_search_terms(workspace_id, stg['id'])
+            for term in terms:
+                t_text = term if isinstance(term, str) else term.get('term')
+                mapping[t_text] = stg['name']
+        return mapping
+    # --- NEW: LLM Parser ---
+    @staticmethod
+    def parse_llm_response(raw_data, market, workspace_name, search_engine_name):
+        """
+        Flattens LLM mentions data.
+        Each mention becomes a row. If no mentions exist for a query, the query is still recorded.
+        """
+        rows = []
+        for item in raw_data:
+            period = item.get('period')
+            query = item.get('query')
+            response_text = item.get('response')
+            mentions = item.get('mentions', [])
+            base_row = {
+                'Period': period,
+                'Market': market,
+                'Workspace': workspace_name,
+                'SearchEngine': search_engine_name,
+                'Query': query,
+                'LLM_Response': response_text
+            }
+            if mentions:
+                for m in mentions:
+                    row = base_row.copy()
+                    row.update({
+                        'Entity': m.get('entity'),
+                        'Citation': m.get('citation'),
+                        'Source_Site': m.get('source_site'),
+                        'Source_Domain': m.get('source_domain'),
+                        'Source_URL': m.get('source_url'),
+                        'Sentiment': m.get('sentiment'),
+                        'Sentiment_Category': m.get('sentiment_category')
+                    })
+                    rows.append(row)
+            else:
+                # Record the query even if no citations found
+                row = base_row.copy()
+                row.update({
+                    'Entity': None,
+                    'Citation': None,
+                    'Source_Site': None,
+                    'Source_Domain': None,
+                    'Source_URL': None,
+                    'Sentiment': None,
+                    'Sentiment_Category': None
+                })
+                rows.append(row)
+        return rows