PyPI - gitlytics - Versions diffs - 0.1.6__tar.gz → 0.2.0__tar.gz - Mend

gitlytics 0.1.6tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{gitlytics-0.1.6 → gitlytics-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gitlytics
-Version: 0.1.6
+Version: 0.2.0
 Summary: Monitor and automate your GitHub repository traffic analytics.
 Author-email: Ameya Chopade <ameyaccod171@gmail.com>
 License: Apache-2.0
@@ -96,7 +96,7 @@ Dynamic: license-file
 The full Gitlytics ecosystem spans across a few repositories. If you are looking for the live web dashboard or the automation cron job, check out the links below:
-- **[Gitlytics Web Ecosystem](https://github.com/ameyac11/gitlytics-deployement)**: The production landing page, React Dashboard, and React Documentation site.
+- 📊 **[Gitlytics Live Dashboard](https://dashboard.gitlytics.dev)**: The production web interface to visualize repository traffic analytics, trends, and historical charts.
 - ⚙️ **[Gitlytics Automation](https://github.com/ameyac11/gitlytics-github-traffic-automation)**: The GitHub Action companion tool that automates fetching and saving to defeat GitHub's 14-day traffic limit.
 ---

{gitlytics-0.1.6 → gitlytics-0.2.0}/README.md RENAMED Viewed

@@ -64,7 +64,7 @@
 The full Gitlytics ecosystem spans across a few repositories. If you are looking for the live web dashboard or the automation cron job, check out the links below:
-- **[Gitlytics Web Ecosystem](https://github.com/ameyac11/gitlytics-deployement)**: The production landing page, React Dashboard, and React Documentation site.
+- 📊 **[Gitlytics Live Dashboard](https://dashboard.gitlytics.dev)**: The production web interface to visualize repository traffic analytics, trends, and historical charts.
 - ⚙️ **[Gitlytics Automation](https://github.com/ameyac11/gitlytics-github-traffic-automation)**: The GitHub Action companion tool that automates fetching and saving to defeat GitHub's 14-day traffic limit.
 ---

{gitlytics-0.1.6 → gitlytics-0.2.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "gitlytics"
-version = "0.1.6"
+version = "0.2.0"
 description = "Monitor and automate your GitHub repository traffic analytics."
 readme = "README.md"
 requires-python = ">=3.9"

{gitlytics-0.1.6 → gitlytics-0.2.0}/src/gitlytics/__init__.py RENAMED Viewed

@@ -8,7 +8,7 @@ import json
 # Single source of truth for the package version.
 # Mirrors the version in pyproject.toml — keep them in sync.
-__version__ = "0.1.6"
+__version__ = "0.2.0"
 __all__ = ["fetch_traffic", "sync", "serve_dashboard", "__version__"]
@@ -98,6 +98,11 @@ def sync(token: str, repo_name=None, data_dir: str = "./data", output_mode: str
             exported JSON — acts as a security firewall.
         metrics: Optional list of metrics to fetch (e.g., ``["views", "clones"]``).
     """
+    if data_dir and not os.path.isabs(data_dir) and not os.path.exists(data_dir):
+        parent_dir = os.path.join("..", data_dir)
+        if os.path.exists(parent_dir):
+            data_dir = parent_dir
     # Hand off to the automation engine — it handles deduplication and schema migration
     run_sync(
         token=token,
@@ -142,7 +147,17 @@ def serve_dashboard(host: str = "127.0.0.1", port: int = 8000, token: str = None
         if token:
             os.environ["GITLYTICS_TOKEN"] = token
         if data_dir:
-            os.environ["GITLYTICS_DATA_DIR"] = os.path.abspath(data_dir)
+            from pathlib import Path
+            abs_data_dir = os.path.abspath(data_dir)
+            if not os.path.exists(abs_data_dir) and not os.path.isabs(data_dir):
+                parent_dir = os.path.abspath(os.path.join("..", data_dir))
+                if os.path.exists(parent_dir):
+                    abs_data_dir = parent_dir
+            if not os.path.exists(abs_data_dir):
+                print(f"⚠️ Warning: The specified data directory '{data_dir}' (resolved to '{abs_data_dir}') does not exist.")
+            elif not any(Path(abs_data_dir).glob("traffic_*.csv")):
+                print(f"⚠️ Warning: No traffic_*.csv database files found in '{data_dir}' (resolved to '{abs_data_dir}').")
+            os.environ["GITLYTICS_DATA_DIR"] = abs_data_dir
         uvicorn.run("gitlytics.api:app", host=host, port=port, reload=False)
     finally:
         if _orig_token is None:

{gitlytics-0.1.6 → gitlytics-0.2.0}/src/gitlytics/api.py RENAMED Viewed

@@ -135,22 +135,38 @@ def get_traffic(token: str = Body("", embed=True)):
     if not ok:
         raise HTTPException(status_code=401, detail="Invalid token")
     data_dir = os.environ.get("GITLYTICS_DATA_DIR")
+    dfs = []
     if data_dir:
         data_dir_path = Path(data_dir)
-        csv_files = list(data_dir_path.glob("traffic_*.csv")) if data_dir_path.exists() else []
-        dfs = []
-        for f in csv_files:
-            try:
-                dfs.append(pd.read_csv(f))
-            except Exception as exc:
-                logger.warning(f"Skipping unreadable CSV '{f}': {exc}")
-        if dfs:
-            df = pd.concat(dfs, ignore_index=True)
-            df = df.drop_duplicates(subset=["date", "repository"], keep="last")
+        if not data_dir_path.exists():
+            logger.warning(f"Data directory '{data_dir}' does not exist.")
         else:
-            df = fetch_traffic_data(active_token)
+            csv_files = list(data_dir_path.glob("traffic_*.csv"))
+            if not csv_files:
+                logger.warning(f"No traffic_*.csv files found in '{data_dir}'.")
+            for f in csv_files:
+                try:
+                    dfs.append(pd.read_csv(f))
+                except Exception as exc:
+                    logger.warning(f"Skipping unreadable CSV '{f}': {exc}")
+    try:
+        live_df = fetch_traffic_data(active_token)
+    except Exception as exc:
+        logger.warning(f"Failed to fetch live traffic: {exc}")
+        live_df = pd.DataFrame()
+    if dfs:
+        csv_df = pd.concat(dfs, ignore_index=True)
+        if not live_df.empty:
+            df = pd.concat([csv_df, live_df], ignore_index=True)
+        else:
+            df = csv_df
     else:
-        df = fetch_traffic_data(active_token)
+        df = live_df
+    if not df.empty:
+        df = df.drop_duplicates(subset=["date", "repository"], keep="last")
     df = df.replace([float('inf'), float('-inf')], None).where(pd.notnull(df), None)
@@ -171,8 +187,17 @@ def get_traffic(token: str = Body("", embed=True)):
 @app.post("/api/upload-csv")
 def upload_csv(file: UploadFile = File(...)):
-    # Accept a user-uploaded CSV — deep stats not available in CSV mode
     try:
+        data_dir = os.environ.get("GITLYTICS_DATA_DIR")
+        if data_dir:
+            data_dir_path = Path(data_dir)
+            data_dir_path.mkdir(parents=True, exist_ok=True)
+            file.file.seek(0)
+            content = file.file.read()
+            file.file.seek(0)
+            dest = data_dir_path / f"traffic_uploaded_{int(_time.time())}.csv"
+            with open(dest, "wb") as f:
+                f.write(content)
         df = process_uploaded_csv(file.file)
         df = df.replace([float('inf'), float('-inf')], None).where(pd.notnull(df), None)
         payload = build_react_payload(df, deep_stats=None)
@@ -192,7 +217,7 @@ def serve_index():
         return FileResponse(index_file)
     return JSONResponse(
         status_code=503,
-        content={"error": "Dashboard not found. Run 'npm run build' in the dashboard directory."}
+        content={"error": "Dashboard assets not found in the package installation."}
     )
@@ -209,7 +234,7 @@ def serve_spa_fallback(full_path: str):
     return JSONResponse(
         status_code=503,
-        content={"error": "Dashboard not found. Run 'npm run build' in the dashboard directory."}
+        content={"error": "Dashboard assets not found in the package installation."}
     )

{gitlytics-0.1.6 → gitlytics-0.2.0}/src/gitlytics/automation.py RENAMED Viewed

@@ -130,13 +130,15 @@ def run_sync_cycle(token: str, repo_names=None, data_dir="./data", output_mode="
     else:
         existing_fields = new_fields
-    # Overwrite existing rows with fresh data, and add brand-new day rows
+    # Merge fresh data into existing rows — preserves columns not present in this sync run
     new_records_added = 0
     for _, row in df.iterrows():
         key = (str(row["repository"]), str(row["date"]))
         if key not in existing_data:
             new_records_added += 1
-        existing_data[key] = row.to_dict()
+            existing_data[key] = row.to_dict()
+        else:
+            existing_data[key].update(row.to_dict())
     # Sort all rows by date and repo name before writing back to disk
     final_rows = []

{gitlytics-0.1.6 → gitlytics-0.2.0}/src/gitlytics/core.py RENAMED Viewed

@@ -221,8 +221,8 @@ def get_deep_repo_stats(token: str, full_name: str) -> dict:
         "has_code_of_conduct": None,
     }
-    # Commit activity — GitHub computes this async and returns 202 when not ready
-    # We do not block the thread pool worker with sleep; accept None on 202 (C-1)
+    # Commit activity — GitHub computes this async and returns 202 when not ready.
+    # total_commits here is the trailing 52-week (12-month) sum, NOT lifetime.
     ca_url = f"{BASE}/repos/{full_name}/stats/commit_activity"
     ca_data, status = _safe_get(ca_url, h)
     if status == 202:
@@ -254,12 +254,35 @@ def get_deep_repo_stats(token: str, full_name: str) -> dict:
         stats["has_contributing"] = bool(files.get("contributing"))
         stats["has_code_of_conduct"] = bool(files.get("code_of_conduct"))
-    # Releases — count and most recent publish date
-    rel_data, _ = _safe_get(f"{BASE}/repos/{full_name}/releases", h, params={"per_page": 100})
-    if isinstance(rel_data, list):
-        stats["total_releases"] = len(rel_data)
-        if rel_data:
-            stats["last_release_at"] = rel_data[0].get("published_at") or rel_data[0].get("created_at")
+    # Releases — count real total using Link header pagination, then get latest date
+    try:
+        rel_resp = requests.get(
+            f"{BASE}/repos/{full_name}/releases",
+            headers=h,
+            params={"per_page": 1},
+            timeout=10,
+        )
+        if rel_resp.status_code == 200:
+            link = rel_resp.headers.get("Link", "")
+            if 'rel="last"' in link:
+                import re as _re
+                m = _re.search(r'page=(\d+)>; rel="last"', link)
+                stats["total_releases"] = int(m.group(1)) if m else 1
+            else:
+                # Only one page — count items in this page
+                items = rel_resp.json()
+                stats["total_releases"] = len(items) if isinstance(items, list) else 0
+            # Fetch the latest release separately for its date
+            latest_resp = requests.get(
+                f"{BASE}/repos/{full_name}/releases/latest",
+                headers=h,
+                timeout=10,
+            )
+            if latest_resp.status_code == 200:
+                latest = latest_resp.json()
+                stats["last_release_at"] = latest.get("published_at") or latest.get("created_at")
+    except Exception as exc:
+        logger.warning(f"Could not fetch releases for {full_name}: {exc}")
     return stats

{gitlytics-0.1.6 → gitlytics-0.2.0}/src/gitlytics/process.py RENAMED Viewed

@@ -35,12 +35,12 @@ def build_json_payload(df: pd.DataFrame, return_format: str = "timeseries", expo
     for repo, group in df.groupby("repository"):
         group = group.sort_values("date")
-        r_views = int(group["views"].sum()) if "views" in group.columns else 0
-        r_clones = int(group["clones"].sum()) if "clones" in group.columns else 0
-        r_unique_v = int(group["unique_visitors"].sum()) if "unique_visitors" in group.columns else 0
-        r_unique_c = int(group["unique_cloners"].sum()) if "unique_cloners" in group.columns else 0
-        r_stars = int(group["stars"].dropna().iloc[-1]) if "stars" in group.columns and not group["stars"].dropna().empty else 0
-        r_forks = int(group["forks"].dropna().iloc[-1]) if "forks" in group.columns and not group["forks"].dropna().empty else 0
+        r_views = _safe_int(group["views"].sum()) if "views" in group.columns else 0
+        r_clones = _safe_int(group["clones"].sum()) if "clones" in group.columns else 0
+        r_unique_v = _safe_int(group["unique_visitors"].sum()) if "unique_visitors" in group.columns else 0
+        r_unique_c = _safe_int(group["unique_cloners"].sum()) if "unique_cloners" in group.columns else 0
+        r_stars = _safe_int(group["stars"].dropna().iloc[-1]) if "stars" in group.columns and not group["stars"].dropna().empty else 0
+        r_forks = _safe_int(group["forks"].dropna().iloc[-1]) if "forks" in group.columns and not group["forks"].dropna().empty else 0
         r_is_private = bool(group["is_private"].dropna().iloc[-1]) if "is_private" in group.columns and not group["is_private"].dropna().empty else False
         top_ref = str(group["top_referrer"].dropna().iloc[-1]) if "top_referrer" in group.columns and not group["top_referrer"].dropna().empty else ""
@@ -103,6 +103,11 @@ def build_json_payload(df: pd.DataFrame, return_format: str = "timeseries", expo
 def process_uploaded_csv(uploaded_file) -> pd.DataFrame:
     """Reads a user-uploaded CSV and normalises column names to match our tidy schema."""
     raw_df = pd.read_csv(uploaded_file)
+    # Normalise capitalized "Date" column before any other checks
+    if "Date" in raw_df.columns and "date" not in raw_df.columns:
+        raw_df = raw_df.rename(columns={"Date": "date"})
     if "repository" not in raw_df.columns:
         if "repo_name" in raw_df.columns:
             raw_df = raw_df.rename(columns={"repo_name": "repository"})
@@ -113,7 +118,7 @@ def process_uploaded_csv(uploaded_file) -> pd.DataFrame:
             })
         else:
             raise ValueError("Invalid CSV format: missing 'repository' column")
-    # M-5: validate date column exists after renaming so callers get 400 not a 500 KeyError later
     if "date" not in raw_df.columns:
         raise ValueError("Invalid CSV format: missing required 'date' column")
     return raw_df
@@ -161,10 +166,10 @@ def build_react_payload(df: pd.DataFrame, deep_stats: dict = None) -> list:
     for repo, group in df.groupby("repository"):
         group = group.sort_values("date")
-        r_views = int(group["views"].sum()) if "views" in group.columns else 0
-        r_clones = int(group["clones"].sum()) if "clones" in group.columns else 0
-        r_unique_v = int(group["unique_visitors"].sum()) if "unique_visitors" in group.columns else 0
-        r_unique_c = int(group["unique_cloners"].sum()) if "unique_cloners" in group.columns else 0
+        r_views = _safe_int(group["views"].sum()) if "views" in group.columns else 0
+        r_clones = _safe_int(group["clones"].sum()) if "clones" in group.columns else 0
+        r_unique_v = _safe_int(group["unique_visitors"].sum()) if "unique_visitors" in group.columns else 0
+        r_unique_c = _safe_int(group["unique_cloners"].sum()) if "unique_cloners" in group.columns else 0
         r_stars = _safe_int(group["stars"].dropna().iloc[-1]) if "stars" in group.columns and not group["stars"].dropna().empty else 0
         r_forks = _safe_int(group["forks"].dropna().iloc[-1]) if "forks" in group.columns and not group["forks"].dropna().empty else 0
         r_is_private = bool(group["is_private"].dropna().iloc[-1]) if "is_private" in group.columns and not group["is_private"].dropna().empty else False
@@ -194,13 +199,13 @@ def build_react_payload(df: pd.DataFrame, deep_stats: dict = None) -> list:
             date_str = str(row["date"])
             daily_views.append({
                 "timestamp": date_str,
-                "count": int(row.get("views", 0)),
-                "uniques": int(row.get("unique_visitors", 0))
+                "count": _safe_int(row.get("views", 0)),
+                "uniques": _safe_int(row.get("unique_visitors", 0))
             })
             daily_clones.append({
                 "timestamp": date_str,
-                "count": int(row.get("clones", 0)),
-                "uniques": int(row.get("unique_cloners", 0))  # fixed typo
+                "count": _safe_int(row.get("clones", 0)),
+                "uniques": _safe_int(row.get("unique_cloners", 0))  # fixed typo
             })
         raw_refs_val = group["_raw_referrers"].iloc[-1] if "_raw_referrers" in group.columns else None

gitlytics 0.1.6__tar.gz → 0.2.0__tar.gz

gitlytics 0.1.6tar.gz → 0.2.0tar.gz