explainio-airflow-agent 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- explainio_airflow_agent-0.1.1/MANIFEST.in +1 -0
- explainio_airflow_agent-0.1.1/PKG-INFO +44 -0
- explainio_airflow_agent-0.1.1/README.md +119 -0
- explainio_airflow_agent-0.1.1/cost_agent/README.md +24 -0
- explainio_airflow_agent-0.1.1/cost_agent/__init__.py +6 -0
- explainio_airflow_agent-0.1.1/cost_agent/client.py +38 -0
- explainio_airflow_agent-0.1.1/cost_agent/extractors.py +45 -0
- explainio_airflow_agent-0.1.1/cost_agent/listener.py +24 -0
- explainio_airflow_agent-0.1.1/explainio_airflow_agent.egg-info/PKG-INFO +44 -0
- explainio_airflow_agent-0.1.1/explainio_airflow_agent.egg-info/SOURCES.txt +17 -0
- explainio_airflow_agent-0.1.1/explainio_airflow_agent.egg-info/dependency_links.txt +1 -0
- explainio_airflow_agent-0.1.1/explainio_airflow_agent.egg-info/requires.txt +1 -0
- explainio_airflow_agent-0.1.1/explainio_airflow_agent.egg-info/top_level.txt +1 -0
- explainio_airflow_agent-0.1.1/setup.cfg +4 -0
- explainio_airflow_agent-0.1.1/setup.py +21 -0
- explainio_airflow_agent-0.1.1/tests/test_aggregator.py +96 -0
- explainio_airflow_agent-0.1.1/tests/test_extractors.py +60 -0
- explainio_airflow_agent-0.1.1/tests/test_mailer.py +82 -0
- explainio_airflow_agent-0.1.1/tests/test_scheduler.py +86 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
include cost_agent/README.md
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: explainio-airflow-agent
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Explain.io FinOps Airflow Cost Agent
|
|
5
|
+
Home-page: https://explain.io
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Framework :: Apache Airflow
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: requests
|
|
13
|
+
Dynamic: classifier
|
|
14
|
+
Dynamic: description
|
|
15
|
+
Dynamic: description-content-type
|
|
16
|
+
Dynamic: home-page
|
|
17
|
+
Dynamic: requires-dist
|
|
18
|
+
Dynamic: requires-python
|
|
19
|
+
Dynamic: summary
|
|
20
|
+
|
|
21
|
+
# 💸 Explain.io - Airflow Cost Agent
|
|
22
|
+
|
|
23
|
+
[]()
|
|
24
|
+
[]()
|
|
25
|
+
[]()
|
|
26
|
+
|
|
27
|
+
**Explain.io** is a FinOps tool for Data Engineers. This lightweight Airflow plugin automatically attributes Google Cloud BigQuery compute costs to the exact DAG and Task that triggered them.
|
|
28
|
+
|
|
29
|
+
No more guessing which pipeline caused the bill spike. No more changing your SQL or Python code to add tracking tags.
|
|
30
|
+
|
|
31
|
+
## ✨ Features
|
|
32
|
+
* **Zero Code Changes:** Just add a single `on_success_callback` to your DAG definition.
|
|
33
|
+
* **Zero Blast Radius:** Network requests run asynchronously in a separate thread. If the Explain.io API goes down, your DAG **will still succeed**. Exceptions are silently swallowed and logged.
|
|
34
|
+
* **Instant Dashboard:** View your pipeline costs, projected monthly spend, and heaviest DAGs at [explain-io.vercel.app](https://explain-io.vercel.app) *(Update this link to your actual domain later)*.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## 🚀 Installation
|
|
39
|
+
|
|
40
|
+
Install the package directly from GitHub into your Airflow environment:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install explainio-airflow-agent
|
|
44
|
+
```
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Explain.io - Core Platform (Monorepo)
|
|
2
|
+
|
|
3
|
+
Explain.io is a B2B SaaS platform that provides active cost attribution for data pipelines (Airflow + BigQuery).
|
|
4
|
+
|
|
5
|
+
This repository contains the core infrastructure: the FastAPI backend that ingests webhook payloads from Airflow agents, and the Next.js frontend dashboard where users view their cost analytics.
|
|
6
|
+
|
|
7
|
+
> **🤖 AI Assistant / LLM Notice:**
|
|
8
|
+
> Before writing any code, please read the `agents.md` file in the root directory for strict architectural guidelines, security rules, and tech stack versions.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## 🏗 Repository Structure
|
|
13
|
+
|
|
14
|
+
```text
|
|
15
|
+
explain-io/
|
|
16
|
+
├── backend/ # FastAPI backend & Background Workers
|
|
17
|
+
│ ├── main.py # API entry point & webhook ingestion
|
|
18
|
+
│ ├── worker.py # apscheduler tasks (GCP cost calculation)
|
|
19
|
+
│ ├── pyproject.toml # Python dependencies (managed via uv)
|
|
20
|
+
│ └── .env # Backend secrets (Service Role)
|
|
21
|
+
├── frontend/ # Next.js App Router Dashboard
|
|
22
|
+
│ ├── app/ # React components & pages
|
|
23
|
+
│ ├── utils/supabase/ # @supabase/ssr client/server utilities
|
|
24
|
+
│ ├── package.json # Node dependencies
|
|
25
|
+
│ └── .env.local # Frontend secrets (Anon Key)
|
|
26
|
+
├── database/ # SQL Migrations
|
|
27
|
+
│ └── 0001_initial.sql # Supabase schema & RLS policies
|
|
28
|
+
└── agents.md # Strict context and rules for AI agents
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## 🚀 Tech Stack
|
|
32
|
+
|
|
33
|
+
* **Frontend**: Next.js (App Router), React, TypeScript, Tailwind CSS, Recharts.
|
|
34
|
+
* **Backend**: FastAPI, Python 3.12+, `uv` (Package Manager), `apscheduler`.
|
|
35
|
+
* **Database & Auth**: Supabase (PostgreSQL), strict Row Level Security (RLS).
|
|
36
|
+
* **Hosting**: Vercel (Frontend), Render (Backend).
|
|
37
|
+
|
|
38
|
+
## 💻 Local Development Setup
|
|
39
|
+
|
|
40
|
+
### Prerequisites
|
|
41
|
+
|
|
42
|
+
* Node.js (v18+)
|
|
43
|
+
* Python (3.12+)
|
|
44
|
+
* `uv` (Extremely fast Python package manager)
|
|
45
|
+
* A Supabase project with the schema applied from `database/0001_initial.sql`.
|
|
46
|
+
|
|
47
|
+
### 1. Database Setup
|
|
48
|
+
|
|
49
|
+
1. Create a Supabase project.
|
|
50
|
+
2. Run the SQL from `database/0001_initial.sql` in the SQL Editor to create tables and RLS policies.
|
|
51
|
+
3. Grab your anon public key and service_role secret key from the Supabase API settings.
|
|
52
|
+
|
|
53
|
+
### 2. Backend Setup (`/backend`)
|
|
54
|
+
|
|
55
|
+
The backend is responsible for ingesting agent data and querying GCP APIs.
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
cd backend
|
|
59
|
+
|
|
60
|
+
# 1. Install dependencies using uv
|
|
61
|
+
uv sync
|
|
62
|
+
|
|
63
|
+
# 2. Set up environment variables
|
|
64
|
+
cp .env.example .env
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Backend `.env` format:
|
|
68
|
+
|
|
69
|
+
```ini
|
|
70
|
+
SUPABASE_URL="https://your-project.supabase.co"
|
|
71
|
+
SUPABASE_KEY="your_secret_SERVICE_ROLE_key" # Bypass RLS
|
|
72
|
+
GOOGLE_APPLICATION_CREDENTIALS="/path/to/your/gcp-service-account.json"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Run the backend locally:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
uv run uvicorn main:app --reload --port 8000
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### 3. Frontend Setup (`/frontend`)
|
|
82
|
+
|
|
83
|
+
The frontend is the customer-facing dashboard.
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
cd frontend
|
|
87
|
+
|
|
88
|
+
# 1. Install dependencies
|
|
89
|
+
npm install
|
|
90
|
+
|
|
91
|
+
# 2. Set up environment variables
|
|
92
|
+
cp .env.example .env.local
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Frontend `.env.local` format:
|
|
96
|
+
|
|
97
|
+
```ini
|
|
98
|
+
NEXT_PUBLIC_SUPABASE_URL="https://your-project.supabase.co"
|
|
99
|
+
NEXT_PUBLIC_SUPABASE_ANON_KEY="your_public_ANON_key" # Obeys RLS
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Run the frontend locally:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
npm run dev
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
The dashboard will be available at `http://localhost:3000`.
|
|
109
|
+
|
|
110
|
+
## 🔒 Security Posture
|
|
111
|
+
|
|
112
|
+
* **Frontend Authentication**: Handled via `@supabase/ssr`.
|
|
113
|
+
* **Database Security**: Locked down completely by Postgres RLS. The API keys verify identity, but RLS guarantees tenant isolation (a user can only see rows in `cost_ledger` matching their `user_id` in `user_projects`).
|
|
114
|
+
* **Backend Ingestion**: Uses user-generated API keys (stored in `api_keys` table) passed via Bearer token to map incoming DAG runs to the correct `project_id`.
|
|
115
|
+
|
|
116
|
+
## 🚢 Deployment
|
|
117
|
+
|
|
118
|
+
* **Frontend**: Auto-deploys via Vercel integration on pushes to `main`.
|
|
119
|
+
* **Backend**: Auto-deploys via Render integration on pushes to `main`. Ensure the Render Root Directory is set to `backend/`.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# 💸 Explain.io - Airflow Cost Agent
|
|
2
|
+
|
|
3
|
+
[]()
|
|
4
|
+
[]()
|
|
5
|
+
[]()
|
|
6
|
+
|
|
7
|
+
**Explain.io** is a FinOps tool for Data Engineers. This lightweight Airflow plugin automatically attributes Google Cloud BigQuery compute costs to the exact DAG and Task that triggered them.
|
|
8
|
+
|
|
9
|
+
No more guessing which pipeline caused the bill spike. No more changing your SQL or Python code to add tracking tags.
|
|
10
|
+
|
|
11
|
+
## ✨ Features
|
|
12
|
+
* **Zero Code Changes:** Just add a single `on_success_callback` to your DAG definition.
|
|
13
|
+
* **Zero Blast Radius:** Network requests run asynchronously in a separate thread. If the Explain.io API goes down, your DAG **will still succeed**. Exceptions are silently swallowed and logged.
|
|
14
|
+
* **Instant Dashboard:** View your pipeline costs, projected monthly spend, and heaviest DAGs at [explain-io.vercel.app](https://explain-io.vercel.app) *(Update this link to your actual domain later)*.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## 🚀 Installation
|
|
19
|
+
|
|
20
|
+
Install the package directly from GitHub into your Airflow environment:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install explainio-airflow-agent
|
|
24
|
+
```
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import threading
|
|
5
|
+
|
|
6
|
+
log = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
# Your SaaS Backend URL
|
|
9
|
+
# Default points to the production API, can be overridden by EXPLAIN_IO_API_URL for local dev
|
|
10
|
+
API_URL = os.getenv("EXPLAIN_IO_API_URL", "https://api.explain.io/api/v1/ingest")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _send_async(payload, api_key):
|
|
14
|
+
"""Running in a separate thread to avoid blocking the scheduler loop"""
|
|
15
|
+
try:
|
|
16
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
17
|
+
log.info("Cost Agent: Sending payload inside _send_async")
|
|
18
|
+
log.info(payload)
|
|
19
|
+
response = requests.post(API_URL, json=payload, headers=headers, timeout=5)
|
|
20
|
+
if response.status_code != 200:
|
|
21
|
+
log.warning(f"Cost Agent: Server rejected payload {response.status_code}")
|
|
22
|
+
except Exception as e:
|
|
23
|
+
# Never crash the user's pipeline because of a network error
|
|
24
|
+
log.debug(f"Cost Agent: Failed to send event: {e}")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def send_event(payload):
|
|
28
|
+
"""Spawns a thread to send data"""
|
|
29
|
+
# 1. Get API Key from Environment (The "License")
|
|
30
|
+
api_key = os.getenv("EXPLAIN_IO_API_KEY")
|
|
31
|
+
if not api_key:
|
|
32
|
+
log.info("Cost Agent: Plugin disabled (no EXPLAIN_IO_API_KEY found)")
|
|
33
|
+
return # Plugin disabled if no key
|
|
34
|
+
|
|
35
|
+
# Actually spawn a thread to be truly async
|
|
36
|
+
thread = threading.Thread(target=_send_async, args=(payload, api_key))
|
|
37
|
+
thread.daemon = True
|
|
38
|
+
thread.start()
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
# Dual support for Airflow 2.x and 3.x
|
|
4
|
+
try:
|
|
5
|
+
from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance as TI
|
|
6
|
+
IS_V3 = True
|
|
7
|
+
except ImportError:
|
|
8
|
+
from airflow.models.taskinstance import TaskInstance as TI
|
|
9
|
+
IS_V3 = False
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
def extract_job_metadata(task_instance):
|
|
14
|
+
"""
|
|
15
|
+
Inspects a TaskInstance to find the external Cloud Job ID.
|
|
16
|
+
Works for both Airflow 2.x and 3.x.
|
|
17
|
+
"""
|
|
18
|
+
# Duck-typing: Both versions have task and operator_name
|
|
19
|
+
operator_name = getattr(task_instance.task, 'operator_name', None) or task_instance.task.__class__.__name__
|
|
20
|
+
|
|
21
|
+
if "BigQuery" in operator_name:
|
|
22
|
+
return _extract_bigquery_id(task_instance)
|
|
23
|
+
return None
|
|
24
|
+
|
|
25
|
+
def _extract_bigquery_id(ti):
|
|
26
|
+
try:
|
|
27
|
+
# Airflow 3.0 uses RuntimeTaskInstance, 2.x uses standard TI
|
|
28
|
+
# Both support xcom_pull
|
|
29
|
+
job_id = ti.xcom_pull(task_ids=ti.task_id, key='return_value')
|
|
30
|
+
|
|
31
|
+
# Access attributes carefully
|
|
32
|
+
task = ti.task
|
|
33
|
+
project_id = getattr(task, 'project_id', None)
|
|
34
|
+
location = getattr(task, 'location', 'US')
|
|
35
|
+
|
|
36
|
+
if job_id and isinstance(job_id, str):
|
|
37
|
+
return {
|
|
38
|
+
"provider": "gcp_bigquery",
|
|
39
|
+
"job_id": job_id,
|
|
40
|
+
"project_id": project_id,
|
|
41
|
+
"location": location,
|
|
42
|
+
}
|
|
43
|
+
except Exception as e:
|
|
44
|
+
log.debug(f"Could not extract BQ ID for {ti.task_id}: {e}")
|
|
45
|
+
return None
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from airflow.listeners import hookimpl
|
|
3
|
+
from cost_agent.extractors import extract_job_metadata
|
|
4
|
+
from cost_agent.client import send_event
|
|
5
|
+
|
|
6
|
+
log = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
class CostAgentListener:
|
|
9
|
+
@hookimpl
|
|
10
|
+
def on_task_instance_success(self, previous_state, task_instance):
|
|
11
|
+
try:
|
|
12
|
+
metadata = extract_job_metadata(task_instance)
|
|
13
|
+
if not metadata:
|
|
14
|
+
return
|
|
15
|
+
|
|
16
|
+
payload = {
|
|
17
|
+
"dag_id": task_instance.dag_id,
|
|
18
|
+
"task_id": task_instance.task_id,
|
|
19
|
+
"execution_date": task_instance.start_date.isoformat(),
|
|
20
|
+
"cloud_metadata": metadata
|
|
21
|
+
}
|
|
22
|
+
send_event(payload)
|
|
23
|
+
except Exception:
|
|
24
|
+
log.exception("Cost Agent: unexpected error in on_task_instance_success — swallowed to protect pipeline")
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: explainio-airflow-agent
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Explain.io FinOps Airflow Cost Agent
|
|
5
|
+
Home-page: https://explain.io
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Framework :: Apache Airflow
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: requests
|
|
13
|
+
Dynamic: classifier
|
|
14
|
+
Dynamic: description
|
|
15
|
+
Dynamic: description-content-type
|
|
16
|
+
Dynamic: home-page
|
|
17
|
+
Dynamic: requires-dist
|
|
18
|
+
Dynamic: requires-python
|
|
19
|
+
Dynamic: summary
|
|
20
|
+
|
|
21
|
+
# 💸 Explain.io - Airflow Cost Agent
|
|
22
|
+
|
|
23
|
+
[]()
|
|
24
|
+
[]()
|
|
25
|
+
[]()
|
|
26
|
+
|
|
27
|
+
**Explain.io** is a FinOps tool for Data Engineers. This lightweight Airflow plugin automatically attributes Google Cloud BigQuery compute costs to the exact DAG and Task that triggered them.
|
|
28
|
+
|
|
29
|
+
No more guessing which pipeline caused the bill spike. No more changing your SQL or Python code to add tracking tags.
|
|
30
|
+
|
|
31
|
+
## ✨ Features
|
|
32
|
+
* **Zero Code Changes:** Just add a single `on_success_callback` to your DAG definition.
|
|
33
|
+
* **Zero Blast Radius:** Network requests run asynchronously in a separate thread. If the Explain.io API goes down, your DAG **will still succeed**. Exceptions are silently swallowed and logged.
|
|
34
|
+
* **Instant Dashboard:** View your pipeline costs, projected monthly spend, and heaviest DAGs at [explain-io.vercel.app](https://explain-io.vercel.app) *(Update this link to your actual domain later)*.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## 🚀 Installation
|
|
39
|
+
|
|
40
|
+
Install the package directly from GitHub into your Airflow environment:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install explainio-airflow-agent
|
|
44
|
+
```
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
MANIFEST.in
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
cost_agent/README.md
|
|
5
|
+
cost_agent/__init__.py
|
|
6
|
+
cost_agent/client.py
|
|
7
|
+
cost_agent/extractors.py
|
|
8
|
+
cost_agent/listener.py
|
|
9
|
+
explainio_airflow_agent.egg-info/PKG-INFO
|
|
10
|
+
explainio_airflow_agent.egg-info/SOURCES.txt
|
|
11
|
+
explainio_airflow_agent.egg-info/dependency_links.txt
|
|
12
|
+
explainio_airflow_agent.egg-info/requires.txt
|
|
13
|
+
explainio_airflow_agent.egg-info/top_level.txt
|
|
14
|
+
tests/test_aggregator.py
|
|
15
|
+
tests/test_extractors.py
|
|
16
|
+
tests/test_mailer.py
|
|
17
|
+
tests/test_scheduler.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
requests
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cost_agent
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="explainio-airflow-agent",
|
|
5
|
+
version="0.1.1",
|
|
6
|
+
packages=find_packages(include=["cost_agent", "cost_agent.*"]),
|
|
7
|
+
install_requires=[
|
|
8
|
+
"requests",
|
|
9
|
+
],
|
|
10
|
+
description="Explain.io FinOps Airflow Cost Agent",
|
|
11
|
+
long_description=open("cost_agent/README.md").read(),
|
|
12
|
+
long_description_content_type="text/markdown",
|
|
13
|
+
url="https://explain.io",
|
|
14
|
+
classifiers=[
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Framework :: Apache Airflow",
|
|
19
|
+
],
|
|
20
|
+
python_requires=">=3.9",
|
|
21
|
+
)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from unittest.mock import MagicMock
|
|
3
|
+
from datetime import date, datetime, timedelta
|
|
4
|
+
from backend.aggregator import get_daily_summary
|
|
5
|
+
|
|
6
|
+
@pytest.fixture
|
|
7
|
+
def mock_supabase():
|
|
8
|
+
return MagicMock()
|
|
9
|
+
|
|
10
|
+
def test_get_daily_summary_calculates_correctly(mock_supabase):
|
|
11
|
+
# Setup
|
|
12
|
+
user_id = "test-user-id"
|
|
13
|
+
today = date(2026, 4, 15)
|
|
14
|
+
|
|
15
|
+
# Mock data for today's RPC response
|
|
16
|
+
today_summary = {
|
|
17
|
+
"total_cost": 70.0,
|
|
18
|
+
"top_dags": [
|
|
19
|
+
{"dag_id": "dag3", "cost": 30.0},
|
|
20
|
+
{"dag_id": "dag2", "cost": 20.0},
|
|
21
|
+
{"dag_id": "dag1", "cost": 15.0}
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# Mock data for last week's RPC response
|
|
26
|
+
last_week_summary = {
|
|
27
|
+
"total_cost": 50.0,
|
|
28
|
+
"top_dags": [{"dag_id": "dag1", "cost": 50.0}]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
mock_response_today = MagicMock()
|
|
32
|
+
mock_response_today.data = [today_summary]
|
|
33
|
+
|
|
34
|
+
mock_response_last_week = MagicMock()
|
|
35
|
+
mock_response_last_week.data = [last_week_summary]
|
|
36
|
+
|
|
37
|
+
mock_supabase.rpc.return_value.execute.side_effect = [
|
|
38
|
+
mock_response_today,
|
|
39
|
+
mock_response_last_week
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
# Execute
|
|
43
|
+
summary = get_daily_summary(mock_supabase, user_id, today)
|
|
44
|
+
|
|
45
|
+
# Verify
|
|
46
|
+
assert summary["total_cost"] == 70.0
|
|
47
|
+
assert len(summary["top_dags"]) == 3
|
|
48
|
+
assert summary["top_dags"][0]["dag_id"] == "dag3"
|
|
49
|
+
assert summary["top_dags"][1]["dag_id"] == "dag2"
|
|
50
|
+
assert summary["top_dags"][2]["dag_id"] == "dag1"
|
|
51
|
+
|
|
52
|
+
# % change: (70 - 50) / 50 * 100 = 40%
|
|
53
|
+
assert summary["prev_week_comparison"] == 40.0
|
|
54
|
+
|
|
55
|
+
def test_get_daily_summary_handles_no_data(mock_supabase):
|
|
56
|
+
# Setup
|
|
57
|
+
user_id = "test-user-id"
|
|
58
|
+
today = date(2026, 4, 15)
|
|
59
|
+
|
|
60
|
+
mock_response_empty = MagicMock()
|
|
61
|
+
mock_response_empty.data = []
|
|
62
|
+
|
|
63
|
+
mock_supabase.rpc.return_value.execute.return_value = mock_response_empty
|
|
64
|
+
|
|
65
|
+
# Execute
|
|
66
|
+
summary = get_daily_summary(mock_supabase, user_id, today)
|
|
67
|
+
|
|
68
|
+
# Verify
|
|
69
|
+
assert summary["total_cost"] == 0.0
|
|
70
|
+
assert summary["top_dags"] == []
|
|
71
|
+
assert summary["prev_week_comparison"] == 0.0
|
|
72
|
+
|
|
73
|
+
def test_get_daily_summary_handles_division_by_zero(mock_supabase):
|
|
74
|
+
# Setup
|
|
75
|
+
user_id = "test-user-id"
|
|
76
|
+
today = date(2026, 4, 15)
|
|
77
|
+
|
|
78
|
+
# Data for today
|
|
79
|
+
mock_response_today = MagicMock()
|
|
80
|
+
mock_response_today.data = [{"total_cost": 10.0, "top_dags": [{"dag_id": "dag1", "cost": 10.0}]}]
|
|
81
|
+
|
|
82
|
+
# No data for last week
|
|
83
|
+
mock_response_last_week = MagicMock()
|
|
84
|
+
mock_response_last_week.data = []
|
|
85
|
+
|
|
86
|
+
mock_supabase.rpc.return_value.execute.side_effect = [
|
|
87
|
+
mock_response_today,
|
|
88
|
+
mock_response_last_week
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
# Execute
|
|
92
|
+
summary = get_daily_summary(mock_supabase, user_id, today)
|
|
93
|
+
|
|
94
|
+
# Verify
|
|
95
|
+
assert summary["total_cost"] == 10.0
|
|
96
|
+
assert summary["prev_week_comparison"] == 100.0
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
3
|
+
|
|
4
|
+
def test_extract_job_metadata_v2_mock():
|
|
5
|
+
# Mocking TaskInstance for Airflow 2.x
|
|
6
|
+
mock_ti = MagicMock()
|
|
7
|
+
mock_ti.task_id = "test_bq_task"
|
|
8
|
+
mock_ti.xcom_pull.return_value = "job_12345"
|
|
9
|
+
|
|
10
|
+
mock_task = MagicMock()
|
|
11
|
+
# Airflow 2.x might not have operator_name on task object directly, but often it's on the TI's task
|
|
12
|
+
mock_task.operator_name = "BigQueryInsertJobOperator"
|
|
13
|
+
mock_task.project_id = "my-project"
|
|
14
|
+
mock_task.location = "EU"
|
|
15
|
+
mock_ti.task = mock_task
|
|
16
|
+
|
|
17
|
+
from cost_agent.extractors import extract_job_metadata
|
|
18
|
+
|
|
19
|
+
metadata = extract_job_metadata(mock_ti)
|
|
20
|
+
|
|
21
|
+
print(f"V2 Metadata: {metadata}")
|
|
22
|
+
assert metadata is not None
|
|
23
|
+
assert metadata["job_id"] == "job_12345"
|
|
24
|
+
assert metadata["project_id"] == "my-project"
|
|
25
|
+
assert metadata["location"] == "EU"
|
|
26
|
+
|
|
27
|
+
def test_extract_job_metadata_v3_mock():
|
|
28
|
+
# Mocking RuntimeTaskInstance for Airflow 3.x
|
|
29
|
+
mock_ti = MagicMock()
|
|
30
|
+
mock_ti.task_id = "test_bq_task_v3"
|
|
31
|
+
mock_ti.xcom_pull.return_value = "job_v3_6789"
|
|
32
|
+
|
|
33
|
+
# In Airflow 3, the structure is similar but might use duck-typing
|
|
34
|
+
mock_task = MagicMock()
|
|
35
|
+
mock_task.operator_name = "BigQueryInsertJobOperator"
|
|
36
|
+
mock_task.project_id = "my-v3-project"
|
|
37
|
+
mock_task.location = "US"
|
|
38
|
+
mock_ti.task = mock_task
|
|
39
|
+
|
|
40
|
+
from cost_agent.extractors import extract_job_metadata
|
|
41
|
+
|
|
42
|
+
metadata = extract_job_metadata(mock_ti)
|
|
43
|
+
|
|
44
|
+
print(f"V3 Metadata: {metadata}")
|
|
45
|
+
assert metadata is not None
|
|
46
|
+
assert metadata["job_id"] == "job_v3_6789"
|
|
47
|
+
assert metadata["project_id"] == "my-v3-project"
|
|
48
|
+
assert metadata["location"] == "US"
|
|
49
|
+
|
|
50
|
+
if __name__ == "__main__":
|
|
51
|
+
try:
|
|
52
|
+
test_extract_job_metadata_v2_mock()
|
|
53
|
+
print("✅ Airflow 2.x Mock Test Passed")
|
|
54
|
+
test_extract_job_metadata_v3_mock()
|
|
55
|
+
print("✅ Airflow 3.x Mock Test Passed")
|
|
56
|
+
except Exception as e:
|
|
57
|
+
print(f"❌ Test Failed: {e}")
|
|
58
|
+
import traceback
|
|
59
|
+
traceback.print_exc()
|
|
60
|
+
sys.exit(1)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import os
|
|
3
|
+
from unittest.mock import patch, MagicMock
|
|
4
|
+
from backend.mailer import send_digest_email
|
|
5
|
+
|
|
6
|
+
@patch("resend.Emails.send")
|
|
7
|
+
@patch.dict(os.environ, {"RESEND_API_KEY": "re_test_key"})
|
|
8
|
+
def test_send_digest_email_calls_resend(mock_send):
|
|
9
|
+
# Setup
|
|
10
|
+
user_email = "test@example.com"
|
|
11
|
+
summary_data = {
|
|
12
|
+
"total_cost": 75.50,
|
|
13
|
+
"top_dags": [
|
|
14
|
+
{"dag_id": "daily_ingestion", "cost": 45.00},
|
|
15
|
+
{"dag_id": "user_sync", "cost": 20.50},
|
|
16
|
+
{"dag_id": "backup_job", "cost": 10.00}
|
|
17
|
+
],
|
|
18
|
+
"prev_week_comparison": 15.5
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
# Execute
|
|
22
|
+
send_digest_email(user_email, summary_data)
|
|
23
|
+
|
|
24
|
+
# Verify
|
|
25
|
+
mock_send.assert_called_once()
|
|
26
|
+
call_args = mock_send.call_args[1]
|
|
27
|
+
|
|
28
|
+
assert call_args["to"] == user_email
|
|
29
|
+
assert "Explain.io" in call_args["subject"]
|
|
30
|
+
assert "$75.50" in call_args["html"]
|
|
31
|
+
assert "daily_ingestion" in call_args["html"]
|
|
32
|
+
assert "15.5%" in call_args["html"]
|
|
33
|
+
assert "https://explain.io/dashboard" in call_args["html"]
|
|
34
|
+
|
|
35
|
+
@patch("resend.Emails.send")
|
|
36
|
+
@patch.dict(os.environ, {"RESEND_API_KEY": "re_test_key"})
|
|
37
|
+
def test_send_digest_email_handles_negative_comparison(mock_send):
|
|
38
|
+
# Setup
|
|
39
|
+
user_email = "test@example.com"
|
|
40
|
+
summary_data = {
|
|
41
|
+
"total_cost": 40.00,
|
|
42
|
+
"top_dags": [
|
|
43
|
+
{"dag_id": "daily_ingestion", "cost": 40.00}
|
|
44
|
+
],
|
|
45
|
+
"prev_week_comparison": -10.0
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Execute
|
|
49
|
+
send_digest_email(user_email, summary_data)
|
|
50
|
+
|
|
51
|
+
# Verify
|
|
52
|
+
mock_send.assert_called_once()
|
|
53
|
+
call_args = mock_send.call_args[1]
|
|
54
|
+
|
|
55
|
+
assert "-10.0%" in call_args["html"]
|
|
56
|
+
|
|
57
|
+
@patch("resend.Emails.send")
|
|
58
|
+
@patch.dict(os.environ, {"RESEND_API_KEY": "re_test_key"})
|
|
59
|
+
def test_send_digest_email_handles_exception(mock_send):
|
|
60
|
+
# Setup
|
|
61
|
+
mock_send.side_effect = Exception("API Error")
|
|
62
|
+
user_email = "test@example.com"
|
|
63
|
+
summary_data = {"total_cost": 10.0, "top_dags": [], "prev_week_comparison": 0.0}
|
|
64
|
+
|
|
65
|
+
# Execute
|
|
66
|
+
result = send_digest_email(user_email, summary_data)
|
|
67
|
+
|
|
68
|
+
# Verify
|
|
69
|
+
assert result is None
|
|
70
|
+
mock_send.assert_called_once()
|
|
71
|
+
|
|
72
|
+
@patch.dict(os.environ, {}, clear=True)
|
|
73
|
+
def test_send_digest_email_handles_missing_api_key():
|
|
74
|
+
# Setup
|
|
75
|
+
user_email = "test@example.com"
|
|
76
|
+
summary_data = {"total_cost": 10.0, "top_dags": [], "prev_week_comparison": 0.0}
|
|
77
|
+
|
|
78
|
+
# Execute
|
|
79
|
+
result = send_digest_email(user_email, summary_data)
|
|
80
|
+
|
|
81
|
+
# Verify
|
|
82
|
+
assert result is None
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from unittest.mock import patch, MagicMock
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
|
+
from backend.main import dispatch_daily_digests
|
|
5
|
+
|
|
6
|
+
@patch("backend.main.supabase")
|
|
7
|
+
@patch("backend.main.aggregator.get_daily_summary")
|
|
8
|
+
@patch("backend.main.mailer.send_digest_email")
|
|
9
|
+
def test_dispatch_daily_digests_success(mock_send, mock_summary, mock_supabase):
|
|
10
|
+
# Setup
|
|
11
|
+
yesterday = datetime.now(timezone.utc).date() - timedelta(days=1)
|
|
12
|
+
|
|
13
|
+
# Mock user settings query
|
|
14
|
+
mock_users = [
|
|
15
|
+
{"user_id": "user1", "email_digest_enabled": True, "email": "user1@example.com"},
|
|
16
|
+
{"user_id": "user2", "email_digest_enabled": True, "email": "user2@example.com"}
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
# Mock supabase.table("user_settings").select(...).eq(...).execute()
|
|
20
|
+
mock_query = MagicMock()
|
|
21
|
+
mock_query.execute.return_value = MagicMock(data=mock_users)
|
|
22
|
+
mock_supabase.table.return_value.select.return_value.eq.return_value = mock_query
|
|
23
|
+
|
|
24
|
+
# Mock aggregator data
|
|
25
|
+
mock_summary.side_effect = [
|
|
26
|
+
{"total_cost": 10.5, "top_dags": [{"dag_id": "dag1", "cost": 10.5}], "prev_week_comparison": 5.0},
|
|
27
|
+
{"total_cost": 0.0, "top_dags": [], "prev_week_comparison": 0.0} # No data for user2
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
# Execute
|
|
31
|
+
dispatch_daily_digests()
|
|
32
|
+
|
|
33
|
+
# Verify
|
|
34
|
+
# Should be called for both users
|
|
35
|
+
assert mock_summary.call_count == 2
|
|
36
|
+
mock_summary.assert_any_call(mock_supabase, "user1", yesterday)
|
|
37
|
+
mock_summary.assert_any_call(mock_supabase, "user2", yesterday)
|
|
38
|
+
|
|
39
|
+
# Should only send email for user1 (data exists)
|
|
40
|
+
assert mock_send.call_count == 1
|
|
41
|
+
mock_send.assert_called_once_with("user1@example.com", {"total_cost": 10.5, "top_dags": [{"dag_id": "dag1", "cost": 10.5}], "prev_week_comparison": 5.0})
|
|
42
|
+
|
|
43
|
+
@patch("backend.main.supabase")
|
|
44
|
+
@patch("backend.main.aggregator.get_daily_summary")
|
|
45
|
+
@patch("backend.main.mailer.send_digest_email")
|
|
46
|
+
def test_dispatch_daily_digests_no_users(mock_send, mock_summary, mock_supabase):
|
|
47
|
+
# Setup
|
|
48
|
+
mock_supabase.table.return_value.select.return_value.eq.return_value.execute.return_value = MagicMock(data=[])
|
|
49
|
+
|
|
50
|
+
# Execute
|
|
51
|
+
dispatch_daily_digests()
|
|
52
|
+
|
|
53
|
+
# Verify
|
|
54
|
+
assert mock_summary.call_count == 0
|
|
55
|
+
assert mock_send.call_count == 0
|
|
56
|
+
|
|
57
|
+
@patch("backend.main.supabase")
|
|
58
|
+
@patch("backend.main.aggregator.get_daily_summary")
|
|
59
|
+
@patch("backend.main.mailer.send_digest_email")
|
|
60
|
+
def test_dispatch_daily_digests_failure_isolation(mock_send, mock_summary, mock_supabase):
|
|
61
|
+
# Setup: user1 fails, but user2 should still succeed
|
|
62
|
+
yesterday = datetime.now(timezone.utc).date() - timedelta(days=1)
|
|
63
|
+
|
|
64
|
+
mock_users = [
|
|
65
|
+
{"user_id": "fail_user", "email": "fail@example.com"},
|
|
66
|
+
{"user_id": "success_user", "email": "success@example.com"}
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
mock_query = MagicMock()
|
|
70
|
+
mock_query.execute.return_value = MagicMock(data=mock_users)
|
|
71
|
+
mock_supabase.table.return_value.select.return_value.eq.return_value = mock_query
|
|
72
|
+
|
|
73
|
+
# user1 raises an exception, user2 returns valid data
|
|
74
|
+
mock_summary.side_effect = [
|
|
75
|
+
Exception("DB Error for user1"),
|
|
76
|
+
{"total_cost": 20.0, "top_dags": [], "prev_week_comparison": 0.0}
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
# Execute
|
|
80
|
+
dispatch_daily_digests()
|
|
81
|
+
|
|
82
|
+
# Verify
|
|
83
|
+
assert mock_summary.call_count == 2
|
|
84
|
+
# Email should be sent for success_user despite fail_user error
|
|
85
|
+
assert mock_send.call_count == 1
|
|
86
|
+
mock_send.assert_called_once_with("success@example.com", {"total_cost": 20.0, "top_dags": [], "prev_week_comparison": 0.0})
|