interloper-agent 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- interloper_agent/__init__.py +4 -0
- interloper_agent/agent.py +90 -0
- interloper_agent/context.py +103 -0
- interloper_agent/prompts.py +113 -0
- interloper_agent/tools/__init__.py +0 -0
- interloper_agent/tools/actions.py +133 -0
- interloper_agent/tools/analytics.py +158 -0
- interloper_agent/tools/catalog.py +261 -0
- interloper_agent/tools/lineage.py +244 -0
- interloper_agent/tools/operations.py +171 -0
- interloper_agent-0.2.0.dist-info/METADATA +16 -0
- interloper_agent-0.2.0.dist-info/RECORD +13 -0
- interloper_agent-0.2.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Interloper Agent — multi-agent system for asset discovery, lineage, and operations."""
|
|
2
|
+
|
|
3
|
+
from google.adk.agents import Agent
|
|
4
|
+
|
|
5
|
+
from interloper_agent.prompts import (
|
|
6
|
+
ACTION_INSTRUCTION,
|
|
7
|
+
ANALYTICS_INSTRUCTION,
|
|
8
|
+
CATALOG_INSTRUCTION,
|
|
9
|
+
LINEAGE_INSTRUCTION,
|
|
10
|
+
OPERATIONS_INSTRUCTION,
|
|
11
|
+
ROOT_INSTRUCTION,
|
|
12
|
+
)
|
|
13
|
+
from interloper_agent.tools import actions, analytics, catalog, lineage, operations
|
|
14
|
+
|
|
15
|
+
catalog_agent = Agent(
|
|
16
|
+
name="CatalogAgent",
|
|
17
|
+
model="gemini-2.5-flash",
|
|
18
|
+
description="Discovers sources, inspects asset schemas, searches fields across the catalog, and compares schemas.",
|
|
19
|
+
instruction=CATALOG_INSTRUCTION,
|
|
20
|
+
tools=[
|
|
21
|
+
catalog.list_sources,
|
|
22
|
+
catalog.get_source_detail,
|
|
23
|
+
catalog.get_asset_schema,
|
|
24
|
+
catalog.search_fields,
|
|
25
|
+
catalog.compare_schemas,
|
|
26
|
+
catalog.list_destinations,
|
|
27
|
+
],
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
lineage_agent = Agent(
|
|
31
|
+
name="LineageAgent",
|
|
32
|
+
model="gemini-2.5-flash",
|
|
33
|
+
description="Analyzes asset dependencies — upstream/downstream traversal, impact analysis, and cross-source edges.",
|
|
34
|
+
instruction=LINEAGE_INSTRUCTION,
|
|
35
|
+
tools=[
|
|
36
|
+
lineage.get_upstream,
|
|
37
|
+
lineage.get_downstream,
|
|
38
|
+
lineage.get_full_lineage,
|
|
39
|
+
lineage.impact_analysis,
|
|
40
|
+
lineage.cross_source_dependencies,
|
|
41
|
+
],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
operations_agent = Agent(
|
|
45
|
+
name="OperationsAgent",
|
|
46
|
+
model="gemini-2.5-flash",
|
|
47
|
+
description="Monitors run health, recent failures, job schedules, and backfill progress.",
|
|
48
|
+
instruction=OPERATIONS_INSTRUCTION,
|
|
49
|
+
tools=[
|
|
50
|
+
operations.list_recent_runs,
|
|
51
|
+
operations.get_run_detail,
|
|
52
|
+
operations.list_failures,
|
|
53
|
+
operations.get_job_health,
|
|
54
|
+
operations.list_jobs,
|
|
55
|
+
operations.list_backfills,
|
|
56
|
+
],
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
analytics_agent = Agent(
|
|
60
|
+
name="AnalyticsAgent",
|
|
61
|
+
model="gemini-2.5-flash",
|
|
62
|
+
description="Provides run statistics, partition coverage analysis, and data freshness checks.",
|
|
63
|
+
instruction=ANALYTICS_INSTRUCTION,
|
|
64
|
+
tools=[
|
|
65
|
+
analytics.run_history_summary,
|
|
66
|
+
analytics.partition_coverage,
|
|
67
|
+
analytics.freshness_check,
|
|
68
|
+
],
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
action_agent = Agent(
|
|
72
|
+
name="ActionAgent",
|
|
73
|
+
model="gemini-2.5-flash",
|
|
74
|
+
description="Triggers runs, starts backfills, and toggles jobs or assets on/off.",
|
|
75
|
+
instruction=ACTION_INSTRUCTION,
|
|
76
|
+
tools=[
|
|
77
|
+
actions.trigger_run,
|
|
78
|
+
actions.trigger_backfill,
|
|
79
|
+
actions.toggle_job,
|
|
80
|
+
actions.toggle_asset,
|
|
81
|
+
],
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
root_agent = Agent(
|
|
85
|
+
name="InterloperAgent",
|
|
86
|
+
model="gemini-2.5-flash",
|
|
87
|
+
instruction=ROOT_INSTRUCTION,
|
|
88
|
+
description="Main Interloper assistant that routes queries to specialized sub-agents.",
|
|
89
|
+
sub_agents=[catalog_agent, lineage_agent, operations_agent, analytics_agent, action_agent],
|
|
90
|
+
)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Store, catalog, and session context for agent tools."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
|
|
9
|
+
from google.adk.tools.tool_context import ToolContext
|
|
10
|
+
from interloper.catalog.base import Catalog
|
|
11
|
+
from interloper_db import Store, init_engine
|
|
12
|
+
|
|
13
|
+
_store: Store | None = None
|
|
14
|
+
_catalog: Catalog | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def init(database_url: str, catalog: Catalog) -> None:
|
|
18
|
+
"""Initialize the agent context with a database connection and catalog.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
database_url: PostgreSQL connection string.
|
|
22
|
+
catalog: Catalog instance.
|
|
23
|
+
"""
|
|
24
|
+
global _store, _catalog # noqa: PLW0603
|
|
25
|
+
init_engine(database_url)
|
|
26
|
+
_catalog = catalog
|
|
27
|
+
_store = Store(catalog=catalog)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def set_store(store: Store) -> None:
|
|
31
|
+
"""Set the global Store instance (used by interloper-api integration).
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
store: An already-initialized Store.
|
|
35
|
+
"""
|
|
36
|
+
global _store # noqa: PLW0603
|
|
37
|
+
_store = store
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def set_catalog(catalog: Catalog) -> None:
|
|
41
|
+
"""Set the global catalog instance (used by interloper-api integration).
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
catalog: Catalog instance.
|
|
45
|
+
"""
|
|
46
|
+
global _catalog # noqa: PLW0603
|
|
47
|
+
_catalog = catalog
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_store() -> Store:
|
|
51
|
+
"""Return the global Store instance."""
|
|
52
|
+
if _store is None:
|
|
53
|
+
raise RuntimeError("Agent context not initialized. Call init() or set_store() first.")
|
|
54
|
+
return _store
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_catalog() -> dict[str, Any]:
|
|
58
|
+
"""Return the global catalog as a serialized dict."""
|
|
59
|
+
if _catalog is None:
|
|
60
|
+
raise RuntimeError("Agent context not initialized. Call init() or set_catalog() first.")
|
|
61
|
+
return _catalog.dump()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_org_id(tool_context: ToolContext) -> UUID:
|
|
65
|
+
"""Extract the organisation ID from ADK session state.
|
|
66
|
+
|
|
67
|
+
The caller must set ``session.state["org_id"]`` before invoking the agent.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
tool_context: Injected by ADK.
|
|
71
|
+
"""
|
|
72
|
+
raw = tool_context.state.get("org_id")
|
|
73
|
+
if raw is None:
|
|
74
|
+
raise ValueError("org_id not set in session state")
|
|
75
|
+
return UUID(str(raw))
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def serialize(obj: Any) -> Any:
|
|
79
|
+
"""Convert a SQLModel instance (or collection) to a JSON-safe dict.
|
|
80
|
+
|
|
81
|
+
Recursively handles UUIDs, datetimes, dates, lists, and nested models.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
obj: A SQLModel row, dict, list, or primitive.
|
|
85
|
+
"""
|
|
86
|
+
if obj is None:
|
|
87
|
+
return None
|
|
88
|
+
if isinstance(obj, (str, int, float, bool)):
|
|
89
|
+
return obj
|
|
90
|
+
if isinstance(obj, UUID):
|
|
91
|
+
return str(obj)
|
|
92
|
+
if isinstance(obj, datetime.datetime):
|
|
93
|
+
return obj.isoformat()
|
|
94
|
+
if isinstance(obj, datetime.date):
|
|
95
|
+
return obj.isoformat()
|
|
96
|
+
if isinstance(obj, dict):
|
|
97
|
+
return {k: serialize(v) for k, v in obj.items()}
|
|
98
|
+
if isinstance(obj, (list, tuple)):
|
|
99
|
+
return [serialize(item) for item in obj]
|
|
100
|
+
# SQLModel / Pydantic BaseModel
|
|
101
|
+
if hasattr(obj, "model_dump"):
|
|
102
|
+
return serialize(obj.model_dump())
|
|
103
|
+
return str(obj)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Instruction strings for all agents."""
|
|
2
|
+
|
|
3
|
+
ROOT_INSTRUCTION = """\
|
|
4
|
+
You are Interloper Assistant, an AI agent for the Interloper data asset platform.
|
|
5
|
+
|
|
6
|
+
You help users understand their data catalog, asset dependencies, operational health,
|
|
7
|
+
and can take actions like triggering runs or backfills.
|
|
8
|
+
|
|
9
|
+
Route questions to the appropriate specialist:
|
|
10
|
+
|
|
11
|
+
- **CatalogAgent** — "What sources do we have?", "Show me the schema for X",
|
|
12
|
+
"Which assets have a spend field?", "Compare Facebook and TikTok schemas"
|
|
13
|
+
- **LineageAgent** — "What depends on X?", "What's upstream of Y?",
|
|
14
|
+
"If Google Ads breaks, what's affected?", "Show cross-source dependencies"
|
|
15
|
+
- **OperationsAgent** — "Did last night's runs succeed?", "Which assets failed?",
|
|
16
|
+
"What's the cron schedule?", "Show backfill progress"
|
|
17
|
+
- **AnalyticsAgent** — "How often do runs fail?", "Any partition gaps?",
|
|
18
|
+
"When was the last successful run for each job?"
|
|
19
|
+
- **ActionAgent** — "Re-run the Facebook job for yesterday", "Backfill March 1-15",
|
|
20
|
+
"Disable the campaign_matcher job"
|
|
21
|
+
|
|
22
|
+
Always be concise and present data in a structured way. Use tables when listing
|
|
23
|
+
multiple items. When referencing assets, use the qualified key (source_key.asset_key).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
CATALOG_INSTRUCTION = """\
|
|
27
|
+
You are the Catalog specialist for Interloper.
|
|
28
|
+
|
|
29
|
+
You help users discover sources, understand asset schemas, find fields across the
|
|
30
|
+
catalog, and compare schemas between different assets.
|
|
31
|
+
|
|
32
|
+
When presenting schemas:
|
|
33
|
+
- List field names, types, and descriptions clearly
|
|
34
|
+
- Note which fields are required vs optional
|
|
35
|
+
- Highlight partition columns when present
|
|
36
|
+
|
|
37
|
+
When listing sources:
|
|
38
|
+
- Include their asset count and key
|
|
39
|
+
- Note which are configured (have DB instances) vs only in the catalog
|
|
40
|
+
|
|
41
|
+
When comparing schemas:
|
|
42
|
+
- Show shared fields, fields unique to each, and any type mismatches
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
LINEAGE_INSTRUCTION = """\
|
|
46
|
+
You are the Lineage specialist for Interloper.
|
|
47
|
+
|
|
48
|
+
You help users understand data dependencies between assets — which assets feed
|
|
49
|
+
into which, cross-source dependency edges, and impact analysis.
|
|
50
|
+
|
|
51
|
+
When showing lineage:
|
|
52
|
+
- Present it as a clear chain or tree
|
|
53
|
+
- Use qualified keys (source_key.asset_key)
|
|
54
|
+
- Distinguish required vs optional dependencies
|
|
55
|
+
|
|
56
|
+
For impact analysis:
|
|
57
|
+
- Emphasize the total number of affected downstream assets
|
|
58
|
+
- Group by source for clarity
|
|
59
|
+
- Note which assets are leaves (final outputs)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
OPERATIONS_INSTRUCTION = """\
|
|
63
|
+
You are the Operations specialist for Interloper.
|
|
64
|
+
|
|
65
|
+
You help users understand run health, recent failures, job schedules, and backfill
|
|
66
|
+
progress.
|
|
67
|
+
|
|
68
|
+
When showing failures:
|
|
69
|
+
- Always include the error message from events
|
|
70
|
+
- Note which specific asset within the run failed
|
|
71
|
+
- Summarize patterns (e.g., "3 of last 5 runs failed")
|
|
72
|
+
|
|
73
|
+
When showing job status:
|
|
74
|
+
- Decode cron expressions to human-readable schedules
|
|
75
|
+
- Show last_run_at and next_run_at
|
|
76
|
+
- Compute success rate from recent runs
|
|
77
|
+
|
|
78
|
+
Present timestamps in a human-readable format relative to now when useful
|
|
79
|
+
(e.g., "2 hours ago").
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
ANALYTICS_INSTRUCTION = """\
|
|
83
|
+
You are the Analytics specialist for Interloper.
|
|
84
|
+
|
|
85
|
+
You help users understand trends in run performance, partition coverage gaps,
|
|
86
|
+
and data freshness across their jobs.
|
|
87
|
+
|
|
88
|
+
When presenting statistics:
|
|
89
|
+
- Include counts, percentages, and averages
|
|
90
|
+
- Flag concerning trends (rising failure rates, growing gaps)
|
|
91
|
+
- Compare against recent history for context
|
|
92
|
+
|
|
93
|
+
For partition coverage:
|
|
94
|
+
- Clearly list missing dates in a range
|
|
95
|
+
- Calculate the coverage percentage
|
|
96
|
+
|
|
97
|
+
For freshness:
|
|
98
|
+
- Flag any job that hasn't run successfully in over 24 hours
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
ACTION_INSTRUCTION = """\
|
|
102
|
+
You are the Action specialist for Interloper.
|
|
103
|
+
|
|
104
|
+
You can trigger runs, start backfills, and toggle jobs or assets on or off.
|
|
105
|
+
|
|
106
|
+
Important rules:
|
|
107
|
+
- Always confirm what you are about to do before executing
|
|
108
|
+
- Describe the action clearly: which job, which dates, what will change
|
|
109
|
+
- After executing, report the result including the created run/backfill ID
|
|
110
|
+
- For backfills, confirm the date range and concurrency settings
|
|
111
|
+
|
|
112
|
+
Never execute destructive actions without explicit user confirmation.
|
|
113
|
+
"""
|
|
File without changes
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Action tools — trigger runs, backfills, and toggle state."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
|
|
9
|
+
from google.adk.tools.tool_context import ToolContext
|
|
10
|
+
|
|
11
|
+
from interloper_agent.context import get_org_id, get_store, serialize
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def trigger_run(
|
|
15
|
+
job_id: str,
|
|
16
|
+
partition_date: str | None = None,
|
|
17
|
+
tool_context: ToolContext = None, # type: ignore[assignment]
|
|
18
|
+
) -> dict[str, Any]:
|
|
19
|
+
"""Queue a single run for a job.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
job_id: UUID of the job to run.
|
|
23
|
+
partition_date: Optional partition date in ISO format (YYYY-MM-DD).
|
|
24
|
+
"""
|
|
25
|
+
try:
|
|
26
|
+
org_id = get_org_id(tool_context)
|
|
27
|
+
store = get_store()
|
|
28
|
+
pd = datetime.date.fromisoformat(partition_date) if partition_date else None
|
|
29
|
+
run = store.create_run(org_id, job_id=UUID(job_id), partition_date=pd)
|
|
30
|
+
return {
|
|
31
|
+
"status": "success",
|
|
32
|
+
"message": "Run queued successfully",
|
|
33
|
+
"run": serialize(run),
|
|
34
|
+
}
|
|
35
|
+
except Exception as e:
|
|
36
|
+
return {"status": "error", "error": str(e)}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def trigger_backfill(
|
|
40
|
+
job_id: str,
|
|
41
|
+
start_date: str,
|
|
42
|
+
end_date: str,
|
|
43
|
+
concurrency: int = 1,
|
|
44
|
+
fail_fast: bool = False,
|
|
45
|
+
tool_context: ToolContext = None, # type: ignore[assignment]
|
|
46
|
+
) -> dict[str, Any]:
|
|
47
|
+
"""Start a backfill for a job over a date range.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
job_id: UUID of the job.
|
|
51
|
+
start_date: Start date in ISO format (YYYY-MM-DD).
|
|
52
|
+
end_date: End date in ISO format (YYYY-MM-DD), inclusive.
|
|
53
|
+
concurrency: Max number of runs in-flight at once (default 1).
|
|
54
|
+
fail_fast: If true, cancel remaining runs on first failure (default false).
|
|
55
|
+
"""
|
|
56
|
+
try:
|
|
57
|
+
org_id = get_org_id(tool_context)
|
|
58
|
+
store = get_store()
|
|
59
|
+
backfill = store.create_backfill(
|
|
60
|
+
org_id,
|
|
61
|
+
job_id=UUID(job_id),
|
|
62
|
+
start_date=datetime.date.fromisoformat(start_date),
|
|
63
|
+
end_date=datetime.date.fromisoformat(end_date),
|
|
64
|
+
concurrency=concurrency,
|
|
65
|
+
fail_fast=fail_fast,
|
|
66
|
+
)
|
|
67
|
+
return {
|
|
68
|
+
"status": "success",
|
|
69
|
+
"message": "Backfill created successfully",
|
|
70
|
+
"backfill": serialize(backfill),
|
|
71
|
+
}
|
|
72
|
+
except Exception as e:
|
|
73
|
+
return {"status": "error", "error": str(e)}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def toggle_job(
|
|
77
|
+
job_id: str,
|
|
78
|
+
enabled: bool,
|
|
79
|
+
tool_context: ToolContext = None, # type: ignore[assignment]
|
|
80
|
+
) -> dict[str, Any]:
|
|
81
|
+
"""Enable or disable a scheduled job.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
job_id: UUID of the job.
|
|
85
|
+
enabled: True to enable, false to disable.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
store = get_store()
|
|
89
|
+
jid = UUID(job_id)
|
|
90
|
+
job = store.get_job(jid)
|
|
91
|
+
updated = store.update_job(
|
|
92
|
+
jid,
|
|
93
|
+
name=job.name,
|
|
94
|
+
cron=job.cron,
|
|
95
|
+
source_ids=[s.id for s in job.sources if s.id] if job.sources else None,
|
|
96
|
+
asset_ids=[a.id for a in job.assets if a.id] if job.assets else None,
|
|
97
|
+
tags=job.tags,
|
|
98
|
+
enabled=enabled,
|
|
99
|
+
partitioned=job.partitioned,
|
|
100
|
+
backfill_days=job.backfill_days,
|
|
101
|
+
)
|
|
102
|
+
action = "enabled" if enabled else "disabled"
|
|
103
|
+
return {
|
|
104
|
+
"status": "success",
|
|
105
|
+
"message": f"Job '{job.name}' {action}",
|
|
106
|
+
"job": serialize(updated),
|
|
107
|
+
}
|
|
108
|
+
except Exception as e:
|
|
109
|
+
return {"status": "error", "error": str(e)}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def toggle_asset(
|
|
113
|
+
asset_id: str,
|
|
114
|
+
materializable: bool,
|
|
115
|
+
tool_context: ToolContext = None, # type: ignore[assignment]
|
|
116
|
+
) -> dict[str, Any]:
|
|
117
|
+
"""Enable or disable materialization for an asset.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
asset_id: UUID of the asset.
|
|
121
|
+
materializable: True to enable materialization, false to disable.
|
|
122
|
+
"""
|
|
123
|
+
try:
|
|
124
|
+
store = get_store()
|
|
125
|
+
updated = store.update_asset(UUID(asset_id), materializable=materializable)
|
|
126
|
+
action = "enabled" if materializable else "disabled"
|
|
127
|
+
return {
|
|
128
|
+
"status": "success",
|
|
129
|
+
"message": f"Asset '{updated.key}' materialization {action}",
|
|
130
|
+
"asset": serialize(updated),
|
|
131
|
+
}
|
|
132
|
+
except Exception as e:
|
|
133
|
+
return {"status": "error", "error": str(e)}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""Analytics tools — run statistics, partition coverage, and data freshness."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
|
|
9
|
+
from google.adk.tools.tool_context import ToolContext
|
|
10
|
+
|
|
11
|
+
from interloper_agent.context import get_org_id, get_store, serialize
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def run_history_summary(
|
|
15
|
+
job_id: str | None = None,
|
|
16
|
+
days: int = 7,
|
|
17
|
+
tool_context: ToolContext = None, # type: ignore[assignment]
|
|
18
|
+
) -> dict[str, Any]:
|
|
19
|
+
"""Summarize run statistics over a period.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
job_id: Filter to a specific job UUID (optional, all jobs if omitted).
|
|
23
|
+
days: Number of days to look back (default 7).
|
|
24
|
+
|
|
25
|
+
Returns aggregate counts (total, success, failed, canceled),
|
|
26
|
+
success rate, and average duration.
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
org_id = get_org_id(tool_context)
|
|
30
|
+
store = get_store()
|
|
31
|
+
runs = store.list_runs(
|
|
32
|
+
org_id,
|
|
33
|
+
job_id=UUID(job_id) if job_id else None,
|
|
34
|
+
limit=500,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
cutoff = datetime.datetime.now(tz=datetime.timezone.utc) - datetime.timedelta(days=days)
|
|
38
|
+
recent = [r for r in runs if r.created_at and r.created_at >= cutoff]
|
|
39
|
+
|
|
40
|
+
total = len(recent)
|
|
41
|
+
by_status: dict[str, int] = {}
|
|
42
|
+
durations: list[float] = []
|
|
43
|
+
for r in recent:
|
|
44
|
+
by_status[r.status] = by_status.get(r.status, 0) + 1
|
|
45
|
+
if r.started_at and r.completed_at:
|
|
46
|
+
durations.append((r.completed_at - r.started_at).total_seconds())
|
|
47
|
+
|
|
48
|
+
success = by_status.get("success", 0)
|
|
49
|
+
return {
|
|
50
|
+
"status": "success",
|
|
51
|
+
"period_days": days,
|
|
52
|
+
"job_id": job_id,
|
|
53
|
+
"total_runs": total,
|
|
54
|
+
"by_status": by_status,
|
|
55
|
+
"success_rate": round(success / total, 2) if total > 0 else None,
|
|
56
|
+
"avg_duration_seconds": round(sum(durations) / len(durations), 1) if durations else None,
|
|
57
|
+
}
|
|
58
|
+
except Exception as e:
|
|
59
|
+
return {"status": "error", "error": str(e)}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def partition_coverage(
|
|
63
|
+
job_id: str,
|
|
64
|
+
start_date: str,
|
|
65
|
+
end_date: str,
|
|
66
|
+
tool_context: ToolContext = None, # type: ignore[assignment]
|
|
67
|
+
) -> dict[str, Any]:
|
|
68
|
+
"""Check partition coverage for a job over a date range.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
job_id: UUID of the job.
|
|
72
|
+
start_date: Start date in ISO format (YYYY-MM-DD).
|
|
73
|
+
end_date: End date in ISO format (YYYY-MM-DD), inclusive.
|
|
74
|
+
|
|
75
|
+
Returns which dates have successful runs and which are missing.
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
org_id = get_org_id(tool_context)
|
|
79
|
+
store = get_store()
|
|
80
|
+
jid = UUID(job_id)
|
|
81
|
+
runs = store.list_runs(org_id, job_id=jid, limit=1000)
|
|
82
|
+
|
|
83
|
+
start = datetime.date.fromisoformat(start_date)
|
|
84
|
+
end = datetime.date.fromisoformat(end_date)
|
|
85
|
+
|
|
86
|
+
# Collect dates with successful runs
|
|
87
|
+
covered: set[datetime.date] = set()
|
|
88
|
+
for r in runs:
|
|
89
|
+
if r.status == "success" and r.partition_date:
|
|
90
|
+
if start <= r.partition_date <= end:
|
|
91
|
+
covered.add(r.partition_date)
|
|
92
|
+
|
|
93
|
+
# Build expected date range
|
|
94
|
+
expected: list[datetime.date] = []
|
|
95
|
+
current = start
|
|
96
|
+
while current <= end:
|
|
97
|
+
expected.append(current)
|
|
98
|
+
current += datetime.timedelta(days=1)
|
|
99
|
+
|
|
100
|
+
missing = sorted(set(expected) - covered)
|
|
101
|
+
coverage_pct = round(len(covered) / len(expected) * 100, 1) if expected else 100.0
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
"status": "success",
|
|
105
|
+
"job_id": job_id,
|
|
106
|
+
"start_date": start_date,
|
|
107
|
+
"end_date": end_date,
|
|
108
|
+
"total_days": len(expected),
|
|
109
|
+
"covered_days": len(covered),
|
|
110
|
+
"missing_days": len(missing),
|
|
111
|
+
"coverage_percent": coverage_pct,
|
|
112
|
+
"missing_dates": [d.isoformat() for d in missing],
|
|
113
|
+
}
|
|
114
|
+
except Exception as e:
|
|
115
|
+
return {"status": "error", "error": str(e)}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def freshness_check(tool_context: ToolContext) -> dict[str, Any]:
|
|
119
|
+
"""Check data freshness for all jobs.
|
|
120
|
+
|
|
121
|
+
Returns the last successful run timestamp for each job and flags
|
|
122
|
+
any that haven't succeeded in over 24 hours.
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
org_id = get_org_id(tool_context)
|
|
126
|
+
store = get_store()
|
|
127
|
+
jobs = store.list_jobs(org_id)
|
|
128
|
+
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
129
|
+
|
|
130
|
+
results = []
|
|
131
|
+
for job in jobs:
|
|
132
|
+
if not job.enabled:
|
|
133
|
+
continue
|
|
134
|
+
job_id = job.id # type: ignore[assignment]
|
|
135
|
+
runs = store.list_runs(org_id, job_id=job_id, status="success", limit=1)
|
|
136
|
+
last_success = runs[0] if runs else None
|
|
137
|
+
|
|
138
|
+
hours_since = None
|
|
139
|
+
if last_success and last_success.completed_at:
|
|
140
|
+
delta = now - last_success.completed_at
|
|
141
|
+
hours_since = round(delta.total_seconds() / 3600, 1)
|
|
142
|
+
|
|
143
|
+
results.append({
|
|
144
|
+
"job": serialize(job),
|
|
145
|
+
"last_success_at": serialize(last_success.completed_at) if last_success else None,
|
|
146
|
+
"hours_since_success": hours_since,
|
|
147
|
+
"stale": hours_since is None or hours_since > 24,
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
stale_count = sum(1 for r in results if r["stale"])
|
|
151
|
+
return {
|
|
152
|
+
"status": "success",
|
|
153
|
+
"total_jobs": len(results),
|
|
154
|
+
"stale_count": stale_count,
|
|
155
|
+
"jobs": results,
|
|
156
|
+
}
|
|
157
|
+
except Exception as e:
|
|
158
|
+
return {"status": "error", "error": str(e)}
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""Catalog tools — discovery, schema inspection, and field search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from google.adk.tools.tool_context import ToolContext
|
|
8
|
+
|
|
9
|
+
from interloper_agent.context import get_catalog, get_org_id, get_store, serialize
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def list_sources(tool_context: ToolContext) -> dict[str, Any]:
|
|
13
|
+
"""List all sources registered in the organisation.
|
|
14
|
+
|
|
15
|
+
Returns each source with its key, name, asset count, and creation date.
|
|
16
|
+
Also indicates which catalog sources have configured DB instances.
|
|
17
|
+
"""
|
|
18
|
+
try:
|
|
19
|
+
org_id = get_org_id(tool_context)
|
|
20
|
+
store = get_store()
|
|
21
|
+
catalog = get_catalog()
|
|
22
|
+
|
|
23
|
+
db_sources = store.list_sources(org_id)
|
|
24
|
+
db_by_key: dict[str, Any] = {}
|
|
25
|
+
for s in db_sources:
|
|
26
|
+
entry = serialize(s)
|
|
27
|
+
entry["asset_count"] = len(s.assets) if s.assets else 0
|
|
28
|
+
db_by_key[s.key] = entry
|
|
29
|
+
|
|
30
|
+
# Enrich with catalog metadata
|
|
31
|
+
results = []
|
|
32
|
+
for key, defn in catalog.items():
|
|
33
|
+
if defn.get("kind") != "source":
|
|
34
|
+
continue
|
|
35
|
+
item: dict[str, Any] = {
|
|
36
|
+
"key": key,
|
|
37
|
+
"name": defn.get("name", key),
|
|
38
|
+
"description": defn.get("description"),
|
|
39
|
+
"icon": defn.get("icon"),
|
|
40
|
+
"catalog_asset_count": len(defn.get("assets", [])),
|
|
41
|
+
"tags": defn.get("tags", []),
|
|
42
|
+
}
|
|
43
|
+
if key in db_by_key:
|
|
44
|
+
item["configured"] = True
|
|
45
|
+
item["instance"] = db_by_key[key]
|
|
46
|
+
else:
|
|
47
|
+
item["configured"] = False
|
|
48
|
+
results.append(item)
|
|
49
|
+
|
|
50
|
+
return {"status": "success", "sources": results}
|
|
51
|
+
except Exception as e:
|
|
52
|
+
return {"status": "error", "error": str(e)}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_source_detail(source_key: str, tool_context: ToolContext) -> dict[str, Any]:
|
|
56
|
+
"""Get full catalog detail for a source type.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
source_key: The source key (e.g. 'facebook_ads').
|
|
60
|
+
|
|
61
|
+
Returns the source definition including config schema, resource types,
|
|
62
|
+
destination types, and a list of all its assets with their schemas.
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
catalog = get_catalog()
|
|
66
|
+
defn = catalog.get(source_key)
|
|
67
|
+
if defn is None or defn.get("kind") != "source":
|
|
68
|
+
return {"status": "error", "error": f"Source '{source_key}' not found in catalog"}
|
|
69
|
+
return {"status": "success", "source": defn}
|
|
70
|
+
except Exception as e:
|
|
71
|
+
return {"status": "error", "error": str(e)}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_asset_schema(source_key: str, asset_key: str, tool_context: ToolContext) -> dict[str, Any]:
|
|
75
|
+
"""Get the JSON schema for a specific asset within a source.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
source_key: The source key (e.g. 'facebook_ads').
|
|
79
|
+
asset_key: The asset key within the source (e.g. 'ad_insights').
|
|
80
|
+
|
|
81
|
+
Returns the asset schema with field names, types, and descriptions.
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
catalog = get_catalog()
|
|
85
|
+
defn = catalog.get(source_key)
|
|
86
|
+
if defn is None or defn.get("kind") != "source":
|
|
87
|
+
return {"status": "error", "error": f"Source '{source_key}' not found in catalog"}
|
|
88
|
+
|
|
89
|
+
for asset_def in defn.get("assets", []):
|
|
90
|
+
if asset_def.get("key") == asset_key:
|
|
91
|
+
schema = asset_def.get("asset_schema")
|
|
92
|
+
return {
|
|
93
|
+
"status": "success",
|
|
94
|
+
"source_key": source_key,
|
|
95
|
+
"asset_key": asset_key,
|
|
96
|
+
"qualified_key": f"{source_key}.{asset_key}",
|
|
97
|
+
"schema": schema,
|
|
98
|
+
"partitioning": asset_def.get("partitioning"),
|
|
99
|
+
"tags": asset_def.get("tags", []),
|
|
100
|
+
"requires": asset_def.get("requires", {}),
|
|
101
|
+
"optional_requires": asset_def.get("optional_requires", {}),
|
|
102
|
+
}
|
|
103
|
+
# Also match by qualified_key
|
|
104
|
+
if asset_def.get("qualified_key") == f"{source_key}.{asset_key}":
|
|
105
|
+
schema = asset_def.get("asset_schema")
|
|
106
|
+
return {
|
|
107
|
+
"status": "success",
|
|
108
|
+
"source_key": source_key,
|
|
109
|
+
"asset_key": asset_key,
|
|
110
|
+
"qualified_key": f"{source_key}.{asset_key}",
|
|
111
|
+
"schema": schema,
|
|
112
|
+
"partitioning": asset_def.get("partitioning"),
|
|
113
|
+
"tags": asset_def.get("tags", []),
|
|
114
|
+
"requires": asset_def.get("requires", {}),
|
|
115
|
+
"optional_requires": asset_def.get("optional_requires", {}),
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return {"status": "error", "error": f"Asset '{asset_key}' not found in source '{source_key}'"}
|
|
119
|
+
except Exception as e:
|
|
120
|
+
return {"status": "error", "error": str(e)}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def search_fields(query: str, tool_context: ToolContext) -> dict[str, Any]:
|
|
124
|
+
"""Search for fields across all asset schemas matching a query string.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
query: Substring to search for in field names and descriptions (case-insensitive).
|
|
128
|
+
|
|
129
|
+
Returns matching fields grouped by source and asset, with field type and description.
|
|
130
|
+
"""
|
|
131
|
+
try:
|
|
132
|
+
catalog = get_catalog()
|
|
133
|
+
query_lower = query.lower()
|
|
134
|
+
matches: list[dict[str, Any]] = []
|
|
135
|
+
|
|
136
|
+
for key, defn in catalog.items():
|
|
137
|
+
if defn.get("kind") != "source":
|
|
138
|
+
continue
|
|
139
|
+
source_key = key
|
|
140
|
+
for asset_def in defn.get("assets", []):
|
|
141
|
+
asset_key = asset_def.get("key", "")
|
|
142
|
+
schema = asset_def.get("asset_schema")
|
|
143
|
+
if not schema or "properties" not in schema:
|
|
144
|
+
continue
|
|
145
|
+
for field_name, field_info in schema["properties"].items():
|
|
146
|
+
field_desc = field_info.get("description", "")
|
|
147
|
+
if query_lower in field_name.lower() or query_lower in field_desc.lower():
|
|
148
|
+
matches.append({
|
|
149
|
+
"source_key": source_key,
|
|
150
|
+
"asset_key": asset_key,
|
|
151
|
+
"qualified_key": f"{source_key}.{asset_key}",
|
|
152
|
+
"field_name": field_name,
|
|
153
|
+
"field_type": _extract_type(field_info),
|
|
154
|
+
"description": field_desc,
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
return {"status": "success", "query": query, "match_count": len(matches), "matches": matches}
|
|
158
|
+
except Exception as e:
|
|
159
|
+
return {"status": "error", "error": str(e)}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def compare_schemas(
|
|
163
|
+
source_key_a: str,
|
|
164
|
+
asset_key_a: str,
|
|
165
|
+
source_key_b: str,
|
|
166
|
+
asset_key_b: str,
|
|
167
|
+
tool_context: ToolContext,
|
|
168
|
+
) -> dict[str, Any]:
|
|
169
|
+
"""Compare the schemas of two assets side by side.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
source_key_a: Source key for the first asset.
|
|
173
|
+
asset_key_a: Asset key for the first asset.
|
|
174
|
+
source_key_b: Source key for the second asset.
|
|
175
|
+
asset_key_b: Asset key for the second asset.
|
|
176
|
+
|
|
177
|
+
Returns shared fields, fields unique to each asset, and any type mismatches.
|
|
178
|
+
"""
|
|
179
|
+
try:
|
|
180
|
+
schema_a = _get_schema_properties(source_key_a, asset_key_a)
|
|
181
|
+
schema_b = _get_schema_properties(source_key_b, asset_key_b)
|
|
182
|
+
|
|
183
|
+
if isinstance(schema_a, dict) and "error" in schema_a:
|
|
184
|
+
return {"status": "error", "error": schema_a["error"]}
|
|
185
|
+
if isinstance(schema_b, dict) and "error" in schema_b:
|
|
186
|
+
return {"status": "error", "error": schema_b["error"]}
|
|
187
|
+
|
|
188
|
+
assert isinstance(schema_a, dict) and isinstance(schema_b, dict)
|
|
189
|
+
fields_a = set(schema_a.keys())
|
|
190
|
+
fields_b = set(schema_b.keys())
|
|
191
|
+
|
|
192
|
+
shared = fields_a & fields_b
|
|
193
|
+
only_a = fields_a - fields_b
|
|
194
|
+
only_b = fields_b - fields_a
|
|
195
|
+
|
|
196
|
+
shared_details = []
|
|
197
|
+
for field in sorted(shared):
|
|
198
|
+
type_a = _extract_type(schema_a[field])
|
|
199
|
+
type_b = _extract_type(schema_b[field])
|
|
200
|
+
shared_details.append({
|
|
201
|
+
"field": field,
|
|
202
|
+
"type_a": type_a,
|
|
203
|
+
"type_b": type_b,
|
|
204
|
+
"type_match": type_a == type_b,
|
|
205
|
+
})
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
"status": "success",
|
|
209
|
+
"asset_a": f"{source_key_a}.{asset_key_a}",
|
|
210
|
+
"asset_b": f"{source_key_b}.{asset_key_b}",
|
|
211
|
+
"shared_count": len(shared),
|
|
212
|
+
"only_a_count": len(only_a),
|
|
213
|
+
"only_b_count": len(only_b),
|
|
214
|
+
"shared_fields": shared_details,
|
|
215
|
+
"only_in_a": sorted(only_a),
|
|
216
|
+
"only_in_b": sorted(only_b),
|
|
217
|
+
}
|
|
218
|
+
except Exception as e:
|
|
219
|
+
return {"status": "error", "error": str(e)}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def list_destinations(tool_context: ToolContext) -> dict[str, Any]:
|
|
223
|
+
"""List all configured destinations in the organisation.
|
|
224
|
+
|
|
225
|
+
Returns each destination with its key, name, config, and resource bindings.
|
|
226
|
+
"""
|
|
227
|
+
try:
|
|
228
|
+
org_id = get_org_id(tool_context)
|
|
229
|
+
store = get_store()
|
|
230
|
+
destinations = store.list_destinations(org_id)
|
|
231
|
+
return {"status": "success", "destinations": [serialize(d) for d in destinations]}
|
|
232
|
+
except Exception as e:
|
|
233
|
+
return {"status": "error", "error": str(e)}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# ---------------------------------------------------------------------------
|
|
237
|
+
# Helpers
|
|
238
|
+
# ---------------------------------------------------------------------------
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _extract_type(field_info: dict[str, Any]) -> str:
|
|
242
|
+
"""Extract a human-readable type string from a JSON schema field definition."""
|
|
243
|
+
if "anyOf" in field_info:
|
|
244
|
+
types = [t.get("type", "unknown") for t in field_info["anyOf"] if t.get("type") != "null"]
|
|
245
|
+
return types[0] if len(types) == 1 else " | ".join(types) if types else "any"
|
|
246
|
+
return field_info.get("type", "unknown")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _get_schema_properties(source_key: str, asset_key: str) -> dict[str, Any]:
|
|
250
|
+
"""Retrieve the properties dict from an asset's JSON schema."""
|
|
251
|
+
catalog = get_catalog()
|
|
252
|
+
defn = catalog.get(source_key)
|
|
253
|
+
if defn is None or defn.get("kind") != "source":
|
|
254
|
+
return {"error": f"Source '{source_key}' not found in catalog"}
|
|
255
|
+
for asset_def in defn.get("assets", []):
|
|
256
|
+
if asset_def.get("key") == asset_key:
|
|
257
|
+
schema = asset_def.get("asset_schema")
|
|
258
|
+
if not schema or "properties" not in schema:
|
|
259
|
+
return {"error": f"Asset '{source_key}.{asset_key}' has no schema"}
|
|
260
|
+
return schema["properties"]
|
|
261
|
+
return {"error": f"Asset '{asset_key}' not found in source '{source_key}'"}
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""Lineage tools — dependency analysis, impact assessment, and DAG traversal."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
|
|
9
|
+
from google.adk.tools.tool_context import ToolContext
|
|
10
|
+
|
|
11
|
+
from interloper_agent.context import get_org_id, get_store
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_upstream(asset_id: str, tool_context: ToolContext) -> dict[str, Any]:
|
|
15
|
+
"""Get the direct upstream dependencies of an asset.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
asset_id: UUID of the asset to inspect.
|
|
19
|
+
|
|
20
|
+
Returns the list of upstream assets that this asset depends on,
|
|
21
|
+
including the parameter name used for each dependency.
|
|
22
|
+
"""
|
|
23
|
+
try:
|
|
24
|
+
org_id = get_org_id(tool_context)
|
|
25
|
+
store = get_store()
|
|
26
|
+
deps = store.list_dependencies(org_id)
|
|
27
|
+
target = UUID(asset_id)
|
|
28
|
+
|
|
29
|
+
upstream = []
|
|
30
|
+
for dep in deps:
|
|
31
|
+
if dep.asset_id == target:
|
|
32
|
+
asset = store.get_asset(dep.upstream_asset_id)
|
|
33
|
+
upstream.append({
|
|
34
|
+
"upstream_asset_id": str(dep.upstream_asset_id),
|
|
35
|
+
"param_name": dep.param_name,
|
|
36
|
+
"asset_key": asset.key,
|
|
37
|
+
"source_id": str(asset.source_id),
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
return {"status": "success", "asset_id": asset_id, "upstream": upstream}
|
|
41
|
+
except Exception as e:
|
|
42
|
+
return {"status": "error", "error": str(e)}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_downstream(asset_id: str, tool_context: ToolContext) -> dict[str, Any]:
|
|
46
|
+
"""Get the direct downstream dependents of an asset.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
asset_id: UUID of the asset to inspect.
|
|
50
|
+
|
|
51
|
+
Returns the list of assets that directly depend on this asset.
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
org_id = get_org_id(tool_context)
|
|
55
|
+
store = get_store()
|
|
56
|
+
deps = store.list_dependencies(org_id)
|
|
57
|
+
target = UUID(asset_id)
|
|
58
|
+
|
|
59
|
+
downstream = []
|
|
60
|
+
for dep in deps:
|
|
61
|
+
if dep.upstream_asset_id == target:
|
|
62
|
+
asset = store.get_asset(dep.asset_id)
|
|
63
|
+
downstream.append({
|
|
64
|
+
"asset_id": str(dep.asset_id),
|
|
65
|
+
"param_name": dep.param_name,
|
|
66
|
+
"asset_key": asset.key,
|
|
67
|
+
"source_id": str(asset.source_id),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
return {"status": "success", "asset_id": asset_id, "downstream": downstream}
|
|
71
|
+
except Exception as e:
|
|
72
|
+
return {"status": "error", "error": str(e)}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def get_full_lineage(asset_id: str, direction: str = "upstream", tool_context: ToolContext = None) -> dict[str, Any]: # type: ignore[assignment]
|
|
76
|
+
"""Recursively traverse the full lineage of an asset.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
asset_id: UUID of the asset to start from.
|
|
80
|
+
direction: Either 'upstream' (ancestors) or 'downstream' (dependents).
|
|
81
|
+
|
|
82
|
+
Returns an ordered list of assets in the lineage chain with depth levels.
|
|
83
|
+
"""
|
|
84
|
+
try:
|
|
85
|
+
org_id = get_org_id(tool_context)
|
|
86
|
+
adj, asset_info = _build_adjacency(org_id, direction)
|
|
87
|
+
target = UUID(asset_id)
|
|
88
|
+
|
|
89
|
+
visited: set[UUID] = set()
|
|
90
|
+
result: list[dict[str, Any]] = []
|
|
91
|
+
queue: list[tuple[UUID, int]] = [(target, 0)]
|
|
92
|
+
|
|
93
|
+
while queue:
|
|
94
|
+
current, depth = queue.pop(0)
|
|
95
|
+
if current in visited:
|
|
96
|
+
continue
|
|
97
|
+
visited.add(current)
|
|
98
|
+
if current != target:
|
|
99
|
+
info = asset_info.get(current, {})
|
|
100
|
+
result.append({"asset_id": str(current), "depth": depth, **info})
|
|
101
|
+
for neighbor in adj.get(current, []):
|
|
102
|
+
if neighbor not in visited:
|
|
103
|
+
queue.append((neighbor, depth + 1))
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
"status": "success",
|
|
107
|
+
"asset_id": asset_id,
|
|
108
|
+
"direction": direction,
|
|
109
|
+
"lineage_count": len(result),
|
|
110
|
+
"lineage": result,
|
|
111
|
+
}
|
|
112
|
+
except Exception as e:
|
|
113
|
+
return {"status": "error", "error": str(e)}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def impact_analysis(asset_id: str, tool_context: ToolContext) -> dict[str, Any]:
|
|
117
|
+
"""Analyze the downstream impact if an asset fails or is disabled.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
asset_id: UUID of the asset to analyze.
|
|
121
|
+
|
|
122
|
+
Returns all downstream assets grouped by source, with total affected count.
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
org_id = get_org_id(tool_context)
|
|
126
|
+
adj, asset_info = _build_adjacency(org_id, "downstream")
|
|
127
|
+
target = UUID(asset_id)
|
|
128
|
+
|
|
129
|
+
visited: set[UUID] = set()
|
|
130
|
+
affected: list[dict[str, Any]] = []
|
|
131
|
+
queue: list[tuple[UUID, int]] = [(target, 0)]
|
|
132
|
+
|
|
133
|
+
while queue:
|
|
134
|
+
current, depth = queue.pop(0)
|
|
135
|
+
if current in visited:
|
|
136
|
+
continue
|
|
137
|
+
visited.add(current)
|
|
138
|
+
if current != target:
|
|
139
|
+
info = asset_info.get(current, {})
|
|
140
|
+
affected.append({"asset_id": str(current), "depth": depth, **info})
|
|
141
|
+
for neighbor in adj.get(current, []):
|
|
142
|
+
if neighbor not in visited:
|
|
143
|
+
queue.append((neighbor, depth + 1))
|
|
144
|
+
|
|
145
|
+
# Group by source
|
|
146
|
+
by_source: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
147
|
+
for item in affected:
|
|
148
|
+
by_source[item.get("source_key", "unknown")].append(item)
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
"status": "success",
|
|
152
|
+
"asset_id": asset_id,
|
|
153
|
+
"total_affected": len(affected),
|
|
154
|
+
"by_source": {k: v for k, v in by_source.items()},
|
|
155
|
+
}
|
|
156
|
+
except Exception as e:
|
|
157
|
+
return {"status": "error", "error": str(e)}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def cross_source_dependencies(tool_context: ToolContext) -> dict[str, Any]:
|
|
161
|
+
"""List all dependency edges that cross source boundaries.
|
|
162
|
+
|
|
163
|
+
Returns edges where the upstream and downstream assets belong to
|
|
164
|
+
different sources.
|
|
165
|
+
"""
|
|
166
|
+
try:
|
|
167
|
+
org_id = get_org_id(tool_context)
|
|
168
|
+
store = get_store()
|
|
169
|
+
deps = store.list_dependencies(org_id)
|
|
170
|
+
assets = store.list_assets(org_id)
|
|
171
|
+
|
|
172
|
+
asset_source: dict[UUID, UUID | None] = {}
|
|
173
|
+
asset_info: dict[UUID, dict[str, str]] = {}
|
|
174
|
+
for a in assets:
|
|
175
|
+
if not a.id:
|
|
176
|
+
continue
|
|
177
|
+
asset_source[a.id] = a.source_id
|
|
178
|
+
asset_info[a.id] = {"asset_key": a.key, "source_id": str(a.source_id) if a.source_id else ""}
|
|
179
|
+
|
|
180
|
+
cross_deps = []
|
|
181
|
+
for dep in deps:
|
|
182
|
+
src_down = asset_source.get(dep.asset_id)
|
|
183
|
+
src_up = asset_source.get(dep.upstream_asset_id)
|
|
184
|
+
if src_down and src_up and src_down != src_up:
|
|
185
|
+
cross_deps.append({
|
|
186
|
+
"downstream_asset_id": str(dep.asset_id),
|
|
187
|
+
"downstream": asset_info.get(dep.asset_id, {}),
|
|
188
|
+
"upstream_asset_id": str(dep.upstream_asset_id),
|
|
189
|
+
"upstream": asset_info.get(dep.upstream_asset_id, {}),
|
|
190
|
+
"param_name": dep.param_name,
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
return {"status": "success", "cross_source_count": len(cross_deps), "dependencies": cross_deps}
|
|
194
|
+
except Exception as e:
|
|
195
|
+
return {"status": "error", "error": str(e)}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# ---------------------------------------------------------------------------
|
|
199
|
+
# Helpers
|
|
200
|
+
# ---------------------------------------------------------------------------
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _build_adjacency(
|
|
204
|
+
org_id: UUID,
|
|
205
|
+
direction: str,
|
|
206
|
+
) -> tuple[dict[UUID, list[UUID]], dict[UUID, dict[str, Any]]]:
|
|
207
|
+
"""Build an adjacency map and asset info lookup from all dependencies.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
org_id: Organisation scope.
|
|
211
|
+
direction: 'upstream' builds child→parents; 'downstream' builds parent→children.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
(adjacency_map, asset_info_map)
|
|
215
|
+
"""
|
|
216
|
+
store = get_store()
|
|
217
|
+
deps = store.list_dependencies(org_id)
|
|
218
|
+
assets = store.list_assets(org_id)
|
|
219
|
+
|
|
220
|
+
asset_info: dict[UUID, dict[str, Any]] = {}
|
|
221
|
+
source_keys: dict[UUID, str] = {}
|
|
222
|
+
for a in assets:
|
|
223
|
+
if not a.id:
|
|
224
|
+
continue
|
|
225
|
+
asset_info[a.id] = {"asset_key": a.key, "source_id": str(a.source_id) if a.source_id else ""}
|
|
226
|
+
if a.source and a.source_id:
|
|
227
|
+
source_keys[a.source_id] = a.source.key
|
|
228
|
+
|
|
229
|
+
# Add source_key to asset_info
|
|
230
|
+
for uid, info in asset_info.items():
|
|
231
|
+
sid_str = info["source_id"]
|
|
232
|
+
if sid_str:
|
|
233
|
+
info["source_key"] = source_keys.get(UUID(sid_str), "unknown")
|
|
234
|
+
else:
|
|
235
|
+
info["source_key"] = ""
|
|
236
|
+
|
|
237
|
+
adj: dict[UUID, list[UUID]] = defaultdict(list)
|
|
238
|
+
for dep in deps:
|
|
239
|
+
if direction == "upstream":
|
|
240
|
+
adj[dep.asset_id].append(dep.upstream_asset_id)
|
|
241
|
+
else:
|
|
242
|
+
adj[dep.upstream_asset_id].append(dep.asset_id)
|
|
243
|
+
|
|
244
|
+
return adj, asset_info
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""Operations tools — run health, job status, failures, and backfill monitoring."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
from uuid import UUID
|
|
7
|
+
|
|
8
|
+
from google.adk.tools.tool_context import ToolContext
|
|
9
|
+
|
|
10
|
+
from interloper_agent.context import get_org_id, get_store, serialize
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def list_recent_runs(
|
|
14
|
+
job_id: str | None = None,
|
|
15
|
+
status: str | None = None,
|
|
16
|
+
limit: int = 20,
|
|
17
|
+
tool_context: ToolContext = None, # type: ignore[assignment]
|
|
18
|
+
) -> dict[str, Any]:
|
|
19
|
+
"""List recent runs with optional filters.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
job_id: Filter by job UUID (optional).
|
|
23
|
+
status: Filter by status: 'queued', 'running', 'success', 'failed', 'canceled' (optional).
|
|
24
|
+
limit: Maximum number of runs to return (default 20).
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
org_id = get_org_id(tool_context)
|
|
28
|
+
store = get_store()
|
|
29
|
+
runs = store.list_runs(
|
|
30
|
+
org_id,
|
|
31
|
+
job_id=UUID(job_id) if job_id else None,
|
|
32
|
+
status=status,
|
|
33
|
+
limit=limit,
|
|
34
|
+
)
|
|
35
|
+
return {"status": "success", "count": len(runs), "runs": [serialize(r) for r in runs]}
|
|
36
|
+
except Exception as e:
|
|
37
|
+
return {"status": "error", "error": str(e)}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_run_detail(run_id: str, tool_context: ToolContext) -> dict[str, Any]:
|
|
41
|
+
"""Get full detail for a single run including events and per-asset execution status.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
run_id: UUID of the run.
|
|
45
|
+
|
|
46
|
+
Returns the run metadata, event timeline, and per-asset execution summary.
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
store = get_store()
|
|
50
|
+
rid = UUID(run_id)
|
|
51
|
+
run = store.get_run(rid)
|
|
52
|
+
events = store.list_events(run_id=rid)
|
|
53
|
+
asset_execs = store.list_asset_executions(rid)
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
"status": "success",
|
|
57
|
+
"run": serialize(run),
|
|
58
|
+
"events": [serialize(e) for e in events],
|
|
59
|
+
"asset_executions": asset_execs,
|
|
60
|
+
}
|
|
61
|
+
except Exception as e:
|
|
62
|
+
return {"status": "error", "error": str(e)}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def list_failures(limit: int = 20, tool_context: ToolContext = None) -> dict[str, Any]: # type: ignore[assignment]
|
|
66
|
+
"""List recent failed runs with their error events.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
limit: Maximum number of failed runs to return (default 20).
|
|
70
|
+
|
|
71
|
+
Returns failed runs along with the error messages from their events.
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
org_id = get_org_id(tool_context)
|
|
75
|
+
store = get_store()
|
|
76
|
+
failed_runs = store.list_runs(org_id, status="failed", limit=limit)
|
|
77
|
+
|
|
78
|
+
results = []
|
|
79
|
+
for run in failed_runs:
|
|
80
|
+
run_id = run.id # type: ignore[assignment]
|
|
81
|
+
events = store.list_events(run_id=run_id)
|
|
82
|
+
errors = [
|
|
83
|
+
{"asset_key": e.asset_key, "error": e.error, "timestamp": serialize(e.timestamp)}
|
|
84
|
+
for e in events
|
|
85
|
+
if e.error
|
|
86
|
+
]
|
|
87
|
+
results.append({
|
|
88
|
+
"run": serialize(run),
|
|
89
|
+
"errors": errors,
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
return {"status": "success", "count": len(results), "failures": results}
|
|
93
|
+
except Exception as e:
|
|
94
|
+
return {"status": "error", "error": str(e)}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_job_health(job_id: str, tool_context: ToolContext) -> dict[str, Any]:
|
|
98
|
+
"""Get health summary for a job: metadata, recent success/failure rate.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
job_id: UUID of the job to inspect.
|
|
102
|
+
|
|
103
|
+
Returns job metadata plus success rate computed from the last 20 runs.
|
|
104
|
+
"""
|
|
105
|
+
try:
|
|
106
|
+
org_id = get_org_id(tool_context)
|
|
107
|
+
store = get_store()
|
|
108
|
+
jid = UUID(job_id)
|
|
109
|
+
job = store.get_job(jid)
|
|
110
|
+
runs = store.list_runs(org_id, job_id=jid, limit=20)
|
|
111
|
+
|
|
112
|
+
total = len(runs)
|
|
113
|
+
success = sum(1 for r in runs if r.status == "success")
|
|
114
|
+
failed = sum(1 for r in runs if r.status == "failed")
|
|
115
|
+
|
|
116
|
+
# Compute average duration for completed runs
|
|
117
|
+
durations = []
|
|
118
|
+
for r in runs:
|
|
119
|
+
if r.started_at and r.completed_at:
|
|
120
|
+
delta = r.completed_at - r.started_at
|
|
121
|
+
durations.append(delta.total_seconds())
|
|
122
|
+
avg_duration_seconds = sum(durations) / len(durations) if durations else None
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
"status": "success",
|
|
126
|
+
"job": serialize(job),
|
|
127
|
+
"health": {
|
|
128
|
+
"total_recent_runs": total,
|
|
129
|
+
"success_count": success,
|
|
130
|
+
"failed_count": failed,
|
|
131
|
+
"success_rate": round(success / total, 2) if total > 0 else None,
|
|
132
|
+
"avg_duration_seconds": round(avg_duration_seconds, 1) if avg_duration_seconds else None,
|
|
133
|
+
},
|
|
134
|
+
}
|
|
135
|
+
except Exception as e:
|
|
136
|
+
return {"status": "error", "error": str(e)}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def list_jobs(tool_context: ToolContext) -> dict[str, Any]:
|
|
140
|
+
"""List all scheduled jobs in the organisation.
|
|
141
|
+
|
|
142
|
+
Returns each job with its name, cron expression, enabled status,
|
|
143
|
+
last_run_at, and next_run_at.
|
|
144
|
+
"""
|
|
145
|
+
try:
|
|
146
|
+
org_id = get_org_id(tool_context)
|
|
147
|
+
store = get_store()
|
|
148
|
+
jobs = store.list_jobs(org_id)
|
|
149
|
+
return {"status": "success", "count": len(jobs), "jobs": [serialize(j) for j in jobs]}
|
|
150
|
+
except Exception as e:
|
|
151
|
+
return {"status": "error", "error": str(e)}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def list_backfills(active_only: bool = True, tool_context: ToolContext = None) -> dict[str, Any]: # type: ignore[assignment]
|
|
155
|
+
"""List backfills, optionally filtered to active ones only.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
active_only: If true, only return running/queued backfills (default true).
|
|
159
|
+
|
|
160
|
+
Returns backfills with their status, date range, and partition progress.
|
|
161
|
+
"""
|
|
162
|
+
try:
|
|
163
|
+
org_id = get_org_id(tool_context)
|
|
164
|
+
store = get_store()
|
|
165
|
+
if active_only:
|
|
166
|
+
backfills = store.list_active_backfills(org_id)
|
|
167
|
+
else:
|
|
168
|
+
backfills = store.list_backfills(org_id)
|
|
169
|
+
return {"status": "success", "count": len(backfills), "backfills": [serialize(b) for b in backfills]}
|
|
170
|
+
except Exception as e:
|
|
171
|
+
return {"status": "error", "error": str(e)}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: interloper-agent
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Interloper AI agent powered by Google ADK
|
|
5
|
+
Author: Guillaume Onfroy
|
|
6
|
+
Author-email: Guillaume Onfroy <guillaume@digitlcloud.com>
|
|
7
|
+
Requires-Dist: interloper-core
|
|
8
|
+
Requires-Dist: interloper-db
|
|
9
|
+
Requires-Dist: interloper-assets
|
|
10
|
+
Requires-Dist: google-adk>=1.0.0
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# interloper-agent
|
|
15
|
+
|
|
16
|
+
Interloper AI agent powered by [Google ADK](https://google.github.io/adk-docs/).
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
interloper_agent/__init__.py,sha256=Toi-D_RUs3W5-TjHwWQgUNVFdlXseGa1fSlHOrEOI_Y,158
|
|
2
|
+
interloper_agent/agent.py,sha256=wnySEoZh3MxUS6UzCMq0b5Fg4JgMP3yylieMhkPNQXE,2740
|
|
3
|
+
interloper_agent/context.py,sha256=WP-_I9uHR4LGM1q8YWr3rzKUJrcUsS81P0_4fhEuCNA,2927
|
|
4
|
+
interloper_agent/prompts.py,sha256=hy35kpi9huU5PGUDx4f2bODL7lc0W05IDwpLt-k0Kn4,4027
|
|
5
|
+
interloper_agent/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
interloper_agent/tools/actions.py,sha256=woGQtTFcbQv3FSqf-RwicbXAcSJFiwQBZBunjDGXp88,4184
|
|
7
|
+
interloper_agent/tools/analytics.py,sha256=tdjCscOefrcJrjq4ehbRSULNhV7DLGg4Mowjw9LW4VY,5401
|
|
8
|
+
interloper_agent/tools/catalog.py,sha256=PEjfUbnNPPteIHeXy-Zq760FPtWtu8kMMWqMGAioYcg,10355
|
|
9
|
+
interloper_agent/tools/lineage.py,sha256=5flThYz5Ar1zlF98A2CtTRIgTjU_3VtU3o_l85pwRJQ,8476
|
|
10
|
+
interloper_agent/tools/operations.py,sha256=8rfeNExTe9LLh-qj812EAdVKqlUZ8HAPuIjDBJcL-0I,5913
|
|
11
|
+
interloper_agent-0.2.0.dist-info/WHEEL,sha256=f5fWSvWsg5Knq5GWa6t1nJIug0Tqo69GqAWD_9LbBKw,81
|
|
12
|
+
interloper_agent-0.2.0.dist-info/METADATA,sha256=T78qnLme1EUVxeoRI_6fKXDTw7hUolWxAFrXiJhA-M8,487
|
|
13
|
+
interloper_agent-0.2.0.dist-info/RECORD,,
|