clusop 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clusop/__init__.py +23 -0
- clusop/analysis/__init__.py +1 -0
- clusop/analysis/confidence.py +71 -0
- clusop/analysis/parser.py +110 -0
- clusop/analysis/signal.py +38 -0
- clusop/analysis/waste.py +36 -0
- clusop/jars/.gitkeep +1 -0
- clusop/jars/photon_listener.jar +0 -0
- clusop/pricing/__init__.py +1 -0
- clusop/pricing/price_table.json +34 -0
- clusop/pricing/provider.py +61 -0
- clusop/runtime/__init__.py +1 -0
- clusop/runtime/bootstrap.py +117 -0
- clusop/runtime/detect.py +106 -0
- clusop/service/__init__.py +1 -0
- clusop/service/aggregator.py +69 -0
- clusop/service/onboard.py +34 -0
- clusop/service/resolver.py +59 -0
- clusop/service/suppression.py +40 -0
- clusop/service/teams.py +44 -0
- clusop-0.0.1.dist-info/METADATA +107 -0
- clusop-0.0.1.dist-info/RECORD +24 -0
- clusop-0.0.1.dist-info/WHEEL +4 -0
- clusop_autoload.pth +1 -0
clusop/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""clusop — Photon Fallback Analyzer.
|
|
2
|
+
|
|
3
|
+
Zero-touch Databricks cost forensics. Installed as one PyPI wheel (``pip install
|
|
4
|
+
clusop``) that bundles a Scala QueryExecutionListener JAR; a ``.pth`` self-arms it at
|
|
5
|
+
interpreter startup. It detects when Photon silently falls back to the JVM, models
|
|
6
|
+
the wasted DBU premium, and proposes a human-approved fix — it never auto-applies.
|
|
7
|
+
|
|
8
|
+
`import clusop` is intentionally light and does NOT arm anything. The self-arming
|
|
9
|
+
tripwire is installed by ``clusop_autoload.pth`` at interpreter startup (it runs
|
|
10
|
+
``import clusop.runtime.bootstrap``).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
__version__ = "0.0.1"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def diagnose(plan_text: str, *, runtime_seconds: float = 0.0):
|
|
19
|
+
"""Offline helper: analyze a physical-plan string (e.g. from EXPLAIN FORMATTED)
|
|
20
|
+
and return the fallback verdict + decomposed confidence. Used by the harness and
|
|
21
|
+
for ad-hoc checks — the live path is the in-process listener."""
|
|
22
|
+
from clusop.analysis.parser import analyze_plan
|
|
23
|
+
return analyze_plan(plan_text, runtime_seconds=runtime_seconds)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""clusop — Photon Fallback Analyzer (zero-touch Databricks cost forensics)."""
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Decomposed confidence — four independent axes, never one MIN'd number.
|
|
2
|
+
|
|
3
|
+
A card must never assert more certainty about its *recommendation* than it has on
|
|
4
|
+
the weakest leg supporting it. One number collapses four distinct questions:
|
|
5
|
+
parse — did we read the plan correctly?
|
|
6
|
+
diagnosis — do we know WHY it fell back?
|
|
7
|
+
cost — how trustworthy is the dollar figure?
|
|
8
|
+
recommendation (DERIVED) — bounded by the legs the action actually depends on.
|
|
9
|
+
|
|
10
|
+
Example: a FIX-the-UDF rec doesn't depend on the $ figure, so it isn't capped by a
|
|
11
|
+
weak cost leg; a DISABLE-Photon rec IS a dollar decision, so it is.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from enum import IntEnum
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Level(IntEnum):
|
|
21
|
+
LOW = 0
|
|
22
|
+
MEDIUM = 1
|
|
23
|
+
HIGH = 2
|
|
24
|
+
|
|
25
|
+
def label(self) -> str:
|
|
26
|
+
return self.name
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _min(*levels: Level) -> Level:
|
|
30
|
+
return Level(min(int(x) for x in levels))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class Confidence:
|
|
35
|
+
parse: Level
|
|
36
|
+
diagnosis: Level
|
|
37
|
+
cost: Level
|
|
38
|
+
recommendation: Level
|
|
39
|
+
|
|
40
|
+
def to_dict(self) -> dict:
|
|
41
|
+
return {k: getattr(self, k).label() for k in ("parse", "diagnosis", "cost", "recommendation")}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def score(analysis, *, version_certified: Level, cost_tier_billed: bool,
|
|
45
|
+
attribution: Level) -> Confidence:
|
|
46
|
+
"""analysis: PlanAnalysis. version_certified: from the harness cert level for this
|
|
47
|
+
DBR. cost_tier_billed: True if reconciled from system.billing. attribution:
|
|
48
|
+
concurrency-aware cost confidence (see service/aggregator concurrency note)."""
|
|
49
|
+
# parse — did we read the plan cleanly?
|
|
50
|
+
parse = Level.LOW if (not analysis.aqe_ok or analysis.partial) else Level.HIGH
|
|
51
|
+
|
|
52
|
+
# diagnosis — do we know WHY it fell back?
|
|
53
|
+
clear_signature = Level.HIGH if (analysis.udf_fallback or analysis.round_trips > 0) else Level.MEDIUM
|
|
54
|
+
diagnosis = _min(version_certified, clear_signature)
|
|
55
|
+
|
|
56
|
+
# cost — how trustworthy is the dollar figure?
|
|
57
|
+
cost = _min(
|
|
58
|
+
Level.HIGH if cost_tier_billed else Level.MEDIUM, # billed vs modeled
|
|
59
|
+
Level.HIGH if analysis.fallback_weight > 0 else Level.LOW, # had real severity?
|
|
60
|
+
attribution, # concurrency-aware
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# recommendation — bounded by exactly the legs the action needs.
|
|
64
|
+
if analysis.verdict.kind == "fix_fallback":
|
|
65
|
+
rec = _min(parse, diagnosis) # not a $ decision
|
|
66
|
+
elif analysis.verdict.kind == "disable_photon":
|
|
67
|
+
rec = _min(parse, diagnosis, cost) # a $ decision -> needs cost too
|
|
68
|
+
else:
|
|
69
|
+
rec = Level.LOW
|
|
70
|
+
|
|
71
|
+
return Confidence(parse=parse, diagnosis=diagnosis, cost=cost, recommendation=rec)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Version-resilient, STRUCTURAL physical-plan parser — the detection moat.
|
|
2
|
+
|
|
3
|
+
Why structural (not a raw count): a fully-Photon query STILL ends with one
|
|
4
|
+
ColumnarToRow at the root, to hand columnar batches back to the driver as rows.
|
|
5
|
+
That terminal transition is NORMAL, not waste. Real fallback is a *mid-plan*
|
|
6
|
+
ColumnarToRow (Photon -> rows -> JVM op), often with a RowToColumnar round-trip
|
|
7
|
+
back into Photon, or a (Batch|Arrow)EvalPython for a Python UDF. Counting raw
|
|
8
|
+
occurrences false-positives on every healthy Photon query — so we distinguish the
|
|
9
|
+
ROOT transition from interior ones.
|
|
10
|
+
|
|
11
|
+
Operator names are internal Spark/DBR strings and change across runtimes, so we
|
|
12
|
+
match FAMILIES via regex, never exact class names. # ASSUMPTION: verified per DBR
|
|
13
|
+
by the certification harness.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import re
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
|
|
21
|
+
_PHOTON = re.compile(r"(?i)\bPhoton[A-Za-z]+")
|
|
22
|
+
_COL2ROW = re.compile(r"(?i)ColumnarToRow")
|
|
23
|
+
_ROW2COL = re.compile(r"(?i)RowToColumnar")
|
|
24
|
+
_UDF = re.compile(r"(?i)(?:Batch|Arrow)EvalPython")
|
|
25
|
+
_AQE_NOT_FINAL = re.compile(r"(?i)AdaptiveSparkPlan\s+isFinalPlan=false")
|
|
26
|
+
_NODE_CAP = 4000 # pathological-plan guard (P99 latency budget)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Verdict:
|
|
31
|
+
kind: str # "none" | "fix_fallback" | "disable_photon"
|
|
32
|
+
cause: str # "none" | "python_udf" | "unsupported_op"
|
|
33
|
+
summary: str = ""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class PlanAnalysis:
|
|
38
|
+
photon_nodes: int = 0
|
|
39
|
+
terminal_col2row: bool = False
|
|
40
|
+
mid_plan_col2row: int = 0
|
|
41
|
+
round_trips: int = 0 # RowToColumnar = JVM->Photon hop = definite fallback
|
|
42
|
+
udf_fallback: bool = False
|
|
43
|
+
aqe_ok: bool = True
|
|
44
|
+
partial: bool = False # plan truncated / over the node cap
|
|
45
|
+
photon_ratio: float = 1.0
|
|
46
|
+
fallback_weight: float = 0.0 # 0..1 severity (drives $ and cost confidence)
|
|
47
|
+
verdict: Verdict = field(default_factory=lambda: Verdict("none", "none"))
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def has_fallback(self) -> bool:
|
|
51
|
+
return self.verdict.kind != "none"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def analyze_plan(plan_text: str, runtime_seconds: float = 0.0,
|
|
55
|
+
photon_enabled: bool | None = None) -> PlanAnalysis:
|
|
56
|
+
a = PlanAnalysis()
|
|
57
|
+
if not plan_text:
|
|
58
|
+
return a
|
|
59
|
+
lines = [ln for ln in plan_text.splitlines() if ln.strip()]
|
|
60
|
+
if len(lines) > _NODE_CAP:
|
|
61
|
+
a.partial = True
|
|
62
|
+
lines = lines[:_NODE_CAP]
|
|
63
|
+
text = "\n".join(lines)
|
|
64
|
+
|
|
65
|
+
a.aqe_ok = not bool(_AQE_NOT_FINAL.search(text))
|
|
66
|
+
a.photon_nodes = len(_PHOTON.findall(text))
|
|
67
|
+
total_col2row = len(_COL2ROW.findall(text))
|
|
68
|
+
a.round_trips = len(_ROW2COL.findall(text))
|
|
69
|
+
a.udf_fallback = bool(_UDF.search(text))
|
|
70
|
+
|
|
71
|
+
# the ROOT operator is the first (outermost) line; a ColumnarToRow there is the
|
|
72
|
+
# normal result-boundary transition and must NOT count as fallback.
|
|
73
|
+
root = lines[0] if lines else ""
|
|
74
|
+
a.terminal_col2row = bool(_COL2ROW.search(root))
|
|
75
|
+
a.mid_plan_col2row = max(total_col2row - (1 if a.terminal_col2row else 0), 0)
|
|
76
|
+
|
|
77
|
+
photon_seen = a.photon_nodes > 0 or (photon_enabled is True)
|
|
78
|
+
jvm_fallback = a.mid_plan_col2row + a.round_trips + (1 if a.udf_fallback else 0)
|
|
79
|
+
|
|
80
|
+
# no Photon ops at all on a Photon-enabled cluster -> the whole query ran on JVM.
|
|
81
|
+
full_fallback = (photon_enabled is True) and a.photon_nodes == 0
|
|
82
|
+
|
|
83
|
+
if not photon_seen:
|
|
84
|
+
a.verdict = Verdict("none", "none", "not a Photon plan")
|
|
85
|
+
a.photon_ratio = 1.0
|
|
86
|
+
return a
|
|
87
|
+
|
|
88
|
+
denom = a.photon_nodes + jvm_fallback
|
|
89
|
+
a.photon_ratio = (a.photon_nodes / denom) if denom else 1.0
|
|
90
|
+
|
|
91
|
+
if jvm_fallback == 0 and not full_fallback:
|
|
92
|
+
a.verdict = Verdict("none", "none", "clean Photon execution")
|
|
93
|
+
a.fallback_weight = 0.0
|
|
94
|
+
return a
|
|
95
|
+
|
|
96
|
+
# severity: how much of the work bypassed the C++ engine.
|
|
97
|
+
weight = 1.0 - a.photon_ratio
|
|
98
|
+
if full_fallback:
|
|
99
|
+
weight = 1.0
|
|
100
|
+
if a.udf_fallback and a.photon_ratio < 0.5:
|
|
101
|
+
weight = max(weight, 0.9) # query bottlenecked by the JVM/Python transition
|
|
102
|
+
a.fallback_weight = round(min(max(weight, 0.0), 1.0), 3)
|
|
103
|
+
|
|
104
|
+
if a.udf_fallback:
|
|
105
|
+
a.verdict = Verdict("fix_fallback", "python_udf",
|
|
106
|
+
"Photon premium paid but execution fell back to the JVM via a Python UDF.")
|
|
107
|
+
else:
|
|
108
|
+
a.verdict = Verdict("disable_photon", "unsupported_op",
|
|
109
|
+
"Photon fell back to the JVM (unsupported operator); premium largely wasted.")
|
|
110
|
+
return a
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""The compact signal record the driver emits and the central service aggregates.
|
|
2
|
+
|
|
3
|
+
Driver emits SIGNALS (not cards). The signature is what collapses 500 statements
|
|
4
|
+
hitting the same fallback into one accumulating record (driver-local dedup).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def fallback_signature(plan_shape_hint: str, fallback_family: str, source_table: str) -> str:
|
|
14
|
+
"""hash(normalized plan shape, fallback op family, source table). Same problem ->
|
|
15
|
+
same signature -> one record, regardless of how many statements hit it."""
|
|
16
|
+
raw = f"{plan_shape_hint}|{fallback_family}|{source_table}".lower()
|
|
17
|
+
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class FallbackSignal:
|
|
22
|
+
signature: str
|
|
23
|
+
cluster_id: str
|
|
24
|
+
job_id: str
|
|
25
|
+
job_run_id: str
|
|
26
|
+
cloud: str
|
|
27
|
+
dbr: str
|
|
28
|
+
cause: str # python_udf | unsupported_op
|
|
29
|
+
verdict_kind: str # fix_fallback | disable_photon
|
|
30
|
+
fallback_weight: float
|
|
31
|
+
runtime_seconds: float
|
|
32
|
+
wasted_usd_modeled: float
|
|
33
|
+
occurrences: int = 1
|
|
34
|
+
confidence: dict = field(default_factory=dict) # decomposed, from analysis
|
|
35
|
+
evidence: str = "" # short plan snippet (NO full plan retained)
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> dict:
|
|
38
|
+
return self.__dict__.copy()
|
clusop/analysis/waste.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Modeled premium-waste — dimensionally correct, cluster-size aware.
|
|
2
|
+
|
|
3
|
+
waste$ = runtime_hours
|
|
4
|
+
x Sum(node DBU/hr) # cluster size matters (2 nodes != 40 nodes)
|
|
5
|
+
x dbu_$_rate(compute_type)
|
|
6
|
+
x (photon_premium_multiplier - 1) # the PREMIUM, not the whole bill
|
|
7
|
+
x fallback_weight # 0..1 severity from the plan
|
|
8
|
+
|
|
9
|
+
i.e. of the Photon premium you paid for this run, the fraction that bought you
|
|
10
|
+
nothing because execution fell back to the JVM. Modeled (Tier-0); the central
|
|
11
|
+
cost-tier resolver upgrades it to billed when system.billing is readable.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class WasteEstimate:
|
|
21
|
+
premium_usd: float # total Photon premium for the run
|
|
22
|
+
wasted_usd: float # premium x fallback_weight
|
|
23
|
+
basis: str # "modeled" | "billed"
|
|
24
|
+
fallback_weight: float
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def modeled_waste(analysis, shape, provider, runtime_seconds: float) -> WasteEstimate:
|
|
28
|
+
"""analysis: PlanAnalysis. shape: detect.ClusterShape. provider: PricingProvider."""
|
|
29
|
+
hours = max(runtime_seconds, 0.0) / 3600.0
|
|
30
|
+
premium = provider.photon_premium_usd(
|
|
31
|
+
shape.driver_node_type, shape.worker_node_type, shape.num_workers,
|
|
32
|
+
shape.compute_type, hours,
|
|
33
|
+
)
|
|
34
|
+
wasted = round(premium * analysis.fallback_weight, 4)
|
|
35
|
+
return WasteEstimate(premium_usd=premium, wasted_usd=wasted,
|
|
36
|
+
basis="modeled", fallback_weight=analysis.fallback_weight)
|
clusop/jars/.gitkeep
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# The compiled Scala listener JAR is built by CI (sbt) and copied here before packaging.
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""clusop — Photon Fallback Analyzer (zero-touch Databricks cost forensics)."""
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_comment": "EXAMPLE list prices — VERIFY per cloud/region/tier before trusting dollar figures. cloud_usd_per_hour should be auto-refreshed from the public AWS Pricing API / Azure Retail Prices API / GCP Catalog; dbu_per_hour + photon_premium_multiplier come from the Databricks price list. The agent AUTO-DETECTS cloud at runtime and selects the right block.",
|
|
3
|
+
"currency": "USD",
|
|
4
|
+
"default_contract_discount": 0.0,
|
|
5
|
+
"clouds": {
|
|
6
|
+
"aws": {
|
|
7
|
+
"dbu_usd_rate": { "jobs": 0.15, "all_purpose": 0.55, "sql_serverless": 0.70 },
|
|
8
|
+
"photon_premium_multiplier": 2.0,
|
|
9
|
+
"nodes": {
|
|
10
|
+
"m5.large": { "dbu_per_hour": 0.40, "cloud_usd_per_hour": 0.096 },
|
|
11
|
+
"m5d.large": { "dbu_per_hour": 0.40, "cloud_usd_per_hour": 0.113 },
|
|
12
|
+
"m5.xlarge": { "dbu_per_hour": 0.75, "cloud_usd_per_hour": 0.192 },
|
|
13
|
+
"m5.2xlarge": { "dbu_per_hour": 1.50, "cloud_usd_per_hour": 0.384 },
|
|
14
|
+
"m5.4xlarge": { "dbu_per_hour": 3.00, "cloud_usd_per_hour": 0.768 },
|
|
15
|
+
"i3.xlarge": { "dbu_per_hour": 1.00, "cloud_usd_per_hour": 0.312 }
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"azure": {
|
|
19
|
+
"dbu_usd_rate": { "jobs": 0.15, "all_purpose": 0.55, "sql_serverless": 0.70 },
|
|
20
|
+
"photon_premium_multiplier": 2.0,
|
|
21
|
+
"nodes": {
|
|
22
|
+
"standard_ds3_v2": { "dbu_per_hour": 0.75, "cloud_usd_per_hour": 0.229 },
|
|
23
|
+
"standard_d4ds_v5": { "dbu_per_hour": 1.00, "cloud_usd_per_hour": 0.272 }
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"gcp": {
|
|
27
|
+
"dbu_usd_rate": { "jobs": 0.15, "all_purpose": 0.55, "sql_serverless": 0.70 },
|
|
28
|
+
"photon_premium_multiplier": 2.0,
|
|
29
|
+
"nodes": {
|
|
30
|
+
"n2-standard-4": { "dbu_per_hour": 1.00, "cloud_usd_per_hour": 0.194 }
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""PricingProvider — modeled cost, cloud-aware, zero external dependency.
|
|
2
|
+
|
|
3
|
+
Cost = usage (measured) x rate (reference). We measure usage in-process; the rate
|
|
4
|
+
comes from the bundled per-cloud price table (cloud auto-detected). Modeled cost is
|
|
5
|
+
DIRECTIONAL — good enough to rank waste, not to invoice. If system.billing.usage is
|
|
6
|
+
granted, the central service reconciles modeled -> billed (cost-tier resolver).
|
|
7
|
+
|
|
8
|
+
The Photon angle: the premium is the EXTRA DBUs Photon costs vs standard. Wasted
|
|
9
|
+
premium = the premium you paid for the fraction of work that fell back to the JVM.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
_TABLE_PATH = os.getenv("CLSO_PRICE_TABLE") or os.path.join(os.path.dirname(__file__), "price_table.json")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class PricingProvider:
|
|
23
|
+
cloud: str = "aws"
|
|
24
|
+
contract_discount: float | None = None
|
|
25
|
+
_table: dict = None # type: ignore
|
|
26
|
+
|
|
27
|
+
def __post_init__(self):
|
|
28
|
+
with open(_TABLE_PATH) as f:
|
|
29
|
+
self._table = json.load(f)
|
|
30
|
+
if self.contract_discount is None:
|
|
31
|
+
self.contract_discount = float(self._table.get("default_contract_discount", 0.0))
|
|
32
|
+
|
|
33
|
+
def _block(self) -> dict:
|
|
34
|
+
return (self._table.get("clouds", {}).get(self.cloud)
|
|
35
|
+
or self._table.get("clouds", {}).get("aws", {}))
|
|
36
|
+
|
|
37
|
+
def _node(self, nt: str) -> dict:
|
|
38
|
+
return self._block().get("nodes", {}).get(nt, {"dbu_per_hour": 0.0, "cloud_usd_per_hour": 0.0})
|
|
39
|
+
|
|
40
|
+
def _dbu_rate(self, compute_type: str) -> float:
|
|
41
|
+
rates = self._block().get("dbu_usd_rate", {})
|
|
42
|
+
return float(rates.get(compute_type, rates.get("jobs", 0.0)))
|
|
43
|
+
|
|
44
|
+
def photon_premium_multiplier(self) -> float:
|
|
45
|
+
"""How much MORE DBU/hr Photon costs vs standard (e.g. 2.0 = +100%)."""
|
|
46
|
+
return float(self._block().get("photon_premium_multiplier", 1.0))
|
|
47
|
+
|
|
48
|
+
def cluster_dbu_per_hour(self, driver: str, worker: str, num_workers: int) -> float:
|
|
49
|
+
nodes = [driver] + [worker] * max(num_workers, 0)
|
|
50
|
+
return sum(self._node(n)["dbu_per_hour"] for n in nodes)
|
|
51
|
+
|
|
52
|
+
def photon_premium_usd(self, driver: str, worker: str, num_workers: int,
|
|
53
|
+
compute_type: str, runtime_hours: float) -> float:
|
|
54
|
+
"""The DOLLARS of Photon premium for a run of this shape/duration — i.e. the
|
|
55
|
+
extra cost vs running the same cluster on standard compute. This is the pool
|
|
56
|
+
from which fallback waste is drawn."""
|
|
57
|
+
dbu_hr = self.cluster_dbu_per_hour(driver, worker, num_workers)
|
|
58
|
+
rate = self._dbu_rate(compute_type)
|
|
59
|
+
extra_factor = max(self.photon_premium_multiplier() - 1.0, 0.0)
|
|
60
|
+
gross = runtime_hours * dbu_hr * rate * extra_factor
|
|
61
|
+
return round(gross * (1.0 - self.contract_discount), 4)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""clusop — Photon Fallback Analyzer (zero-touch Databricks cost forensics)."""
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Self-arming tripwire — runs at interpreter startup via clusop_autoload.pth.
|
|
2
|
+
|
|
3
|
+
CARDINAL RULE: best-effort, non-blocking, must NEVER break or slow the customer's
|
|
4
|
+
job. Every step is wrapped to swallow exceptions and return fast. clusop degrading
|
|
5
|
+
silently is fine; clusop delaying a production run is not.
|
|
6
|
+
|
|
7
|
+
What activate() does, once a SparkSession exists and the JVM is reachable:
|
|
8
|
+
1. add the bundled listener JAR to the JVM (sparkContext.addJar) — "one pip" delivery,
|
|
9
|
+
2. instantiate + register com.clusop.listener.PhotonFallbackListener via py4j.
|
|
10
|
+
|
|
11
|
+
On Spark Connect / USER_ISOLATION clusters the JVM is sealed -> we no-op cleanly
|
|
12
|
+
(the listener can't run there; that's the documented scope boundary).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import os
|
|
18
|
+
import sys
|
|
19
|
+
|
|
20
|
+
_ARMED = False
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _log(msg: str) -> None:
|
|
24
|
+
try:
|
|
25
|
+
sys.stderr.write(f"[clusop] {msg}\n")
|
|
26
|
+
except Exception:
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _bundled_jar() -> str | None:
|
|
31
|
+
try:
|
|
32
|
+
import glob
|
|
33
|
+
here = os.path.dirname(os.path.abspath(__file__))
|
|
34
|
+
jars = glob.glob(os.path.join(here, "..", "jars", "*.jar"))
|
|
35
|
+
return os.path.abspath(jars[0]) if jars else None
|
|
36
|
+
except Exception:
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _active_spark():
|
|
41
|
+
try:
|
|
42
|
+
from pyspark.sql import SparkSession
|
|
43
|
+
return SparkSession.getActiveSession() or SparkSession._instantiatedSession
|
|
44
|
+
except Exception:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def activate() -> bool:
|
|
49
|
+
"""Arm the listener. Idempotent, fail-open. Returns True only if registered."""
|
|
50
|
+
global _ARMED
|
|
51
|
+
if _ARMED or os.environ.get("CLSO_DISABLE") == "1":
|
|
52
|
+
return False
|
|
53
|
+
try:
|
|
54
|
+
spark = _active_spark()
|
|
55
|
+
if spark is None:
|
|
56
|
+
# session not up yet — a watcher (below) retries; don't block startup.
|
|
57
|
+
_install_watcher()
|
|
58
|
+
return False
|
|
59
|
+
|
|
60
|
+
from clusop.runtime.detect import jvm_available
|
|
61
|
+
if not jvm_available(spark):
|
|
62
|
+
_log("JVM not reachable (Spark Connect / Shared cluster) — listener not armed. "
|
|
63
|
+
"clusop detection requires a dedicated / single-user cluster.")
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
jar = _bundled_jar()
|
|
67
|
+
if jar:
|
|
68
|
+
try:
|
|
69
|
+
spark.sparkContext.addJar(jar)
|
|
70
|
+
except Exception as ex:
|
|
71
|
+
_log(f"addJar failed ({str(ex)[:80]}) — JAR may already be on the classpath")
|
|
72
|
+
|
|
73
|
+
premium = float(os.getenv("CLSO_PHOTON_PREMIUM", "1.0"))
|
|
74
|
+
endpoint = os.getenv("CLSO_SIGNAL_ENDPOINT", "")
|
|
75
|
+
try:
|
|
76
|
+
listener = spark._jvm.com.clusop.listener.PhotonFallbackListener(premium, endpoint)
|
|
77
|
+
spark._jsparkSession.listenerManager().register(listener)
|
|
78
|
+
_ARMED = True
|
|
79
|
+
_log("armed — PhotonFallbackListener registered.")
|
|
80
|
+
return True
|
|
81
|
+
except Exception as ex:
|
|
82
|
+
_log(f"listener registration failed: {str(ex)[:120]}")
|
|
83
|
+
return False
|
|
84
|
+
except Exception as ex: # absolute backstop — never propagate into the job
|
|
85
|
+
_log(f"activate suppressed error: {str(ex)[:120]}")
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _install_watcher() -> None:
|
|
90
|
+
"""If the session isn't up at import time, poll briefly on a daemon thread and
|
|
91
|
+
arm once it appears. Never touches the import machinery; never blocks the kernel."""
|
|
92
|
+
try:
|
|
93
|
+
import threading
|
|
94
|
+
import time
|
|
95
|
+
|
|
96
|
+
def _poll():
|
|
97
|
+
for _ in range(150): # ~30s, then give up (job-task path still covered)
|
|
98
|
+
try:
|
|
99
|
+
if _active_spark() is not None and activate():
|
|
100
|
+
return
|
|
101
|
+
except Exception:
|
|
102
|
+
pass
|
|
103
|
+
time.sleep(0.2)
|
|
104
|
+
|
|
105
|
+
threading.Thread(target=_poll, name="clusop-arm", daemon=True).start()
|
|
106
|
+
except Exception:
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# fire on import (the .pth triggers this module import at interpreter startup)
|
|
111
|
+
try:
|
|
112
|
+
# never arm Spark executors; only the driver.
|
|
113
|
+
_is_driver = os.environ.get("DB_IS_DRIVER")
|
|
114
|
+
if _is_driver is None or _is_driver.upper() == "TRUE":
|
|
115
|
+
activate()
|
|
116
|
+
except Exception:
|
|
117
|
+
pass
|
clusop/runtime/detect.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Runtime auto-detection — cloud, DBR, cluster shape, access mode.
|
|
2
|
+
|
|
3
|
+
Everything here is discovered in-process from the live SparkSession / env, so a NEW
|
|
4
|
+
customer needs zero manual config. Values are only *overridable* (env vars), never
|
|
5
|
+
required. All lookups are best-effort and never raise.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
# instance-prefix -> cloud (extend as needed; verified per cloud).
|
|
14
|
+
_CLOUD_BY_PREFIX = {
|
|
15
|
+
# AWS EC2 families
|
|
16
|
+
"m5": "aws", "m5d": "aws", "m6": "aws", "c5": "aws", "r5": "aws", "i3": "aws", "i4": "aws",
|
|
17
|
+
# Azure
|
|
18
|
+
"standard_d": "azure", "standard_e": "azure", "standard_f": "azure", "standard_l": "azure",
|
|
19
|
+
# GCP
|
|
20
|
+
"n1": "gcp", "n2": "gcp", "n2d": "gcp", "e2": "gcp", "c2": "gcp",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _conf(spark, key, default=None):
|
|
25
|
+
try:
|
|
26
|
+
return spark.conf.get(key)
|
|
27
|
+
except Exception:
|
|
28
|
+
return default
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def detect_cloud(spark=None, node_type: str | None = None, host: str | None = None) -> str:
|
|
32
|
+
"""aws | azure | gcp | unknown — from instance-type prefix, then host pattern."""
|
|
33
|
+
if os.getenv("CLSO_CLOUD"):
|
|
34
|
+
return os.getenv("CLSO_CLOUD")
|
|
35
|
+
nt = (node_type or "").lower()
|
|
36
|
+
for pref, cloud in sorted(_CLOUD_BY_PREFIX.items(), key=lambda kv: -len(kv[0])):
|
|
37
|
+
if nt.startswith(pref):
|
|
38
|
+
return cloud
|
|
39
|
+
h = (host or os.getenv("DATABRICKS_HOST") or "").lower()
|
|
40
|
+
if "azuredatabricks.net" in h:
|
|
41
|
+
return "azure"
|
|
42
|
+
if "gcp.databricks.com" in h:
|
|
43
|
+
return "gcp"
|
|
44
|
+
if "cloud.databricks.com" in h:
|
|
45
|
+
return "aws"
|
|
46
|
+
return "unknown"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def detect_dbr() -> str:
|
|
50
|
+
"""Databricks Runtime version string, e.g. '15.4'. Read in-process; never asks."""
|
|
51
|
+
return (os.getenv("CLSO_DBR")
|
|
52
|
+
or os.getenv("DATABRICKS_RUNTIME_VERSION")
|
|
53
|
+
or "unknown")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(frozen=True)
|
|
57
|
+
class ClusterShape:
|
|
58
|
+
cloud: str
|
|
59
|
+
dbr: str
|
|
60
|
+
driver_node_type: str
|
|
61
|
+
worker_node_type: str
|
|
62
|
+
num_workers: int
|
|
63
|
+
photon_enabled: bool
|
|
64
|
+
data_security_mode: str # SINGLE_USER | USER_ISOLATION | ... (drives JVM availability)
|
|
65
|
+
compute_type: str # jobs | all_purpose
|
|
66
|
+
cluster_id: str
|
|
67
|
+
cluster_name: str
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def detect_shape(spark=None) -> ClusterShape:
|
|
71
|
+
"""Best-effort cluster shape from clusterUsageTags / env. JVM-free where possible
|
|
72
|
+
(works even before the listener registers)."""
|
|
73
|
+
tag = lambda k, d=None: _conf(spark, f"spark.databricks.clusterUsageTags.{k}", d) if spark else d # noqa: E731
|
|
74
|
+
driver = tag("driverNodeType") or tag("clusterNodeType") or os.getenv("CLSO_NODE_TYPE") or "unknown"
|
|
75
|
+
worker = tag("workerNodeType") or tag("clusterNodeType") or driver
|
|
76
|
+
try:
|
|
77
|
+
workers = int(float(tag("clusterWorkers", "0") or 0))
|
|
78
|
+
except Exception:
|
|
79
|
+
workers = 0
|
|
80
|
+
dbr = detect_dbr()
|
|
81
|
+
photon = "photon" in (dbr.lower()) or (_conf(spark, "spark.databricks.photon.enabled", "false") == "true")
|
|
82
|
+
dsm = tag("clusterDataSecurityMode") or os.getenv("CLSO_DATA_SECURITY_MODE") or "unknown"
|
|
83
|
+
return ClusterShape(
|
|
84
|
+
cloud=detect_cloud(spark, node_type=driver),
|
|
85
|
+
dbr=dbr,
|
|
86
|
+
driver_node_type=driver,
|
|
87
|
+
worker_node_type=worker,
|
|
88
|
+
num_workers=workers,
|
|
89
|
+
photon_enabled=bool(photon),
|
|
90
|
+
data_security_mode=dsm,
|
|
91
|
+
compute_type=("jobs" if (tag("jobId") or os.getenv("DB_JOB_ID")) else "all_purpose"),
|
|
92
|
+
cluster_id=tag("clusterId") or os.getenv("DB_CLUSTER_ID") or "unknown",
|
|
93
|
+
cluster_name=tag("clusterName") or os.getenv("DB_CLUSTER_NAME") or "unknown",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def jvm_available(spark=None) -> bool:
|
|
98
|
+
"""True only on clusters where Python can reach the JVM/py4j (SINGLE_USER /
|
|
99
|
+
dedicated). USER_ISOLATION (Shared) runs Spark Connect and seals the JVM — the
|
|
100
|
+
listener cannot register there. This is the scope boundary, checked honestly."""
|
|
101
|
+
try:
|
|
102
|
+
_ = spark._jvm # noqa: B018 — Spark Connect raises here
|
|
103
|
+
_ = spark.sparkContext
|
|
104
|
+
return True
|
|
105
|
+
except Exception:
|
|
106
|
+
return False
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""clusop — Photon Fallback Analyzer (zero-touch Databricks cost forensics)."""
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Two-tier aggregator.
|
|
2
|
+
|
|
3
|
+
Tier 1 — DriverBatcher (ephemeral, in the driver): dedup by signature within the
|
|
4
|
+
session, accumulate waste + occurrences, bound memory, flush compact signals to the
|
|
5
|
+
central sink. Dies with the cluster, holds nothing durable (a driver crash loses at
|
|
6
|
+
most the unflushed batch — acceptable for an estimation product).
|
|
7
|
+
|
|
8
|
+
Tier 2 — CentralAggregator (durable, off-cluster): window by job_run (closed on an
|
|
9
|
+
idle timeout — job-run completion isn't observable from the listener, an explicit
|
|
10
|
+
approximation), idempotent upsert by {job_run_id, signature}, cross-run suppression,
|
|
11
|
+
prioritization. The real scaling locus.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# ---- Tier 1: driver-local ---------------------------------------------------
|
|
20
|
+
@dataclass
|
|
21
|
+
class DriverBatcher:
|
|
22
|
+
max_signatures: int = 1000
|
|
23
|
+
_acc: dict = field(default_factory=dict)
|
|
24
|
+
evicted: int = 0
|
|
25
|
+
|
|
26
|
+
def add(self, signal) -> None:
|
|
27
|
+
sig = signal.signature
|
|
28
|
+
cur = self._acc.get(sig)
|
|
29
|
+
if cur is None:
|
|
30
|
+
if len(self._acc) >= self.max_signatures:
|
|
31
|
+
# evict lowest-waste signature (bounded memory)
|
|
32
|
+
victim = min(self._acc.values(), key=lambda s: s.wasted_usd_modeled)
|
|
33
|
+
del self._acc[victim.signature]
|
|
34
|
+
self.evicted += 1
|
|
35
|
+
self._acc[sig] = signal
|
|
36
|
+
else:
|
|
37
|
+
cur.occurrences += signal.occurrences
|
|
38
|
+
cur.wasted_usd_modeled = round(cur.wasted_usd_modeled + signal.wasted_usd_modeled, 4)
|
|
39
|
+
cur.runtime_seconds += signal.runtime_seconds
|
|
40
|
+
|
|
41
|
+
def flush(self) -> list:
|
|
42
|
+
out = list(self._acc.values())
|
|
43
|
+
self._acc.clear()
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ---- Tier 2: central, durable ----------------------------------------------
|
|
48
|
+
class CentralAggregator:
|
|
49
|
+
"""Reference in-memory impl of the durable service contract (real deployment =
|
|
50
|
+
stateful HA service + DB). Demonstrates window / idempotency / suppression."""
|
|
51
|
+
|
|
52
|
+
def __init__(self, store=None, resolver=None):
|
|
53
|
+
self._store = store if store is not None else {} # {(job_run_id, signature): signal}
|
|
54
|
+
self._resolver = resolver # CostTierResolver | None
|
|
55
|
+
|
|
56
|
+
def ingest(self, signal) -> None:
|
|
57
|
+
key = (signal.job_run_id, signal.signature)
|
|
58
|
+
existing = self._store.get(key)
|
|
59
|
+
if existing is None:
|
|
60
|
+
if self._resolver is not None:
|
|
61
|
+
self._resolver.reconcile(signal) # modeled -> billed if granted
|
|
62
|
+
self._store[key] = signal
|
|
63
|
+
else:
|
|
64
|
+
# idempotent upsert: retried/duplicate delivery collapses, no double count
|
|
65
|
+
existing.occurrences = max(existing.occurrences, signal.occurrences)
|
|
66
|
+
existing.wasted_usd_modeled = max(existing.wasted_usd_modeled, signal.wasted_usd_modeled)
|
|
67
|
+
|
|
68
|
+
def open_for_run(self, job_run_id: str) -> list:
|
|
69
|
+
return [s for (jr, _), s in self._store.items() if jr == job_run_id]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""clusop onboarding (central service side).
|
|
2
|
+
|
|
3
|
+
Creates the clusop telemetry schema + signals table, probes the cost tier, and reports
|
|
4
|
+
exactly what the customer is getting. Fail-LOUD only if it can't create its own schema
|
|
5
|
+
or reach the webhook; never fails on missing system.* access (that just keeps cost
|
|
6
|
+
modeled). Pass a sql_runner (a Databricks SQL warehouse client) + teams webhook.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from clusop.service.resolver import CostTierResolver
|
|
12
|
+
|
|
13
|
+
_DDL = """
|
|
14
|
+
CREATE TABLE IF NOT EXISTS {catalog}.{schema}.clusop_signals (
|
|
15
|
+
signature STRING, cluster_id STRING, job_id STRING, job_run_id STRING,
|
|
16
|
+
cloud STRING, dbr STRING, cause STRING, verdict_kind STRING,
|
|
17
|
+
fallback_weight DOUBLE, runtime_seconds DOUBLE,
|
|
18
|
+
wasted_usd DOUBLE, cost_basis STRING, occurrences INT,
|
|
19
|
+
confidence STRING, evidence STRING, captured_at TIMESTAMP, dt DATE
|
|
20
|
+
) USING DELTA PARTITIONED BY (dt)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def onboard(sql_runner, *, catalog="main", schema="clusop", teams_webhook=None) -> dict:
|
|
25
|
+
sql_runner(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{schema}")
|
|
26
|
+
sql_runner(_DDL.format(catalog=catalog, schema=schema))
|
|
27
|
+
tier = CostTierResolver(sql_runner).probe()
|
|
28
|
+
msg = (f"clusop onboarded -> {catalog}.{schema}.clusop_signals. Cost tier: "
|
|
29
|
+
f"{'BILLED (Tier-1)' if tier.billed else 'MODELED (Tier-0)'} — {tier.reason}.")
|
|
30
|
+
if not tier.billed:
|
|
31
|
+
msg += (" To enable billed reconciliation, grant the clusop service principal "
|
|
32
|
+
"SELECT on system.billing + system.compute.")
|
|
33
|
+
return {"ok": True, "cost_tier_billed": tier.billed, "message": msg,
|
|
34
|
+
"webhook_set": bool(teams_webhook)}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Cost-tier resolver (central service, off-cluster).
|
|
2
|
+
|
|
3
|
+
Detection is ALWAYS in-process (system tables can't see the physical plan). This
|
|
4
|
+
resolver only upgrades the COST leg: probe read access to system.billing; if present,
|
|
5
|
+
reconcile modeled -> billed and raise cost confidence to HIGH; else stay modeled.
|
|
6
|
+
Read-only, cheap, cached, fail-open. NEVER a detection dependency.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class CostTier:
|
|
16
|
+
billed: bool
|
|
17
|
+
reason: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CostTierResolver:
|
|
21
|
+
def __init__(self, sql_runner=None):
|
|
22
|
+
# sql_runner: callable(str)->rows, e.g. a Databricks SQL warehouse client.
|
|
23
|
+
# None => no system access => modeled only.
|
|
24
|
+
self._sql = sql_runner
|
|
25
|
+
self._cached: CostTier | None = None
|
|
26
|
+
|
|
27
|
+
def probe(self) -> CostTier:
|
|
28
|
+
if self._cached is not None:
|
|
29
|
+
return self._cached
|
|
30
|
+
if self._sql is None:
|
|
31
|
+
self._cached = CostTier(False, "no SQL access configured — modeled cost (Tier-0)")
|
|
32
|
+
return self._cached
|
|
33
|
+
try:
|
|
34
|
+
self._sql("SELECT 1 FROM system.billing.usage LIMIT 1")
|
|
35
|
+
self._cached = CostTier(True, "system.billing readable — billed reconciliation (Tier-1)")
|
|
36
|
+
except Exception as ex:
|
|
37
|
+
self._cached = CostTier(False, f"system.billing not readable ({str(ex)[:60]}) — modeled (Tier-0)")
|
|
38
|
+
return self._cached
|
|
39
|
+
|
|
40
|
+
def reconcile(self, signal) -> None:
|
|
41
|
+
"""If billed access exists, replace the modeled waste on a FallbackSignal with
|
|
42
|
+
billed-derived cost and flip its cost basis. Mutates signal in place."""
|
|
43
|
+
tier = self.probe()
|
|
44
|
+
if not tier.billed or self._sql is None:
|
|
45
|
+
return
|
|
46
|
+
try:
|
|
47
|
+
rows = self._sql(
|
|
48
|
+
"SELECT SUM(usage_quantity) AS dbus FROM system.billing.usage u "
|
|
49
|
+
"JOIN system.billing.list_prices p USING (sku_name) "
|
|
50
|
+
f"WHERE u.usage_metadata.cluster_id = '{signal.cluster_id}' "
|
|
51
|
+
"AND u.sku_name ILIKE '%PHOTON%' "
|
|
52
|
+
"AND u.usage_date >= current_date() - INTERVAL 1 DAY LIMIT 1"
|
|
53
|
+
)
|
|
54
|
+
# (production: derive premium share of billed DBUs and scale by fallback_weight)
|
|
55
|
+
if rows:
|
|
56
|
+
signal.confidence["cost"] = "HIGH"
|
|
57
|
+
signal.evidence += " | billed-reconciled"
|
|
58
|
+
except Exception:
|
|
59
|
+
pass # fail-open: keep modeled
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Suppression + prioritization — turns thousands of signals into the few cards a
|
|
2
|
+
human should actually see. At fleet scale the bottleneck is human attention, not
|
|
3
|
+
parser throughput, so surfacing 10,000 cards == surfacing none.
|
|
4
|
+
|
|
5
|
+
All parameters are CALIBRATION OUTPUTS of the false-positive study, not constants.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
_CONF_RANK = {"LOW": 0, "MEDIUM": 1, "HIGH": 2}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class Gate:
|
|
17
|
+
sustained_runs_n: int = 3 # fallback must persist across N runs
|
|
18
|
+
min_history_runs: int = 3 # below this -> insufficient_data, no card
|
|
19
|
+
min_waste_usd_per_month: float = 50.0
|
|
20
|
+
min_recommendation_conf: str = "MEDIUM"
|
|
21
|
+
cooldown_hours: float = 168.0 # don't re-surface an accepted/snoozed sig for 7d
|
|
22
|
+
per_team_daily_cap: int = 25 # alert-storm guard
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def passes_gate(*, sustained_runs: int, history_runs: int, monthly_waste_usd: float,
|
|
26
|
+
rec_conf: str, gate: Gate = Gate()) -> tuple[bool, str]:
|
|
27
|
+
if history_runs < gate.min_history_runs:
|
|
28
|
+
return False, "insufficient_data"
|
|
29
|
+
if sustained_runs < gate.sustained_runs_n:
|
|
30
|
+
return False, "not yet sustained"
|
|
31
|
+
if monthly_waste_usd < gate.min_waste_usd_per_month:
|
|
32
|
+
return False, "below dollar floor"
|
|
33
|
+
if _CONF_RANK.get(rec_conf, 0) < _CONF_RANK[gate.min_recommendation_conf]:
|
|
34
|
+
return False, "recommendation confidence too low"
|
|
35
|
+
return True, "ok"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def priority(monthly_waste_usd: float, rec_conf: str) -> float:
|
|
39
|
+
"""Rank surfaced candidates by waste x confidence — emit top-N per team/day."""
|
|
40
|
+
return monthly_waste_usd * (1 + _CONF_RANK.get(rec_conf, 0))
|
clusop/service/teams.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Propose-never-apply Teams Adaptive Card.
|
|
2
|
+
|
|
3
|
+
Surfaces the fallback, the modeled (or billed) wasted premium, the decomposed
|
|
4
|
+
confidence (headline badge = recommendation leg, never parse), and the action.
|
|
5
|
+
On Approve the control plane flips runtime_engine PHOTON->STANDARD for FUTURE runs
|
|
6
|
+
only, or files the UDF-rewrite — never auto-applied, always reversible + audited.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def build_card(signal, *, view_url: str = "") -> dict:
|
|
13
|
+
rec = signal.confidence.get("recommendation", "MEDIUM")
|
|
14
|
+
action = ("Rewrite the Python UDF in native PySpark/SQL (or register it as a UC "
|
|
15
|
+
"Python UDF), or disable Photon on this job."
|
|
16
|
+
if signal.verdict_kind == "fix_fallback"
|
|
17
|
+
else "Disable Photon on this job cluster — execution fell back to the JVM, "
|
|
18
|
+
"so the premium is yielding little/no speedup.")
|
|
19
|
+
body = [
|
|
20
|
+
{"type": "TextBlock", "size": "Large", "weight": "Bolder", "wrap": True,
|
|
21
|
+
"text": "\U0001f9ea Photon Fallback Detected"},
|
|
22
|
+
{"type": "TextBlock", "isSubtle": True, "wrap": True,
|
|
23
|
+
"text": f"job_run `{signal.job_run_id}` · cluster `{signal.cluster_id}` · {signal.cloud}/{signal.dbr}"},
|
|
24
|
+
{"type": "TextBlock", "wrap": True,
|
|
25
|
+
"text": f"**Wasted Photon premium (~):** ${signal.wasted_usd_modeled} "
|
|
26
|
+
f"({'billed' if signal.confidence.get('cost') == 'HIGH' else 'modeled'}) · "
|
|
27
|
+
f"fallback severity {int(signal.fallback_weight*100)}%"},
|
|
28
|
+
{"type": "TextBlock", "wrap": True, "text": f"**Cause:** {signal.cause}"},
|
|
29
|
+
{"type": "FactSet", "facts": [
|
|
30
|
+
{"title": "parse", "value": signal.confidence.get("parse", "?")},
|
|
31
|
+
{"title": "diagnosis", "value": signal.confidence.get("diagnosis", "?")},
|
|
32
|
+
{"title": "cost", "value": signal.confidence.get("cost", "?")},
|
|
33
|
+
{"title": "recommendation", "value": rec},
|
|
34
|
+
]},
|
|
35
|
+
{"type": "TextBlock", "wrap": True, "text": f"**Action:** {action}"},
|
|
36
|
+
{"type": "TextBlock", "wrap": True, "isSubtle": True,
|
|
37
|
+
"text": "Propose only — nothing changes until you approve; applies to future runs."},
|
|
38
|
+
]
|
|
39
|
+
actions = [{"type": "Action.OpenUrl", "title": "\U0001f50d View analysis",
|
|
40
|
+
"url": view_url or "#"}]
|
|
41
|
+
content = {"$schema": "http://adaptivecards.io/schemas/adaptive-card.json",
|
|
42
|
+
"type": "AdaptiveCard", "version": "1.4", "body": body, "actions": actions}
|
|
43
|
+
return {"type": "message", "attachments": [
|
|
44
|
+
{"contentType": "application/vnd.microsoft.card.adaptive", "content": content}]}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: clusop
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Photon Fallback Analyzer — zero-touch Databricks cost forensics (.pth self-arm + bundled JVM listener)
|
|
5
|
+
Author: yogasathyandrun
|
|
6
|
+
License: Proprietary
|
|
7
|
+
Keywords: cost,databricks,finops,photon,spark
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
11
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
12
|
+
Provides-Extra: service
|
|
13
|
+
Requires-Dist: databricks-sdk>=0.30; extra == 'service'
|
|
14
|
+
Requires-Dist: requests>=2.31; extra == 'service'
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# clusop — Photon Fallback Analyzer
|
|
18
|
+
|
|
19
|
+
`pip install clusop`
|
|
20
|
+
|
|
21
|
+
clusop finds the money Databricks Photon quietly burns. When Photon hits an operator it
|
|
22
|
+
can't run (a Python UDF, a struct-IN filter, an unsupported Delta feature), it silently
|
|
23
|
+
**falls back** to the JVM — you keep paying the 2–2.9× Photon DBU premium while getting
|
|
24
|
+
JVM speed. clusop detects those fallbacks from the executed plan, estimates the wasted
|
|
25
|
+
spend, and proposes a fix. It never touches your job.
|
|
26
|
+
|
|
27
|
+
## One install, two halves, zero config
|
|
28
|
+
|
|
29
|
+
A single `pip install clusop` ships both halves and arms itself:
|
|
30
|
+
|
|
31
|
+
- a **Python `.pth`** (`clusop_autoload.pth`) that Python's `site` machinery runs at
|
|
32
|
+
interpreter startup → imports `clusop.runtime.bootstrap` → arms automatically. You do
|
|
33
|
+
not `import clusop` anywhere in your job.
|
|
34
|
+
- a bundled **Scala JAR** (`clusop/jars/photon_listener.jar`) — the actual
|
|
35
|
+
`QueryExecutionListener`. The bootstrap `addJar`s it and registers it over Py4J.
|
|
36
|
+
|
|
37
|
+
Install it as a **cluster/job library** (production path) so every interpreter that
|
|
38
|
+
starts already has it. `%pip install clusop` + `dbutils.library.restartPython()` works for
|
|
39
|
+
dev.
|
|
40
|
+
|
|
41
|
+
Everything is **auto-detected** at runtime — cloud (from the instance type), DBR (from
|
|
42
|
+
`DATABRICKS_RUNTIME_VERSION`), cluster shape, and whether the JVM is reachable. No
|
|
43
|
+
per-user setup; a new customer pip-installs and it works.
|
|
44
|
+
|
|
45
|
+
## What it costs you: nothing if it can't run safely
|
|
46
|
+
|
|
47
|
+
- **Fail-open everywhere.** Any error in arming, listening, parsing, or dispatch is
|
|
48
|
+
swallowed. clusop never breaks, slows, or blocks a customer query.
|
|
49
|
+
- **Needs a SINGLE_USER / dedicated cluster** for the JVM listener. On USER_ISOLATION
|
|
50
|
+
(Shared) clusters the JVM is sealed behind Spark Connect — clusop detects this and stays
|
|
51
|
+
dormant rather than failing.
|
|
52
|
+
- **Propose-never-apply.** clusop emits a signal and a Teams card. A human decides.
|
|
53
|
+
|
|
54
|
+
## How a signal is born
|
|
55
|
+
|
|
56
|
+
1. The JVM listener sees a finished query and reads its **executed plan**.
|
|
57
|
+
2. The **structural parser** ([src/clusop/analysis/parser.py](src/clusop/analysis/parser.py),
|
|
58
|
+
mirrored in Scala) decides if this is a *real* fallback. The hard part: a clean Photon
|
|
59
|
+
query always ends in one **terminal** `ColumnarToRow` (the normal result boundary) —
|
|
60
|
+
that is **not** a fallback. Real fallback is **mid-plan** `ColumnarToRow`,
|
|
61
|
+
`RowToColumnar` round-trips, or `BatchEvalPython`/`ArrowEvalPython`. Counting raw
|
|
62
|
+
occurrences false-positives; clusop doesn't.
|
|
63
|
+
3. The **waste model** ([waste.py](src/clusop/analysis/waste.py)) sizes the loss:
|
|
64
|
+
`runtime × Σ(node DBU/hr) × $rate × (photon_premium−1) × fallback_weight`.
|
|
65
|
+
4. **Confidence** ([confidence.py](src/clusop/analysis/confidence.py)) is decomposed into
|
|
66
|
+
four legs — parse / diagnosis / cost / recommendation. A *fix* (rewrite the UDF) needs
|
|
67
|
+
only parse+diagnosis; a *disable-Photon* recommendation is a dollar decision and is
|
|
68
|
+
capped by the cost leg.
|
|
69
|
+
5. The signal flows to a **driver batcher** (dedup by signature) → **central aggregator**
|
|
70
|
+
(idempotent upsert, suppression, prioritization) → Teams card.
|
|
71
|
+
|
|
72
|
+
## Cost: modeled now, billed if granted
|
|
73
|
+
|
|
74
|
+
clusop always works with a **modeled** cost (public price table → MEDIUM cost confidence).
|
|
75
|
+
If the workspace can read `system.billing.usage`, the
|
|
76
|
+
[CostTierResolver](src/clusop/service/resolver.py) reconciles the estimate to **billed**
|
|
77
|
+
dollars (HIGH confidence). Detection never depends on system tables — they only upgrade
|
|
78
|
+
the cost layer. Prices are reference data, not something clusop invents per-row.
|
|
79
|
+
|
|
80
|
+
## Certifying it's real
|
|
81
|
+
|
|
82
|
+
[harness/certify.py](harness/certify.py) runs on a dedicated cluster, captures real
|
|
83
|
+
executed plans for known clean / known-fallback fixtures, and reports catch-rate plus a
|
|
84
|
+
**DBR-stamped Delta feature-support matrix** — derived, never hand-maintained.
|
|
85
|
+
|
|
86
|
+
## Layout
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
src/clusop/
|
|
90
|
+
runtime/ bootstrap (arming), detect (cloud/DBR/shape/jvm)
|
|
91
|
+
analysis/ parser · waste · confidence · signal
|
|
92
|
+
pricing/ price_table.json · provider
|
|
93
|
+
service/ aggregator · resolver · suppression · teams · onboard
|
|
94
|
+
jars/ photon_listener.jar (baked in by the release workflow)
|
|
95
|
+
scala/ the QueryExecutionListener (sbt; mirror of the Python parser)
|
|
96
|
+
harness/ certify.py (catch-rate + Delta matrix, run on a dedicated cluster)
|
|
97
|
+
tests/ parser / waste / confidence
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
See [SPEC.md](SPEC.md) for the full design and invariants.
|
|
101
|
+
|
|
102
|
+
## Release
|
|
103
|
+
|
|
104
|
+
Push-button: the [Publish workflow](.github/workflows/release-pypi.yml) auto-versions
|
|
105
|
+
(`max(PyPI, tags)+1`), builds the JAR with sbt, bakes it into the wheel, lints+tests,
|
|
106
|
+
gates on *(.pth at wheel root AND JAR bundled)*, then publishes via PyPI Trusted
|
|
107
|
+
Publishing (OIDC — no stored token) and tags the bump.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
clusop/__init__.py,sha256=Fb6IqaqvJe1IeYKawOAElXXgTu_joXp78y9Q7QMBLuM,1066
|
|
2
|
+
clusop/analysis/__init__.py,sha256=2HKrWyp5lTZHU921rI2-nVGyxgKEgh5DiMDIyI1pzHo,82
|
|
3
|
+
clusop/analysis/confidence.py,sha256=NmkAiyuNj6GDzRhLEBqtJi6DpK5YXgOtS68VQXVFsJk,2661
|
|
4
|
+
clusop/analysis/parser.py,sha256=j1Yz51uiATlF2i5Z-sbdH99uE9G00XbJHXLw79_cRzo,4390
|
|
5
|
+
clusop/analysis/signal.py,sha256=ZSah6nlCVP2rOIOQaJM-8uTMRtmXtfvKEqzGkjpIvNE,1327
|
|
6
|
+
clusop/analysis/waste.py,sha256=lfmYPWbMOxDWwGPim12SGUuyuij3o8vVt5Q2aVIZKTA,1491
|
|
7
|
+
clusop/jars/.gitkeep,sha256=Sd7zg-Ir9eq3LY53l8RJ9uFiEgt9g7deppu-qtF3NVk,89
|
|
8
|
+
clusop/jars/photon_listener.jar,sha256=TUhbw87uk8Jo6YLuYg6iKxdFa6SVYeXWLqEjcMG7tq8,19171
|
|
9
|
+
clusop/pricing/__init__.py,sha256=2HKrWyp5lTZHU921rI2-nVGyxgKEgh5DiMDIyI1pzHo,82
|
|
10
|
+
clusop/pricing/price_table.json,sha256=YUQq0QIK3zWooOPAO4rY-K9HuoVn9enW3m7cxsnCfM0,1660
|
|
11
|
+
clusop/pricing/provider.py,sha256=ULfovKkvUjDl6q6n-oZsi5niaxvVJn1VXibc5SPi_k4,2749
|
|
12
|
+
clusop/runtime/__init__.py,sha256=2HKrWyp5lTZHU921rI2-nVGyxgKEgh5DiMDIyI1pzHo,82
|
|
13
|
+
clusop/runtime/bootstrap.py,sha256=I3yb4gLfFQLPyd9gmVN8877s4iuhH1iUVqmy7A23oPs,4033
|
|
14
|
+
clusop/runtime/detect.py,sha256=J3hRAg2mCjbDk-l6r9EYZzK9LJ9L_YD4lwMhJNqiTvY,3977
|
|
15
|
+
clusop/service/__init__.py,sha256=2HKrWyp5lTZHU921rI2-nVGyxgKEgh5DiMDIyI1pzHo,82
|
|
16
|
+
clusop/service/aggregator.py,sha256=t0aEWUE-4zocWBXymgxsAAcyuGrAlGNgk_b2eNF3DB0,2955
|
|
17
|
+
clusop/service/onboard.py,sha256=K93zle_MVBqdFth5PNXzpSocI2m3EabB9UBLy8zCNGo,1647
|
|
18
|
+
clusop/service/resolver.py,sha256=jZEkSBdjPzViYa7JQzP3ziIr-2DI6waokVQ7w4ZbgVo,2421
|
|
19
|
+
clusop/service/suppression.py,sha256=RRDzTqReRgJPauc9thMwk8XcR4Orp149dhRmNH97tx0,1680
|
|
20
|
+
clusop/service/teams.py,sha256=TLMRmkUJpoi4dXfnE8s4SKHRdqLXLCcmtbHp7v90M9c,2612
|
|
21
|
+
clusop_autoload.pth,sha256=PzWzzxAddUNX7qpdsYXiWHXcJHkhYb_nPQMAbtERTbU,32
|
|
22
|
+
clusop-0.0.1.dist-info/METADATA,sha256=Pfp-v2-GOQTvJxSe8o9gJ9drtINGarxpUEKW2mfpy34,5296
|
|
23
|
+
clusop-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
24
|
+
clusop-0.0.1.dist-info/RECORD,,
|
clusop_autoload.pth
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import clusop.runtime.bootstrap
|