apiris 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apiris/__init__.py +5 -0
- apiris/ai/__init__.py +1 -0
- apiris/ai/anomaly_model.py +215 -0
- apiris/ai/loader.py +15 -0
- apiris/ai/predictive_model.py +23 -0
- apiris/ai/tradeoff_model.py +25 -0
- apiris/cache.py +13 -0
- apiris/cli.py +499 -0
- apiris/client.py +237 -0
- apiris/config.py +76 -0
- apiris/decision_engine.py +596 -0
- apiris/evaluator.py +281 -0
- apiris/explain/explainer.py +133 -0
- apiris/intelligence/__init__.py +24 -0
- apiris/intelligence/cve_advisory.py +217 -0
- apiris/interceptor.py +135 -0
- apiris/log_utils.py +37 -0
- apiris/logging.py +37 -0
- apiris/policy/__init__.py +7 -0
- apiris/policy/policy_loader.py +58 -0
- apiris/policy/policy_manager.py +49 -0
- apiris/policy/policy_validator.py +36 -0
- apiris/storage/__init__.py +5 -0
- apiris/storage/sqlite_store.py +333 -0
- apiris-1.0.0.dist-info/METADATA +607 -0
- apiris-1.0.0.dist-info/RECORD +30 -0
- apiris-1.0.0.dist-info/WHEEL +5 -0
- apiris-1.0.0.dist-info/entry_points.txt +2 -0
- apiris-1.0.0.dist-info/licenses/LICENSE +62 -0
- apiris-1.0.0.dist-info/top_level.txt +1 -0
apiris/__init__.py
ADDED
apiris/ai/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""AI helpers for Apiris runtime."""
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import math
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
from .loader import load_json
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _hash_text(text: str) -> str:
|
|
11
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _extract_schema_paths(value: Any, prefix: str = "", depth: int = 0, max_depth: int = 6, paths: Optional[set] = None) -> set:
|
|
15
|
+
if paths is None:
|
|
16
|
+
paths = set()
|
|
17
|
+
if depth > max_depth:
|
|
18
|
+
return paths
|
|
19
|
+
if isinstance(value, list):
|
|
20
|
+
array_prefix = f"{prefix}[]" if prefix else "[]"
|
|
21
|
+
paths.add(array_prefix)
|
|
22
|
+
if value:
|
|
23
|
+
_extract_schema_paths(value[0], array_prefix, depth + 1, max_depth, paths)
|
|
24
|
+
return paths
|
|
25
|
+
if isinstance(value, dict):
|
|
26
|
+
for key, val in value.items():
|
|
27
|
+
next_prefix = f"{prefix}.{key}" if prefix else str(key)
|
|
28
|
+
paths.add(next_prefix)
|
|
29
|
+
_extract_schema_paths(val, next_prefix, depth + 1, max_depth, paths)
|
|
30
|
+
return paths
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _collect_stats(value: Any, depth: int = 0, stats: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
34
|
+
current = stats or {
|
|
35
|
+
"field_count": 0,
|
|
36
|
+
"max_depth": 0,
|
|
37
|
+
"array_count": 0,
|
|
38
|
+
"null_count": 0,
|
|
39
|
+
"value_count": 0,
|
|
40
|
+
"numeric_values": [],
|
|
41
|
+
}
|
|
42
|
+
if isinstance(value, list):
|
|
43
|
+
current["array_count"] += 1
|
|
44
|
+
current["max_depth"] = max(current["max_depth"], depth)
|
|
45
|
+
for item in value:
|
|
46
|
+
_collect_stats(item, depth + 1, current)
|
|
47
|
+
return current
|
|
48
|
+
if isinstance(value, dict):
|
|
49
|
+
current["max_depth"] = max(current["max_depth"], depth)
|
|
50
|
+
for _, val in value.items():
|
|
51
|
+
current["field_count"] += 1
|
|
52
|
+
_collect_stats(val, depth + 1, current)
|
|
53
|
+
return current
|
|
54
|
+
|
|
55
|
+
current["value_count"] += 1
|
|
56
|
+
if value is None:
|
|
57
|
+
current["null_count"] += 1
|
|
58
|
+
if isinstance(value, (int, float)) and math.isfinite(value):
|
|
59
|
+
current["numeric_values"].append(float(value))
|
|
60
|
+
return current
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _numeric_stats(values: List[float]) -> Dict[str, float]:
|
|
64
|
+
if not values:
|
|
65
|
+
return {"mean": 0.0, "std": 0.0, "min": 0.0, "max": 0.0}
|
|
66
|
+
mean = sum(values) / len(values)
|
|
67
|
+
variance = sum((v - mean) ** 2 for v in values) / len(values)
|
|
68
|
+
return {
|
|
69
|
+
"mean": mean,
|
|
70
|
+
"std": math.sqrt(variance),
|
|
71
|
+
"min": min(values),
|
|
72
|
+
"max": max(values),
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _compute_feature_vector(parsed: Any, context: Dict[str, Any]) -> Dict[str, Any]:
|
|
77
|
+
schema_paths = sorted(_extract_schema_paths(parsed)) if parsed is not None else []
|
|
78
|
+
schema_hash = _hash_text("|".join(schema_paths)) if schema_paths else None
|
|
79
|
+
stats = _collect_stats(parsed)
|
|
80
|
+
numeric = _numeric_stats(stats["numeric_values"])
|
|
81
|
+
|
|
82
|
+
core_fields = context.get("core_fields") or []
|
|
83
|
+
missing_core = 0
|
|
84
|
+
if core_fields and schema_paths:
|
|
85
|
+
path_set = set(schema_paths)
|
|
86
|
+
for field in core_fields:
|
|
87
|
+
if field not in path_set:
|
|
88
|
+
missing_core += 1
|
|
89
|
+
|
|
90
|
+
response_hash = context.get("response_hash")
|
|
91
|
+
previous_hash = context.get("last_response_hash")
|
|
92
|
+
repeat_count = (context.get("repeat_count") or 0) + 1 if previous_hash and response_hash == previous_hash else 0
|
|
93
|
+
time_since_last = -1
|
|
94
|
+
if response_hash and context.get("last_response_timestamp") and response_hash == previous_hash:
|
|
95
|
+
time_since_last = (context.get("now") - context.get("last_response_timestamp")) / 1000
|
|
96
|
+
|
|
97
|
+
numeric_jump = 0
|
|
98
|
+
if context.get("last_numeric_mean") is not None:
|
|
99
|
+
numeric_jump = abs(numeric["mean"] - context.get("last_numeric_mean"))
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
"schemaHash": schema_hash,
|
|
103
|
+
"responseHash": response_hash,
|
|
104
|
+
"featureMap": {
|
|
105
|
+
"field_count": stats["field_count"],
|
|
106
|
+
"max_depth": stats["max_depth"],
|
|
107
|
+
"array_count": stats["array_count"],
|
|
108
|
+
"null_ratio": (stats["null_count"] / stats["value_count"]) if stats["value_count"] else 0,
|
|
109
|
+
"numeric_mean": numeric["mean"],
|
|
110
|
+
"numeric_std": numeric["std"],
|
|
111
|
+
"numeric_min": numeric["min"],
|
|
112
|
+
"numeric_max": numeric["max"],
|
|
113
|
+
"numeric_jump": numeric_jump,
|
|
114
|
+
"missing_core_ratio": (missing_core / len(core_fields)) if core_fields else 0,
|
|
115
|
+
"repeat_count": repeat_count,
|
|
116
|
+
"time_since_identical": time_since_last,
|
|
117
|
+
},
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _harmonic(n: int) -> float:
|
|
122
|
+
return sum(1.0 / i for i in range(1, n + 1))
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _c_factor(n: int) -> float:
|
|
126
|
+
if n <= 1:
|
|
127
|
+
return 0.0
|
|
128
|
+
return 2.0 * _harmonic(n - 1) - (2.0 * (n - 1)) / n
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _path_length(row: List[float], node: Dict[str, Any], depth: int) -> float:
|
|
132
|
+
if node.get("leaf"):
|
|
133
|
+
return depth + _c_factor(node.get("size", 1))
|
|
134
|
+
feature = node.get("feature")
|
|
135
|
+
split = node.get("split")
|
|
136
|
+
if row[feature] <= split:
|
|
137
|
+
return _path_length(row, node.get("left"), depth + 1)
|
|
138
|
+
return _path_length(row, node.get("right"), depth + 1)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def score_isolation_forest(row: List[float], forest: Dict[str, Any]) -> float:
|
|
142
|
+
lengths = [_path_length(row, tree, 0) for tree in forest.get("trees", [])]
|
|
143
|
+
if not lengths:
|
|
144
|
+
return 0.0
|
|
145
|
+
avg = sum(lengths) / len(lengths)
|
|
146
|
+
return math.pow(2, -avg / _c_factor(forest.get("sampleSize", 1)))
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class AnomalyScorer:
|
|
150
|
+
def __init__(self, model_path: str, soft_threshold: float, strong_threshold: float) -> None:
|
|
151
|
+
self.model = load_json(model_path) or {}
|
|
152
|
+
self.feature_names = self.model.get("featureNames") or []
|
|
153
|
+
self.models = self.model.get("models") or {}
|
|
154
|
+
self.soft_threshold = soft_threshold
|
|
155
|
+
self.strong_threshold = strong_threshold
|
|
156
|
+
|
|
157
|
+
def score(self, api: str, parsed: Any, response_text: Optional[str], runtime_state: Dict[str, Dict[str, Any]], now_ms: int) -> Optional[Dict[str, Any]]:
|
|
158
|
+
api_model = self.models.get(api)
|
|
159
|
+
if not api_model:
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
if api not in runtime_state:
|
|
163
|
+
runtime_state[api] = {
|
|
164
|
+
"last_response_hash": None,
|
|
165
|
+
"last_response_timestamp": None,
|
|
166
|
+
"last_numeric_mean": None,
|
|
167
|
+
"repeat_count": 0,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
response_hash = _hash_text(response_text) if response_text else None
|
|
171
|
+
state = runtime_state[api]
|
|
172
|
+
feature_data = _compute_feature_vector(parsed, {
|
|
173
|
+
"core_fields": api_model.get("coreFields") or [],
|
|
174
|
+
"response_hash": response_hash,
|
|
175
|
+
"last_response_hash": state.get("last_response_hash"),
|
|
176
|
+
"last_response_timestamp": state.get("last_response_timestamp"),
|
|
177
|
+
"last_numeric_mean": state.get("last_numeric_mean"),
|
|
178
|
+
"repeat_count": state.get("repeat_count"),
|
|
179
|
+
"now": now_ms,
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
row = [feature_data["featureMap"].get(name, 0) for name in self.feature_names]
|
|
183
|
+
mean = api_model.get("mean") or []
|
|
184
|
+
std = api_model.get("std") or []
|
|
185
|
+
standardized = [
|
|
186
|
+
(row[idx] - mean[idx]) / (std[idx] or 1) if idx < len(mean) else row[idx]
|
|
187
|
+
for idx in range(len(row))
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
anomaly_score = score_isolation_forest(standardized, api_model.get("forest") or {})
|
|
191
|
+
anomaly_flag = "strong" if anomaly_score >= self.strong_threshold else "soft" if anomaly_score >= self.soft_threshold else "none"
|
|
192
|
+
|
|
193
|
+
top_features = []
|
|
194
|
+
for idx, name in enumerate(self.feature_names):
|
|
195
|
+
value = feature_data["featureMap"].get(name, 0)
|
|
196
|
+
z = (value - mean[idx]) / (std[idx] or 1) if idx < len(mean) else 0
|
|
197
|
+
top_features.append({"feature": name, "value": value, "z": abs(z)})
|
|
198
|
+
top_features = sorted(top_features, key=lambda item: item["z"], reverse=True)[:3]
|
|
199
|
+
|
|
200
|
+
if response_hash and response_hash == state.get("last_response_hash"):
|
|
201
|
+
state["repeat_count"] += 1
|
|
202
|
+
else:
|
|
203
|
+
state["repeat_count"] = 0
|
|
204
|
+
state["last_response_hash"] = response_hash
|
|
205
|
+
state["last_response_timestamp"] = now_ms
|
|
206
|
+
state["last_numeric_mean"] = feature_data["featureMap"].get("numeric_mean", 0)
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
"anomalyScore": anomaly_score,
|
|
210
|
+
"anomalyFlag": anomaly_flag,
|
|
211
|
+
"schemaHash": feature_data.get("schemaHash"),
|
|
212
|
+
"responseHash": response_hash,
|
|
213
|
+
"topFeatures": top_features,
|
|
214
|
+
"features": feature_data.get("featureMap"),
|
|
215
|
+
}
|
apiris/ai/loader.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_json(path: str) -> Optional[Dict[str, Any]]:
|
|
9
|
+
file_path = Path(path)
|
|
10
|
+
if not file_path.exists():
|
|
11
|
+
return None
|
|
12
|
+
try:
|
|
13
|
+
return json.loads(file_path.read_text(encoding="utf-8"))
|
|
14
|
+
except json.JSONDecodeError:
|
|
15
|
+
return None
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from typing import List, Dict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _sigmoid(z: float) -> float:
|
|
8
|
+
return 1.0 / (1.0 + math.exp(-z))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def predict_probability(model: Dict[str, List[float]], row: List[float]) -> float:
|
|
12
|
+
mean = model.get("mean", [])
|
|
13
|
+
std = model.get("std", [])
|
|
14
|
+
weights = model.get("weights", [])
|
|
15
|
+
standardized = [
|
|
16
|
+
(row[idx] - mean[idx]) / (std[idx] or 1) if idx < len(mean) else row[idx]
|
|
17
|
+
for idx in range(len(row))
|
|
18
|
+
]
|
|
19
|
+
z = weights[0] if weights else 0.0
|
|
20
|
+
for idx, value in enumerate(standardized):
|
|
21
|
+
if idx + 1 < len(weights):
|
|
22
|
+
z += weights[idx + 1] * value
|
|
23
|
+
return _sigmoid(z)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
|
|
5
|
+
from .predictive_model import predict_probability
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def predict_tradeoff(models: Dict[str, Dict], row: List[float]) -> Dict[str, object]:
|
|
9
|
+
scores: Dict[str, float] = {}
|
|
10
|
+
for tradeoff, model in models.items():
|
|
11
|
+
scores[tradeoff] = predict_probability(model, row)
|
|
12
|
+
entries = sorted(scores.items(), key=lambda item: item[1], reverse=True)
|
|
13
|
+
if not entries:
|
|
14
|
+
return {"tradeoff": "none", "confidence": 0.0, "scores": scores}
|
|
15
|
+
return {"tradeoff": entries[0][0], "confidence": entries[0][1], "scores": scores}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def top_contributors(model: Dict[str, List[float]], feature_names: List[str], row: List[float], count: int = 3) -> List[Dict[str, float]]:
|
|
19
|
+
weights = model.get("weights", [])
|
|
20
|
+
contributions = []
|
|
21
|
+
for idx, name in enumerate(feature_names):
|
|
22
|
+
weight = weights[idx + 1] if idx + 1 < len(weights) else 0
|
|
23
|
+
value = row[idx] if idx < len(row) else 0
|
|
24
|
+
contributions.append({"feature": name, "weight": weight, "value": value, "impact": abs(weight * value)})
|
|
25
|
+
return sorted(contributions, key=lambda item: item["impact"], reverse=True)[:count]
|
apiris/cache.py
ADDED