apiris 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
apiris/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Apiris runtime package."""
2
+
3
+ from .client import CADClient
4
+
5
+ __all__ = ["CADClient"]
apiris/ai/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """AI helpers for Apiris runtime."""
@@ -0,0 +1,215 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import math
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from .loader import load_json
8
+
9
+
10
+ def _hash_text(text: str) -> str:
11
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
12
+
13
+
14
+ def _extract_schema_paths(value: Any, prefix: str = "", depth: int = 0, max_depth: int = 6, paths: Optional[set] = None) -> set:
15
+ if paths is None:
16
+ paths = set()
17
+ if depth > max_depth:
18
+ return paths
19
+ if isinstance(value, list):
20
+ array_prefix = f"{prefix}[]" if prefix else "[]"
21
+ paths.add(array_prefix)
22
+ if value:
23
+ _extract_schema_paths(value[0], array_prefix, depth + 1, max_depth, paths)
24
+ return paths
25
+ if isinstance(value, dict):
26
+ for key, val in value.items():
27
+ next_prefix = f"{prefix}.{key}" if prefix else str(key)
28
+ paths.add(next_prefix)
29
+ _extract_schema_paths(val, next_prefix, depth + 1, max_depth, paths)
30
+ return paths
31
+
32
+
33
+ def _collect_stats(value: Any, depth: int = 0, stats: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
34
+ current = stats or {
35
+ "field_count": 0,
36
+ "max_depth": 0,
37
+ "array_count": 0,
38
+ "null_count": 0,
39
+ "value_count": 0,
40
+ "numeric_values": [],
41
+ }
42
+ if isinstance(value, list):
43
+ current["array_count"] += 1
44
+ current["max_depth"] = max(current["max_depth"], depth)
45
+ for item in value:
46
+ _collect_stats(item, depth + 1, current)
47
+ return current
48
+ if isinstance(value, dict):
49
+ current["max_depth"] = max(current["max_depth"], depth)
50
+ for _, val in value.items():
51
+ current["field_count"] += 1
52
+ _collect_stats(val, depth + 1, current)
53
+ return current
54
+
55
+ current["value_count"] += 1
56
+ if value is None:
57
+ current["null_count"] += 1
58
+ if isinstance(value, (int, float)) and math.isfinite(value):
59
+ current["numeric_values"].append(float(value))
60
+ return current
61
+
62
+
63
+ def _numeric_stats(values: List[float]) -> Dict[str, float]:
64
+ if not values:
65
+ return {"mean": 0.0, "std": 0.0, "min": 0.0, "max": 0.0}
66
+ mean = sum(values) / len(values)
67
+ variance = sum((v - mean) ** 2 for v in values) / len(values)
68
+ return {
69
+ "mean": mean,
70
+ "std": math.sqrt(variance),
71
+ "min": min(values),
72
+ "max": max(values),
73
+ }
74
+
75
+
76
+ def _compute_feature_vector(parsed: Any, context: Dict[str, Any]) -> Dict[str, Any]:
77
+ schema_paths = sorted(_extract_schema_paths(parsed)) if parsed is not None else []
78
+ schema_hash = _hash_text("|".join(schema_paths)) if schema_paths else None
79
+ stats = _collect_stats(parsed)
80
+ numeric = _numeric_stats(stats["numeric_values"])
81
+
82
+ core_fields = context.get("core_fields") or []
83
+ missing_core = 0
84
+ if core_fields and schema_paths:
85
+ path_set = set(schema_paths)
86
+ for field in core_fields:
87
+ if field not in path_set:
88
+ missing_core += 1
89
+
90
+ response_hash = context.get("response_hash")
91
+ previous_hash = context.get("last_response_hash")
92
+ repeat_count = (context.get("repeat_count") or 0) + 1 if previous_hash and response_hash == previous_hash else 0
93
+ time_since_last = -1
94
+ if response_hash and context.get("last_response_timestamp") and response_hash == previous_hash:
95
+ time_since_last = (context.get("now") - context.get("last_response_timestamp")) / 1000
96
+
97
+ numeric_jump = 0
98
+ if context.get("last_numeric_mean") is not None:
99
+ numeric_jump = abs(numeric["mean"] - context.get("last_numeric_mean"))
100
+
101
+ return {
102
+ "schemaHash": schema_hash,
103
+ "responseHash": response_hash,
104
+ "featureMap": {
105
+ "field_count": stats["field_count"],
106
+ "max_depth": stats["max_depth"],
107
+ "array_count": stats["array_count"],
108
+ "null_ratio": (stats["null_count"] / stats["value_count"]) if stats["value_count"] else 0,
109
+ "numeric_mean": numeric["mean"],
110
+ "numeric_std": numeric["std"],
111
+ "numeric_min": numeric["min"],
112
+ "numeric_max": numeric["max"],
113
+ "numeric_jump": numeric_jump,
114
+ "missing_core_ratio": (missing_core / len(core_fields)) if core_fields else 0,
115
+ "repeat_count": repeat_count,
116
+ "time_since_identical": time_since_last,
117
+ },
118
+ }
119
+
120
+
121
+ def _harmonic(n: int) -> float:
122
+ return sum(1.0 / i for i in range(1, n + 1))
123
+
124
+
125
+ def _c_factor(n: int) -> float:
126
+ if n <= 1:
127
+ return 0.0
128
+ return 2.0 * _harmonic(n - 1) - (2.0 * (n - 1)) / n
129
+
130
+
131
+ def _path_length(row: List[float], node: Dict[str, Any], depth: int) -> float:
132
+ if node.get("leaf"):
133
+ return depth + _c_factor(node.get("size", 1))
134
+ feature = node.get("feature")
135
+ split = node.get("split")
136
+ if row[feature] <= split:
137
+ return _path_length(row, node.get("left"), depth + 1)
138
+ return _path_length(row, node.get("right"), depth + 1)
139
+
140
+
141
+ def score_isolation_forest(row: List[float], forest: Dict[str, Any]) -> float:
142
+ lengths = [_path_length(row, tree, 0) for tree in forest.get("trees", [])]
143
+ if not lengths:
144
+ return 0.0
145
+ avg = sum(lengths) / len(lengths)
146
+ return math.pow(2, -avg / _c_factor(forest.get("sampleSize", 1)))
147
+
148
+
149
+ class AnomalyScorer:
150
+ def __init__(self, model_path: str, soft_threshold: float, strong_threshold: float) -> None:
151
+ self.model = load_json(model_path) or {}
152
+ self.feature_names = self.model.get("featureNames") or []
153
+ self.models = self.model.get("models") or {}
154
+ self.soft_threshold = soft_threshold
155
+ self.strong_threshold = strong_threshold
156
+
157
+ def score(self, api: str, parsed: Any, response_text: Optional[str], runtime_state: Dict[str, Dict[str, Any]], now_ms: int) -> Optional[Dict[str, Any]]:
158
+ api_model = self.models.get(api)
159
+ if not api_model:
160
+ return None
161
+
162
+ if api not in runtime_state:
163
+ runtime_state[api] = {
164
+ "last_response_hash": None,
165
+ "last_response_timestamp": None,
166
+ "last_numeric_mean": None,
167
+ "repeat_count": 0,
168
+ }
169
+
170
+ response_hash = _hash_text(response_text) if response_text else None
171
+ state = runtime_state[api]
172
+ feature_data = _compute_feature_vector(parsed, {
173
+ "core_fields": api_model.get("coreFields") or [],
174
+ "response_hash": response_hash,
175
+ "last_response_hash": state.get("last_response_hash"),
176
+ "last_response_timestamp": state.get("last_response_timestamp"),
177
+ "last_numeric_mean": state.get("last_numeric_mean"),
178
+ "repeat_count": state.get("repeat_count"),
179
+ "now": now_ms,
180
+ })
181
+
182
+ row = [feature_data["featureMap"].get(name, 0) for name in self.feature_names]
183
+ mean = api_model.get("mean") or []
184
+ std = api_model.get("std") or []
185
+ standardized = [
186
+ (row[idx] - mean[idx]) / (std[idx] or 1) if idx < len(mean) else row[idx]
187
+ for idx in range(len(row))
188
+ ]
189
+
190
+ anomaly_score = score_isolation_forest(standardized, api_model.get("forest") or {})
191
+ anomaly_flag = "strong" if anomaly_score >= self.strong_threshold else "soft" if anomaly_score >= self.soft_threshold else "none"
192
+
193
+ top_features = []
194
+ for idx, name in enumerate(self.feature_names):
195
+ value = feature_data["featureMap"].get(name, 0)
196
+ z = (value - mean[idx]) / (std[idx] or 1) if idx < len(mean) else 0
197
+ top_features.append({"feature": name, "value": value, "z": abs(z)})
198
+ top_features = sorted(top_features, key=lambda item: item["z"], reverse=True)[:3]
199
+
200
+ if response_hash and response_hash == state.get("last_response_hash"):
201
+ state["repeat_count"] += 1
202
+ else:
203
+ state["repeat_count"] = 0
204
+ state["last_response_hash"] = response_hash
205
+ state["last_response_timestamp"] = now_ms
206
+ state["last_numeric_mean"] = feature_data["featureMap"].get("numeric_mean", 0)
207
+
208
+ return {
209
+ "anomalyScore": anomaly_score,
210
+ "anomalyFlag": anomaly_flag,
211
+ "schemaHash": feature_data.get("schemaHash"),
212
+ "responseHash": response_hash,
213
+ "topFeatures": top_features,
214
+ "features": feature_data.get("featureMap"),
215
+ }
apiris/ai/loader.py ADDED
@@ -0,0 +1,15 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Optional
6
+
7
+
8
+ def load_json(path: str) -> Optional[Dict[str, Any]]:
9
+ file_path = Path(path)
10
+ if not file_path.exists():
11
+ return None
12
+ try:
13
+ return json.loads(file_path.read_text(encoding="utf-8"))
14
+ except json.JSONDecodeError:
15
+ return None
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ from typing import List, Dict
5
+
6
+
7
+ def _sigmoid(z: float) -> float:
8
+ return 1.0 / (1.0 + math.exp(-z))
9
+
10
+
11
+ def predict_probability(model: Dict[str, List[float]], row: List[float]) -> float:
12
+ mean = model.get("mean", [])
13
+ std = model.get("std", [])
14
+ weights = model.get("weights", [])
15
+ standardized = [
16
+ (row[idx] - mean[idx]) / (std[idx] or 1) if idx < len(mean) else row[idx]
17
+ for idx in range(len(row))
18
+ ]
19
+ z = weights[0] if weights else 0.0
20
+ for idx, value in enumerate(standardized):
21
+ if idx + 1 < len(weights):
22
+ z += weights[idx + 1] * value
23
+ return _sigmoid(z)
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, List
4
+
5
+ from .predictive_model import predict_probability
6
+
7
+
8
+ def predict_tradeoff(models: Dict[str, Dict], row: List[float]) -> Dict[str, object]:
9
+ scores: Dict[str, float] = {}
10
+ for tradeoff, model in models.items():
11
+ scores[tradeoff] = predict_probability(model, row)
12
+ entries = sorted(scores.items(), key=lambda item: item[1], reverse=True)
13
+ if not entries:
14
+ return {"tradeoff": "none", "confidence": 0.0, "scores": scores}
15
+ return {"tradeoff": entries[0][0], "confidence": entries[0][1], "scores": scores}
16
+
17
+
18
+ def top_contributors(model: Dict[str, List[float]], feature_names: List[str], row: List[float], count: int = 3) -> List[Dict[str, float]]:
19
+ weights = model.get("weights", [])
20
+ contributions = []
21
+ for idx, name in enumerate(feature_names):
22
+ weight = weights[idx + 1] if idx + 1 < len(weights) else 0
23
+ value = row[idx] if idx < len(row) else 0
24
+ contributions.append({"feature": name, "weight": weight, "value": value, "impact": abs(weight * value)})
25
+ return sorted(contributions, key=lambda item: item["impact"], reverse=True)[:count]
apiris/cache.py ADDED
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Optional
5
+
6
+
7
+ @dataclass
8
+ class ResponseCache:
9
+ ts: float
10
+ status: int
11
+ headers: Dict[str, str]
12
+ body: str
13
+ content_type: Optional[str]