procela-analysis 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ """
2
+ Procela analysis tools for epistemic simulation audit.
3
+
4
+ Provides memory reading, mechanism profiling, regime detection,
5
+ policy analysis, ecological modeling of competing theories,
6
+ visualization, and self-contained HTML audit report generation.
7
+ """
8
+
9
+ __version__ = "0.1.0"
10
+
11
+ from .mechanisms import MechanismEcology, MechanismProfiler
12
+ from .memory import (
13
+ ERRORS_SCHEMA,
14
+ HYPOTHESES_SCHEMA,
15
+ RESOLUTIONS_SCHEMA,
16
+ MemoryReader,
17
+ confidence_spread,
18
+ coverage,
19
+ disagreement_index,
20
+ errors_frame,
21
+ fragility,
22
+ hypotheses_frame,
23
+ resolutions_frame,
24
+ rolling_error,
25
+ validate_errors,
26
+ validate_hypotheses,
27
+ validate_resolutions,
28
+ )
29
+ from .policies import PolicyStability
30
+ from .regimes import RegimeDetector, TransitionAnalyzer
31
+ from .reports import AuditReport
32
+ from .viz import (
33
+ coverage_timeline,
34
+ diversity_timeline,
35
+ dominance_heatmap,
36
+ dominance_timeline,
37
+ error_timeline,
38
+ fragility_timeline,
39
+ per_mechanism_error,
40
+ regime_bands,
41
+ turnover_timeline,
42
+ )
43
+
44
+ __all__ = [
45
+ # Mechanisms
46
+ "MechanismEcology",
47
+ "MechanismProfiler",
48
+ # Memory
49
+ "hypotheses_frame",
50
+ "resolutions_frame",
51
+ "errors_frame",
52
+ "validate_hypotheses",
53
+ "validate_resolutions",
54
+ "validate_errors",
55
+ "HYPOTHESES_SCHEMA",
56
+ "RESOLUTIONS_SCHEMA",
57
+ "ERRORS_SCHEMA",
58
+ "rolling_error",
59
+ "coverage",
60
+ "fragility",
61
+ "disagreement_index",
62
+ "confidence_spread",
63
+ "MemoryReader",
64
+ # Policies
65
+ "PolicyStability",
66
+ # Regimes
67
+ "RegimeDetector",
68
+ "TransitionAnalyzer",
69
+ # Reports
70
+ "AuditReport",
71
+ # Viz
72
+ "dominance_timeline",
73
+ "dominance_heatmap",
74
+ "error_timeline",
75
+ "per_mechanism_error",
76
+ "fragility_timeline",
77
+ "coverage_timeline",
78
+ "regime_bands",
79
+ "diversity_timeline",
80
+ "turnover_timeline",
81
+ ]
@@ -0,0 +1,17 @@
1
+ """
2
+ Mechanism analysis for Procela hypothesis memory.
3
+
4
+ Provides per-mechanism performance profiling and population-level
5
+ ecological analysis of competing theories. MechanismProfiler focuses
6
+ on individual accuracy, falsifiability, and redundancy.
7
+ MechanismEcology studies dominance, diversity, extinction, and
8
+ turnover dynamics across the mechanism population.
9
+ """
10
+
11
+ from .ecology import MechanismEcology
12
+ from .profiler import MechanismProfiler
13
+
14
+ __all__ = [
15
+ "MechanismEcology",
16
+ "MechanismProfiler",
17
+ ]
@@ -0,0 +1,365 @@
1
+ """
2
+ Mechanism ecology for population-level analysis of competing theories.
3
+
4
+ Studies the dynamics of mechanism populations: which mechanisms
5
+ dominate, which are redundant, which go extinct after regime shifts.
6
+ Treats mechanisms as species competing in an epistemic niche.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import pandas as pd
12
+
13
+
14
+ class MechanismEcology:
15
+ """
16
+ Analyzes mechanism population dynamics from hypothesis memory.
17
+
18
+ Treats mechanisms as competing species. Dominance is measured
19
+ by confidence share. Niche overlap is measured by proposal
20
+ correlation. Extinction occurs when a mechanism's influence
21
+ drops below a threshold and does not recover within a bounded
22
+ window.
23
+
24
+ Parameters
25
+ ----------
26
+ hypotheses : pd.DataFrame
27
+ As produced by ``MemoryReader.hypotheses()``.
28
+ errors : pd.DataFrame
29
+ As produced by ``MemoryReader.errors()``.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ hypotheses: pd.DataFrame,
35
+ errors: pd.DataFrame,
36
+ ) -> None:
37
+ """Mechanism ecology for population-level analysis of competing theories."""
38
+ self._hypotheses = hypotheses
39
+ self._errors = errors
40
+
41
+ def dominance_curve(
42
+ self,
43
+ variable: str,
44
+ ) -> pd.DataFrame:
45
+ """
46
+ Compute confidence share over time for each mechanism.
47
+
48
+ At each step, confidence share is a mechanism's confidence
49
+ divided by the sum of all confidences for that variable.
50
+ If all confidences at a step are zero, shares are set to
51
+ zero for all mechanisms at that step.
52
+
53
+ Parameters
54
+ ----------
55
+ variable : str
56
+ Variable name to filter on.
57
+
58
+ Returns
59
+ -------
60
+ pd.DataFrame
61
+ Columns: ``step``, ``mechanism``, ``confidence_share``.
62
+ Sorted by step, then by confidence share descending.
63
+
64
+ Raises
65
+ ------
66
+ ValueError
67
+ If no hypotheses found for the variable.
68
+ """
69
+ hyp = self._hypotheses[self._hypotheses["variable"] == variable].copy()
70
+
71
+ if hyp.empty:
72
+ raise ValueError(f"No hypotheses found for variable '{variable}'.")
73
+
74
+ # Vectorized confidence share computation
75
+ step_totals = hyp.groupby("step")["confidence"].transform("sum")
76
+ hyp["confidence_share"] = (
77
+ hyp["confidence"] / step_totals.replace(0, float("nan"))
78
+ ).fillna(0.0)
79
+
80
+ result = hyp[["step", "mechanism", "confidence_share"]].copy()
81
+ result = result.sort_values(
82
+ ["step", "confidence_share"], ascending=[True, False]
83
+ ).reset_index(drop=True)
84
+
85
+ return result
86
+
87
+ def niche_overlap(self, variable: str) -> pd.DataFrame:
88
+ """
89
+ Compute pairwise proposal correlation between mechanisms.
90
+
91
+ High correlation suggests mechanisms encode similar theories
92
+ (high niche overlap). Low correlation indicates genuinely
93
+ distinct causal hypotheses (distinct niches).
94
+
95
+ Parameters
96
+ ----------
97
+ variable : str
98
+ Variable name to filter on.
99
+
100
+ Returns
101
+ -------
102
+ pd.DataFrame
103
+ Columns: ``mechanism_a``, ``mechanism_b``, ``pearson_r``,
104
+ ``overlap_strength``.
105
+ ``overlap_strength`` is ``|pearson_r|`` categorized as
106
+ 'high' (>0.8), 'moderate' (0.5-0.8), 'low' (<0.5),
107
+ or 'unknown' (insufficient data or constant proposals).
108
+
109
+ Raises
110
+ ------
111
+ ValueError
112
+ If fewer than two mechanisms or no hypotheses found.
113
+ """
114
+ hyp = self._hypotheses[self._hypotheses["variable"] == variable].copy()
115
+
116
+ pivot = hyp.pivot_table(
117
+ index="step",
118
+ columns="mechanism",
119
+ values="proposed",
120
+ aggfunc="first",
121
+ )
122
+
123
+ mechanisms = pivot.columns.tolist()
124
+
125
+ if len(mechanisms) < 2:
126
+ raise ValueError(
127
+ f"Need at least two mechanisms for niche overlap analysis "
128
+ f"on variable '{variable}'. Found: {len(mechanisms)}."
129
+ )
130
+
131
+ pairs: list[dict[str, str | float]] = []
132
+
133
+ for i, ma in enumerate(mechanisms):
134
+ for mb in mechanisms[i + 1 :]:
135
+ valid = pivot[[ma, mb]].dropna()
136
+ if len(valid) < 3:
137
+ r = float("nan")
138
+ strength = "unknown"
139
+ else:
140
+ r = float(valid[ma].corr(valid[mb]))
141
+ if pd.isna(r):
142
+ strength = "unknown"
143
+ else:
144
+ abs_r = abs(r)
145
+ if abs_r > 0.8:
146
+ strength = "high"
147
+ elif abs_r > 0.5:
148
+ strength = "moderate"
149
+ else:
150
+ strength = "low"
151
+
152
+ pairs.append(
153
+ {
154
+ "mechanism_a": ma,
155
+ "mechanism_b": mb,
156
+ "pearson_r": r,
157
+ "overlap_strength": strength,
158
+ }
159
+ )
160
+
161
+ return pd.DataFrame(pairs)
162
+
163
+ def extinction_events(
164
+ self,
165
+ variable: str,
166
+ threshold: float = 0.05,
167
+ recovery_window: int = 10,
168
+ ) -> pd.DataFrame:
169
+ """
170
+ Detect mechanisms that went functionally extinct.
171
+
172
+ A mechanism is considered extinct when its confidence share
173
+ drops below the threshold and does not recover above it
174
+ within ``recovery_window`` steps. The extinction step is
175
+ the first step where the share drops below the threshold.
176
+
177
+ Parameters
178
+ ----------
179
+ variable : str
180
+ Variable name to filter on.
181
+ threshold : float
182
+ Confidence share below which a mechanism is considered
183
+ at risk of extinction. Default 0.05.
184
+ recovery_window : int
185
+ Number of steps after the initial drop to check for
186
+ recovery. If the mechanism stays below threshold for
187
+ this entire window, it is declared extinct.
188
+
189
+ Returns
190
+ -------
191
+ pd.DataFrame
192
+ Columns: ``mechanism``, ``extinction_step``,
193
+ ``last_confidence_share``, ``steps_active_after_extinction``.
194
+ Empty if no extinctions detected.
195
+
196
+ Raises
197
+ ------
198
+ ValueError
199
+ If no hypotheses found for the variable.
200
+ """
201
+ dom = self.dominance_curve(variable)
202
+
203
+ extinctions: list[dict[str, str | float | int]] = []
204
+
205
+ for mechanism in dom["mechanism"].unique():
206
+ mech_data = dom[dom["mechanism"] == mechanism].sort_values("step")
207
+
208
+ below = mech_data[mech_data["confidence_share"] < threshold]
209
+
210
+ if below.empty:
211
+ continue
212
+
213
+ extinction_step = int(below["step"].iloc[0])
214
+
215
+ # Check recovery within the bounded window
216
+ after_drop = mech_data[
217
+ (mech_data["step"] > extinction_step)
218
+ & (mech_data["step"] <= extinction_step + recovery_window)
219
+ ]
220
+
221
+ recovered = (after_drop["confidence_share"] >= threshold).any()
222
+
223
+ if not recovered:
224
+ steps_after = int(
225
+ mech_data[mech_data["step"] > extinction_step]["step"].count()
226
+ )
227
+ extinctions.append(
228
+ {
229
+ "mechanism": mechanism,
230
+ "extinction_step": extinction_step,
231
+ "last_confidence_share": float(
232
+ below["confidence_share"].iloc[0]
233
+ ),
234
+ "steps_active_after_extinction": steps_after,
235
+ }
236
+ )
237
+
238
+ return pd.DataFrame(extinctions)
239
+
240
+ def diversity_index(
241
+ self,
242
+ variable: str,
243
+ ) -> pd.DataFrame:
244
+ """
245
+ Compute Simpson's diversity index over time for mechanisms.
246
+
247
+ D = 1 - sum(share_i^2) for all mechanisms at each step.
248
+ D = 0 means one mechanism dominates completely.
249
+ D approaches 1 - 1/N when all mechanisms have equal share.
250
+
251
+ Parameters
252
+ ----------
253
+ variable : str
254
+ Variable name to filter on.
255
+
256
+ Returns
257
+ -------
258
+ pd.DataFrame
259
+ Columns: ``step``, ``num_active``, ``diversity``.
260
+ ``num_active`` is count of mechanisms with share > 0.
261
+
262
+ Raises
263
+ ------
264
+ ValueError
265
+ If no hypotheses found for the variable.
266
+ """
267
+ dom = self.dominance_curve(variable)
268
+
269
+ diversity = (
270
+ dom.groupby("step")
271
+ .agg(
272
+ num_active=("confidence_share", lambda x: (x > 0).sum()),
273
+ simpson_sum=("confidence_share", lambda x: (x**2).sum()),
274
+ )
275
+ .reset_index()
276
+ )
277
+
278
+ diversity["diversity"] = 1.0 - diversity["simpson_sum"]
279
+ diversity = diversity.drop(columns=["simpson_sum"])
280
+
281
+ return diversity.sort_values("step").reset_index(drop=True)
282
+
283
+ def turnover(
284
+ self,
285
+ variable: str,
286
+ ) -> pd.DataFrame:
287
+ """
288
+ Compute mechanism rank turnover at each step.
289
+
290
+ Measures how much the dominance ranking changes between
291
+ consecutive steps using normalized pairwise discordance.
292
+ High turnover indicates the epistemic landscape is shifting
293
+ rapidly.
294
+
295
+ At each step after the first, mechanisms are ranked by
296
+ confidence share (descending). The fraction of mechanism
297
+ pairs whose relative ordering changed from the previous
298
+ step is computed. Ties are broken by first occurrence.
299
+
300
+ Parameters
301
+ ----------
302
+ variable : str
303
+ Variable name to filter on.
304
+
305
+ Returns
306
+ -------
307
+ pd.DataFrame
308
+ Columns: ``step``, ``turnover``.
309
+ ``turnover`` is the fraction of rank pairs that changed
310
+ from the previous step. 0 = identical ranking, 1 = fully
311
+ reversed ranking. NaN for the first step.
312
+
313
+ Raises
314
+ ------
315
+ ValueError
316
+ If no hypotheses found for the variable.
317
+ """
318
+ dom = self.dominance_curve(variable)
319
+
320
+ steps = sorted(dom["step"].unique())
321
+ turnover_data: list[dict[str, int | float]] = []
322
+
323
+ for i, step in enumerate(steps):
324
+ if i == 0:
325
+ turnover_data.append({"step": int(step), "turnover": float("nan")})
326
+ continue
327
+
328
+ prev_step = steps[i - 1]
329
+
330
+ prev_ranks = (
331
+ dom[dom["step"] == prev_step]
332
+ .set_index("mechanism")["confidence_share"]
333
+ .rank(method="first", ascending=False)
334
+ )
335
+ curr_ranks = (
336
+ dom[dom["step"] == step]
337
+ .set_index("mechanism")["confidence_share"]
338
+ .rank(method="first", ascending=False)
339
+ )
340
+
341
+ # Align on shared mechanisms
342
+ common = prev_ranks.index.intersection(curr_ranks.index)
343
+ if len(common) < 2:
344
+ turnover_data.append({"step": int(step), "turnover": float("nan")})
345
+ continue
346
+
347
+ prev_aligned = prev_ranks[common]
348
+ curr_aligned = curr_ranks[common]
349
+
350
+ # Count discordant pairs
351
+ n = len(common)
352
+ discordant = 0
353
+ for j in range(n):
354
+ for k in range(j + 1, n):
355
+ prev_order = prev_aligned.iloc[j] - prev_aligned.iloc[k]
356
+ curr_order = curr_aligned.iloc[j] - curr_aligned.iloc[k]
357
+ if prev_order * curr_order < 0:
358
+ discordant += 1
359
+
360
+ max_discordant = n * (n - 1) / 2
361
+ turnover = discordant / max_discordant if max_discordant > 0 else 0.0
362
+
363
+ turnover_data.append({"step": int(step), "turnover": float(turnover)})
364
+
365
+ return pd.DataFrame(turnover_data)
@@ -0,0 +1,209 @@
1
+ """
2
+ Mechanism profiler for per-mechanism performance analysis.
3
+
4
+ Computes accuracy curves, influence windows, redundancy matrices,
5
+ and falsifiability scores from hypothesis and error DataFrames
6
+ produced by MemoryReader.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import pandas as pd
12
+
13
+ from .ecology import MechanismEcology
14
+
15
+
16
+ class MechanismProfiler:
17
+ """
18
+ Analyzes individual mechanism behavior from hypothesis memory.
19
+
20
+ All methods operate on DataFrames produced by MemoryReader.
21
+
22
+ Parameters
23
+ ----------
24
+ hypotheses : pd.DataFrame
25
+ As produced by ``MemoryReader.hypotheses()``.
26
+ errors : pd.DataFrame
27
+ As produced by ``MemoryReader.errors()``.
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ hypotheses: pd.DataFrame,
33
+ errors: pd.DataFrame,
34
+ ) -> None:
35
+ """Mechanism profiler for per-mechanism performance analysis."""
36
+ self._hypotheses = hypotheses
37
+ self._errors = errors
38
+
39
+ # ------------------------------------------------------------------
40
+ # Public API
41
+ # ------------------------------------------------------------------
42
+
43
+ def rolling_mae(
44
+ self,
45
+ variable: str,
46
+ window: int = 10,
47
+ ) -> pd.DataFrame:
48
+ """
49
+ Compute rolling mean absolute error per mechanism.
50
+
51
+ Parameters
52
+ ----------
53
+ variable : str
54
+ Variable name to filter on.
55
+ window : int
56
+ Rolling window size in steps. Default 10.
57
+
58
+ Returns
59
+ -------
60
+ pd.DataFrame
61
+ Columns: ``step``, ``mechanism``, ``rolling_mae``.
62
+ NaN for steps where window is incomplete.
63
+ """
64
+ err = self._errors[self._errors["variable"] == variable].copy()
65
+
66
+ if err.empty:
67
+ raise ValueError(f"No errors found for variable '{variable}'.")
68
+
69
+ result = err.sort_values(["mechanism", "step"]).copy()
70
+ result["rolling_mae"] = result.groupby("mechanism")["absolute_error"].transform(
71
+ lambda x: x.rolling(window=window, min_periods=1).mean()
72
+ )
73
+
74
+ return result[["step", "mechanism", "rolling_mae"]]
75
+
76
+ def influence(
77
+ self,
78
+ variable: str,
79
+ threshold: float = 0.1,
80
+ ) -> pd.DataFrame:
81
+ """
82
+ Identify steps where each mechanism was epistemically influential.
83
+
84
+ A mechanism is influential when its confidence share exceeds
85
+ the threshold. This distinguishes 'enabled' from 'actually
86
+ contributing to the resolved value'.
87
+
88
+ Parameters
89
+ ----------
90
+ variable : str
91
+ Variable name to filter on.
92
+ threshold : float
93
+ Minimum confidence share to be considered influential.
94
+
95
+ Returns
96
+ -------
97
+ pd.DataFrame
98
+ Columns: ``step``, ``mechanism``, ``is_influential``.
99
+ """
100
+ dom = MechanismEcology(self._hypotheses, self._errors).dominance_curve(
101
+ variable=variable
102
+ )
103
+ dom["is_influential"] = dom["confidence_share"] >= threshold
104
+ return dom[["step", "mechanism", "is_influential"]]
105
+
106
+ def redundancy(self, variable: str) -> pd.DataFrame:
107
+ """
108
+ Compute pairwise proposal correlation between mechanisms.
109
+
110
+ High correlation suggests mechanisms encode similar theories
111
+ with different noise. Low correlation indicates genuinely
112
+ distinct causal hypotheses.
113
+
114
+ Parameters
115
+ ----------
116
+ variable : str
117
+ Variable name to filter on.
118
+
119
+ Returns
120
+ -------
121
+ pd.DataFrame
122
+ Columns: ``mechanism_a``, ``mechanism_b``, ``pearson_r``.
123
+ One row per unique mechanism pair.
124
+ """
125
+ hyp = self._hypotheses[self._hypotheses["variable"] == variable].copy()
126
+
127
+ if hyp.empty:
128
+ raise ValueError(f"No hypotheses found for variable '{variable}'.")
129
+
130
+ # Pivot: step × mechanism → proposed value
131
+ pivot = hyp.pivot_table(
132
+ values="proposed",
133
+ index="step",
134
+ columns="mechanism",
135
+ aggfunc="first",
136
+ )
137
+
138
+ mechanisms = pivot.columns.tolist()
139
+ pairs: list[dict[str, str | float]] = []
140
+
141
+ for i, ma in enumerate(mechanisms):
142
+ for mb in mechanisms[i + 1 :]:
143
+ # Only compute for steps where both proposed
144
+ valid = pivot[[ma, mb]].dropna()
145
+ if len(valid) < 3:
146
+ r = float("nan")
147
+ else:
148
+ r = float(valid[ma].corr(valid[mb]))
149
+
150
+ pairs.append(
151
+ {
152
+ "mechanism_a": ma,
153
+ "mechanism_b": mb,
154
+ "pearson_r": r,
155
+ }
156
+ )
157
+
158
+ if not pairs:
159
+ raise ValueError(
160
+ f"Need at least two mechanisms for redundancy analysis "
161
+ f"on variable '{variable}'."
162
+ )
163
+
164
+ return pd.DataFrame(pairs)
165
+
166
+ def falsifiability(self, variable: str) -> pd.DataFrame:
167
+ """
168
+ Compute falsifiability scores for each mechanism.
169
+
170
+ A mechanism is falsifiable if its errors are consistently
171
+ low within specific regimes and high outside them. This is
172
+ measured as the ratio of mean error to error volatility.
173
+ High ratio = predictable errors = falsifiable theory.
174
+ Low ratio = erratic errors = unfalsifiable in practice.
175
+
176
+ Parameters
177
+ ----------
178
+ variable : str
179
+ Variable name to filter on.
180
+
181
+ Returns
182
+ -------
183
+ pd.DataFrame
184
+ Columns: ``mechanism``, ``mean_error``, ``error_volatility``,
185
+ ``steps_active``, ``falsifiability_score``.
186
+ """
187
+ err = self._errors[self._errors["variable"] == variable].copy()
188
+
189
+ if err.empty:
190
+ raise ValueError(f"No errors found for variable '{variable}'.")
191
+
192
+ stats = (
193
+ err.groupby("mechanism")
194
+ .agg(
195
+ mean_error=("absolute_error", "mean"),
196
+ error_volatility=("absolute_error", "std"),
197
+ steps_active=("step", "count"),
198
+ )
199
+ .reset_index()
200
+ )
201
+
202
+ # Falsifiability: mean / (volatility + epsilon)
203
+ # High when error is stable (low volatility relative to mean)
204
+ # Low when error is erratic (high volatility)
205
+ stats["falsifiability_score"] = stats["mean_error"] / (
206
+ stats["error_volatility"] + 1e-8
207
+ )
208
+
209
+ return stats