mergeron 2024.738963.0__py3-none-any.whl → 2025.739265.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mergeron might be problematic. Click here for more details.
- mergeron/__init__.py +26 -6
- mergeron/core/__init__.py +5 -65
- mergeron/core/{damodaran_margin_data.py → empirical_margin_distribution.py} +74 -58
- mergeron/core/ftc_merger_investigations_data.py +142 -93
- mergeron/core/guidelines_boundaries.py +289 -1077
- mergeron/core/guidelines_boundary_functions.py +1128 -0
- mergeron/core/{guidelines_boundaries_specialized_functions.py → guidelines_boundary_functions_extra.py} +76 -42
- mergeron/core/pseudorandom_numbers.py +16 -22
- mergeron/data/__init__.py +3 -0
- mergeron/data/damodaran_margin_data.xls +0 -0
- mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
- mergeron/demo/__init__.py +3 -0
- mergeron/demo/visualize_empirical_margin_distribution.py +86 -0
- mergeron/gen/__init__.py +257 -245
- mergeron/gen/data_generation.py +473 -221
- mergeron/gen/data_generation_functions.py +876 -0
- mergeron/gen/enforcement_stats.py +355 -0
- mergeron/gen/upp_tests.py +159 -259
- mergeron-2025.739265.0.dist-info/METADATA +115 -0
- mergeron-2025.739265.0.dist-info/RECORD +23 -0
- {mergeron-2024.738963.0.dist-info → mergeron-2025.739265.0.dist-info}/WHEEL +1 -1
- mergeron/License.txt +0 -16
- mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
- mergeron/core/excel_helper.py +0 -259
- mergeron/core/proportions_tests.py +0 -520
- mergeron/ext/__init__.py +0 -5
- mergeron/ext/tol_colors.py +0 -851
- mergeron/gen/_data_generation_functions_nonpublic.py +0 -621
- mergeron/gen/investigations_stats.py +0 -709
- mergeron/jinja_LaTex_templates/clrrate_cis_summary_table_template.tex.jinja2 +0 -121
- mergeron/jinja_LaTex_templates/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -82
- mergeron/jinja_LaTex_templates/ftcinvdata_summary_table_template.tex.jinja2 +0 -57
- mergeron/jinja_LaTex_templates/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -104
- mergeron/jinja_LaTex_templates/mergeron.cls +0 -161
- mergeron/jinja_LaTex_templates/mergeron_table_collection_template.tex.jinja2 +0 -90
- mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 +0 -84
- mergeron-2024.738963.0.dist-info/METADATA +0 -108
- mergeron-2024.738963.0.dist-info/RECORD +0 -30
- /mergeron/{core → data}/ftc_invdata.msgpack +0 -0
|
@@ -1,35 +1,71 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Specialized methods for defining and analyzing boundaries for Guidelines standards.
|
|
3
3
|
|
|
4
|
-
These methods provide
|
|
5
|
-
to have
|
|
4
|
+
These methods (functions) provide rely on scipy of sympy for core computations,
|
|
5
|
+
and may provide improved precision than core functions, but tend to have
|
|
6
|
+
poor performance
|
|
6
7
|
|
|
7
8
|
"""
|
|
8
9
|
|
|
9
|
-
from
|
|
10
|
+
from collections.abc import Callable
|
|
11
|
+
from dataclasses import dataclass
|
|
10
12
|
from typing import Literal
|
|
11
13
|
|
|
12
14
|
import numpy as np
|
|
13
15
|
from mpmath import mp, mpf # type: ignore
|
|
14
16
|
from scipy.spatial.distance import minkowski as distance_function # type: ignore
|
|
15
|
-
from sympy import lambdify, simplify, solve, symbols
|
|
17
|
+
from sympy import lambdify, simplify, solve, symbols # type: ignore
|
|
16
18
|
|
|
17
|
-
from .. import
|
|
18
|
-
from .
|
|
19
|
-
GuidelinesBoundary,
|
|
20
|
-
GuidelinesBoundaryCallable,
|
|
21
|
-
_shrratio_boundary_intcpt,
|
|
22
|
-
lerp,
|
|
23
|
-
)
|
|
19
|
+
from .. import DEFAULT_REC_RATIO, VERSION, ArrayDouble # noqa: TID252
|
|
20
|
+
from . import guidelines_boundary_functions as gbfn
|
|
24
21
|
|
|
25
|
-
__version__ =
|
|
22
|
+
__version__ = VERSION
|
|
26
23
|
|
|
27
24
|
|
|
28
|
-
mp.
|
|
25
|
+
mp.dps = 32
|
|
29
26
|
mp.trap_complex = True
|
|
30
27
|
|
|
31
28
|
|
|
32
|
-
|
|
29
|
+
@dataclass(slots=True, frozen=True)
|
|
30
|
+
class GuidelinesBoundaryCallable:
|
|
31
|
+
boundary_function: Callable[[ArrayDouble], ArrayDouble]
|
|
32
|
+
area: float
|
|
33
|
+
s_naught: float = 0
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def dh_area_quad(_dh_val: float = 0.01, /, *, dps: int = 9) -> float:
|
|
37
|
+
"""
|
|
38
|
+
Area under the ΔHHI boundary.
|
|
39
|
+
|
|
40
|
+
When the given ΔHHI bound matches a Guidelines safeharbor,
|
|
41
|
+
the area under the boundary is half the intrinsic clearance rate
|
|
42
|
+
for the ΔHHI safeharbor.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
_dh_val
|
|
47
|
+
Merging-firms' ΔHHI bound.
|
|
48
|
+
dps
|
|
49
|
+
Specified precision in decimal places.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
Area under ΔHHI boundary.
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
_dh_val = mpf(f"{_dh_val}")
|
|
58
|
+
_s_naught = (1 - mp.sqrt(1 - 2 * _dh_val)) / 2
|
|
59
|
+
|
|
60
|
+
return round(
|
|
61
|
+
float(
|
|
62
|
+
_s_naught + mp.quad(lambda x: _dh_val / (2 * x), [_s_naught, 1 - _s_naught])
|
|
63
|
+
),
|
|
64
|
+
dps,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def hhi_delta_boundary_qdtr(_dh_val: float = 0.01, /) -> GuidelinesBoundaryCallable:
|
|
33
69
|
"""
|
|
34
70
|
Generate the list of share combination on the ΔHHI boundary.
|
|
35
71
|
|
|
@@ -50,12 +86,12 @@ def delta_hhi_boundary_qdtr(_dh_val: float = 0.01, /) -> GuidelinesBoundaryCalla
|
|
|
50
86
|
|
|
51
87
|
_hhi_eqn = _s_2 - 0.01 / (2 * _s_1)
|
|
52
88
|
|
|
53
|
-
_hhi_bdry = solve(_hhi_eqn, _s_2)[0]
|
|
54
|
-
_s_nought = float(solve(_hhi_eqn.subs({_s_2: 1 - _s_1}), _s_1)[0])
|
|
89
|
+
_hhi_bdry = solve(_hhi_eqn, _s_2)[0]
|
|
90
|
+
_s_nought = float(solve(_hhi_eqn.subs({_s_2: 1 - _s_1}), _s_1)[0])
|
|
55
91
|
|
|
56
92
|
_hhi_bdry_area = 2 * (
|
|
57
93
|
_s_nought
|
|
58
|
-
+ mp.quad(lambdify(_s_1, _hhi_bdry, "mpmath"), (_s_nought, 1 - _s_nought))
|
|
94
|
+
+ mp.quad(lambdify(_s_1, _hhi_bdry, "mpmath"), (_s_nought, 1 - _s_nought)) # pyright: ignore
|
|
59
95
|
)
|
|
60
96
|
|
|
61
97
|
return GuidelinesBoundaryCallable(
|
|
@@ -65,7 +101,7 @@ def delta_hhi_boundary_qdtr(_dh_val: float = 0.01, /) -> GuidelinesBoundaryCalla
|
|
|
65
101
|
|
|
66
102
|
def shrratio_boundary_qdtr_wtd_avg(
|
|
67
103
|
_delta_star: float = 0.075,
|
|
68
|
-
_r_val: float =
|
|
104
|
+
_r_val: float = DEFAULT_REC_RATIO,
|
|
69
105
|
/,
|
|
70
106
|
*,
|
|
71
107
|
weighting: Literal["own-share", "cross-product-share"] | None = "own-share",
|
|
@@ -113,7 +149,7 @@ def shrratio_boundary_qdtr_wtd_avg(
|
|
|
113
149
|
- (_s_1 + _s_2) * _delta_star
|
|
114
150
|
)
|
|
115
151
|
|
|
116
|
-
_bdry_func = solve(_bdry_eqn, _s_2)[0]
|
|
152
|
+
_bdry_func = solve(_bdry_eqn, _s_2)[0]
|
|
117
153
|
_s_naught = (
|
|
118
154
|
float(solve(simplify(_bdry_eqn.subs({_s_2: 1 - _s_1})), _s_1)[0]) # type: ignore
|
|
119
155
|
if recapture_form == "inside-out"
|
|
@@ -123,7 +159,7 @@ def shrratio_boundary_qdtr_wtd_avg(
|
|
|
123
159
|
2
|
|
124
160
|
* (
|
|
125
161
|
_s_naught
|
|
126
|
-
+ mp.quad(lambdify(_s_1, _bdry_func, "mpmath"), (_s_naught, _s_mid))
|
|
162
|
+
+ mp.quad(lambdify(_s_1, _bdry_func, "mpmath"), (_s_naught, _s_mid)) # pyright: ignore
|
|
127
163
|
)
|
|
128
164
|
- (_s_mid**2 + _s_naught**2)
|
|
129
165
|
)
|
|
@@ -143,7 +179,7 @@ def shrratio_boundary_qdtr_wtd_avg(
|
|
|
143
179
|
- (_s_1 + _s_2) * _d_star
|
|
144
180
|
)
|
|
145
181
|
|
|
146
|
-
_bdry_func = solve(_bdry_eqn, _s_2)[1]
|
|
182
|
+
_bdry_func = solve(_bdry_eqn, _s_2)[1]
|
|
147
183
|
_bdry_area = float(
|
|
148
184
|
2
|
|
149
185
|
* (
|
|
@@ -153,7 +189,7 @@ def shrratio_boundary_qdtr_wtd_avg(
|
|
|
153
189
|
),
|
|
154
190
|
(0, _s_mid),
|
|
155
191
|
)
|
|
156
|
-
).real
|
|
192
|
+
).real # pyright: ignore
|
|
157
193
|
- _s_mid**2
|
|
158
194
|
)
|
|
159
195
|
|
|
@@ -171,9 +207,9 @@ def shrratio_boundary_qdtr_wtd_avg(
|
|
|
171
207
|
- _delta_star
|
|
172
208
|
)
|
|
173
209
|
|
|
174
|
-
_bdry_func = solve(_bdry_eqn, _s_2)[0]
|
|
210
|
+
_bdry_func = solve(_bdry_eqn, _s_2)[0]
|
|
175
211
|
_bdry_area = float(
|
|
176
|
-
2 * (mp.quad(lambdify(_s_1, _bdry_func, "mpmath"), (0, _s_mid)))
|
|
212
|
+
2 * (mp.quad(lambdify(_s_1, _bdry_func, "mpmath"), (0, _s_mid))) # pyright: ignore
|
|
177
213
|
- _s_mid**2
|
|
178
214
|
)
|
|
179
215
|
|
|
@@ -184,20 +220,20 @@ def shrratio_boundary_qdtr_wtd_avg(
|
|
|
184
220
|
|
|
185
221
|
def shrratio_boundary_distance(
|
|
186
222
|
_delta_star: float = 0.075,
|
|
187
|
-
_r_val: float =
|
|
223
|
+
_r_val: float = DEFAULT_REC_RATIO,
|
|
188
224
|
/,
|
|
189
225
|
*,
|
|
190
|
-
agg_method: Literal["arithmetic", "distance"] = "arithmetic",
|
|
226
|
+
agg_method: Literal["arithmetic mean", "distance"] = "arithmetic mean",
|
|
191
227
|
weighting: Literal["own-share", "cross-product-share"] | None = "own-share",
|
|
192
228
|
recapture_form: Literal["inside-out", "proportional"] = "inside-out",
|
|
193
|
-
|
|
194
|
-
) -> GuidelinesBoundary:
|
|
229
|
+
dps: int = 5,
|
|
230
|
+
) -> gbfn.GuidelinesBoundary:
|
|
195
231
|
"""
|
|
196
232
|
Share combinations for the GUPPI boundaries using various aggregators with
|
|
197
233
|
symmetric merging-firm margins.
|
|
198
234
|
|
|
199
235
|
Reimplements the arithmetic-averages and distance estimations from function,
|
|
200
|
-
`shrratio_boundary_wtd_avg`but uses the Minkowski-distance function,
|
|
236
|
+
`shrratio_boundary_wtd_avg` but uses the Minkowski-distance function,
|
|
201
237
|
`scipy.spatial.distance.minkowski` for all aggregators. This reimplementation
|
|
202
238
|
is useful for testing the output of `shrratio_boundary_wtd_avg`
|
|
203
239
|
but runs considerably slower.
|
|
@@ -209,13 +245,13 @@ def shrratio_boundary_distance(
|
|
|
209
245
|
_r_val
|
|
210
246
|
recapture ratio
|
|
211
247
|
agg_method
|
|
212
|
-
Whether "arithmetic
|
|
248
|
+
Whether "arithmetic mean" or "distance".
|
|
213
249
|
weighting
|
|
214
250
|
Whether "own-share" or "cross-product-share".
|
|
215
251
|
recapture_form
|
|
216
252
|
Whether recapture-ratio is MNL-consistent ("inside-out") or has fixed
|
|
217
253
|
value for both merging firms ("proportional").
|
|
218
|
-
|
|
254
|
+
dps
|
|
219
255
|
Number of decimal places for rounding returned shares and area.
|
|
220
256
|
|
|
221
257
|
Returns
|
|
@@ -230,11 +266,11 @@ def shrratio_boundary_distance(
|
|
|
230
266
|
# initial conditions
|
|
231
267
|
_gbdry_points = [(_s_mid, _s_mid)]
|
|
232
268
|
_s_1_pre, _s_2_pre = _s_mid, _s_mid
|
|
233
|
-
_s_2_oddval, _s_2_oddsum, _s_2_evnsum = True, 0, 0
|
|
269
|
+
_s_2_oddval, _s_2_oddsum, _s_2_evnsum = True, 0.0, 0.0
|
|
234
270
|
|
|
235
271
|
# parameters for iteration
|
|
236
272
|
_weights_base = (mpf("0.5"),) * 2
|
|
237
|
-
_gbd_step_sz = mp.power(10, -
|
|
273
|
+
_gbd_step_sz = mp.power(10, -dps)
|
|
238
274
|
_theta = _gbd_step_sz * (10 if weighting == "cross-product-share" else 1)
|
|
239
275
|
for _s_1 in mp.arange(_s_mid - _gbd_step_sz, 0, -_gbd_step_sz):
|
|
240
276
|
# The wtd. avg. GUPPI is not always convex to the origin, so we
|
|
@@ -249,7 +285,7 @@ def shrratio_boundary_distance(
|
|
|
249
285
|
while True:
|
|
250
286
|
_de_1 = _s_2 / (1 - _s_1)
|
|
251
287
|
_de_2 = (
|
|
252
|
-
_s_1 / (1 - lerp(_s_1, _s_2, _r_val))
|
|
288
|
+
_s_1 / (1 - gbfn.lerp(_s_1, _s_2, _r_val))
|
|
253
289
|
if recapture_form == "inside-out"
|
|
254
290
|
else _s_1 / (1 - _s_2)
|
|
255
291
|
)
|
|
@@ -267,7 +303,7 @@ def shrratio_boundary_distance(
|
|
|
267
303
|
)
|
|
268
304
|
|
|
269
305
|
match agg_method:
|
|
270
|
-
case "arithmetic":
|
|
306
|
+
case "arithmetic mean":
|
|
271
307
|
_delta_test = distance_function(
|
|
272
308
|
(_de_1, _de_2), (0.0, 0.0), p=1, w=_weights_i
|
|
273
309
|
)
|
|
@@ -304,7 +340,7 @@ def shrratio_boundary_distance(
|
|
|
304
340
|
else:
|
|
305
341
|
_s_2_oddsum -= _s_1_pre
|
|
306
342
|
|
|
307
|
-
_s_intcpt = _shrratio_boundary_intcpt(
|
|
343
|
+
_s_intcpt = gbfn._shrratio_boundary_intcpt(
|
|
308
344
|
_s_1_pre,
|
|
309
345
|
_delta_star,
|
|
310
346
|
_r_val,
|
|
@@ -329,11 +365,9 @@ def shrratio_boundary_distance(
|
|
|
329
365
|
# Area under boundary
|
|
330
366
|
_gbdry_area_total = 2 * _gbd_prtlarea - mp.power(_s_mid, "2")
|
|
331
367
|
|
|
332
|
-
_gbdry_points
|
|
333
|
-
np.float64
|
|
334
|
-
)
|
|
368
|
+
_gbdry_points.append((mpf("0.0"), _s_intcpt))
|
|
335
369
|
# Points defining boundary to point-of-symmetry
|
|
336
|
-
return GuidelinesBoundary(
|
|
337
|
-
np.
|
|
338
|
-
round(float(_gbdry_area_total),
|
|
370
|
+
return gbfn.GuidelinesBoundary(
|
|
371
|
+
np.vstack((_gbdry_points[::-1], np.flip(_gbdry_points[1:], 1))),
|
|
372
|
+
round(float(_gbdry_area_total), dps),
|
|
339
373
|
)
|
|
@@ -8,24 +8,18 @@ https://github.com/numpy/numpy/issues/16313.
|
|
|
8
8
|
|
|
9
9
|
import concurrent.futures
|
|
10
10
|
from collections.abc import Sequence
|
|
11
|
-
from importlib.metadata import version
|
|
12
11
|
from multiprocessing import cpu_count
|
|
13
|
-
from typing import Literal
|
|
12
|
+
from typing import Literal
|
|
14
13
|
|
|
15
14
|
import numpy as np
|
|
16
15
|
from numpy.random import PCG64DXSM, Generator, SeedSequence
|
|
17
|
-
from numpy.typing import NBitBase, NDArray
|
|
18
16
|
|
|
19
|
-
from .. import
|
|
17
|
+
from .. import VERSION, ArrayDouble # noqa: TID252
|
|
20
18
|
|
|
21
|
-
__version__ =
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
TF = TypeVar("TF", bound=NBitBase)
|
|
25
|
-
TI = TypeVar("TI", bound=NBitBase)
|
|
19
|
+
__version__ = VERSION
|
|
26
20
|
|
|
27
21
|
NTHREADS = 2 * cpu_count()
|
|
28
|
-
|
|
22
|
+
DEFAULT_DIST_PARMS = np.array([0.0, 1.0], np.float64)
|
|
29
23
|
|
|
30
24
|
|
|
31
25
|
def prng(_s: SeedSequence | None = None, /) -> np.random.Generator:
|
|
@@ -123,7 +117,7 @@ class MultithreadedRNG:
|
|
|
123
117
|
|
|
124
118
|
Parameters
|
|
125
119
|
----------
|
|
126
|
-
|
|
120
|
+
__out_array
|
|
127
121
|
The output array to which generated data are written.
|
|
128
122
|
Its dimensions define the size of the sample.
|
|
129
123
|
dist_type
|
|
@@ -139,24 +133,24 @@ class MultithreadedRNG:
|
|
|
139
133
|
|
|
140
134
|
def __init__(
|
|
141
135
|
self,
|
|
142
|
-
|
|
136
|
+
__out_array: ArrayDouble,
|
|
143
137
|
/,
|
|
144
138
|
*,
|
|
145
139
|
dist_type: Literal[
|
|
146
140
|
"Beta", "Dirichlet", "Gaussian", "Normal", "Random", "Uniform"
|
|
147
141
|
] = "Uniform",
|
|
148
|
-
dist_parms:
|
|
142
|
+
dist_parms: ArrayDouble | None = DEFAULT_DIST_PARMS,
|
|
149
143
|
seed_sequence: SeedSequence | None = None,
|
|
150
144
|
nthreads: int = NTHREADS,
|
|
151
145
|
):
|
|
152
146
|
self.thread_count = nthreads
|
|
153
147
|
|
|
154
|
-
|
|
148
|
+
__seed_sequence = seed_sequence or SeedSequence(pool_size=8)
|
|
155
149
|
self._random_generators = [
|
|
156
|
-
prng(_t) for _t in
|
|
150
|
+
prng(_t) for _t in __seed_sequence.spawn(self.thread_count)
|
|
157
151
|
]
|
|
158
152
|
|
|
159
|
-
self.sample_sz = len(
|
|
153
|
+
self.sample_sz = len(__out_array)
|
|
160
154
|
|
|
161
155
|
if dist_type not in (_rdts := ("Beta", "Dirichlet", "Normal", "Uniform")):
|
|
162
156
|
raise ValueError("Specified distribution must be one of {_rdts}")
|
|
@@ -172,7 +166,7 @@ class MultithreadedRNG:
|
|
|
172
166
|
|
|
173
167
|
self.dist_type = dist_type
|
|
174
168
|
|
|
175
|
-
if dist_parms is None or np.array_equal(dist_parms,
|
|
169
|
+
if dist_parms is None or np.array_equal(dist_parms, DEFAULT_DIST_PARMS):
|
|
176
170
|
match dist_type:
|
|
177
171
|
case "Uniform":
|
|
178
172
|
self.dist_type = "Random"
|
|
@@ -189,10 +183,10 @@ class MultithreadedRNG:
|
|
|
189
183
|
)
|
|
190
184
|
|
|
191
185
|
elif dist_type == "Dirichlet":
|
|
192
|
-
if len(dist_parms) !=
|
|
186
|
+
if len(dist_parms) != __out_array.shape[1]:
|
|
193
187
|
raise ValueError(
|
|
194
188
|
f"Insufficient shape parameters for requested Dirichlet sample "
|
|
195
|
-
f"of size, {
|
|
189
|
+
f"of size, {__out_array.shape}"
|
|
196
190
|
)
|
|
197
191
|
|
|
198
192
|
elif (_lrdp := len(dist_parms)) != 2:
|
|
@@ -200,7 +194,7 @@ class MultithreadedRNG:
|
|
|
200
194
|
|
|
201
195
|
self.dist_parms = dist_parms
|
|
202
196
|
|
|
203
|
-
self.values =
|
|
197
|
+
self.values = __out_array
|
|
204
198
|
self.executor = concurrent.futures.ThreadPoolExecutor(self.thread_count)
|
|
205
199
|
|
|
206
200
|
self.step_size = (len(self.values) / self.thread_count).__ceil__()
|
|
@@ -211,8 +205,8 @@ class MultithreadedRNG:
|
|
|
211
205
|
def _fill(
|
|
212
206
|
_rng: np.random.Generator,
|
|
213
207
|
_dist_type: str,
|
|
214
|
-
_dist_parms:
|
|
215
|
-
_out:
|
|
208
|
+
_dist_parms: ArrayDouble,
|
|
209
|
+
_out: ArrayDouble,
|
|
216
210
|
_first: int,
|
|
217
211
|
_last: int,
|
|
218
212
|
/,
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Plot the empirical distribution derived using the Gaussian KDE with
|
|
3
|
+
margin data downloaded from Prof. Damodaran's website at NYU.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import warnings
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from matplotlib.ticker import StrMethodFormatter
|
|
12
|
+
from numpy.random import PCG64DXSM, Generator, SeedSequence
|
|
13
|
+
from scipy import stats # type: ignore
|
|
14
|
+
|
|
15
|
+
import mergeron.core.empirical_margin_distribution as dmgn
|
|
16
|
+
from mergeron import DATA_DIR
|
|
17
|
+
from mergeron.core.guidelines_boundary_functions import boundary_plot
|
|
18
|
+
|
|
19
|
+
SAMPLE_SIZE = 10**6
|
|
20
|
+
BIN_COUNT = 25
|
|
21
|
+
mgn_data_obs, mgn_data_wts, mgn_data_stats = dmgn.mgn_data_builder()
|
|
22
|
+
print(repr(mgn_data_obs))
|
|
23
|
+
print(repr(mgn_data_stats))
|
|
24
|
+
|
|
25
|
+
plt, mgn_fig, mgn_ax, set_axis_def = boundary_plot(mktshares_plot_flag=False)
|
|
26
|
+
mgn_fig.set_figheight(6.5)
|
|
27
|
+
mgn_fig.set_figwidth(9.0)
|
|
28
|
+
|
|
29
|
+
_, mgn_bins, _ = mgn_ax.hist(
|
|
30
|
+
x=mgn_data_obs,
|
|
31
|
+
weights=mgn_data_wts,
|
|
32
|
+
bins=BIN_COUNT,
|
|
33
|
+
alpha=0.4,
|
|
34
|
+
density=True,
|
|
35
|
+
label="Downloaded data",
|
|
36
|
+
color="#004488", # Paul Tol's High Contrast Blue
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
with warnings.catch_warnings():
|
|
40
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
|
41
|
+
# Don't warn regarding the below; ticklabels have been fixed before this point
|
|
42
|
+
mgn_ax.set_yticklabels([
|
|
43
|
+
f"{float(_g.get_text()) * np.diff(mgn_bins)[-1]:.0%}"
|
|
44
|
+
for _g in mgn_ax.get_yticklabels()
|
|
45
|
+
])
|
|
46
|
+
|
|
47
|
+
mgn_kde = stats.gaussian_kde(mgn_data_obs, weights=mgn_data_wts, bw_method="silverman")
|
|
48
|
+
mgn_kde.set_bandwidth(bw_method=mgn_kde.factor / 3.0) # pyright: ignore
|
|
49
|
+
|
|
50
|
+
mgn_ax.plot(
|
|
51
|
+
(_xv := np.linspace(0, BIN_COUNT, 10**5) / BIN_COUNT),
|
|
52
|
+
mgn_kde(_xv),
|
|
53
|
+
color="#004488",
|
|
54
|
+
rasterized=True,
|
|
55
|
+
label="Estimated Density",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
mgn_ax.hist(
|
|
59
|
+
x=mgn_kde.resample(
|
|
60
|
+
SAMPLE_SIZE, seed=Generator(PCG64DXSM(SeedSequence(pool_size=8)))
|
|
61
|
+
)[0],
|
|
62
|
+
color="#DDAA33", # Paul Tol's High Contrast Yellow
|
|
63
|
+
alpha=0.6,
|
|
64
|
+
bins=BIN_COUNT,
|
|
65
|
+
density=True,
|
|
66
|
+
label="Generated data",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
mgn_ax.legend(
|
|
70
|
+
loc="best",
|
|
71
|
+
fancybox=False,
|
|
72
|
+
shadow=False,
|
|
73
|
+
frameon=True,
|
|
74
|
+
facecolor="white",
|
|
75
|
+
edgecolor="white",
|
|
76
|
+
framealpha=1,
|
|
77
|
+
fontsize="small",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
mgn_ax.set_xlim(0.0, 1.0)
|
|
81
|
+
mgn_ax.xaxis.set_major_formatter(StrMethodFormatter("{x:>3.0%}"))
|
|
82
|
+
mgn_ax.set_xlabel("Price Cost Margin", fontsize=10)
|
|
83
|
+
mgn_ax.set_ylabel("Relative Frequency", fontsize=10)
|
|
84
|
+
|
|
85
|
+
mgn_fig.tight_layout()
|
|
86
|
+
plt.savefig(DATA_DIR / f"{Path(__file__).stem}.pdf")
|