mergeron 2024.738973.0__tar.gz → 2024.739079.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mergeron might be problematic. Click here for more details.

Files changed (42) hide show
  1. mergeron-2024.739079.10/PKG-INFO +109 -0
  2. mergeron-2024.739079.10/README.rst +61 -0
  3. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/pyproject.toml +30 -35
  4. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/__init__.py +28 -3
  5. mergeron-2024.739079.10/src/mergeron/core/__init__.py +3 -0
  6. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/core/damodaran_margin_data.py +66 -52
  7. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/core/excel_helper.py +39 -37
  8. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/core/ftc_merger_investigations_data.py +66 -35
  9. mergeron-2024.739079.10/src/mergeron/core/guidelines_boundaries.py +466 -0
  10. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/core/guidelines_boundary_functions.py +182 -27
  11. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/core/guidelines_boundary_functions_extra.py +17 -14
  12. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/core/proportions_tests.py +2 -4
  13. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/core/pseudorandom_numbers.py +6 -11
  14. mergeron-2024.739079.10/src/mergeron/data/__init__.py +3 -0
  15. mergeron-2024.739079.10/src/mergeron/data/damodaran_margin_data.xls +0 -0
  16. mergeron-2024.739079.10/src/mergeron/data/damodaran_margin_data_dict.msgpack +0 -0
  17. mergeron-2024.738973.0/src/mergeron/jinja_LaTex_templates/setup_tikz_tables.tex.jinja2 → mergeron-2024.739079.10/src/mergeron/data/jinja2_LaTeX_templates/setup_tikz_tables.tex +45 -50
  18. mergeron-2024.739079.10/src/mergeron/demo/__init__.py +3 -0
  19. mergeron-2024.739079.10/src/mergeron/demo/visualize_empirical_margin_distribution.py +88 -0
  20. mergeron-2024.739079.10/src/mergeron/ext/__init__.py +3 -0
  21. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/ext/tol_colors.py +3 -3
  22. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/gen/__init__.py +53 -46
  23. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/gen/_data_generation_functions.py +28 -93
  24. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/gen/data_generation.py +20 -24
  25. mergeron-2024.738973.0/src/mergeron/gen/investigations_stats.py → mergeron-2024.739079.10/src/mergeron/gen/enforcement_stats.py +59 -57
  26. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/gen/market_sample.py +6 -10
  27. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/gen/upp_tests.py +29 -26
  28. mergeron-2024.738973.0/PKG-INFO +0 -108
  29. mergeron-2024.738973.0/README.rst +0 -64
  30. mergeron-2024.738973.0/src/mergeron/core/InCommon RSA Server CA cert chain.pem +0 -68
  31. mergeron-2024.738973.0/src/mergeron/core/__init__.py +0 -78
  32. mergeron-2024.738973.0/src/mergeron/core/guidelines_boundaries.py +0 -439
  33. mergeron-2024.738973.0/src/mergeron/ext/__init__.py +0 -5
  34. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/License.txt +0 -0
  35. {mergeron-2024.738973.0/src/mergeron/core → mergeron-2024.739079.10/src/mergeron/data}/ftc_invdata.msgpack +0 -0
  36. {mergeron-2024.738973.0/src/mergeron/jinja_LaTex_templates → mergeron-2024.739079.10/src/mergeron/data/jinja2_LaTeX_templates}/clrrate_cis_summary_table_template.tex.jinja2 +0 -0
  37. {mergeron-2024.738973.0/src/mergeron/jinja_LaTex_templates → mergeron-2024.739079.10/src/mergeron/data/jinja2_LaTeX_templates}/ftcinvdata_byhhianddelta_table_template.tex.jinja2 +0 -0
  38. {mergeron-2024.738973.0/src/mergeron/jinja_LaTex_templates → mergeron-2024.739079.10/src/mergeron/data/jinja2_LaTeX_templates}/ftcinvdata_summary_table_template.tex.jinja2 +0 -0
  39. {mergeron-2024.738973.0/src/mergeron/jinja_LaTex_templates → mergeron-2024.739079.10/src/mergeron/data/jinja2_LaTeX_templates}/ftcinvdata_summarypaired_table_template.tex.jinja2 +0 -0
  40. {mergeron-2024.738973.0/src/mergeron/jinja_LaTex_templates → mergeron-2024.739079.10/src/mergeron/data/jinja2_LaTeX_templates}/mergeron.cls +0 -0
  41. {mergeron-2024.738973.0/src/mergeron/jinja_LaTex_templates → mergeron-2024.739079.10/src/mergeron/data/jinja2_LaTeX_templates}/mergeron_table_collection_template.tex.jinja2 +0 -0
  42. {mergeron-2024.738973.0 → mergeron-2024.739079.10}/src/mergeron/py.typed +0 -0
@@ -0,0 +1,109 @@
1
+ Metadata-Version: 2.1
2
+ Name: mergeron
3
+ Version: 2024.739079.10
4
+ Summary: Merger Policy Analysis using Python
5
+ License: MIT
6
+ Keywords: merger policy analysis,merger guidelines,merger screening,policy presumptions,concentration standards,upward pricing pressure,GUPPI
7
+ Author: Murthy Kambhampaty
8
+ Author-email: smk@capeconomics.com
9
+ Requires-Python: >=3.12,<4.0
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: End Users/Desktop
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3 :: Only
20
+ Classifier: Programming Language :: Python :: Implementation :: CPython
21
+ Requires-Dist: aenum (>=3.1.15,<4.0.0)
22
+ Requires-Dist: attrs (>=23.2)
23
+ Requires-Dist: bs4 (>=0.0.1)
24
+ Requires-Dist: certifi (>=2023.11.17)
25
+ Requires-Dist: google-re2 (>=1.1)
26
+ Requires-Dist: icecream (>=2.1.0)
27
+ Requires-Dist: jinja2 (>=3.1)
28
+ Requires-Dist: joblib (>=1.3)
29
+ Requires-Dist: lxml (>=5.0)
30
+ Requires-Dist: matplotlib (>=3.8)
31
+ Requires-Dist: mpmath (>=1.3)
32
+ Requires-Dist: msgpack (>=1.0)
33
+ Requires-Dist: msgpack-numpy (>=0.4)
34
+ Requires-Dist: numpy (>=1.26,<2.0)
35
+ Requires-Dist: openpyxl (>=3.1.2)
36
+ Requires-Dist: pendulum (>=3.0.0)
37
+ Requires-Dist: requests (>=2.31)
38
+ Requires-Dist: requests-toolbelt (>=1.0.0)
39
+ Requires-Dist: scipy (>=1.12)
40
+ Requires-Dist: sympy (>=1.12)
41
+ Requires-Dist: tables (>=3.8)
42
+ Requires-Dist: types-beautifulsoup4 (>=4.11.2)
43
+ Requires-Dist: types-requests (>=2.31.0)
44
+ Requires-Dist: xlrd (>=2.0.1,<3.0.0)
45
+ Requires-Dist: xlsxwriter (>=3.1)
46
+ Description-Content-Type: text/x-rst
47
+
48
+ mergeron: Merger Policy Analysis using Python
49
+ =============================================
50
+
51
+ Download and analyze merger investigations data published by the U.S. Federal Trade Commission in various reports on extended merger investigations during 1996 to 2011. Model the sets of mergers conforming to various U.S. Horizontal Merger Guidelines standards. Analyze intrinsic clearance rates and intrinsic enforcement rates under Guidelines standards using generated data with specified distributions of market shares, price-cost margins, firm counts, and prices, optionally imposing restrictions impled by statutory filing thresholds and/or Bertrand-Nash oligopoly with MNL demand.
52
+
53
+ Intrinsic clearance and enforcement rates are distinguished from *observed* clearance and enforcement rates in that the former do not reflect the effects of screening and deterrence as do the latter.
54
+
55
+
56
+ Introduction
57
+ ------------
58
+
59
+ Classes for specifying concentration standards (`mergeron.core.guidelines_boundaries.ConcentrationBoundary`) and diversion-ratio standards (`mergeron.core.guidelines_boundaries.DiversionRatioBoundary`), with automatic generation of boundary (as an array of share-pairs) and area, are provided in `mergeron.core.guidelines_boundaries`. This module also includes a function for generating plots of concentation and diversion-ratio boundaries, and functions for mapping GUPPI standards to concentration (ΔHHI) standards, and vice-versa.
60
+
61
+ Methods for generating industry data under various distributions of shares, margins, and prices are included in, `mergeron.gen.data_generation`. Shares are drawn with uniform distribution with :math:`s_1 + s_2 \leqslant 1` and an unspecified number of firms. Alternatively, shares may be drawn from the Dirichlet distribution. When drawing shares from the Dirichlet distribution, the user can specify a fixed number for firms or provide a vector of weights specifying the frequency distribution over sequential firm counts, e.g., :code:`[133, 184, 134, 52, 32, 10, 12, 4, 3]` to specify shares drawn from Dirichlet distributions with 2 to 10 pre-merger firms distributed as in data for FTC merger investigations during 1996--2003 (See, for example, Table 4.1 of `FTC, Horizontal Merger Investigations Data, Fiscal Years 1996--2003 (Revised: August 31, 2004) <"https://www.ftc.gov/sites/default/files/documents/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003/040831horizmergersdata96-03.pdf>`_). The user can specify recapture rates as, "proportional", "inside-out" --- i.e., consistent with merging-firms' in-market shares and a default recapture rate) --- or "outside-in" --- i.e., purchase probabilities are drawn at random for :math:`N+1` goods, from which are derived market shares and recapture rates for the :math:`N` goods in the putative market. Documentation on specifying the sampling strategy for market shares is at `mergeron.gen.ShareSpec`. Price-cost-margins may be specified as symmetric, i.i.d., or subject to equilibrium conditions for (profit-mazimization in) Bertrand-Nash oligopoly with MNL demand (see, `mergeron.gen.PCMSpec`). Prices may be specified as symmetric or asymmetric, and in the latter case, the direction of correlation between merging firm prices, if any, can also be specified (see, `mergeron.gen.PriceSpec`). Two alternative approaches for modeling statutory filing requirements (HSR filing thresholds) are implemented (see, `mergeron.gen.SSZConstants`). The full specification of a market sample is given in a `mergeron.gen.market_sample.MarketSample` object. Data are drawn by invoking `mergeron.gen.market_sample.MarketSample.generate_sample` which adds a `data` property of class, `mergeron.gen.MarketDataSample`. Enforcement or clearance counts are computed by invoking `mergeron.gen.market_sample.MarketSample.estimate_invres_counts`, which adds an `invres_counts` property of class `mergeron.gen.UPPTestsCounts`. For fast, parallel generation of enforcement or clearance counts over large market data samples that ordinarily would exceed available limits on machine memory, the user can invoke the method `estimate_invres_counts` on a `mergeron.gen.market_sample.MarketSample` object without first invoking `generate_sample`. Note, however, that this strategy discards the market sample in the interests of conserving memory and maintaining high performance.
62
+
63
+ Methods for printing enforcement statistics based on FTC investigations data and test data are printed to screen or rendered to LaTex files (for processing into publication-quality tables) using methods provided in `mergeron.gen.enforcement_stats`.
64
+
65
+ Programs demonstrating the analysis and reporting facilites provided by the sub-package, `mergeron.demo`.
66
+
67
+ This package exposes methods employed for generating random numbers with selected continuous distribution over specified parameters, and with CPU multithreading on machines with multiple virtual, logical, or physical CPU cores. To access these directly:
68
+
69
+ .. code-block:: python
70
+
71
+ import mergeron.core.pseudorandom_numbers as prng
72
+
73
+ Also included are methods for estimating confidence intervals for proportions and for contrasts (differences) in proportions. (Although coded from scratch using the source literature, the APIs implemented in the module included here are designed for consistency with the APIs in, `statsmodels.stats.proportion` from the package, `statsmodels` (https://pypi.org/project/statsmodels/).) To access these directly:
74
+
75
+ .. code-block:: python
76
+
77
+ import mergeron.core.proportions_tests as prci
78
+
79
+ A recent version of Paul Tol's python module, `tol_colors.py` is redistributed within this package. Other than re-formatting and type annotation, the `mergeron.ext.tol_colors` module is re-distributed as downloaded from, https://personal.sron.nl/~pault/data/tol_colors.py. The `tol_colors.py` module is distributed under the Standard 3-clause BSD license. To access the `mergeron.ext.tol_colors` module directly:
80
+
81
+ .. code-block:: python
82
+
83
+ import mergeron.ext.tol_colors as ptc
84
+
85
+ Documentation for this package is in the form of the API Reference. Documentation for individual functions and classes is accessible within a python shell. For example:
86
+
87
+ .. code-block:: python
88
+
89
+ import mergeron.core.market_sample as market_sample
90
+
91
+ help(market_sample.MarketSample)
92
+
93
+
94
+ .. image:: https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json
95
+ :alt: Poetry
96
+ :target: https://python-poetry.org/
97
+
98
+ .. image:: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json
99
+ :alt: Ruff
100
+ :target: https://github.com/astral-sh/ruff
101
+
102
+ .. image:: https://www.mypy-lang.org/static/mypy_badge.svg
103
+ :alt: Checked with mypy
104
+ :target: https://mypy-lang.org/
105
+
106
+ .. image:: https://img.shields.io/badge/License-MIT-yellow.svg
107
+ :alt: License: MIT
108
+ :target: https://opensource.org/licenses/MIT
109
+
@@ -0,0 +1,61 @@
1
+ mergeron: Merger Policy Analysis using Python
2
+ =============================================
3
+
4
+ Download and analyze merger investigations data published by the U.S. Federal Trade Commission in various reports on extended merger investigations during 1996 to 2011. Model the sets of mergers conforming to various U.S. Horizontal Merger Guidelines standards. Analyze intrinsic clearance rates and intrinsic enforcement rates under Guidelines standards using generated data with specified distributions of market shares, price-cost margins, firm counts, and prices, optionally imposing restrictions impled by statutory filing thresholds and/or Bertrand-Nash oligopoly with MNL demand.
5
+
6
+ Intrinsic clearance and enforcement rates are distinguished from *observed* clearance and enforcement rates in that the former do not reflect the effects of screening and deterrence as do the latter.
7
+
8
+
9
+ Introduction
10
+ ------------
11
+
12
+ Classes for specifying concentration standards (`mergeron.core.guidelines_boundaries.ConcentrationBoundary`) and diversion-ratio standards (`mergeron.core.guidelines_boundaries.DiversionRatioBoundary`), with automatic generation of boundary (as an array of share-pairs) and area, are provided in `mergeron.core.guidelines_boundaries`. This module also includes a function for generating plots of concentation and diversion-ratio boundaries, and functions for mapping GUPPI standards to concentration (ΔHHI) standards, and vice-versa.
13
+
14
+ Methods for generating industry data under various distributions of shares, margins, and prices are included in, `mergeron.gen.data_generation`. Shares are drawn with uniform distribution with :math:`s_1 + s_2 \leqslant 1` and an unspecified number of firms. Alternatively, shares may be drawn from the Dirichlet distribution. When drawing shares from the Dirichlet distribution, the user can specify a fixed number for firms or provide a vector of weights specifying the frequency distribution over sequential firm counts, e.g., :code:`[133, 184, 134, 52, 32, 10, 12, 4, 3]` to specify shares drawn from Dirichlet distributions with 2 to 10 pre-merger firms distributed as in data for FTC merger investigations during 1996--2003 (See, for example, Table 4.1 of `FTC, Horizontal Merger Investigations Data, Fiscal Years 1996--2003 (Revised: August 31, 2004) <"https://www.ftc.gov/sites/default/files/documents/reports/horizontal-merger-investigation-data-fiscal-years-1996-2003/040831horizmergersdata96-03.pdf>`_). The user can specify recapture rates as, "proportional", "inside-out" --- i.e., consistent with merging-firms' in-market shares and a default recapture rate) --- or "outside-in" --- i.e., purchase probabilities are drawn at random for :math:`N+1` goods, from which are derived market shares and recapture rates for the :math:`N` goods in the putative market. Documentation on specifying the sampling strategy for market shares is at `mergeron.gen.ShareSpec`. Price-cost-margins may be specified as symmetric, i.i.d., or subject to equilibrium conditions for (profit-mazimization in) Bertrand-Nash oligopoly with MNL demand (see, `mergeron.gen.PCMSpec`). Prices may be specified as symmetric or asymmetric, and in the latter case, the direction of correlation between merging firm prices, if any, can also be specified (see, `mergeron.gen.PriceSpec`). Two alternative approaches for modeling statutory filing requirements (HSR filing thresholds) are implemented (see, `mergeron.gen.SSZConstants`). The full specification of a market sample is given in a `mergeron.gen.market_sample.MarketSample` object. Data are drawn by invoking `mergeron.gen.market_sample.MarketSample.generate_sample` which adds a `data` property of class, `mergeron.gen.MarketDataSample`. Enforcement or clearance counts are computed by invoking `mergeron.gen.market_sample.MarketSample.estimate_invres_counts`, which adds an `invres_counts` property of class `mergeron.gen.UPPTestsCounts`. For fast, parallel generation of enforcement or clearance counts over large market data samples that ordinarily would exceed available limits on machine memory, the user can invoke the method `estimate_invres_counts` on a `mergeron.gen.market_sample.MarketSample` object without first invoking `generate_sample`. Note, however, that this strategy discards the market sample in the interests of conserving memory and maintaining high performance.
15
+
16
+ Methods for printing enforcement statistics based on FTC investigations data and test data are printed to screen or rendered to LaTex files (for processing into publication-quality tables) using methods provided in `mergeron.gen.enforcement_stats`.
17
+
18
+ Programs demonstrating the analysis and reporting facilites provided by the sub-package, `mergeron.demo`.
19
+
20
+ This package exposes methods employed for generating random numbers with selected continuous distribution over specified parameters, and with CPU multithreading on machines with multiple virtual, logical, or physical CPU cores. To access these directly:
21
+
22
+ .. code-block:: python
23
+
24
+ import mergeron.core.pseudorandom_numbers as prng
25
+
26
+ Also included are methods for estimating confidence intervals for proportions and for contrasts (differences) in proportions. (Although coded from scratch using the source literature, the APIs implemented in the module included here are designed for consistency with the APIs in, `statsmodels.stats.proportion` from the package, `statsmodels` (https://pypi.org/project/statsmodels/).) To access these directly:
27
+
28
+ .. code-block:: python
29
+
30
+ import mergeron.core.proportions_tests as prci
31
+
32
+ A recent version of Paul Tol's python module, `tol_colors.py` is redistributed within this package. Other than re-formatting and type annotation, the `mergeron.ext.tol_colors` module is re-distributed as downloaded from, https://personal.sron.nl/~pault/data/tol_colors.py. The `tol_colors.py` module is distributed under the Standard 3-clause BSD license. To access the `mergeron.ext.tol_colors` module directly:
33
+
34
+ .. code-block:: python
35
+
36
+ import mergeron.ext.tol_colors as ptc
37
+
38
+ Documentation for this package is in the form of the API Reference. Documentation for individual functions and classes is accessible within a python shell. For example:
39
+
40
+ .. code-block:: python
41
+
42
+ import mergeron.core.market_sample as market_sample
43
+
44
+ help(market_sample.MarketSample)
45
+
46
+
47
+ .. image:: https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json
48
+ :alt: Poetry
49
+ :target: https://python-poetry.org/
50
+
51
+ .. image:: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json
52
+ :alt: Ruff
53
+ :target: https://github.com/astral-sh/ruff
54
+
55
+ .. image:: https://www.mypy-lang.org/static/mypy_badge.svg
56
+ :alt: Checked with mypy
57
+ :target: https://mypy-lang.org/
58
+
59
+ .. image:: https://img.shields.io/badge/License-MIT-yellow.svg
60
+ :alt: License: MIT
61
+ :target: https://opensource.org/licenses/MIT
@@ -1,8 +1,9 @@
1
1
  [tool.poetry]
2
2
  name = "mergeron"
3
- # See ./get_version_str.py
4
- version = "2024.738973.0"
5
- description = "Analysis of standards defined in Horizontal Merger Guidelines"
3
+ authors = ["Murthy Kambhampaty <smk@capeconomics.com>"]
4
+ description = "Merger Policy Analysis using Python"
5
+ readme = "README.rst"
6
+ license = "MIT"
6
7
  keywords = [
7
8
  "merger policy analysis",
8
9
  "merger guidelines",
@@ -12,9 +13,7 @@ keywords = [
12
13
  "upward pricing pressure",
13
14
  "GUPPI",
14
15
  ]
15
- authors = ["Murthy Kambhampaty <smk@capeconomics.com>"]
16
- license = "MIT"
17
- readme = "README.rst"
16
+ version = "2024.739079.10"
18
17
 
19
18
  # Classifiers list: https://pypi.org/classifiers/
20
19
  classifiers = [
@@ -34,9 +33,11 @@ classifiers = [
34
33
  [tool.poetry.dependencies]
35
34
  # You may need to apply the fixes in, https://github.com/python-poetry/poetry/issues/3365
36
35
  # if poetry dependency resolution appears to hang (read the page at link to the end)
36
+ aenum = "^3.1.15"
37
37
  attrs = ">=23.2"
38
38
  bs4 = ">=0.0.1"
39
39
  google-re2 = ">=1.1"
40
+ icecream = ">=2.1.0"
40
41
  jinja2 = ">=3.1"
41
42
  joblib = ">=1.3"
42
43
  lxml = ">=5.0"
@@ -44,31 +45,33 @@ matplotlib = ">=3.8"
44
45
  mpmath = ">=1.3"
45
46
  msgpack = ">=1.0"
46
47
  msgpack-numpy = ">=0.4"
47
- numpy = ">=1.26"
48
+ numpy = ">=1.26, <2.0"
48
49
  openpyxl = ">=3.1.2"
50
+ pendulum = ">=3.0.0"
49
51
  python = "^3.12"
50
52
  requests = ">=2.31"
51
53
  scipy = ">=1.12"
52
54
  sympy = ">=1.12"
53
55
  tables = ">=3.8"
54
- xlrd = ">=2.0"
55
56
  xlsxwriter = ">=3.1"
56
57
  certifi = ">=2023.11.17"
57
58
  requests-toolbelt = ">=1.0.0"
58
- importlib-metadata = ">=7.0.1"
59
+ types-requests = ">=2.31.0"
60
+ types-beautifulsoup4 = ">=4.11.2"
61
+ xlrd = "^2.0.1" # Needed to read margin data
59
62
 
60
63
 
61
64
  [tool.poetry.group.dev.dependencies]
62
65
  semver = ">=3.0"
63
- pytest = ">=8.0"
64
66
  mypy = ">=1.8"
65
- ruff = ">=0.2"
67
+ ruff = ">=0.5"
68
+ pytest = ">=8.0"
66
69
  sphinx = ">=7.2"
67
70
  sphinx-autodoc-typehints = ">=2.0.0"
68
71
  sphinx-autoapi = ">=3.0"
69
72
  sphinx-immaterial = ">=0.11"
70
73
  pipdeptree = ">=2.15.1"
71
- uv = ">=0.1.11"
74
+ types-openpyxl = ">=3.0.0"
72
75
 
73
76
  [build-system]
74
77
  requires = ["poetry-core"]
@@ -118,19 +121,20 @@ select = [
118
121
  "I", # isort
119
122
  "W", # pycodestyle
120
123
  # plugins:
121
- "B", # flake8-bugbear
122
- "C4", # flake8-comprehensions
123
- "ICN", # flake8-import-conventions
124
- "NPY", # NumPy-specific rules
125
- "PIE", # flake8-pie
126
- "PL", # pylint
127
- "PTH", # flake8-use-pathlib
128
- "S", # flake8-bandit
129
- "SIM", # flake8-simplify
130
- "TID", # flake8-tidy-imports
131
- "TCH", # flake8-type-checking
132
- "UP", # pyupgrade
133
- "RUF", # ruff-specific
124
+ "B", # flake8-bugbear
125
+ "C4", # flake8-comprehensions
126
+ "FURB", # refurb
127
+ "ICN", # flake8-import-conventions
128
+ "NPY", # NumPy-specific rules
129
+ "PIE", # flake8-pie
130
+ "PL", # pylint
131
+ "PTH", # flake8-use-pathlib
132
+ "S", # flake8-bandit
133
+ "SIM", # flake8-simplify
134
+ "TID", # flake8-tidy-imports
135
+ "TCH", # flake8-type-checking
136
+ "UP", # pyupgrade
137
+ "RUF", # ruff-specific
134
138
  ]
135
139
 
136
140
  ignore = [
@@ -144,16 +148,7 @@ ignore = [
144
148
  # flake8-bugbear opinionated (disabled by default in flake8)
145
149
  'B904',
146
150
  'B905',
147
- # flake8-executable
148
- "EXE002", # file executable but no shebang present
149
- # pygrep-hooks
150
- "PGH003",
151
- # flake8-pie
152
- "PIE790", # unnecessary 'pass' statement
153
- # pylint
154
151
  "PLR2004", # avoid magic values
155
- # flake8-simplify
156
- "SIM102", # nested 'if' statements
157
152
  # flake8-type-checking
158
153
  "TCH001", # move application import into a type-checking block
159
154
  "TCH002", # move third-party import into a type-checking block
@@ -198,7 +193,7 @@ filterwarnings = [
198
193
  "ignore::DeprecationWarning:jinja2.lexer",
199
194
  "ignore::DeprecationWarning:joblib._utils",
200
195
  "ignore::DeprecationWarning:openpyxl.packaging.core",
201
- "ignore::RuntimeWarning:mergeron.gen.investigations_stats",
196
+ "ignore::RuntimeWarning:mergeron.gen.enforcement_stats",
202
197
  "ignore::RuntimeWarning:mergeron.core.proportions_tests",
203
198
  ]
204
199
  tmp_path_retention_policy = "failed"
@@ -1,12 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import enum
4
- from importlib.metadata import version
5
4
  from pathlib import Path
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+ import pendulum # type: ignore
9
+ from icecream import argumentToString, ic, install # type: ignore
10
+ from numpy.typing import NDArray
6
11
 
7
12
  _PKG_NAME: str = Path(__file__).parent.stem
8
13
 
9
- __version__ = version(_PKG_NAME)
14
+ VERSION = "2024.739079.10"
15
+
16
+ __version__ = VERSION
10
17
 
11
18
  DATA_DIR: Path = Path.home() / _PKG_NAME
12
19
  """
@@ -14,11 +21,26 @@ Defines a subdirectory named for this package in the user's home path.
14
21
 
15
22
  If the subdirectory doesn't exist, it is created on package invocation.
16
23
  """
17
-
18
24
  if not DATA_DIR.is_dir():
19
25
  DATA_DIR.mkdir(parents=False)
20
26
 
21
27
 
28
+ np.set_printoptions(precision=18)
29
+
30
+
31
+ def _timestamper() -> str:
32
+ return f"{pendulum.now().strftime("%F %T.%f")} |> "
33
+
34
+
35
+ @argumentToString.register(np.ndarray) # type: ignore
36
+ def _(_obj: NDArray[Any]) -> str:
37
+ return f"ndarray, shape={_obj.shape}, dtype={_obj.dtype}"
38
+
39
+
40
+ ic.configureOutput(prefix=_timestamper, includeContext=True)
41
+ install()
42
+
43
+
22
44
  @enum.unique
23
45
  class RECConstants(enum.StrEnum):
24
46
  """Recapture rate - derivation methods."""
@@ -38,8 +60,11 @@ class UPPAggrSelector(enum.StrEnum):
38
60
  AVG = "average"
39
61
  CPA = "cross-product-share weighted average"
40
62
  CPD = "cross-product-share weighted distance"
63
+ CPG = "cross-product-share weighted geometric mean"
41
64
  DIS = "symmetrically-weighted distance"
65
+ GMN = "geometric mean"
42
66
  MAX = "max"
43
67
  MIN = "min"
44
68
  OSA = "own-share weighted average"
45
69
  OSD = "own-share weighted distance"
70
+ OSG = "own-share weighted geometric mean"
@@ -0,0 +1,3 @@
1
+ from .. import VERSION # noqa: TID252
2
+
3
+ __version__ = VERSION
@@ -7,7 +7,8 @@ Data are downloaded or reused from a local copy, on demand.
7
7
  For terms of use of Prof. Damodaran's data, please see:
8
8
  https://pages.stern.nyu.edu/~adamodar/New_Home_Page/datahistory.html
9
9
 
10
- Important caveats:
10
+ NOTES
11
+ -----
11
12
 
12
13
  Prof. Damodaran notes that the data construction may not be
13
14
  consistent from iteration to iteration. He also notes that,
@@ -32,29 +33,30 @@ price-cost margins fall in the interval :math:`[0, 1]`.
32
33
 
33
34
  """
34
35
 
36
+ import shutil
35
37
  from collections.abc import Mapping
36
- from importlib.metadata import version
38
+ from importlib import resources
37
39
  from pathlib import Path
38
40
  from types import MappingProxyType
39
41
 
40
42
  import msgpack # type:ignore
41
43
  import numpy as np
42
- import requests
44
+ import urllib3
43
45
  from numpy.random import PCG64DXSM, Generator, SeedSequence
44
46
  from numpy.typing import NDArray
45
- from requests_toolbelt.downloadutils import stream # type: ignore
46
47
  from scipy import stats # type: ignore
47
48
  from xlrd import open_workbook # type: ignore
48
49
 
49
- from .. import _PKG_NAME, DATA_DIR # noqa: TID252
50
-
51
- __version__ = version(_PKG_NAME)
50
+ from .. import _PKG_NAME, DATA_DIR, VERSION # noqa: TID252
52
51
 
52
+ __version__ = VERSION
53
53
 
54
54
  MGNDATA_ARCHIVE_PATH = DATA_DIR / "damodaran_margin_data_dict.msgpack"
55
55
 
56
+ u3pm = urllib3.PoolManager()
57
+
56
58
 
57
- def scrape_data_table(
59
+ def mgn_data_getter(
58
60
  _table_name: str = "margin",
59
61
  *,
60
62
  data_archive_path: Path | None = None,
@@ -68,32 +70,46 @@ def scrape_data_table(
68
70
  _data_archive_path = data_archive_path or MGNDATA_ARCHIVE_PATH
69
71
 
70
72
  _mgn_urlstr = f"https://pages.stern.nyu.edu/~adamodar/pc/datasets/{_table_name}.xls"
71
- _mgn_path = _data_archive_path.parent.joinpath(f"damodaran_{_table_name}_data.xls")
73
+ _mgn_path = _data_archive_path.parent / f"damodaran_{_table_name}_data.xls"
72
74
  if _data_archive_path.is_file() and not data_download_flag:
73
75
  return MappingProxyType(msgpack.unpackb(_data_archive_path.read_bytes()))
74
76
  elif _mgn_path.is_file():
75
77
  _mgn_path.unlink()
76
- _data_archive_path.unlink()
77
-
78
- _REQ_TIMEOUT = (9.05, 27)
79
- # NYU will eventually updates its server certificate, to one signed with
80
- # "InCommon RSA Server CA 2.pem", the step below will be obsolete. In
81
- # the interim, it is necessary to provide the certificate chain to the
82
- # root CA, so that the obsolete CA certificate is validated.
83
- _INCOMMON_2014_CERT_CHAIN_PATH = (
84
- Path(__file__).parent / "InCommon RSA Server CA cert chain.pem"
85
- )
86
- try:
87
- _urlopen_handle = requests.get(_mgn_urlstr, timeout=_REQ_TIMEOUT, stream=True)
88
- except requests.exceptions.SSLError:
89
- _urlopen_handle = requests.get(
90
- _mgn_urlstr,
91
- timeout=_REQ_TIMEOUT,
92
- stream=True,
93
- verify=str(_INCOMMON_2014_CERT_CHAIN_PATH),
94
- )
78
+ if _data_archive_path.is_file():
79
+ _data_archive_path.unlink()
95
80
 
96
- _mgn_filename = stream.stream_response_to_file(_urlopen_handle, path=_mgn_path)
81
+ try:
82
+ _chunk_size = 1024 * 1024
83
+ with (
84
+ u3pm.request("GET", _mgn_urlstr, preload_content=False) as _urlopen_handle,
85
+ _mgn_path.open("wb") as _mgn_file,
86
+ ):
87
+ while True:
88
+ _data = _urlopen_handle.read(_chunk_size)
89
+ if not _data:
90
+ break
91
+ _mgn_file.write(_data)
92
+
93
+ print(f"Downloaded {_mgn_urlstr} to {_mgn_path}.")
94
+
95
+ except urllib3.exceptions.MaxRetryError as _err:
96
+ if isinstance(_err.__cause__, urllib3.exceptions.SSLError):
97
+ # Works fine with other sites secured with certificates
98
+ # from the Internet2 CA, such as,
99
+ # https://snap.stanford.edu/data/web-Stanford.txt.gz
100
+ print(
101
+ f"WARNING: Could not establish secure connection to, {_mgn_urlstr}."
102
+ "Using bundled copy."
103
+ )
104
+ if not _mgn_path.is_file():
105
+ with resources.as_file(
106
+ resources.files(f"{_PKG_NAME}.data").joinpath(
107
+ "damodaran_margin_data.xls"
108
+ )
109
+ ) as _mgn_data_archive_path:
110
+ shutil.copy2(_mgn_data_archive_path, _mgn_path)
111
+ else:
112
+ raise _err
97
113
 
98
114
  _xl_book = open_workbook(_mgn_path, ragged_rows=True, on_demand=True)
99
115
  _xl_sheet = _xl_book.sheet_by_name("Industry Averages")
@@ -123,7 +139,7 @@ def mgn_data_builder(
123
139
  _mgn_tbl_dict: Mapping[str, Mapping[str, float | int]] | None = None, /
124
140
  ) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]:
125
141
  if _mgn_tbl_dict is None:
126
- _mgn_tbl_dict = scrape_data_table()
142
+ _mgn_tbl_dict = mgn_data_getter()
127
143
 
128
144
  _mgn_data_wts, _mgn_data_obs = (
129
145
  _f.flatten()
@@ -169,17 +185,19 @@ def mgn_data_builder(
169
185
  )
170
186
 
171
187
 
172
- def resample_mgn_data(
188
+ def mgn_data_resampler(
173
189
  _sample_size: int | tuple[int, int] = (10**6, 2),
174
190
  /,
175
191
  *,
176
192
  seed_sequence: SeedSequence | None = None,
177
193
  ) -> NDArray[np.float64]:
178
194
  """
179
- Generate the specified number of draws from the empirical distribution
180
- for Prof. Damodaran's margin data using the estimated Gaussian KDE.
181
- Margins for firms in finance, investment, insurance, reinsurance, and REITs
182
- are excluded from the sample used to estimate the Gaussian KDE.
195
+ Generate draws from the empirical distribution bassed on Prof. Damodaran's margin data.
196
+
197
+ The empirical distribution is estimated using a Gaussian KDE; the bandwidth
198
+ selected using Silverman's rule is narrowed to reflect that the margin data
199
+ are multimodal. Margins for firms in finance, investment, insurance, reinsurance, and
200
+ REITs are excluded from the sample used to estimate the empirical distribution.
183
201
 
184
202
  Parameters
185
203
  ----------
@@ -198,28 +216,24 @@ def resample_mgn_data(
198
216
 
199
217
  _seed_sequence = seed_sequence or SeedSequence(pool_size=8)
200
218
 
201
- _x, _w, _ = mgn_data_builder(scrape_data_table())
219
+ _x, _w, _ = mgn_data_builder(mgn_data_getter())
202
220
 
203
- _mgn_kde = stats.gaussian_kde(_x, weights=_w)
221
+ _mgn_kde = stats.gaussian_kde(_x, weights=_w, bw_method="silverman")
222
+ _mgn_kde.set_bandwidth(bw_method=_mgn_kde.factor / 3.0)
204
223
 
205
- def _generate_draws(
206
- _mgn_kde: stats.gaussian_kde, _ssz: int, _seed_seq: SeedSequence
207
- ) -> NDArray[np.float64]:
208
- _seed = Generator(PCG64DXSM(_seed_sequence))
209
-
210
- # We enlarge the sample, then truncate to
211
- # the range between [0.0, 1.0)
212
- ssz_up = int(_ssz / (_mgn_kde.integrate_box_1d(0.0, 1.0) ** 2))
213
- sample_1 = _mgn_kde.resample(ssz_up, seed=_seed)[0]
224
+ if isinstance(_sample_size, int):
214
225
  return np.array(
215
- sample_1[(sample_1 >= 0.0) & (sample_1 <= 1)][:_ssz], np.float64
226
+ _mgn_kde.resample(_sample_size, seed=Generator(PCG64DXSM(_seed_sequence)))[
227
+ 0
228
+ ]
216
229
  )
217
-
218
- if isinstance(_sample_size, int):
219
- return _generate_draws(_mgn_kde, _sample_size, _seed_sequence)
220
- else:
230
+ elif isinstance(_sample_size, tuple) and len(_sample_size) == 2:
221
231
  _ssz, _num_cols = _sample_size
222
232
  _ret_array = np.empty(_sample_size, np.float64)
223
233
  for _idx, _seed_seq in enumerate(_seed_sequence.spawn(_num_cols)):
224
- _ret_array[:, _idx] = _generate_draws(_mgn_kde, _ssz, _seed_seq)
234
+ _ret_array[:, _idx] = _mgn_kde.resample(
235
+ _ssz, seed=Generator(PCG64DXSM(_seed_seq))
236
+ )[0]
225
237
  return _ret_array
238
+ else:
239
+ raise ValueError(f"Invalid sample size: {_sample_size!r}")