Pymkp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymkp-0.1.0/LICENSE +21 -0
- pymkp-0.1.0/MANIFEST.in +10 -0
- pymkp-0.1.0/PKG-INFO +402 -0
- pymkp-0.1.0/README.md +348 -0
- pymkp-0.1.0/pyproject.toml +85 -0
- pymkp-0.1.0/setup.cfg +4 -0
- pymkp-0.1.0/src/BMYReplication/analyze_missing.py +252 -0
- pymkp-0.1.0/src/BMYReplication/compute_markups.py +212 -0
- pymkp-0.1.0/src/BMYReplication/create_data.py +164 -0
- pymkp-0.1.0/src/BMYReplication/estimate_coefficients.py +292 -0
- pymkp-0.1.0/src/BMYReplication/main.py +52 -0
- pymkp-0.1.0/src/BMYReplication/markup_figures.py +238 -0
- pymkp-0.1.0/src/BMYReplication/utils.py +59 -0
- pymkp-0.1.0/src/PyMarkup/0.0 Download Compustat.py +68 -0
- pymkp-0.1.0/src/PyMarkup/0.1 Download CPI.py +42 -0
- pymkp-0.1.0/src/PyMarkup/0.2 PPI Data Preparation.py +136 -0
- pymkp-0.1.0/src/PyMarkup/0.3 theta_estimation.py +528 -0
- pymkp-0.1.0/src/PyMarkup/0.4 Create Main Datasets.py +254 -0
- pymkp-0.1.0/src/PyMarkup/0.5 Prepare Data for Figures and Tables.py +209 -0
- pymkp-0.1.0/src/PyMarkup/1. Generate Figure 1 - Aggregate Markup.py +46 -0
- pymkp-0.1.0/src/PyMarkup/2. Generate Figure 2 - CAGR of PPI vs Markup.py +209 -0
- pymkp-0.1.0/src/PyMarkup/3. Generate Summary Statistics.py +821 -0
- pymkp-0.1.0/src/PyMarkup/4. Generate Table 1.py +240 -0
- pymkp-0.1.0/src/PyMarkup/Create_Data.py +169 -0
- pymkp-0.1.0/src/PyMarkup/Estimate_Coefficients.py +368 -0
- pymkp-0.1.0/src/PyMarkup/PyMarkup.py +1 -0
- pymkp-0.1.0/src/PyMarkup/__init__.py +113 -0
- pymkp-0.1.0/src/PyMarkup/__main__.py +4 -0
- pymkp-0.1.0/src/PyMarkup/_version.py +3 -0
- pymkp-0.1.0/src/PyMarkup/cli/__init__.py +5 -0
- pymkp-0.1.0/src/PyMarkup/cli/main.py +253 -0
- pymkp-0.1.0/src/PyMarkup/cli.py +22 -0
- pymkp-0.1.0/src/PyMarkup/core/__init__.py +1 -0
- pymkp-0.1.0/src/PyMarkup/core/data_preparation.py +309 -0
- pymkp-0.1.0/src/PyMarkup/core/figures.py +795 -0
- pymkp-0.1.0/src/PyMarkup/core/markup_calculation.py +203 -0
- pymkp-0.1.0/src/PyMarkup/data/__init__.py +22 -0
- pymkp-0.1.0/src/PyMarkup/data/config.py +75 -0
- pymkp-0.1.0/src/PyMarkup/data/downloaders.py +350 -0
- pymkp-0.1.0/src/PyMarkup/data/loaders.py +149 -0
- pymkp-0.1.0/src/PyMarkup/decomposition/__init__.py +12 -0
- pymkp-0.1.0/src/PyMarkup/decomposition/aggregate.py +169 -0
- pymkp-0.1.0/src/PyMarkup/decomposition/base.py +183 -0
- pymkp-0.1.0/src/PyMarkup/decomposition/olley_pakes.py +245 -0
- pymkp-0.1.0/src/PyMarkup/decomposition/visualization.py +212 -0
- pymkp-0.1.0/src/PyMarkup/estimators/__init__.py +13 -0
- pymkp-0.1.0/src/PyMarkup/estimators/acf.py +347 -0
- pymkp-0.1.0/src/PyMarkup/estimators/base.py +131 -0
- pymkp-0.1.0/src/PyMarkup/estimators/cost_share.py +308 -0
- pymkp-0.1.0/src/PyMarkup/estimators/wooldridge_iv.py +424 -0
- pymkp-0.1.0/src/PyMarkup/io/__init__.py +5 -0
- pymkp-0.1.0/src/PyMarkup/io/schemas.py +297 -0
- pymkp-0.1.0/src/PyMarkup/macro_var_calculation.py +84 -0
- pymkp-0.1.0/src/PyMarkup/path_plot_config.py +101 -0
- pymkp-0.1.0/src/PyMarkup/pipeline/__init__.py +6 -0
- pymkp-0.1.0/src/PyMarkup/pipeline/config.py +288 -0
- pymkp-0.1.0/src/PyMarkup/pipeline/markup_pipeline.py +644 -0
- pymkp-0.1.0/src/PyMarkup/run_all.py +53 -0
- pymkp-0.1.0/src/PyMarkup/utils/__init__.py +1 -0
- pymkp-0.1.0/src/PyMarkup/utils.py +2 -0
- pymkp-0.1.0/src/Pymkp.egg-info/PKG-INFO +402 -0
- pymkp-0.1.0/src/Pymkp.egg-info/SOURCES.txt +70 -0
- pymkp-0.1.0/src/Pymkp.egg-info/dependency_links.txt +1 -0
- pymkp-0.1.0/src/Pymkp.egg-info/entry_points.txt +2 -0
- pymkp-0.1.0/src/Pymkp.egg-info/requires.txt +31 -0
- pymkp-0.1.0/src/Pymkp.egg-info/top_level.txt +2 -0
- pymkp-0.1.0/tests/__init__.py +1 -0
- pymkp-0.1.0/tests/test_data_downloads.py +295 -0
- pymkp-0.1.0/tests/test_decomposition.py +457 -0
- pymkp-0.1.0/tests/test_estimators.py +718 -0
- pymkp-0.1.0/tests/test_figures.py +210 -0
- pymkp-0.1.0/tests/test_pipeline_real.py +236 -0
pymkp-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, Yangyang (Claire) Meng
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pymkp-0.1.0/MANIFEST.in
ADDED
pymkp-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: Pymkp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python toolkit for estimating firm-level markups using production function-based marginal cost recovery.
|
|
5
|
+
Author-email: "Yangyang (Claire) Meng" <ym3593@nyu.edu>
|
|
6
|
+
Maintainer-email: "Yangyang (Claire) Meng" <ym3593@nyu.edu>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: homepage, https://github.com/immortalsRDJ/PyMarkup
|
|
9
|
+
Project-URL: repository, https://github.com/immortalsRDJ/PyMarkup
|
|
10
|
+
Project-URL: documentation, https://github.com/immortalsRDJ/PyMarkup#readme
|
|
11
|
+
Project-URL: bugs, https://github.com/immortalsRDJ/PyMarkup/issues
|
|
12
|
+
Project-URL: changelog, https://github.com/immortalsRDJ/PyMarkup/blob/main/CHANGELOG.md
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: pandas>=2.0.0
|
|
26
|
+
Requires-Dist: numpy<2.0,>=1.24.0
|
|
27
|
+
Requires-Dist: scipy<1.14,>=1.10.0
|
|
28
|
+
Requires-Dist: statsmodels>=0.14.0
|
|
29
|
+
Requires-Dist: linearmodels>=5.0
|
|
30
|
+
Requires-Dist: pydantic>=2.0
|
|
31
|
+
Requires-Dist: typer>=0.9.0
|
|
32
|
+
Requires-Dist: rich>=13.0
|
|
33
|
+
Requires-Dist: pyyaml>=6.0
|
|
34
|
+
Requires-Dist: matplotlib>=3.7.0
|
|
35
|
+
Requires-Dist: seaborn>=0.12.0
|
|
36
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
37
|
+
Requires-Dist: requests>=2.32.5
|
|
38
|
+
Requires-Dist: fredapi>=0.5.2
|
|
39
|
+
Requires-Dist: wrds>=3.1.6
|
|
40
|
+
Provides-Extra: test
|
|
41
|
+
Requires-Dist: coverage[toml]>=7.0; extra == "test"
|
|
42
|
+
Requires-Dist: pytest>=7.0; extra == "test"
|
|
43
|
+
Requires-Dist: pytest-cov>=4.0; extra == "test"
|
|
44
|
+
Requires-Dist: hypothesis>=6.0; extra == "test"
|
|
45
|
+
Requires-Dist: ruff>=0.1.0; extra == "test"
|
|
46
|
+
Requires-Dist: ipdb>=0.13; extra == "test"
|
|
47
|
+
Provides-Extra: dev
|
|
48
|
+
Requires-Dist: Pymkp[test]; extra == "dev"
|
|
49
|
+
Requires-Dist: jupyter>=1.0; extra == "dev"
|
|
50
|
+
Requires-Dist: ipython>=8.0; extra == "dev"
|
|
51
|
+
Provides-Extra: wrds
|
|
52
|
+
Requires-Dist: wrds>=3.1.0; extra == "wrds"
|
|
53
|
+
Dynamic: license-file
|
|
54
|
+
|
|
55
|
+
# PyMarkup
|
|
56
|
+
|
|
57
|
+
A Python toolkit for estimating firm-level markups using production function-based marginal cost recovery.
|
|
58
|
+
|
|
59
|
+
## Installation
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
git clone https://github.com/immortalsRDJ/PyMarkup
|
|
63
|
+
cd PyMarkup
|
|
64
|
+
uv sync
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
For WRDS data downloads, add the `wrds` extra:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
uv sync --extra wrds
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Quick Start
|
|
74
|
+
|
|
75
|
+
### Option 1: Command Line (Recommended)
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# 1. Set up config file
|
|
79
|
+
cp config.example.yaml config.yaml
|
|
80
|
+
# Edit config.yaml with your API keys and settings
|
|
81
|
+
|
|
82
|
+
# 2. Run the full pipeline
|
|
83
|
+
uv run pymarkup run-all --config config.yaml
|
|
84
|
+
|
|
85
|
+
# Or skip data download if you already have the data
|
|
86
|
+
uv run pymarkup run-all --config config.yaml --skip-download
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Option 2: Python Script
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from PyMarkup import MarkupPipeline, PipelineConfig, EstimatorConfig
|
|
93
|
+
|
|
94
|
+
config = PipelineConfig(
|
|
95
|
+
compustat_path="Input/DLEU/Compustat_annual.csv",
|
|
96
|
+
macro_vars_path="Input/DLEU/macro_vars_new.xlsx",
|
|
97
|
+
estimator=EstimatorConfig(method="wooldridge_iv"),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
pipeline = MarkupPipeline(config)
|
|
101
|
+
results = pipeline.run()
|
|
102
|
+
results.save(output_dir="Output/", format="csv")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Command Line Reference
|
|
106
|
+
|
|
107
|
+
### Full Pipeline
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Run everything (download + estimate + figures)
|
|
111
|
+
uv run pymarkup run-all --config config.yaml
|
|
112
|
+
|
|
113
|
+
# Skip all downloads (use existing data)
|
|
114
|
+
uv run pymarkup run-all --config config.yaml --skip-download
|
|
115
|
+
|
|
116
|
+
# Skip only Compustat download (no WRDS credentials needed)
|
|
117
|
+
uv run pymarkup run-all --config config.yaml --skip-compustat
|
|
118
|
+
|
|
119
|
+
# Skip figure generation
|
|
120
|
+
uv run pymarkup run-all --config config.yaml --no-figures
|
|
121
|
+
|
|
122
|
+
# Verbose output for debugging
|
|
123
|
+
uv run pymarkup run-all --config config.yaml -v
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Individual Commands
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# Download data only
|
|
130
|
+
uv run pymarkup download ppi # PPI (no credentials needed)
|
|
131
|
+
uv run pymarkup download cpi --config config.yaml # CPI (needs FRED API key)
|
|
132
|
+
uv run pymarkup download all --config config.yaml # All datasets
|
|
133
|
+
|
|
134
|
+
# Run estimation only (requires existing data)
|
|
135
|
+
uv run pymarkup estimate --config config.yaml
|
|
136
|
+
|
|
137
|
+
# Validate input data
|
|
138
|
+
uv run pymarkup validate Input/DLEU/Compustat_annual.csv
|
|
139
|
+
|
|
140
|
+
# Check version
|
|
141
|
+
uv run pymarkup version
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Configuration
|
|
145
|
+
|
|
146
|
+
### Setting Up Credentials
|
|
147
|
+
|
|
148
|
+
1. Copy the example config file:
|
|
149
|
+
```bash
|
|
150
|
+
cp config.example.yaml config.yaml
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
2. Edit `config.yaml` with your credentials:
|
|
154
|
+
```yaml
|
|
155
|
+
fred_api_key: "your-fred-api-key"
|
|
156
|
+
wrds_username: "your-wrds-username"
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Alternatively, set environment variables: `FRED_API_KEY`, `WRDS_USERNAME`
|
|
160
|
+
|
|
161
|
+
### Data Requirements
|
|
162
|
+
|
|
163
|
+
| Data Source | Credentials | How to Get |
|
|
164
|
+
|-------------|-------------|------------|
|
|
165
|
+
| Compustat (WRDS) | WRDS account | Register at [WRDS](https://wrds-www.wharton.upenn.edu/) |
|
|
166
|
+
| CPI (FRED) | FRED API key | Free at [FRED](https://fred.stlouisfed.org/docs/api/api_key.html) |
|
|
167
|
+
| PPI (BLS) | None | Public data from [BLS](https://download.bls.gov/pub/time.series/pc/) |
|
|
168
|
+
| Macro variables | N/A | Included in repo: `Input/DLEU/macro_vars_new.xlsx` |
|
|
169
|
+
| NAICS descriptions | N/A | Included in repo: `Input/Other/NAICS_2D_Description.xlsx` |
|
|
170
|
+
| DEU observations | N/A | Optional: Original DLEU paper firm-year sample (see below) |
|
|
171
|
+
|
|
172
|
+
## Pipeline Overview
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
Download -> Data Preparation -> Elasticity Estimation -> Markup Calculation -> Figures & Decomposition
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### 1. Data Download
|
|
179
|
+
|
|
180
|
+
Downloads raw data from external sources:
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
from PyMarkup.data import download_compustat, download_cpi, download_ppi, load_config
|
|
184
|
+
|
|
185
|
+
config = load_config("config.yaml")
|
|
186
|
+
download_ppi(config) # No credentials needed
|
|
187
|
+
download_cpi(config) # Requires FRED API key
|
|
188
|
+
download_compustat(config) # Requires WRDS credentials
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
**Data Sources:**
|
|
192
|
+
- **PPI**: Bureau of Labor Statistics Producer Price Index data from https://download.bls.gov/pub/time.series/pc/
|
|
193
|
+
- **CPI**: Federal Reserve Economic Data (FRED) Consumer Price Index
|
|
194
|
+
- **Compustat**: WRDS Compustat Fundamentals Annual/Quarterly
|
|
195
|
+
|
|
196
|
+
### 2. Data Preparation
|
|
197
|
+
|
|
198
|
+
Cleans and prepares the Compustat panel:
|
|
199
|
+
- Deduplicates firm-year observations
|
|
200
|
+
- Extracts NAICS industry codes
|
|
201
|
+
- Deflates monetary values by GDP
|
|
202
|
+
- Computes market shares
|
|
203
|
+
- Trims outliers
|
|
204
|
+
|
|
205
|
+
### 3. Elasticity Estimation
|
|
206
|
+
|
|
207
|
+
Estimates output elasticity of variable inputs (θ) at the industry-year level:
|
|
208
|
+
|
|
209
|
+
| Method | Class | Use Case |
|
|
210
|
+
|--------|-------|----------|
|
|
211
|
+
| Wooldridge IV | `WooldridgeIVEstimator` | Main method, addresses endogeneity via IV/2SLS |
|
|
212
|
+
| Cost Share | `CostShareEstimator` | Fast baseline, no regression needed |
|
|
213
|
+
| ACF | `ACFEstimator` | Robustness, two-stage GMM with control function |
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
from PyMarkup.estimators import WooldridgeIVEstimator
|
|
217
|
+
|
|
218
|
+
estimator = WooldridgeIVEstimator(specification="spec2")
|
|
219
|
+
elasticities = estimator.estimate_elasticities(panel_data)
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
#### SG&A Configuration
|
|
223
|
+
|
|
224
|
+
All three estimators support including SG&A (Selling, General & Administrative expenses) as a third input in the production function:
|
|
225
|
+
|
|
226
|
+
| Estimator | Parameter | Options | Default |
|
|
227
|
+
|-----------|-----------|---------|---------|
|
|
228
|
+
| Wooldridge IV | `specification` | `"spec1"` (COGS+K), `"spec2"` (COGS+K+SG&A) | `"spec2"` |
|
|
229
|
+
| Cost Share | `include_sga` | `True`, `False` | `False` |
|
|
230
|
+
| ACF | `include_sga` | `True`, `False` | `False` |
|
|
231
|
+
|
|
232
|
+
```python
|
|
233
|
+
from PyMarkup.estimators import ACFEstimator, CostShareEstimator, WooldridgeIVEstimator
|
|
234
|
+
|
|
235
|
+
# Wooldridge IV: use spec2 for 3-input (COGS + Capital + SG&A)
|
|
236
|
+
iv_est = WooldridgeIVEstimator(specification="spec2")
|
|
237
|
+
|
|
238
|
+
# Cost Share: include SG&A in cost share calculation
|
|
239
|
+
cs_est = CostShareEstimator(include_sga=True)
|
|
240
|
+
|
|
241
|
+
# ACF: include SG&A as third input
|
|
242
|
+
acf_est = ACFEstimator(include_sga=True)
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
Via pipeline config:
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
from PyMarkup import PipelineConfig, EstimatorConfig
|
|
249
|
+
|
|
250
|
+
config = PipelineConfig(
|
|
251
|
+
compustat_path="Input/DLEU/Compustat_annual.csv",
|
|
252
|
+
macro_vars_path="Input/DLEU/macro_vars_new.xlsx",
|
|
253
|
+
estimator=EstimatorConfig(
|
|
254
|
+
method="all",
|
|
255
|
+
iv_specification="spec2", # Wooldridge IV with SG&A
|
|
256
|
+
cs_include_sga=True, # Cost Share with SG&A
|
|
257
|
+
acf_include_sga=True, # ACF with SG&A
|
|
258
|
+
),
|
|
259
|
+
)
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
#### Aggregation Weights
|
|
263
|
+
|
|
264
|
+
When aggregating firm-level markups to industry or economy level, you can choose the weighting scheme:
|
|
265
|
+
|
|
266
|
+
| Weight Type | Formula | Use Case |
|
|
267
|
+
|-------------|---------|----------|
|
|
268
|
+
| `"revenue"` (default) | `firm_revenue / total_revenue` | Standard approach, larger firms weighted more |
|
|
269
|
+
| `"cost"` | `firm_cogs / total_cogs` | Weight by production scale |
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
from PyMarkup.core.markup_calculation import aggregate_markups
|
|
273
|
+
|
|
274
|
+
# Revenue-weighted aggregation (default)
|
|
275
|
+
agg = aggregate_markups(
|
|
276
|
+
firm_markups, by="year", method="weighted_mean",
|
|
277
|
+
weight_type="revenue", panel_data=panel_data
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Cost-weighted aggregation
|
|
281
|
+
agg = aggregate_markups(
|
|
282
|
+
firm_markups, by="year", method="weighted_mean",
|
|
283
|
+
weight_type="cost", panel_data=panel_data
|
|
284
|
+
)
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
Via pipeline config:
|
|
288
|
+
```python
|
|
289
|
+
config = PipelineConfig(
|
|
290
|
+
...
|
|
291
|
+
aggregation_weight="revenue", # or "cost"
|
|
292
|
+
)
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
#### DEU Sample Filtering
|
|
296
|
+
|
|
297
|
+
To replicate the original De Loecker, Eeckhout, and Unger (2020) paper results, you can filter the Compustat data to only include the firm-year observations from the original study:
|
|
298
|
+
|
|
299
|
+
```yaml
|
|
300
|
+
# config.yaml
|
|
301
|
+
use_deu_sample: true
|
|
302
|
+
deu_observations_path: "Input/DLEU/DEU_observations.dta"
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
Or via Python:
|
|
306
|
+
|
|
307
|
+
```python
|
|
308
|
+
config = PipelineConfig(
|
|
309
|
+
compustat_path="Input/DLEU/Compustat_annual.csv",
|
|
310
|
+
macro_vars_path="Input/DLEU/macro_vars_new.xlsx",
|
|
311
|
+
use_deu_sample=True,
|
|
312
|
+
deu_observations_path="Input/DLEU/DEU_observations.dta",
|
|
313
|
+
...
|
|
314
|
+
)
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
When enabled, the pipeline performs an inner merge on `gvkey` and `year` to filter to the original DLEU sample (approximately 242,000 firm-year observations from 1955-2016).
|
|
318
|
+
|
|
319
|
+
### 4. Markup Calculation
|
|
320
|
+
|
|
321
|
+
Computes firm-level markups using the De Loecker & Warzynski formula:
|
|
322
|
+
|
|
323
|
+
```
|
|
324
|
+
markup = θ / cost_share
|
|
325
|
+
where cost_share = COGS / Revenue
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### 5. Figures
|
|
329
|
+
|
|
330
|
+
| Figure | Function | Description |
|
|
331
|
+
|--------|----------|-------------|
|
|
332
|
+
| Aggregate Markup | `plot_aggregate_markup()` | Time series of aggregate markups |
|
|
333
|
+
| PPI vs Markup | `plot_markup_vs_ppi()` | Scatter plot with weighted OLS regression |
|
|
334
|
+
|
|
335
|
+
### 6. Decomposition
|
|
336
|
+
|
|
337
|
+
Dynamic Olley-Pakes decomposition of aggregate markup changes (DLEU 2020). The decomposition runs automatically in the pipeline for Wooldridge IV and Cost Share methods.
|
|
338
|
+
|
|
339
|
+
**Decomposes markup growth into three components:**
|
|
340
|
+
|
|
341
|
+
| Component | Description |
|
|
342
|
+
|-----------|-------------|
|
|
343
|
+
| **Within** | Markup changes within continuing firms |
|
|
344
|
+
| **Reallocation** | Market share shifts toward high/low-markup firms |
|
|
345
|
+
| **Net Entry** | Difference between entering and exiting firms |
|
|
346
|
+
|
|
347
|
+
The components sum to the total markup change: `Within + Reallocation + Net Entry = Markup (benchmark)`
|
|
348
|
+
|
|
349
|
+
**Output files:**
|
|
350
|
+
|
|
351
|
+
| File | Description |
|
|
352
|
+
|------|-------------|
|
|
353
|
+
| `Output/intermediate/decomposition_wooldridge_iv.csv` | IV decomposition results |
|
|
354
|
+
| `Output/intermediate/decomposition_cost_share.csv` | Cost Share decomposition results |
|
|
355
|
+
| `Output/figures/Decomposition - Wooldridge IV (YYYY-YYYY).pdf` | IV decomposition figure |
|
|
356
|
+
| `Output/figures/Decomposition - Cost Share (YYYY-YYYY).pdf` | Cost Share decomposition figure |
|
|
357
|
+
|
|
358
|
+
**Standalone usage:**
|
|
359
|
+
|
|
360
|
+
```python
|
|
361
|
+
from PyMarkup.decomposition import OlleyPakesDecomposition, plot_decomposition
|
|
362
|
+
|
|
363
|
+
op = OlleyPakesDecomposition(
|
|
364
|
+
firm_var="gvkey",
|
|
365
|
+
time_var="year",
|
|
366
|
+
markup_var="markup",
|
|
367
|
+
weight_var="sale_D",
|
|
368
|
+
)
|
|
369
|
+
decomp_results = op.decompose(firm_markups)
|
|
370
|
+
|
|
371
|
+
# Plot with cumulative markup levels (DLEU Figure IV style)
|
|
372
|
+
# All lines start at the same baseline and show counterfactual paths:
|
|
373
|
+
# "What would markup be if only this component operated?"
|
|
374
|
+
plot_decomposition(
|
|
375
|
+
decomp_results,
|
|
376
|
+
cumulative=True,
|
|
377
|
+
base_markup=1.21, # Base period aggregate markup (e.g., 1980 value)
|
|
378
|
+
save_path="Output/decomposition.pdf",
|
|
379
|
+
)
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
## Project Structure
|
|
384
|
+
|
|
385
|
+
```
|
|
386
|
+
src/PyMarkup/
|
|
387
|
+
├── core/ # Data preparation, markup calculation, figures
|
|
388
|
+
├── data/ # Data downloaders and loaders
|
|
389
|
+
├── estimators/ # WooldridgeIV, CostShare, ACF estimators
|
|
390
|
+
├── pipeline/ # MarkupPipeline orchestrator, config
|
|
391
|
+
├── decomposition/ # Dynamic Olley-Pakes decomposition
|
|
392
|
+
├── io/ # I/O schemas (Pydantic)
|
|
393
|
+
└── cli/ # CLI commands
|
|
394
|
+
|
|
395
|
+
Input/ # Raw data (not version controlled)
|
|
396
|
+
Intermediate/ # Generated datasets, theta estimates
|
|
397
|
+
Output/ # Figures and tables
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
## License
|
|
401
|
+
|
|
402
|
+
MIT License
|