@wentorai/research-plugins 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/README.md +22 -22
  2. package/curated/analysis/README.md +71 -56
  3. package/curated/domains/README.md +176 -67
  4. package/curated/literature/README.md +71 -47
  5. package/curated/research/README.md +91 -58
  6. package/curated/tools/README.md +88 -87
  7. package/curated/writing/README.md +80 -45
  8. package/mcp-configs/cloud-docs/confluence-mcp.json +37 -0
  9. package/mcp-configs/cloud-docs/google-drive-mcp.json +35 -0
  10. package/mcp-configs/cloud-docs/notion-mcp.json +29 -0
  11. package/mcp-configs/communication/discord-mcp.json +29 -0
  12. package/mcp-configs/communication/slack-mcp.json +29 -0
  13. package/mcp-configs/communication/telegram-mcp.json +28 -0
  14. package/mcp-configs/database/neo4j-mcp.json +37 -0
  15. package/mcp-configs/database/postgres-mcp.json +28 -0
  16. package/mcp-configs/database/sqlite-mcp.json +29 -0
  17. package/mcp-configs/dev-platform/github-mcp.json +31 -0
  18. package/mcp-configs/dev-platform/gitlab-mcp.json +34 -0
  19. package/mcp-configs/email/email-mcp.json +40 -0
  20. package/mcp-configs/email/gmail-mcp.json +37 -0
  21. package/mcp-configs/registry.json +178 -149
  22. package/mcp-configs/repository/dataverse-mcp.json +33 -0
  23. package/mcp-configs/repository/huggingface-mcp.json +29 -0
  24. package/openclaw.plugin.json +2 -2
  25. package/package.json +2 -2
  26. package/skills/analysis/dataviz/algorithm-visualizer-guide/SKILL.md +259 -0
  27. package/skills/analysis/dataviz/bokeh-visualization-guide/SKILL.md +270 -0
  28. package/skills/analysis/dataviz/chart-image-generator/SKILL.md +229 -0
  29. package/skills/analysis/dataviz/d3-visualization-guide/SKILL.md +281 -0
  30. package/skills/analysis/dataviz/echarts-visualization-guide/SKILL.md +250 -0
  31. package/skills/analysis/dataviz/metabase-analytics-guide/SKILL.md +242 -0
  32. package/skills/analysis/dataviz/plotly-interactive-guide/SKILL.md +266 -0
  33. package/skills/analysis/dataviz/redash-analytics-guide/SKILL.md +284 -0
  34. package/skills/analysis/econometrics/econml-causal-guide/SKILL.md +163 -0
  35. package/skills/analysis/econometrics/mostly-harmless-guide/SKILL.md +139 -0
  36. package/skills/analysis/econometrics/panel-data-analyst/SKILL.md +259 -0
  37. package/skills/analysis/econometrics/python-causality-guide/SKILL.md +134 -0
  38. package/skills/analysis/econometrics/stata-accounting-guide/SKILL.md +269 -0
  39. package/skills/analysis/econometrics/stata-analyst-guide/SKILL.md +245 -0
  40. package/skills/analysis/statistics/data-anomaly-detection/SKILL.md +157 -0
  41. package/skills/analysis/statistics/ml-experiment-tracker/SKILL.md +212 -0
  42. package/skills/analysis/statistics/pywayne-statistics-guide/SKILL.md +192 -0
  43. package/skills/analysis/statistics/quantitative-methods-guide/SKILL.md +193 -0
  44. package/skills/analysis/statistics/senior-data-scientist-guide/SKILL.md +223 -0
  45. package/skills/analysis/wrangling/csv-data-analyzer/SKILL.md +170 -0
  46. package/skills/analysis/wrangling/data-cleaning-pipeline/SKILL.md +266 -0
  47. package/skills/analysis/wrangling/data-cog-guide/SKILL.md +178 -0
  48. package/skills/analysis/wrangling/stata-data-cleaning/SKILL.md +276 -0
  49. package/skills/analysis/wrangling/survey-data-processing/SKILL.md +298 -0
  50. package/skills/domains/ai-ml/ai-model-benchmarking/SKILL.md +209 -0
  51. package/skills/domains/ai-ml/annotated-dl-papers-guide/SKILL.md +159 -0
  52. package/skills/domains/ai-ml/dl-transformer-finetune/SKILL.md +239 -0
  53. package/skills/domains/ai-ml/generative-ai-guide/SKILL.md +146 -0
  54. package/skills/domains/ai-ml/huggingface-inference-guide/SKILL.md +196 -0
  55. package/skills/domains/ai-ml/keras-deep-learning/SKILL.md +210 -0
  56. package/skills/domains/ai-ml/llm-from-scratch-guide/SKILL.md +124 -0
  57. package/skills/domains/ai-ml/ml-pipeline-guide/SKILL.md +295 -0
  58. package/skills/domains/ai-ml/nlp-toolkit-guide/SKILL.md +247 -0
  59. package/skills/domains/ai-ml/pytorch-guide/SKILL.md +281 -0
  60. package/skills/domains/ai-ml/pytorch-lightning-guide/SKILL.md +244 -0
  61. package/skills/domains/ai-ml/tensorflow-guide/SKILL.md +241 -0
  62. package/skills/domains/biomedical/bioagents-guide/SKILL.md +308 -0
  63. package/skills/domains/biomedical/medgeclaw-guide/SKILL.md +345 -0
  64. package/skills/domains/biomedical/medical-imaging-guide/SKILL.md +305 -0
  65. package/skills/domains/business/architecture-design-guide/SKILL.md +279 -0
  66. package/skills/domains/business/innovation-management-guide/SKILL.md +257 -0
  67. package/skills/domains/business/operations-research-guide/SKILL.md +258 -0
  68. package/skills/domains/chemistry/molecular-dynamics-guide/SKILL.md +237 -0
  69. package/skills/domains/chemistry/pubchem-api-guide/SKILL.md +180 -0
  70. package/skills/domains/chemistry/spectroscopy-analysis-guide/SKILL.md +290 -0
  71. package/skills/domains/cs/distributed-systems-guide/SKILL.md +268 -0
  72. package/skills/domains/cs/formal-verification-guide/SKILL.md +298 -0
  73. package/skills/domains/ecology/species-distribution-guide/SKILL.md +343 -0
  74. package/skills/domains/economics/imf-data-api-guide/SKILL.md +174 -0
  75. package/skills/domains/economics/post-labor-economics/SKILL.md +254 -0
  76. package/skills/domains/economics/pricing-psychology-guide/SKILL.md +273 -0
  77. package/skills/domains/economics/world-bank-data-guide/SKILL.md +179 -0
  78. package/skills/domains/education/assessment-design-guide/SKILL.md +213 -0
  79. package/skills/domains/education/educational-research-methods/SKILL.md +179 -0
  80. package/skills/domains/education/mooc-analytics-guide/SKILL.md +206 -0
  81. package/skills/domains/finance/portfolio-optimization-guide/SKILL.md +279 -0
  82. package/skills/domains/finance/risk-modeling-guide/SKILL.md +260 -0
  83. package/skills/domains/finance/stata-accounting-research/SKILL.md +372 -0
  84. package/skills/domains/geoscience/climate-modeling-guide/SKILL.md +215 -0
  85. package/skills/domains/geoscience/satellite-remote-sensing/SKILL.md +193 -0
  86. package/skills/domains/geoscience/seismology-data-guide/SKILL.md +208 -0
  87. package/skills/domains/humanities/ethical-philosophy-guide/SKILL.md +244 -0
  88. package/skills/domains/humanities/history-research-guide/SKILL.md +260 -0
  89. package/skills/domains/humanities/political-history-guide/SKILL.md +241 -0
  90. package/skills/domains/law/legal-nlp-guide/SKILL.md +236 -0
  91. package/skills/domains/law/patent-analysis-guide/SKILL.md +257 -0
  92. package/skills/domains/law/regulatory-compliance-guide/SKILL.md +267 -0
  93. package/skills/domains/math/symbolic-computation-guide/SKILL.md +263 -0
  94. package/skills/domains/math/topology-data-analysis/SKILL.md +305 -0
  95. package/skills/domains/pharma/clinical-trial-design-guide/SKILL.md +271 -0
  96. package/skills/domains/pharma/drug-target-interaction/SKILL.md +242 -0
  97. package/skills/domains/pharma/pharmacovigilance-guide/SKILL.md +216 -0
  98. package/skills/domains/physics/astrophysics-data-guide/SKILL.md +305 -0
  99. package/skills/domains/physics/particle-physics-guide/SKILL.md +287 -0
  100. package/skills/domains/social-science/network-analysis-guide/SKILL.md +310 -0
  101. package/skills/domains/social-science/psychology-research-guide/SKILL.md +270 -0
  102. package/skills/domains/social-science/sociology-research-guide/SKILL.md +238 -0
  103. package/skills/literature/discovery/paper-recommendation-guide/SKILL.md +120 -0
  104. package/skills/literature/discovery/semantic-paper-radar/SKILL.md +144 -0
  105. package/skills/literature/discovery/zotero-arxiv-daily-guide/SKILL.md +94 -0
  106. package/skills/literature/fulltext/core-api-guide/SKILL.md +144 -0
  107. package/skills/literature/fulltext/institutional-repository-guide/SKILL.md +212 -0
  108. package/skills/literature/fulltext/open-access-mining-guide/SKILL.md +341 -0
  109. package/skills/literature/metadata/academic-paper-summarizer/SKILL.md +101 -0
  110. package/skills/literature/metadata/wikidata-api-guide/SKILL.md +156 -0
  111. package/skills/literature/search/arxiv-batch-reporting/SKILL.md +133 -0
  112. package/skills/literature/search/arxiv-paper-processor/SKILL.md +141 -0
  113. package/skills/literature/search/baidu-scholar-guide/SKILL.md +110 -0
  114. package/skills/literature/search/chatpaper-guide/SKILL.md +122 -0
  115. package/skills/literature/search/deep-literature-search/SKILL.md +149 -0
  116. package/skills/literature/search/deepgit-search-guide/SKILL.md +147 -0
  117. package/skills/literature/search/pasa-paper-search-guide/SKILL.md +138 -0
  118. package/skills/research/automation/ai-scientist-v2-guide/SKILL.md +284 -0
  119. package/skills/research/automation/aim-experiment-guide/SKILL.md +234 -0
  120. package/skills/research/automation/datagen-research-guide/SKILL.md +131 -0
  121. package/skills/research/automation/kedro-pipeline-guide/SKILL.md +216 -0
  122. package/skills/research/automation/mle-agent-guide/SKILL.md +139 -0
  123. package/skills/research/automation/paper-to-agent-guide/SKILL.md +116 -0
  124. package/skills/research/automation/rd-agent-guide/SKILL.md +246 -0
  125. package/skills/research/automation/research-paper-orchestrator/SKILL.md +254 -0
  126. package/skills/research/deep-research/academic-deep-research/SKILL.md +190 -0
  127. package/skills/research/deep-research/auto-deep-research-guide/SKILL.md +141 -0
  128. package/skills/research/deep-research/deep-research-pro/SKILL.md +213 -0
  129. package/skills/research/deep-research/deep-research-work/SKILL.md +204 -0
  130. package/skills/research/deep-research/deep-searcher-guide/SKILL.md +253 -0
  131. package/skills/research/deep-research/gpt-researcher-guide/SKILL.md +191 -0
  132. package/skills/research/deep-research/khoj-research-guide/SKILL.md +200 -0
  133. package/skills/research/deep-research/local-deep-research-guide/SKILL.md +253 -0
  134. package/skills/research/deep-research/tongyi-deep-research-guide/SKILL.md +217 -0
  135. package/skills/research/funding/eu-horizon-guide/SKILL.md +244 -0
  136. package/skills/research/funding/grant-budget-guide/SKILL.md +284 -0
  137. package/skills/research/funding/nih-reporter-api-guide/SKILL.md +166 -0
  138. package/skills/research/funding/nsf-award-api-guide/SKILL.md +133 -0
  139. package/skills/research/methodology/academic-mentor-guide/SKILL.md +169 -0
  140. package/skills/research/methodology/claude-scientific-guide/SKILL.md +122 -0
  141. package/skills/research/methodology/deep-innovator-guide/SKILL.md +242 -0
  142. package/skills/research/methodology/osf-api-guide/SKILL.md +165 -0
  143. package/skills/research/methodology/research-paper-kb/SKILL.md +263 -0
  144. package/skills/research/methodology/research-town-guide/SKILL.md +263 -0
  145. package/skills/research/paper-review/automated-review-guide/SKILL.md +281 -0
  146. package/skills/research/paper-review/paper-compare-guide/SKILL.md +238 -0
  147. package/skills/research/paper-review/paper-digest-guide/SKILL.md +240 -0
  148. package/skills/research/paper-review/paper-research-assistant/SKILL.md +231 -0
  149. package/skills/research/paper-review/research-quality-filter/SKILL.md +261 -0
  150. package/skills/research/paper-review/review-response-guide/SKILL.md +275 -0
  151. package/skills/tools/code-exec/google-colab-guide/SKILL.md +276 -0
  152. package/skills/tools/code-exec/kaggle-api-guide/SKILL.md +216 -0
  153. package/skills/tools/code-exec/overleaf-cli-guide/SKILL.md +279 -0
  154. package/skills/tools/diagram/code-flow-visualizer/SKILL.md +197 -0
  155. package/skills/tools/diagram/excalidraw-diagram-guide/SKILL.md +170 -0
  156. package/skills/tools/diagram/json-data-visualizer/SKILL.md +270 -0
  157. package/skills/tools/diagram/mermaid-architect-guide/SKILL.md +219 -0
  158. package/skills/tools/diagram/tldraw-whiteboard-guide/SKILL.md +397 -0
  159. package/skills/tools/document/docsgpt-guide/SKILL.md +130 -0
  160. package/skills/tools/document/large-document-reader/SKILL.md +202 -0
  161. package/skills/tools/document/paper-parse-guide/SKILL.md +243 -0
  162. package/skills/tools/knowledge-graph/citation-network-builder/SKILL.md +244 -0
  163. package/skills/tools/knowledge-graph/concept-map-generator/SKILL.md +284 -0
  164. package/skills/tools/knowledge-graph/graphiti-guide/SKILL.md +219 -0
  165. package/skills/tools/ocr-translate/pdf-math-translate-guide/SKILL.md +141 -0
  166. package/skills/tools/ocr-translate/zotero-pdf-translate-guide/SKILL.md +95 -0
  167. package/skills/tools/ocr-translate/zotero-pdf2zh-guide/SKILL.md +143 -0
  168. package/skills/tools/scraping/dataset-finder-guide/SKILL.md +253 -0
  169. package/skills/tools/scraping/easy-spider-guide/SKILL.md +250 -0
  170. package/skills/tools/scraping/google-scholar-scraper/SKILL.md +255 -0
  171. package/skills/tools/scraping/repository-harvesting-guide/SKILL.md +310 -0
  172. package/skills/writing/citation/academic-citation-manager/SKILL.md +314 -0
  173. package/skills/writing/citation/jabref-reference-guide/SKILL.md +127 -0
  174. package/skills/writing/citation/jasminum-zotero-guide/SKILL.md +103 -0
  175. package/skills/writing/citation/obsidian-citation-guide/SKILL.md +164 -0
  176. package/skills/writing/citation/obsidian-zotero-guide/SKILL.md +137 -0
  177. package/skills/writing/citation/papersgpt-zotero-guide/SKILL.md +132 -0
  178. package/skills/writing/citation/papis-cli-guide/SKILL.md +213 -0
  179. package/skills/writing/citation/zotero-better-bibtex-guide/SKILL.md +107 -0
  180. package/skills/writing/citation/zotero-better-notes-guide/SKILL.md +121 -0
  181. package/skills/writing/citation/zotero-gpt-guide/SKILL.md +111 -0
  182. package/skills/writing/citation/zotero-mcp-guide/SKILL.md +164 -0
  183. package/skills/writing/citation/zotero-mdnotes-guide/SKILL.md +162 -0
  184. package/skills/writing/citation/zotero-reference-guide/SKILL.md +139 -0
  185. package/skills/writing/citation/zotero-scholar-guide/SKILL.md +294 -0
  186. package/skills/writing/citation/zotfile-attachment-guide/SKILL.md +140 -0
  187. package/skills/writing/composition/ml-paper-writing/SKILL.md +163 -0
  188. package/skills/writing/composition/paper-debugger-guide/SKILL.md +143 -0
  189. package/skills/writing/composition/scientific-writing-resources/SKILL.md +151 -0
  190. package/skills/writing/composition/scientific-writing-wrapper/SKILL.md +153 -0
  191. package/skills/writing/latex/latex-drawing-collection/SKILL.md +154 -0
  192. package/skills/writing/latex/latex-templates-collection/SKILL.md +159 -0
  193. package/skills/writing/latex/md-to-pdf-academic/SKILL.md +230 -0
  194. package/skills/writing/latex/tex-render-guide/SKILL.md +243 -0
  195. package/skills/writing/polish/academic-tone-guide/SKILL.md +209 -0
  196. package/skills/writing/polish/conciseness-editing-guide/SKILL.md +225 -0
  197. package/skills/writing/polish/paper-polish-guide/SKILL.md +160 -0
  198. package/skills/writing/templates/graphical-abstract-guide/SKILL.md +183 -0
  199. package/skills/writing/templates/novathesis-guide/SKILL.md +152 -0
  200. package/skills/writing/templates/scientific-article-pdf/SKILL.md +261 -0
  201. package/skills/writing/templates/sjtuthesis-guide/SKILL.md +197 -0
  202. package/skills/writing/templates/thuthesis-guide/SKILL.md +181 -0
  203. package/skills/literature/fulltext/repository-harvesting-guide/SKILL.md +0 -207
@@ -0,0 +1,163 @@
1
+ ---
2
+ name: econml-causal-guide
3
+ description: "Apply EconML for causal inference combining machine learning and econometrics"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "🔬"
7
+ category: analysis
8
+ subcategory: econometrics
9
+ keywords: ["causal-inference", "machine-learning", "treatment-effects", "econometrics", "microsoft", "double-ml"]
10
+ source: "https://github.com/py-why/EconML"
11
+ ---
12
+
13
+ # EconML Causal Inference Guide
14
+
15
+ ## Overview
16
+
17
+ EconML is a Python package developed by Microsoft Research as part of the ALICE (Automated Learning and Intelligence for Causation and Economics) project. It provides a comprehensive suite of methods for estimating heterogeneous treatment effects from observational data, bridging the gap between modern machine learning and classical econometric techniques for causal inference.
18
+
19
+ Traditional econometric approaches to causal inference often rely on strong parametric assumptions and struggle with high-dimensional data. Pure machine learning methods excel at prediction but do not inherently distinguish correlation from causation. EconML combines the strengths of both paradigms, offering methods that leverage the flexibility of ML for nuisance parameter estimation while maintaining the rigorous causal identification guarantees of econometric theory.
20
+
21
+ The library implements cutting-edge methods from the academic literature including Double Machine Learning (DML), Causal Forests, Doubly Robust Learners, Orthogonal Random Forests, and Instrumental Variable methods with ML first stages. These tools are essential for researchers across economics, public health, education policy, and any field where understanding causal mechanisms from non-experimental data is critical.
22
+
23
+ ## Installation and Setup
24
+
25
+ Install EconML via pip:
26
+
27
+ ```bash
28
+ pip install econml
29
+ ```
30
+
31
+ For the full feature set including optional dependencies:
32
+
33
+ ```bash
34
+ pip install econml[all]
35
+ ```
36
+
37
+ EconML builds on top of scikit-learn and integrates with the broader Python data science ecosystem. Core dependencies include numpy, scipy, pandas, scikit-learn, and statsmodels. Optional dependencies for specific estimators include LightGBM and PyTorch.
38
+
39
+ Verify installation:
40
+
41
+ ```python
42
+ import econml
43
+ print(econml.__version__)
44
+
45
+ from econml.dml import LinearDML
46
+ from econml.orf import DMLOrthoForest
47
+ print("EconML loaded successfully")
48
+ ```
49
+
50
+ ## Core Estimators and Methods
51
+
52
+ **Double Machine Learning (DML)**: The workhorse method for estimating average and heterogeneous treatment effects while controlling for high-dimensional confounders. DML uses cross-fitting and orthogonalization to eliminate regularization bias:
53
+
54
+ ```python
55
+ from econml.dml import LinearDML, CausalForestDML
56
+ from sklearn.ensemble import GradientBoostingRegressor
57
+
58
+ # Linear DML for parametric treatment effect estimation
59
+ est = LinearDML(
60
+ model_y=GradientBoostingRegressor(),
61
+ model_t=GradientBoostingRegressor(),
62
+ cv=5,
63
+ random_state=42
64
+ )
65
+ est.fit(Y, T, X=X, W=W)
66
+
67
+ # Get treatment effect estimates with confidence intervals
68
+ effect = est.effect(X_test)
69
+ ci = est.effect_interval(X_test, alpha=0.05)
70
+ print(f"ATE: {est.ate():.4f}")
71
+ print(f"ATE 95% CI: {est.ate_interval(alpha=0.05)}")
72
+ ```
73
+
74
+ Here `Y` is the outcome, `T` is the treatment, `X` contains effect modifiers (features for heterogeneity), and `W` contains additional confounders.
75
+
76
+ **Causal Forest DML**: Combines DML orthogonalization with Causal Forest estimation for flexible, nonparametric heterogeneous treatment effects:
77
+
78
+ ```python
79
+ from econml.dml import CausalForestDML
80
+
81
+ cf_est = CausalForestDML(
82
+ model_y=GradientBoostingRegressor(),
83
+ model_t=GradientBoostingRegressor(),
84
+ n_estimators=200,
85
+ min_samples_leaf=10,
86
+ cv=5,
87
+ random_state=42
88
+ )
89
+ cf_est.fit(Y, T, X=X, W=W)
90
+
91
+ # Heterogeneous treatment effects
92
+ hte = cf_est.effect(X_test)
93
+ # Feature importance for treatment effect heterogeneity
94
+ importances = cf_est.feature_importances_
95
+ ```
96
+
97
+ **Doubly Robust Learner**: Provides consistent treatment effect estimates when either the outcome model or the propensity score model is correctly specified:
98
+
99
+ ```python
100
+ from econml.dr import DRLearner
101
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
102
+
103
+ dr_est = DRLearner(
104
+ model_propensity=RandomForestClassifier(),
105
+ model_regression=RandomForestRegressor(),
106
+ model_final=RandomForestRegressor(),
107
+ cv=5
108
+ )
109
+ dr_est.fit(Y, T, X=X, W=W)
110
+ ```
111
+
112
+ **Instrumental Variable Methods**: For settings where unobserved confounding is present but valid instruments are available:
113
+
114
+ ```python
115
+ from econml.iv.dml import DMLIV
116
+
117
+ iv_est = DMLIV(
118
+ model_y_xw=GradientBoostingRegressor(),
119
+ model_t_xw=GradientBoostingRegressor(),
120
+ model_t_xwz=GradientBoostingRegressor(),
121
+ cv=5
122
+ )
123
+ iv_est.fit(Y, T, Z=Z, X=X, W=W)
124
+ ```
125
+
126
+ ## Research Workflow Integration
127
+
128
+ **Experiment Analysis**: When randomized experiments suffer from non-compliance or attrition, use IV methods in EconML to recover local average treatment effects. The ML-based first stages handle complex relationships between instruments and treatment uptake.
129
+
130
+ **Policy Evaluation**: Estimate heterogeneous treatment effects to identify which subpopulations benefit most from an intervention. The CATE (Conditional Average Treatment Effect) estimates can directly inform targeted policy design:
131
+
132
+ ```python
133
+ # Identify subgroups with largest treatment effects
134
+ import pandas as pd
135
+
136
+ effects_df = pd.DataFrame({
137
+ "effect": cf_est.effect(X_test).flatten(),
138
+ "ci_lower": cf_est.effect_interval(X_test, alpha=0.05)[0].flatten(),
139
+ "ci_upper": cf_est.effect_interval(X_test, alpha=0.05)[1].flatten()
140
+ }, index=X_test.index)
141
+
142
+ # Top beneficiaries
143
+ top_group = effects_df.nlargest(100, "effect")
144
+ ```
145
+
146
+ **Sensitivity Analysis**: Combine EconML estimates with sensitivity analysis frameworks to assess robustness to potential unobserved confounders. Report how much unmeasured confounding would be required to explain away your findings.
147
+
148
+ **Publication-Ready Results**: EconML provides confidence intervals and hypothesis tests based on asymptotic theory, producing results suitable for peer-reviewed publications. Use the summary methods to generate formatted regression-style output.
149
+
150
+ ## Best Practices for Academic Research
151
+
152
+ 1. **Always validate assumptions**: DML requires conditional ignorability (selection on observables). Document your identification strategy clearly.
153
+ 2. **Cross-fitting is essential**: Never skip the cross-fitting step, as it prevents overfitting bias in the nuisance estimates.
154
+ 3. **Report multiple estimators**: Present results from DML, DR Learner, and Causal Forest side by side to assess robustness.
155
+ 4. **Check overlap**: Verify sufficient overlap in covariate distributions between treated and control groups before estimation.
156
+ 5. **Use honest estimation**: EconML Causal Forests use sample splitting for honesty by default, ensuring valid inference.
157
+
158
+ ## References
159
+
160
+ - EconML repository: https://github.com/py-why/EconML
161
+ - EconML documentation: https://econml.azurewebsites.net/
162
+ - Chernozhukov et al. (2018), Double/Debiased Machine Learning for Treatment and Structural Parameters
163
+ - Athey and Imbens (2019), Machine Learning Methods That Economists Should Know About
@@ -0,0 +1,139 @@
1
+ ---
2
+ name: mostly-harmless-guide
3
+ description: "Replication code and guide for Mostly Harmless Econometrics methods"
4
+ version: 1.0.0
5
+ author: wentor-community
6
+ source: https://github.com/vikjam/mostly-harmless-replication
7
+ metadata:
8
+ openclaw:
9
+ category: analysis
10
+ subcategory: econometrics
11
+ keywords:
12
+ - econometrics
13
+ - causal-inference
14
+ - replication
15
+ - regression
16
+ - instrumental-variables
17
+ - difference-in-differences
18
+ ---
19
+
20
+ # Mostly Harmless Econometrics Guide
21
+
22
+ A skill providing replication code, explanations, and practical guidance for the econometric methods presented in Angrist and Pischke's "Mostly Harmless Econometrics" (MHE). Based on the mostly-harmless-replication repository (642 stars), this skill helps researchers understand and correctly apply core causal inference techniques.
23
+
24
+ ## Overview
25
+
26
+ "Mostly Harmless Econometrics" is one of the most influential applied econometrics textbooks, providing accessible explanations of the methods that dominate modern empirical research in economics and increasingly in other social sciences. This skill translates the book's core methods into practical guidance that the agent can use to help researchers design studies, select appropriate estimators, and interpret results correctly.
27
+
28
+ The skill covers regression, instrumental variables, difference-in-differences, regression discontinuity, and related methods, with emphasis on the practical decisions researchers face when applying these techniques to real data.
29
+
30
+ ## Regression Fundamentals
31
+
32
+ **Ordinary Least Squares (OLS)**
33
+ - OLS provides the best linear approximation to the conditional expectation function
34
+ - The regression anatomy theorem: each coefficient can be obtained from a bivariate regression of the outcome on the residualized regressor
35
+ - Omitted variable bias formula: bias equals the effect of the omitted variable times its correlation with the included regressor
36
+ - Control variables should be selected based on the conditional independence assumption, not on statistical significance
37
+ - Robust standard errors (Huber-White) should be the default; cluster when observations are not independent
38
+
39
+ **Regression Interpretation**
40
+ - The causal interpretation of regression requires the conditional independence assumption (CIA)
41
+ - CIA states that treatment is as good as randomly assigned after conditioning on controls
42
+ - Saturated models (fully interacted categorical variables) are always correctly specified
43
+ - Linear regression with continuous variables approximates the true conditional expectation
44
+ - Report both statistical and economic significance; a large t-statistic does not mean a large effect
45
+
46
+ **Practical Decisions**
47
+ - Include controls that are correlated with both the treatment and outcome
48
+ - Do not include controls that are consequences of treatment (bad controls)
49
+ - Use the most parsimonious specification that satisfies the CIA
50
+ - Test sensitivity to alternative control sets to assess robustness
51
+ - Report multiple specifications to demonstrate that results are not driven by a particular set of controls
52
+
53
+ ## Instrumental Variables
54
+
55
+ **Core Concepts**
56
+ - IV addresses endogeneity when the treatment is correlated with unobserved factors affecting the outcome
57
+ - A valid instrument must be relevant (correlated with treatment) and excludable (affects outcome only through treatment)
58
+ - Two-stage least squares (2SLS) is the standard IV estimator
59
+ - The Wald estimator (reduced form divided by first stage) gives the IV estimate in the simplest case
60
+ - IV estimates the Local Average Treatment Effect (LATE) for compliers
61
+
62
+ **Implementation Guide**
63
+ - Always report the first-stage F-statistic; values below 10 indicate weak instruments
64
+ - Use the Anderson-Rubin test for inference robust to weak instruments
65
+ - Over-identification tests (Sargan-Hansen) can detect violations of the exclusion restriction with multiple instruments, but cannot validate a just-identified model
66
+ - Report the first stage, reduced form, and IV estimates together
67
+ - Compare OLS and IV estimates; if IV is much larger, consider LATE interpretation or measurement error
68
+
69
+ **Common Applications**
70
+ - Returns to education using quarter of birth as an instrument
71
+ - Effect of institutions on growth using settler mortality as an instrument
72
+ - Peer effects using random assignment to groups
73
+ - Supply and demand estimation using shift variables
74
+ - Policy evaluation using eligibility rules as instruments
75
+
76
+ ## Difference-in-Differences
77
+
78
+ **Design Principles**
79
+ - DID compares changes in outcomes over time between treated and control groups
80
+ - The parallel trends assumption: absent treatment, both groups would have followed the same trend
81
+ - DID removes time-invariant unobserved confounders
82
+ - The standard estimator is a two-way fixed effects regression (unit and time fixed effects plus treatment indicator)
83
+ - Staggered adoption designs require careful attention to treatment timing heterogeneity
84
+
85
+ **Implementation**
86
+ - Always plot pre-treatment trends to assess the parallel trends assumption visually
87
+ - Include leads of the treatment indicator to test for pre-trends formally
88
+ - Cluster standard errors at the group level (state, firm, school)
89
+ - With few clusters (fewer than 50), use wild cluster bootstrap for inference
90
+ - Consider synthetic control methods when the control group is not a natural comparator
91
+
92
+ **Recent Developments**
93
+ - Callaway and Sant'Anna (2021): heterogeneity-robust DID with staggered treatment
94
+ - Sun and Abraham (2021): interaction-weighted estimator for event studies
95
+ - de Chaisemartin and D'Haultfoeuille (2020): decomposition of two-way FE estimator
96
+ - Goodman-Bacon (2021): DID with variation in treatment timing decomposition
97
+ - These methods address bias in standard two-way FE when treatment effects are heterogeneous
98
+
99
+ ## Regression Discontinuity
100
+
101
+ **Sharp RD Design**
102
+ - Treatment is a deterministic function of a running variable at a known cutoff
103
+ - Causal effect is identified at the cutoff by comparing outcomes just above and just below
104
+ - Local linear regression is preferred over global polynomial fitting
105
+ - Bandwidth selection should use data-driven methods (Imbens-Kalyanaraman, Calonico-Cattaneo-Titiunik)
106
+ - Always show the RD plot: binned means of the outcome against the running variable
107
+
108
+ **Fuzzy RD Design**
109
+ - Treatment probability jumps at the cutoff but is not deterministic
110
+ - Fuzzy RD is analogous to IV where the instrument is being above the cutoff
111
+ - Estimates a LATE for units whose treatment status is changed by crossing the cutoff
112
+ - Report both the first stage (jump in treatment probability) and the reduced form (jump in outcome)
113
+ - Validity requires that other covariates do not jump at the cutoff (density test, covariate balance)
114
+
115
+ **Practical Guidance**
116
+ - Test for manipulation of the running variable using the McCrary density test
117
+ - Show robustness to alternative bandwidth choices
118
+ - Include covariates to improve precision but the estimate should not change substantially
119
+ - Avoid high-order polynomial specifications that can be misleading
120
+ - Report the effective sample size used in the local estimation
121
+
122
+ ## Integration with Research-Claw
123
+
124
+ This skill enhances the Research-Claw econometric analysis workflow:
125
+
126
+ - Guide researchers in selecting the appropriate causal inference method for their question
127
+ - Help implement estimators correctly with proper standard errors and diagnostics
128
+ - Provide code templates for common econometric analyses in R, Stata, and Python
129
+ - Connect with data wrangling skills for cleaning and preparing analysis datasets
130
+ - Support writing skills with correctly formatted regression tables and result descriptions
131
+
132
+ ## Best Practices
133
+
134
+ - Start by clearly stating the causal question and the source of identification
135
+ - Draw a directed acyclic graph (DAG) to clarify assumptions about causal relationships
136
+ - Report all relevant diagnostics (first-stage F, pre-trends, balance tests)
137
+ - Show robustness across specifications rather than selecting a single preferred model
138
+ - Distinguish between statistical significance, economic significance, and policy relevance
139
+ - Be transparent about the limitations of your identification strategy
@@ -0,0 +1,259 @@
1
+ ---
2
+ name: panel-data-analyst
3
+ description: "Expert panel data regression analysis with fixed effects and GMM"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "grid"
7
+ category: "analysis"
8
+ subcategory: "econometrics"
9
+ keywords: ["panel data", "fixed effects", "random effects", "GMM", "dynamic panel", "Hausman test"]
10
+ source: "https://www.stata.com/manuals/xt.pdf"
11
+ ---
12
+
13
+ # Panel Data Analyst
14
+
15
+ Perform expert-level panel data regression analysis including fixed effects, random effects, dynamic panel models (Arellano-Bond/Blundell-Bond GMM), and advanced diagnostic tests. This skill covers the full workflow from panel setup through model selection, estimation, and publication-ready reporting.
16
+
17
+ ## Overview
18
+
19
+ Panel data -- repeated observations on the same cross-sectional units over time -- is the workhorse of modern empirical economics, finance, political science, and management research. Panel methods exploit both cross-sectional and temporal variation, enabling researchers to control for unobserved heterogeneity that would bias ordinary cross-sectional estimates.
20
+
21
+ The choice between fixed effects, random effects, and dynamic panel estimators depends on the data structure, the nature of unobserved heterogeneity, and the identifying assumptions the researcher is willing to make. This skill provides a systematic decision framework and implementation in both Stata and R, with emphasis on the diagnostic tests that justify model selection.
22
+
23
+ Beyond basic FE/RE models, this skill covers the advanced techniques increasingly required by journal reviewers: instrumental variables within panel frameworks, Driscoll-Kraay standard errors for cross-sectional dependence, correlated random effects (Mundlak/Chamberlain), and system GMM for dynamic panels with endogenous regressors.
24
+
25
+ ## Panel Data Setup
26
+
27
+ ### Declaring Panel Structure
28
+
29
+ ```stata
30
+ * Stata panel setup
31
+ xtset firm_id year
32
+ xtset // Verify panel structure
33
+
34
+ * Check panel balance
35
+ xtdescribe
36
+ * Shows: min/max/avg observations per panel, gaps
37
+
38
+ * Summary statistics by panel dimension
39
+ xtsum revenue profit employees rnd_spending
40
+ * Reports overall, between, and within variation
41
+ ```
42
+
43
+ ### Panel Diagnostics
44
+
45
+ ```stata
46
+ * Check for gaps in panel
47
+ xtset firm_id year
48
+ gen gap = year - l.year if l.year != .
49
+ tab gap // Should be all 1's for balanced annual panels
50
+
51
+ * Create balanced subsample
52
+ by firm_id: gen T_i = _N
53
+ tab T_i
54
+ keep if T_i == max_T // Keep only units observed in all periods
55
+
56
+ * Attrition analysis
57
+ gen in_panel = 1
58
+ xtset firm_id year
59
+ tsfill, full
60
+ replace in_panel = 0 if missing(in_panel)
61
+ reg in_panel l.revenue l.profit l.size, cluster(firm_id)
62
+ ```
63
+
64
+ ## Fixed Effects vs. Random Effects
65
+
66
+ ### Fixed Effects Estimation
67
+
68
+ ```stata
69
+ * Within estimator (entity fixed effects)
70
+ xtreg profit revenue rnd_spending employees i.year, fe robust
71
+ estimates store fe_model
72
+
73
+ * Entity and time fixed effects
74
+ reghdfe profit revenue rnd_spending employees, ///
75
+ absorb(firm_id year) cluster(firm_id)
76
+ estimates store twoway_fe
77
+
78
+ * First-differences (alternative to within estimator)
79
+ reg d.profit d.revenue d.rnd_spending d.employees i.year, ///
80
+ cluster(firm_id)
81
+ estimates store fd_model
82
+ ```
83
+
84
+ ### Random Effects Estimation
85
+
86
+ ```stata
87
+ * GLS random effects
88
+ xtreg profit revenue rnd_spending employees i.year, re robust
89
+ estimates store re_model
90
+ ```
91
+
92
+ ### Hausman Test for Model Selection
93
+
94
+ ```stata
95
+ * Classic Hausman test
96
+ xtreg profit revenue rnd_spending employees, fe
97
+ estimates store fe_haus
98
+ xtreg profit revenue rnd_spending employees, re
99
+ estimates store re_haus
100
+ hausman fe_haus re_haus
101
+
102
+ * Robust Hausman test (preferred with heteroskedasticity)
103
+ * Mundlak (1978) approach: add group means to RE model
104
+ foreach var of varlist revenue rnd_spending employees {
105
+ bysort firm_id: egen m_`var' = mean(`var')
106
+ }
107
+ xtreg profit revenue rnd_spending employees ///
108
+ m_revenue m_rnd_spending m_employees i.year, re cluster(firm_id)
109
+ test m_revenue m_rnd_spending m_employees
110
+ * Rejection => FE preferred; failure to reject => RE acceptable
111
+ ```
112
+
113
+ ## Dynamic Panel Models
114
+
115
+ ### Arellano-Bond GMM (Difference GMM)
116
+
117
+ ```stata
118
+ * When the lagged dependent variable is a regressor:
119
+ * y_it = alpha * y_{i,t-1} + X_it * beta + mu_i + epsilon_it
120
+
121
+ * Difference GMM (Arellano & Bond 1991)
122
+ xtabond profit l.profit revenue rnd_spending employees, ///
123
+ lags(1) twostep robust artests(2)
124
+
125
+ * Diagnostics
126
+ * AR(1) should be significant, AR(2) should NOT be significant
127
+ * Hansen J test of overidentifying restrictions (p > 0.10 desired)
128
+ ```
129
+
130
+ ### System GMM (Blundell-Bond)
131
+
132
+ ```stata
133
+ * System GMM (Blundell & Bond 1998)
134
+ * More efficient than difference GMM, especially with persistent series
135
+
136
+ xtabond2 profit l.profit revenue rnd_spending employees i.year, ///
137
+ gmm(l.profit, lag(2 4) collapse) ///
138
+ gmm(revenue rnd_spending, lag(2 3) collapse) ///
139
+ iv(employees i.year) ///
140
+ twostep robust orthogonal small
141
+
142
+ * Key diagnostics to report:
143
+ * 1. Number of instruments (should not exceed number of groups)
144
+ * 2. Hansen J test p-value (> 0.10, but < 0.25 preferred -- not too high)
145
+ * 3. AR(2) test p-value (> 0.10 for valid instruments)
146
+ * 4. Difference-in-Hansen test for subset of instruments
147
+ ```
148
+
149
+ ### GMM Diagnostic Checklist
150
+
151
+ | Test | Null Hypothesis | Desired Result | Stata Command |
152
+ |------|----------------|----------------|---------------|
153
+ | AR(1) | No first-order autocorrelation | Reject (p < 0.05) | Reported automatically |
154
+ | AR(2) | No second-order autocorrelation | Fail to reject (p > 0.10) | Reported automatically |
155
+ | Hansen J | Instruments are valid | Fail to reject (p > 0.10) | Reported automatically |
156
+ | Diff-in-Hansen | Level instruments valid | Fail to reject (p > 0.10) | Reported automatically |
157
+ | Instrument count | -- | N_instruments < N_groups | Check output |
158
+
159
+ ## Standard Error Options
160
+
161
+ ### Choosing the Right Standard Errors
162
+
163
+ ```stata
164
+ * Entity-clustered (default choice for firm panels)
165
+ xtreg profit revenue rnd_spending, fe cluster(firm_id)
166
+
167
+ * Two-way clustering (firm and year)
168
+ reghdfe profit revenue rnd_spending, ///
169
+ absorb(firm_id) cluster(firm_id year)
170
+
171
+ * Driscoll-Kraay standard errors (cross-sectional dependence)
172
+ xtscc profit revenue rnd_spending i.year, fe lag(3)
173
+
174
+ * Newey-West within panels (autocorrelation + heteroskedasticity)
175
+ xtreg profit revenue rnd_spending, fe
176
+ xtpcse profit revenue rnd_spending i.firm_id, correlation(ar1)
177
+ ```
178
+
179
+ ### Diagnostic Tests for Standard Error Selection
180
+
181
+ ```stata
182
+ * Test for heteroskedasticity in FE model
183
+ xtreg profit revenue rnd_spending, fe
184
+ xttest3 // Modified Wald test (rejects => use robust/cluster SE)
185
+
186
+ * Test for serial correlation
187
+ xtserial profit revenue rnd_spending
188
+ * Wooldridge test (rejects => use cluster SE or Newey-West)
189
+
190
+ * Test for cross-sectional dependence
191
+ xtreg profit revenue rnd_spending, fe
192
+ xtcsd, pesaran abs
193
+ * Pesaran CD test (rejects => consider Driscoll-Kraay SE)
194
+ ```
195
+
196
+ ## Advanced Specifications
197
+
198
+ ### Interaction Effects in Panel Models
199
+
200
+ ```stata
201
+ * Continuous x continuous interaction with FE
202
+ xtreg profit c.rnd_spending##c.market_share i.year, fe cluster(firm_id)
203
+
204
+ * Visualize marginal effect
205
+ margins, dydx(rnd_spending) at(market_share=(0(0.1)1))
206
+ marginsplot, title("Marginal Effect of R&D by Market Share")
207
+ ```
208
+
209
+ ### Instrumental Variables in Panel Data
210
+
211
+ ```stata
212
+ * IV with fixed effects (xtivreg)
213
+ xtivreg profit (rnd_spending = tax_credit regulatory_change) ///
214
+ employees size i.year, fe first
215
+
216
+ * First-stage F-statistic check
217
+ * Report Kleibergen-Paap rk Wald F for weak instruments
218
+ ```
219
+
220
+ ### Correlated Random Effects (Mundlak)
221
+
222
+ ```stata
223
+ * Mundlak (1978) approach: include within-group means
224
+ foreach var of varlist revenue rnd_spending employees {
225
+ bysort firm_id: egen bar_`var' = mean(`var')
226
+ }
227
+
228
+ xtreg profit revenue rnd_spending employees ///
229
+ bar_revenue bar_rnd_spending bar_employees ///
230
+ i.year, re cluster(firm_id)
231
+
232
+ * Coefficients on time-varying vars are equivalent to FE estimates
233
+ * Coefficients on bar_ vars capture between-unit effects
234
+ ```
235
+
236
+ ## Publication Tables
237
+
238
+ ```stata
239
+ * Comparison table: FE vs RE vs GMM
240
+ esttab fe_model re_model gmm_model using "tables/panel_comparison.tex", ///
241
+ b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) ///
242
+ label title("Panel Regression Results") ///
243
+ mtitles("Fixed Effects" "Random Effects" "System GMM") ///
244
+ stats(N N_g r2_w ar2p hansenp, ///
245
+ labels("Observations" "Firms" "Within R-squared" ///
246
+ "AR(2) p-value" "Hansen p-value") ///
247
+ fmt(0 0 3 3 3)) ///
248
+ addnotes("Clustered standard errors in parentheses." ///
249
+ "All models include year fixed effects.") ///
250
+ replace
251
+ ```
252
+
253
+ ## References
254
+
255
+ - Wooldridge, J.M. (2010), Econometric Analysis of Cross Section and Panel Data, 2nd ed., MIT Press
256
+ - Arellano & Bond (1991), "Some Tests of Specification for Panel Data," RES 58(2)
257
+ - Blundell & Bond (1998), "Initial Conditions and Moment Restrictions in Dynamic Panel Data Models," JoE 87(1)
258
+ - Roodman (2009), "How to Do xtabond2: An Introduction to Difference and System GMM in Stata," SJ 9(1)
259
+ - Cameron & Trivedi (2005), Microeconometrics: Methods and Applications, Cambridge University Press