my-markdown-library 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/F24LS_md/ Lecture 4 - Public.md +347 -0
- data/F24LS_md/Lecture 1 - Introduction and Overview.md +327 -0
- data/F24LS_md/Lecture 10 - Development_.md +631 -0
- data/F24LS_md/Lecture 11 - Econometrics.md +345 -0
- data/F24LS_md/Lecture 12 - Finance.md +692 -0
- data/F24LS_md/Lecture 13 - Environmental Economics.md +299 -0
- data/F24LS_md/Lecture 15 - Conclusion.md +272 -0
- data/F24LS_md/Lecture 2 - Demand.md +349 -0
- data/F24LS_md/Lecture 3 - Supply.md +329 -0
- data/F24LS_md/Lecture 5 - Production C-D.md +291 -0
- data/F24LS_md/Lecture 6 - Utility and Latex.md +440 -0
- data/F24LS_md/Lecture 7 - Inequality.md +607 -0
- data/F24LS_md/Lecture 8 - Macroeconomics.md +704 -0
- data/F24LS_md/Lecture 8 - Macro.md +700 -0
- data/F24LS_md/Lecture 9 - Game Theory_.md +436 -0
- data/F24LS_md/summary.yaml +105 -0
- data/F24Lec_MD/LecNB_summary.yaml +206 -0
- data/F24Lec_MD/lec01/lec01.md +267 -0
- data/F24Lec_MD/lec02/Avocados_demand.md +425 -0
- data/F24Lec_MD/lec02/Demand_Steps_24.md +126 -0
- data/F24Lec_MD/lec02/PriceElasticity.md +83 -0
- data/F24Lec_MD/lec02/ScannerData_Beer.md +171 -0
- data/F24Lec_MD/lec02/demand-curve-Fa24.md +213 -0
- data/F24Lec_MD/lec03/3.0-CubicCostCurve.md +239 -0
- data/F24Lec_MD/lec03/3.1-Supply.md +274 -0
- data/F24Lec_MD/lec03/3.2-sympy.md +332 -0
- data/F24Lec_MD/lec03/3.3a-california-energy.md +120 -0
- data/F24Lec_MD/lec03/3.3b-a-really-hot-tuesday.md +121 -0
- data/F24Lec_MD/lec04/lec04-CSfromSurvey-closed.md +335 -0
- data/F24Lec_MD/lec04/lec04-CSfromSurvey.md +331 -0
- data/F24Lec_MD/lec04/lec04-Supply-Demand-closed.md +519 -0
- data/F24Lec_MD/lec04/lec04-Supply-Demand.md +514 -0
- data/F24Lec_MD/lec04/lec04-four-plot-24.md +34 -0
- data/F24Lec_MD/lec04/lec04-four-plot.md +34 -0
- data/F24Lec_MD/lec05/Lec5-Cobb-Douglas.md +131 -0
- data/F24Lec_MD/lec05/Lec5-CobbD-AER1928.md +283 -0
- data/F24Lec_MD/lec06/6.1-Sympy-Differentiation.md +253 -0
- data/F24Lec_MD/lec06/6.2-3D-utility.md +287 -0
- data/F24Lec_MD/lec06/6.3-QuantEcon-Optimization.md +399 -0
- data/F24Lec_MD/lec06/6.4-latex.md +138 -0
- data/F24Lec_MD/lec06/6.5-Edgeworth.md +269 -0
- data/F24Lec_MD/lec07/7.1-inequality.md +283 -0
- data/F24Lec_MD/lec07/7.2-historical-inequality.md +237 -0
- data/F24Lec_MD/lec08/macro-fred-api.md +313 -0
- data/F24Lec_MD/lec09/lecNB-prisoners-dilemma.md +88 -0
- data/F24Lec_MD/lec10/Lec10.2-waterguard.md +401 -0
- data/F24Lec_MD/lec10/lec10.1-mapping.md +199 -0
- data/F24Lec_MD/lec11/11.1-slr.md +305 -0
- data/F24Lec_MD/lec11/11.2-mlr.md +171 -0
- data/F24Lec_MD/lec12/Lec12-4-PersonalFinance.md +590 -0
- data/F24Lec_MD/lec12/lec12-1_Interest_Payments.md +267 -0
- data/F24Lec_MD/lec12/lec12-2-stocks-options.md +235 -0
- data/F24Lec_MD/lec13/Co2_ClimateChange.md +139 -0
- data/F24Lec_MD/lec13/ConstructingMAC.md +213 -0
- data/F24Lec_MD/lec13/EmissionsTracker.md +170 -0
- data/F24Lec_MD/lec13/KuznetsHypothesis.md +219 -0
- data/F24Lec_MD/lec13/RoslingPlots.md +217 -0
- data/F24Lec_MD/lec15/vibecession.md +485 -0
- data/F24Textbook_MD/00-intro/index.md +292 -0
- data/F24Textbook_MD/01-demand/01-demand.md +152 -0
- data/F24Textbook_MD/01-demand/02-example.md +131 -0
- data/F24Textbook_MD/01-demand/03-log-log.md +284 -0
- data/F24Textbook_MD/01-demand/04-elasticity.md +248 -0
- data/F24Textbook_MD/01-demand/index.md +15 -0
- data/F24Textbook_MD/02-supply/01-supply.md +203 -0
- data/F24Textbook_MD/02-supply/02-eep147-example.md +86 -0
- data/F24Textbook_MD/02-supply/03-sympy.md +138 -0
- data/F24Textbook_MD/02-supply/04-market-equilibria.md +204 -0
- data/F24Textbook_MD/02-supply/index.md +16 -0
- data/F24Textbook_MD/03-public/govt-intervention.md +73 -0
- data/F24Textbook_MD/03-public/index.md +10 -0
- data/F24Textbook_MD/03-public/surplus.md +351 -0
- data/F24Textbook_MD/03-public/taxes-subsidies.md +282 -0
- data/F24Textbook_MD/04-production/index.md +15 -0
- data/F24Textbook_MD/04-production/production.md +178 -0
- data/F24Textbook_MD/04-production/shifts.md +296 -0
- data/F24Textbook_MD/05-utility/budget-constraints.md +166 -0
- data/F24Textbook_MD/05-utility/index.md +15 -0
- data/F24Textbook_MD/05-utility/utility.md +136 -0
- data/F24Textbook_MD/06-inequality/historical-inequality.md +253 -0
- data/F24Textbook_MD/06-inequality/index.md +15 -0
- data/F24Textbook_MD/06-inequality/inequality.md +226 -0
- data/F24Textbook_MD/07-game-theory/bertrand.md +257 -0
- data/F24Textbook_MD/07-game-theory/cournot.md +333 -0
- data/F24Textbook_MD/07-game-theory/equilibria-oligopolies.md +96 -0
- data/F24Textbook_MD/07-game-theory/expected-utility.md +61 -0
- data/F24Textbook_MD/07-game-theory/index.md +19 -0
- data/F24Textbook_MD/07-game-theory/python-classes.md +340 -0
- data/F24Textbook_MD/08-development/index.md +35 -0
- data/F24Textbook_MD/09-macro/CentralBanks.md +101 -0
- data/F24Textbook_MD/09-macro/Indicators.md +77 -0
- data/F24Textbook_MD/09-macro/fiscal_policy.md +36 -0
- data/F24Textbook_MD/09-macro/index.md +14 -0
- data/F24Textbook_MD/09-macro/is_curve.md +76 -0
- data/F24Textbook_MD/09-macro/phillips_curve.md +70 -0
- data/F24Textbook_MD/10-finance/index.md +10 -0
- data/F24Textbook_MD/10-finance/options.md +178 -0
- data/F24Textbook_MD/10-finance/value-interest.md +60 -0
- data/F24Textbook_MD/11-econometrics/index.md +16 -0
- data/F24Textbook_MD/11-econometrics/multivariable.md +218 -0
- data/F24Textbook_MD/11-econometrics/reading-econ-papers.md +25 -0
- data/F24Textbook_MD/11-econometrics/single-variable.md +483 -0
- data/F24Textbook_MD/11-econometrics/statsmodels.md +58 -0
- data/F24Textbook_MD/12-environmental/KuznetsHypothesis-Copy1.md +187 -0
- data/F24Textbook_MD/12-environmental/KuznetsHypothesis.md +187 -0
- data/F24Textbook_MD/12-environmental/MAC.md +254 -0
- data/F24Textbook_MD/12-environmental/index.md +36 -0
- data/F24Textbook_MD/LICENSE.md +11 -0
- data/F24Textbook_MD/intro.md +26 -0
- data/F24Textbook_MD/references.md +25 -0
- data/F24Textbook_MD/summary.yaml +414 -0
- metadata +155 -0
@@ -0,0 +1,401 @@
|
|
1
|
+
---
|
2
|
+
title: "Lec10.2-waterguard"
|
3
|
+
type: lecture-notebook
|
4
|
+
week: 10
|
5
|
+
source_path: "/Users/ericvandusen/Documents/Data88E-ForTraining/F24Lec_NBs/lec10/Lec10.2-waterguard.ipynb"
|
6
|
+
---
|
7
|
+
|
8
|
+
<table style="width: 100%;">
|
9
|
+
<tr style="background-color: transparent;"><td>
|
10
|
+
<img src="https://data-88e.github.io/assets/images/blue_text.png" width="250px" style="margin-left: 0;" />
|
11
|
+
</td><td>
|
12
|
+
<p style="text-align: right; font-size: 10pt;"><strong>Economic Models</strong>, Fall 2024<br>
|
13
|
+
Dr. Eric Van Dusen</p></td></tr>
|
14
|
+
</table>
|
15
|
+
|
16
|
+
# Lec 9 : Water Guard Randomized Controlled Trial
|
17
|
+
|
18
|
+
This Lecture Notebook is an adaptation from a set of notebooks developed for a full semester Data Science Connector Course taught in Fall 2017, entitled "Behind the Curtain in Economic Development". This dataset come from a randomized controlled trial household survey carried out in Eastern Kenya in 2007-2008.
|
19
|
+
|
20
|
+
The purpose of the study was to understand how to promote the use of WaterGuard, a dilute sodium hypochlorite solution that was promoted for Point-of-use household water disinfection. There were seven arms in the study, which will be more fully described in the following chart:
|
21
|
+
|
22
|
+
<img src="Slide1.png" />
|
23
|
+
|
24
|
+
Within this table you can see the seven treatments arms - control plus three treatments - in the bolded boxes in the middle with the number of springs and households. The study was carried out as a part of a study of households who gather drinking water from springs in a rural area. The three boxes at the bottom describe the three rounds of data collection - a baseline before the treatment, and a short term and long term follow-up.
|
25
|
+
|
26
|
+
<!-- **Notebook Outline**
|
27
|
+
|
28
|
+
1. [Mapping](#Mapping)
|
29
|
+
2. [Balance Check](#Balance)
|
30
|
+
3. [Baseline and a Randomly Selected Compound](#Baseline)
|
31
|
+
4. [Chlorine Usage outcome variables](#Chlorine)
|
32
|
+
5. [Graph of outcomes by Treatment Arm](#Graph) -->
|
33
|
+
|
34
|
+
```python
|
35
|
+
from datascience import *
|
36
|
+
import numpy as np
|
37
|
+
import matplotlib.pyplot as plt
|
38
|
+
%matplotlib inline
|
39
|
+
import pandas as pd
|
40
|
+
from pandas import read_stata
|
41
|
+
```
|
42
|
+
|
43
|
+
<!-- END QUESTION -->
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
<div id="Balance"></div>
|
48
|
+
|
49
|
+
## Balance Check and Variable Names
|
50
|
+
|
51
|
+
### Baseline Survey
|
52
|
+
This is our first look at the survey dataset. These are a limited set of questions/answers from a simple and short baseline survey. However it is a lot bigger and messier than the datasets we have seen so far and in Data 8.
|
53
|
+
|
54
|
+
Data variable names follow along with the survey below, referred to by the section, a,b,c... number, 1,2,3... and a few words about the question.
|
55
|
+
|
56
|
+
The purpose of this section will be:
|
57
|
+
* to get a familiarity with the dataset,
|
58
|
+
* to look at some background descriptor variables of the households,
|
59
|
+
* to start to think about missing values and coding of subsets of the data.
|
60
|
+
* to check the randomization of households by seeeing if the different arms of the study are balanced across some of the key baseline variables.
|
61
|
+
|
62
|
+
**The surveys that illustrate the raw data names are in a file linked [here](https://drive.google.com/open?id=1UVoiVn7LJ4rn7WEb-9BJ96jmdJ2FBk60). You have to go and look through this survey to understand the variables.**
|
63
|
+
|
64
|
+
**The code sheet that has the codes for some of the possible answers are in a file linked [here](https://drive.google.com/file/d/1iinJXExeVKV4Dm7tRKOiotoYUDSXMyqc). You have to go and look through this code sheet in a later section.**
|
65
|
+
|
66
|
+
```python
|
67
|
+
WGP_baseline = Table.read_table("WGP_baseline_Data8.csv")
|
68
|
+
WGP_baseline
|
69
|
+
```
|
70
|
+
|
71
|
+
```python
|
72
|
+
baseline = pd.read_csv("WGP_baseline_Data8.csv")
|
73
|
+
baseline
|
74
|
+
```
|
75
|
+
|
76
|
+
```python
|
77
|
+
baseline.dropna(axis = 0)
|
78
|
+
```
|
79
|
+
|
80
|
+
### Misssing values
|
81
|
+
|
82
|
+
If you look through the dataset above, and scroll to the right a ways to some of the last variables, you will notice that that there are a lot of cells with NaN, which means a missing value. For these cells no data was entered at the time of data entry. In some cases it may be appropriate to enter a zero and carry on with the analyis.
|
83
|
+
|
84
|
+
```python
|
85
|
+
WGP_base_dfna = WGP_baseline.to_df().fillna(0)
|
86
|
+
WGP_table = Table.from_df(WGP_base_dfna)
|
87
|
+
WGP_table
|
88
|
+
```
|
89
|
+
|
90
|
+
Look at the variable names, and then look at the survey form to find the concordance of codes
|
91
|
+
|
92
|
+
```python
|
93
|
+
# Here is a list of all of the possible categories / columns
|
94
|
+
list(WGP_table)
|
95
|
+
```
|
96
|
+
|
97
|
+
### What are some Variables that we want to specifically look at? ###
|
98
|
+
|
99
|
+
There are a lot of variables here and it can be kind of overwhelming, but it is good to see how many columns there can be in a comprehensive survey dataset.
|
100
|
+
|
101
|
+
#### Front Page information - A variables
|
102
|
+
|
103
|
+
- household id
|
104
|
+
- spring id
|
105
|
+
- interviewer id
|
106
|
+
|
107
|
+
#### Information about respondent - B variables
|
108
|
+
|
109
|
+
- tribe
|
110
|
+
- education
|
111
|
+
- age
|
112
|
+
- gender
|
113
|
+
- group membership
|
114
|
+
|
115
|
+
#### Water Guard Use - C variables
|
116
|
+
|
117
|
+
For Waterguard (WG) usage
|
118
|
+
|
119
|
+
- `c1a` - Whether the respondent has ever heard of WG
|
120
|
+
- `c2a` - Whether the respondent has ever used WG
|
121
|
+
- `c3a` - Whether the respondent's water is currently treated with WG
|
122
|
+
- `c4a` - Whether the respondent has used WG in the past month
|
123
|
+
|
124
|
+
#### Durable / Capital Goods - D variables
|
125
|
+
|
126
|
+
- Whether the respondent has electricity / latrine / iron roof
|
127
|
+
- Number of of bicycle / radio / hoe / beds owned
|
128
|
+
- Number of animals owned
|
129
|
+
|
130
|
+
#### Child Health - E variables
|
131
|
+
|
132
|
+
- `e1_num_kids_under_5`: Number of kids under 5
|
133
|
+
- `e2_`: This table becomes tricky because it has a different format. Each kid in the table is numbered 01, 02 and so on, and then the subsequent questions are keyed to that child number. e.g. `e2e_01_d_diarrhea`, `e2e_02_d_diarrhea` represent whether child 1 and 2 respectively have diarrhea. In total, four diseases are recorded:
|
134
|
+
- Cough
|
135
|
+
- Diarrhea
|
136
|
+
- Malaria
|
137
|
+
- Vomiting
|
138
|
+
|
139
|
+
### The Treatment Arm
|
140
|
+
|
141
|
+
In the study, arm 1 is control, while Arms 2-7 are different types of treatment interventions:
|
142
|
+
|
143
|
+
- Arm 1 - Control
|
144
|
+
- Arm 2 - Household Script
|
145
|
+
- Arm 3 - Community Script
|
146
|
+
- Arm 4 - HH + Community Script
|
147
|
+
- Arm 5 - Flat-Fee Promoter + Coupons
|
148
|
+
- Arm 6 - Incentivized Promoter + Coupons
|
149
|
+
- Arm 7 - Incentivized Promoter + Dispenser at Spring
|
150
|
+
|
151
|
+
Let's check how many households are in each treatment arm.
|
152
|
+
|
153
|
+
```python
|
154
|
+
WGP_table.group("treatment_arm")
|
155
|
+
```
|
156
|
+
|
157
|
+
### Baseline Check - Exposure to Water Guard Use
|
158
|
+
|
159
|
+
Let's see how many households have ever used Water Guards.
|
160
|
+
|
161
|
+
The data is currently Coded as 1 = Yes and 2 = No, so we can't really make sense of the Mean of the variable in its current form. Instead, we will make a new column/variable with the 1 or 2 answers translated into Yes or No.
|
162
|
+
Notably, we must first filter out respondents that had missing values (with value 0) for this question.
|
163
|
+
|
164
|
+
```python
|
165
|
+
WGP_ever = WGP_table.where('c2a_wg_used_ever', are.above(0))
|
166
|
+
WGP_ever.group("c2a_wg_used_ever")
|
167
|
+
```
|
168
|
+
|
169
|
+
```python
|
170
|
+
#This helper function goes through a column of choice, and spits out yes or no based off each value in the column. It returns an array of these yes and no's
|
171
|
+
def translate_to_yesno(table, col):
|
172
|
+
dummy=[]
|
173
|
+
table=table.where(col, are.above(0))
|
174
|
+
for i in np.arange(table.num_rows):
|
175
|
+
if table.column(col).item(i) == 1:
|
176
|
+
dummy.append('Yes')
|
177
|
+
else: #if not 1 then its 2 and 2 means no
|
178
|
+
dummy.append("No")
|
179
|
+
return dummy
|
180
|
+
```
|
181
|
+
|
182
|
+
```python
|
183
|
+
new = translate_to_yesno(WGP_ever, 'c2a_wg_used_ever')
|
184
|
+
WGP_ever = WGP_ever.with_column('c2a_wg_used_ever',new)
|
185
|
+
WGP_ever.group('c2a_wg_used_ever')
|
186
|
+
```
|
187
|
+
|
188
|
+
### Pivoting and Balance Checks
|
189
|
+
|
190
|
+
Now we will use a command called **Pivot** to create a new table that has the percent of households who have ever used Water Guard within each Treatment Arm.
|
191
|
+
|
192
|
+
We can first use it to do a **balance check** for Water Guard use across Arms.
|
193
|
+
|
194
|
+
```python
|
195
|
+
ever_yesno = WGP_ever.pivot('c2a_wg_used_ever','treatment_arm')
|
196
|
+
ever_yesno
|
197
|
+
```
|
198
|
+
|
199
|
+
Converting to percentages...
|
200
|
+
|
201
|
+
```python
|
202
|
+
total = ever_yesno.column(1) + ever_yesno.column(2)
|
203
|
+
ever_yesno = ever_yesno.with_columns('Percent No',ever_yesno.column(1) / total * 100,
|
204
|
+
'Percent Yes', ever_yesno.column(2) / total * 100)
|
205
|
+
ever_yesno
|
206
|
+
```
|
207
|
+
|
208
|
+
Let's also repeat the process for the variable of whether the households are currently using Water Guard, `c3a_wg_water_currently_treat`.
|
209
|
+
|
210
|
+
```python
|
211
|
+
WGP_current = WGP_table.where('c3a_wg_water_currently_treat',are.not_equal_to(0))
|
212
|
+
new2 = translate_to_yesno(WGP_current,'c3a_wg_water_currently_treat')
|
213
|
+
WGP_current = WGP_current.with_column('c3a_wg_water_currently_treat',new2)
|
214
|
+
WGP_current.group("c3a_wg_water_currently_treat")
|
215
|
+
```
|
216
|
+
|
217
|
+
Do you notice a problem here? Look at the total numbers reported in the output above.
|
218
|
+
|
219
|
+
We can do the same percentage tables for the balance check but maybe there's a problem.
|
220
|
+
Look at the total number of households answering the question and compare that to the total number from the previous section.
|
221
|
+
|
222
|
+
```python
|
223
|
+
current_yesno = WGP_current.pivot('c3a_wg_water_currently_treat','treatment_arm')
|
224
|
+
total = current_yesno.column(1) + current_yesno.column(2)
|
225
|
+
current_yesno = current_yesno.with_columns('Percent No',current_yesno.column(1)/total * 100,
|
226
|
+
'Percent Yes', current_yesno.column(2)/total * 100)
|
227
|
+
current_yesno
|
228
|
+
```
|
229
|
+
|
230
|
+
This seems like a really high usage, but **maybe this is due to missing values**.
|
231
|
+
|
232
|
+
Let's now also include the 0 (missing) values in our analysis.
|
233
|
+
|
234
|
+
```python
|
235
|
+
current_yesnomissing = WGP_table.pivot('c3a_wg_water_currently_treat','treatment_arm')
|
236
|
+
total = current_yesnomissing.column(1) + current_yesnomissing.column(2) + current_yesnomissing.column(3)
|
237
|
+
current_yesnomissing = current_yesnomissing.with_columns(
|
238
|
+
'Percent Missing',current_yesnomissing.column("0.0") / total * 100,
|
239
|
+
'Percent No',current_yesnomissing.column("2.0") / total * 100,
|
240
|
+
'Percent Yes', current_yesnomissing.column("1.0") / total * 100)
|
241
|
+
current_yesnomissing
|
242
|
+
```
|
243
|
+
|
244
|
+
<!-- END QUESTION -->
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
<div id="Baseline"></div>
|
249
|
+
|
250
|
+
## Baseline and a Randomly Selected Compound
|
251
|
+
|
252
|
+
Let's describe a household selected at random.
|
253
|
+
|
254
|
+
First, we will extract the household/compound id into an array.
|
255
|
+
|
256
|
+
```python
|
257
|
+
hhld_array = WGP_table.column('a1_cmpd_id')
|
258
|
+
hhld_array
|
259
|
+
```
|
260
|
+
|
261
|
+
Next, we will draw randomly from this array.
|
262
|
+
|
263
|
+
```python
|
264
|
+
randomhh = np.random.choice(hhld_array)
|
265
|
+
print("My randomly selected household is household number", randomhh)
|
266
|
+
```
|
267
|
+
|
268
|
+
Then, let's look at the data for our randomly selected household:
|
269
|
+
|
270
|
+
```python
|
271
|
+
myfamily = WGP_table.where("a1_cmpd_id",randomhh)
|
272
|
+
myfamily
|
273
|
+
```
|
274
|
+
|
275
|
+
Some of the variables may need some manipulation.
|
276
|
+
Let's start with the age of the respondent:
|
277
|
+
|
278
|
+
```python
|
279
|
+
birthyear = myfamily.column("b3_birth_year").item(0)
|
280
|
+
surveyyear = myfamily.column("a5_date_interview_year").item(0)
|
281
|
+
agecalc = surveyyear-birthyear #
|
282
|
+
agecalc
|
283
|
+
```
|
284
|
+
|
285
|
+
And their tribe:
|
286
|
+
|
287
|
+
```python
|
288
|
+
print("Survey respondent Tribe", myfamily.column("b5_tribe").item(0))
|
289
|
+
print("Respondent Spouse Tribe", myfamily.column("b7_tribe_spouse").item(0))
|
290
|
+
```
|
291
|
+
|
292
|
+
Lastly, whether they have a latrine:
|
293
|
+
|
294
|
+
```python
|
295
|
+
print("Does the household have a latrine?", myfamily.column("d3_latrine").item(0))
|
296
|
+
```
|
297
|
+
|
298
|
+
Remember in the answer above it is coded so that 1=Yes and 2=No.
|
299
|
+
|
300
|
+
<!-- BEGIN QUESTION -->
|
301
|
+
|
302
|
+
**Question 3:** Describe your randomly selected household and the respondent who is answering the survey. Please remember you can find the code sheet under the section of Baseline Survey.
|
303
|
+
|
304
|
+
1. Age
|
305
|
+
2. Tribe
|
306
|
+
3. Education
|
307
|
+
4. Member of any groups b11-b15?
|
308
|
+
5. Occupation
|
309
|
+
6. Religion
|
310
|
+
7. A summary of D variables, iron roof, floor materials, latrine, cattle, and others
|
311
|
+
8. Have they ever used WG?
|
312
|
+
9. Their treatment arm assignment
|
313
|
+
10. How many children do they have
|
314
|
+
11. Gender and Age of children
|
315
|
+
12. Have any of the children been sick?
|
316
|
+
|
317
|
+
<!--
|
318
|
+
BEGIN QUESTION
|
319
|
+
name: q3
|
320
|
+
manual: true
|
321
|
+
-->
|
322
|
+
|
323
|
+
_Type your answer here, replacing this text._
|
324
|
+
|
325
|
+
<!-- END QUESTION -->
|
326
|
+
|
327
|
+
|
328
|
+
|
329
|
+
<div id="Chlorine"></div>
|
330
|
+
|
331
|
+
## Water Guard Usage outcome variables
|
332
|
+
|
333
|
+
### WGP Followup - Variability
|
334
|
+
The purpose of this section will be to continue on with the follow-up rounds of the Water Guard Promotion study. In this section we have both the household reported use, and the use validated by checking the chlorine content of the water using a test kit.
|
335
|
+
|
336
|
+
```python
|
337
|
+
WGP3rds_table = Table.read_table('WGP_3waves_Data8.csv')
|
338
|
+
WGP3rds_table
|
339
|
+
```
|
340
|
+
|
341
|
+
This is a large dataset, basically three datasets merged together, one for baseline, one for short term follow up and one for long term followup. The column `round` describes these 3 time steps:
|
342
|
+
|
343
|
+
- Round = 1 : baseline
|
344
|
+
- Round = 2 : 3 week followup
|
345
|
+
- Round = 3 : 3 month followup
|
346
|
+
|
347
|
+
Notably, many of the variables are only asked in one of the three rounds. For example, the chlorine use variables are:
|
348
|
+
|
349
|
+
- The variable for self reported chlorine use was `c6n` in Round 2, and `c5n` in Round 3.
|
350
|
+
- The variable for chlorine use is `c12n21pnk` in Round 2 and `c15npt2or1pnk` in Round 3.
|
351
|
+
|
352
|
+
Instead, the following variables have been combined across rounds for the ease of programming:
|
353
|
+
|
354
|
+
- `Selfrptpct` is self reported chlorine use in both round 2 and round 3
|
355
|
+
- `Vldclpct` is validated chlorine use in both rounds
|
356
|
+
|
357
|
+
```python
|
358
|
+
WGP3rds_table.group("treatment_arm")
|
359
|
+
```
|
360
|
+
|
361
|
+
```python
|
362
|
+
WGP3rds_table.group('round')
|
363
|
+
```
|
364
|
+
|
365
|
+
### Grouping by round + treatment arm
|
366
|
+
|
367
|
+
We want to create a multi-level group: each group should be a unique combination of the survey round and the treatment arm.
|
368
|
+
|
369
|
+
```python
|
370
|
+
WGP_3rds_outcomesonly= WGP3rds_table.select("round", "treatment_arm", "Selfrptpct", "Vldclpct")
|
371
|
+
WGP_3rds_outcomesonly.group(["round","treatment_arm"], np.mean).show(30)
|
372
|
+
```
|
373
|
+
|
374
|
+
### Making a smaller dataset
|
375
|
+
|
376
|
+
Lets break out a smaller dataset of the variables we want to focus on; just for Round 2 and the outcome variables.
|
377
|
+
|
378
|
+
```python
|
379
|
+
WGPRd2 = WGP3rds_table.where("round", 2).select("a1_cmpd_id","treatment_arm",
|
380
|
+
"c6_current_water_treated_wg",
|
381
|
+
'c6_curr_water_treat_other_c',
|
382
|
+
'c12_chlorine_meter_reading',
|
383
|
+
'c11_chlorine_color','c12n21pnk', 'c6n'
|
384
|
+
)
|
385
|
+
WGPRd2
|
386
|
+
```
|
387
|
+
|
388
|
+
A quick examination of the estimated Water Guard usage in Round 2 across all treatment arms:
|
389
|
+
|
390
|
+
```python
|
391
|
+
np.mean(WGPRd2.column('c12n21pnk'))
|
392
|
+
```
|
393
|
+
|
394
|
+
```python
|
395
|
+
|
396
|
+
```
|
397
|
+
|
398
|
+
```python
|
399
|
+
|
400
|
+
```
|
401
|
+
|
@@ -0,0 +1,199 @@
|
|
1
|
+
---
|
2
|
+
title: "lec10.1-mapping"
|
3
|
+
type: lecture-notebook
|
4
|
+
week: 10
|
5
|
+
source_path: "/Users/ericvandusen/Documents/Data88E-ForTraining/F24Lec_NBs/lec10/lec10.1-mapping.ipynb"
|
6
|
+
---
|
7
|
+
|
8
|
+
<table style="width: 100%;">
|
9
|
+
<tr style="background-color: transparent;"><td>
|
10
|
+
<img src="https://data-88e.github.io/assets/images/blue_text.png" width="250px" style="margin-left: 0;" />
|
11
|
+
</td><td>
|
12
|
+
<p style="text-align: right; font-size: 10pt;"><strong>Economic Models</strong>, Fall 2024
|
13
|
+
<br>
|
14
|
+
Dr. Eric Van Dusen</p></td></tr>
|
15
|
+
</table>
|
16
|
+
|
17
|
+
# Lec9: Water Guard Randomized Controlled Trial
|
18
|
+
|
19
|
+
This notebook is an adaptation from a set of notebooks developed for a full semester Data Science Connector Course taught in Fall 2017, entitled "Behind the Curtain in Economic Development". This dataset come from a randomized controlled trial household survey carried out in Eastern Kenya in 2007-2008.
|
20
|
+
|
21
|
+
The purpose of the study was to understand how to promote the use of WaterGuard, a dilute sodium hypochlorite solution that was promoted for Point-of-use household water disinfection. There were seven arms in the study, which will be more fully described in the following chart:
|
22
|
+
|
23
|
+
<img src="Slide1.png" />
|
24
|
+
|
25
|
+
Within this table you can see the seven treatments arms - control plus three treatments - in the bolded boxes in the middle with the number of springs and households. The study was carried out as a part of a study of households who gather drinking water from springs in a rural area. The three boxes at the bottom describe the three rounds of data collection - a baseline before the treatment, and a short term and long term follow-up.
|
26
|
+
|
27
|
+
<!-- **Notebook Outline**
|
28
|
+
|
29
|
+
1. [Mapping](#Mapping)
|
30
|
+
2. [Balance Check](#Balance)
|
31
|
+
3. [Baseline and a Randomly Selected Compound](#Baseline)
|
32
|
+
4. [Chlorine Usage outcome variables](#Chlorine)
|
33
|
+
5. [Graph of outcomes by Treatment Arm](#Graph) -->
|
34
|
+
|
35
|
+
```python
|
36
|
+
from datascience import *
|
37
|
+
import numpy as np
|
38
|
+
import matplotlib.pyplot as plt
|
39
|
+
%matplotlib inline
|
40
|
+
import pandas as pd
|
41
|
+
from pandas import read_stata
|
42
|
+
from ipyleaflet import Map, basemaps, Marker, AwesomeIcon
|
43
|
+
```
|
44
|
+
|
45
|
+
## Mapping
|
46
|
+
|
47
|
+
<div id="Mapping"></div>
|
48
|
+
|
49
|
+
This first section works with a package in Jupyter called ipyleaflet.
|
50
|
+
|
51
|
+
`ipyleaflet`;
|
52
|
+
the documentation is [here](https://ipyleaflet.readthedocs.io/en/latest/)
|
53
|
+
and it is worth a short read through if you are interested.
|
54
|
+
|
55
|
+
|
56
|
+
We want to use two different base maps - one is a satellite layer and oen is the Open Street Map layer.
|
57
|
+
|
58
|
+
We will start by reading in a dataset of the coordinates of the springs that are used in the WaterGuard Promotion (WGP) study. These springs were randomized into seven different treatment arms. The springs are identified by a unique numerical id tag, and the common name in the local language.
|
59
|
+
|
60
|
+
```python
|
61
|
+
springsGPS = Table.read_table('WGPgps_forData8.csv')
|
62
|
+
springsGPS
|
63
|
+
```
|
64
|
+
|
65
|
+
```python
|
66
|
+
# make a table wth just the North and East Gps columns
|
67
|
+
locations = springsGPS.select("gpsn1", "gpse1")
|
68
|
+
locations
|
69
|
+
```
|
70
|
+
|
71
|
+
Where in the world are we?
|
72
|
+
|
73
|
+
First of all lets look at the mean for the Lat and Long and we can center our map there
|
74
|
+
|
75
|
+
```python
|
76
|
+
|
77
|
+
mean_longitude = springsGPS.column('gpse1').mean()
|
78
|
+
mean_latitude = springsGPS.column('gpsn1').mean()
|
79
|
+
|
80
|
+
print("Mean of 'gpse1':", mean_longitude)
|
81
|
+
print("Mean of 'gpsn1':", mean_latitude)
|
82
|
+
```
|
83
|
+
|
84
|
+
The code cell below should display a map. However, it may not run the first time you click it - if this happens, try running all the cells above this one and then refreshing your browser. After a few refreshes, the maps should load.
|
85
|
+
|
86
|
+
```python
|
87
|
+
|
88
|
+
center = [0.4, 34.4]
|
89
|
+
zoom = 12
|
90
|
+
basemap=basemaps.Esri.WorldImagery
|
91
|
+
layout={'width': '800px', 'height': '600px'}
|
92
|
+
|
93
|
+
Map(basemap=basemap, center=center, zoom=zoom, layout=layout)
|
94
|
+
```
|
95
|
+
|
96
|
+
Lets make a map of our sample sites ( springs)
|
97
|
+
|
98
|
+
```python
|
99
|
+
m = Map(basemap=basemap, center=center, zoom=zoom, layout=layout)
|
100
|
+
|
101
|
+
# Iterate through the rows in the dataset
|
102
|
+
for row in springsGPS.rows:
|
103
|
+
latitude = row.item('gpsn1')
|
104
|
+
longitude = row.item('gpse1')
|
105
|
+
marker = Marker(location=(latitude, longitude))
|
106
|
+
m.add_layer(marker)
|
107
|
+
|
108
|
+
m
|
109
|
+
```
|
110
|
+
|
111
|
+
Now the most interesting bit of data is still not being used, the Treatment Arm. Let's assign different colors to the different treatment arms so that when we map it we can see if the arms appear to be randomly distributed.
|
112
|
+
|
113
|
+
The following is function assigns the 7 different treatment arms to a set of colors. [Here](https://www.w3.org/TR/css3-color/#html4) is the colors reference if you are interested!
|
114
|
+
|
115
|
+
```python
|
116
|
+
def color(arm):
|
117
|
+
if arm == 1:
|
118
|
+
return 'black'
|
119
|
+
elif arm == 2:
|
120
|
+
return 'red'
|
121
|
+
elif arm == 3:
|
122
|
+
return 'purple'
|
123
|
+
elif arm == 4:
|
124
|
+
return 'green'
|
125
|
+
elif arm == 5:
|
126
|
+
return 'blue'
|
127
|
+
elif arm == 6:
|
128
|
+
return 'pink'
|
129
|
+
elif arm == 7:
|
130
|
+
return 'orange'
|
131
|
+
```
|
132
|
+
|
133
|
+
```python
|
134
|
+
# Using the .apply method, you can apply any function to a data frame
|
135
|
+
colors = springsGPS.apply(color, "treatment_arm")
|
136
|
+
springsGPS = springsGPS.with_column("color", colors)
|
137
|
+
springsGPS
|
138
|
+
```
|
139
|
+
|
140
|
+
```python
|
141
|
+
|
142
|
+
m = Map( center=center, zoom=zoom, layout=layout)
|
143
|
+
|
144
|
+
for row in springsGPS.rows:
|
145
|
+
latitude = row.item('gpsn1')
|
146
|
+
longitude = row.item('gpse1')
|
147
|
+
color = row.item('color')
|
148
|
+
|
149
|
+
marker = Marker(
|
150
|
+
location=(latitude, longitude),
|
151
|
+
draggable=False, # Set to True if you want to make the markers draggable
|
152
|
+
title=color, # Set the marker title to the color for tooltip
|
153
|
+
alt=color # Set the alt text to the color
|
154
|
+
)
|
155
|
+
|
156
|
+
# Apply the specified color to the marker
|
157
|
+
marker.icon = AwesomeIcon(name='circle', marker_color=color)
|
158
|
+
|
159
|
+
m.add_layer(marker)
|
160
|
+
|
161
|
+
m
|
162
|
+
```
|
163
|
+
|
164
|
+
```python
|
165
|
+
|
166
|
+
m=Map(basemap=basemap, center=center, zoom=zoom, layout=layout)
|
167
|
+
|
168
|
+
for row in springsGPS.rows:
|
169
|
+
latitude = row.item('gpsn1')
|
170
|
+
longitude = row.item('gpse1')
|
171
|
+
color = row.item('color')
|
172
|
+
|
173
|
+
marker = Marker(
|
174
|
+
location=(latitude, longitude),
|
175
|
+
draggable=False, # Set to True if you want to make the markers draggable
|
176
|
+
title=color, # Set the marker title to the color for tooltip
|
177
|
+
alt=color # Set the alt text to the color
|
178
|
+
)
|
179
|
+
|
180
|
+
marker.icon = AwesomeIcon(name='circle', marker_color=color)
|
181
|
+
|
182
|
+
m.add_layer(marker)
|
183
|
+
|
184
|
+
m
|
185
|
+
```
|
186
|
+
|
187
|
+
Do the colors seem randomly distributed?
|
188
|
+
|
189
|
+
In fact, the randomization was performed on just a list of the springs using a random number generator.
|
190
|
+
It did not take spatial distribution into effect.
|
191
|
+
|
192
|
+
```python
|
193
|
+
|
194
|
+
```
|
195
|
+
|
196
|
+
```python
|
197
|
+
|
198
|
+
```
|
199
|
+
|