my-markdown-library 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +7 -0
  2. data/F24LS_md/ Lecture 4 - Public.md +347 -0
  3. data/F24LS_md/Lecture 1 - Introduction and Overview.md +327 -0
  4. data/F24LS_md/Lecture 10 - Development_.md +631 -0
  5. data/F24LS_md/Lecture 11 - Econometrics.md +345 -0
  6. data/F24LS_md/Lecture 12 - Finance.md +692 -0
  7. data/F24LS_md/Lecture 13 - Environmental Economics.md +299 -0
  8. data/F24LS_md/Lecture 15 - Conclusion.md +272 -0
  9. data/F24LS_md/Lecture 2 - Demand.md +349 -0
  10. data/F24LS_md/Lecture 3 - Supply.md +329 -0
  11. data/F24LS_md/Lecture 5 - Production C-D.md +291 -0
  12. data/F24LS_md/Lecture 6 - Utility and Latex.md +440 -0
  13. data/F24LS_md/Lecture 7 - Inequality.md +607 -0
  14. data/F24LS_md/Lecture 8 - Macroeconomics.md +704 -0
  15. data/F24LS_md/Lecture 8 - Macro.md +700 -0
  16. data/F24LS_md/Lecture 9 - Game Theory_.md +436 -0
  17. data/F24LS_md/summary.yaml +105 -0
  18. data/F24Lec_MD/LecNB_summary.yaml +206 -0
  19. data/F24Lec_MD/lec01/lec01.md +267 -0
  20. data/F24Lec_MD/lec02/Avocados_demand.md +425 -0
  21. data/F24Lec_MD/lec02/Demand_Steps_24.md +126 -0
  22. data/F24Lec_MD/lec02/PriceElasticity.md +83 -0
  23. data/F24Lec_MD/lec02/ScannerData_Beer.md +171 -0
  24. data/F24Lec_MD/lec02/demand-curve-Fa24.md +213 -0
  25. data/F24Lec_MD/lec03/3.0-CubicCostCurve.md +239 -0
  26. data/F24Lec_MD/lec03/3.1-Supply.md +274 -0
  27. data/F24Lec_MD/lec03/3.2-sympy.md +332 -0
  28. data/F24Lec_MD/lec03/3.3a-california-energy.md +120 -0
  29. data/F24Lec_MD/lec03/3.3b-a-really-hot-tuesday.md +121 -0
  30. data/F24Lec_MD/lec04/lec04-CSfromSurvey-closed.md +335 -0
  31. data/F24Lec_MD/lec04/lec04-CSfromSurvey.md +331 -0
  32. data/F24Lec_MD/lec04/lec04-Supply-Demand-closed.md +519 -0
  33. data/F24Lec_MD/lec04/lec04-Supply-Demand.md +514 -0
  34. data/F24Lec_MD/lec04/lec04-four-plot-24.md +34 -0
  35. data/F24Lec_MD/lec04/lec04-four-plot.md +34 -0
  36. data/F24Lec_MD/lec05/Lec5-Cobb-Douglas.md +131 -0
  37. data/F24Lec_MD/lec05/Lec5-CobbD-AER1928.md +283 -0
  38. data/F24Lec_MD/lec06/6.1-Sympy-Differentiation.md +253 -0
  39. data/F24Lec_MD/lec06/6.2-3D-utility.md +287 -0
  40. data/F24Lec_MD/lec06/6.3-QuantEcon-Optimization.md +399 -0
  41. data/F24Lec_MD/lec06/6.4-latex.md +138 -0
  42. data/F24Lec_MD/lec06/6.5-Edgeworth.md +269 -0
  43. data/F24Lec_MD/lec07/7.1-inequality.md +283 -0
  44. data/F24Lec_MD/lec07/7.2-historical-inequality.md +237 -0
  45. data/F24Lec_MD/lec08/macro-fred-api.md +313 -0
  46. data/F24Lec_MD/lec09/lecNB-prisoners-dilemma.md +88 -0
  47. data/F24Lec_MD/lec10/Lec10.2-waterguard.md +401 -0
  48. data/F24Lec_MD/lec10/lec10.1-mapping.md +199 -0
  49. data/F24Lec_MD/lec11/11.1-slr.md +305 -0
  50. data/F24Lec_MD/lec11/11.2-mlr.md +171 -0
  51. data/F24Lec_MD/lec12/Lec12-4-PersonalFinance.md +590 -0
  52. data/F24Lec_MD/lec12/lec12-1_Interest_Payments.md +267 -0
  53. data/F24Lec_MD/lec12/lec12-2-stocks-options.md +235 -0
  54. data/F24Lec_MD/lec13/Co2_ClimateChange.md +139 -0
  55. data/F24Lec_MD/lec13/ConstructingMAC.md +213 -0
  56. data/F24Lec_MD/lec13/EmissionsTracker.md +170 -0
  57. data/F24Lec_MD/lec13/KuznetsHypothesis.md +219 -0
  58. data/F24Lec_MD/lec13/RoslingPlots.md +217 -0
  59. data/F24Lec_MD/lec15/vibecession.md +485 -0
  60. data/F24Textbook_MD/00-intro/index.md +292 -0
  61. data/F24Textbook_MD/01-demand/01-demand.md +152 -0
  62. data/F24Textbook_MD/01-demand/02-example.md +131 -0
  63. data/F24Textbook_MD/01-demand/03-log-log.md +284 -0
  64. data/F24Textbook_MD/01-demand/04-elasticity.md +248 -0
  65. data/F24Textbook_MD/01-demand/index.md +15 -0
  66. data/F24Textbook_MD/02-supply/01-supply.md +203 -0
  67. data/F24Textbook_MD/02-supply/02-eep147-example.md +86 -0
  68. data/F24Textbook_MD/02-supply/03-sympy.md +138 -0
  69. data/F24Textbook_MD/02-supply/04-market-equilibria.md +204 -0
  70. data/F24Textbook_MD/02-supply/index.md +16 -0
  71. data/F24Textbook_MD/03-public/govt-intervention.md +73 -0
  72. data/F24Textbook_MD/03-public/index.md +10 -0
  73. data/F24Textbook_MD/03-public/surplus.md +351 -0
  74. data/F24Textbook_MD/03-public/taxes-subsidies.md +282 -0
  75. data/F24Textbook_MD/04-production/index.md +15 -0
  76. data/F24Textbook_MD/04-production/production.md +178 -0
  77. data/F24Textbook_MD/04-production/shifts.md +296 -0
  78. data/F24Textbook_MD/05-utility/budget-constraints.md +166 -0
  79. data/F24Textbook_MD/05-utility/index.md +15 -0
  80. data/F24Textbook_MD/05-utility/utility.md +136 -0
  81. data/F24Textbook_MD/06-inequality/historical-inequality.md +253 -0
  82. data/F24Textbook_MD/06-inequality/index.md +15 -0
  83. data/F24Textbook_MD/06-inequality/inequality.md +226 -0
  84. data/F24Textbook_MD/07-game-theory/bertrand.md +257 -0
  85. data/F24Textbook_MD/07-game-theory/cournot.md +333 -0
  86. data/F24Textbook_MD/07-game-theory/equilibria-oligopolies.md +96 -0
  87. data/F24Textbook_MD/07-game-theory/expected-utility.md +61 -0
  88. data/F24Textbook_MD/07-game-theory/index.md +19 -0
  89. data/F24Textbook_MD/07-game-theory/python-classes.md +340 -0
  90. data/F24Textbook_MD/08-development/index.md +35 -0
  91. data/F24Textbook_MD/09-macro/CentralBanks.md +101 -0
  92. data/F24Textbook_MD/09-macro/Indicators.md +77 -0
  93. data/F24Textbook_MD/09-macro/fiscal_policy.md +36 -0
  94. data/F24Textbook_MD/09-macro/index.md +14 -0
  95. data/F24Textbook_MD/09-macro/is_curve.md +76 -0
  96. data/F24Textbook_MD/09-macro/phillips_curve.md +70 -0
  97. data/F24Textbook_MD/10-finance/index.md +10 -0
  98. data/F24Textbook_MD/10-finance/options.md +178 -0
  99. data/F24Textbook_MD/10-finance/value-interest.md +60 -0
  100. data/F24Textbook_MD/11-econometrics/index.md +16 -0
  101. data/F24Textbook_MD/11-econometrics/multivariable.md +218 -0
  102. data/F24Textbook_MD/11-econometrics/reading-econ-papers.md +25 -0
  103. data/F24Textbook_MD/11-econometrics/single-variable.md +483 -0
  104. data/F24Textbook_MD/11-econometrics/statsmodels.md +58 -0
  105. data/F24Textbook_MD/12-environmental/KuznetsHypothesis-Copy1.md +187 -0
  106. data/F24Textbook_MD/12-environmental/KuznetsHypothesis.md +187 -0
  107. data/F24Textbook_MD/12-environmental/MAC.md +254 -0
  108. data/F24Textbook_MD/12-environmental/index.md +36 -0
  109. data/F24Textbook_MD/LICENSE.md +11 -0
  110. data/F24Textbook_MD/intro.md +26 -0
  111. data/F24Textbook_MD/references.md +25 -0
  112. data/F24Textbook_MD/summary.yaml +414 -0
  113. metadata +155 -0
@@ -0,0 +1,401 @@
1
+ ---
2
+ title: "Lec10.2-waterguard"
3
+ type: lecture-notebook
4
+ week: 10
5
+ source_path: "/Users/ericvandusen/Documents/Data88E-ForTraining/F24Lec_NBs/lec10/Lec10.2-waterguard.ipynb"
6
+ ---
7
+
8
+ <table style="width: 100%;">
9
+ <tr style="background-color: transparent;"><td>
10
+ <img src="https://data-88e.github.io/assets/images/blue_text.png" width="250px" style="margin-left: 0;" />
11
+ </td><td>
12
+ <p style="text-align: right; font-size: 10pt;"><strong>Economic Models</strong>, Fall 2024<br>
13
+ Dr. Eric Van Dusen</p></td></tr>
14
+ </table>
15
+
16
+ # Lec 9 : Water Guard Randomized Controlled Trial
17
+
18
+ This Lecture Notebook is an adaptation from a set of notebooks developed for a full semester Data Science Connector Course taught in Fall 2017, entitled "Behind the Curtain in Economic Development". This dataset come from a randomized controlled trial household survey carried out in Eastern Kenya in 2007-2008.
19
+
20
+ The purpose of the study was to understand how to promote the use of WaterGuard, a dilute sodium hypochlorite solution that was promoted for Point-of-use household water disinfection. There were seven arms in the study, which will be more fully described in the following chart:
21
+
22
+ <img src="Slide1.png" />
23
+
24
+ Within this table you can see the seven treatments arms - control plus three treatments - in the bolded boxes in the middle with the number of springs and households. The study was carried out as a part of a study of households who gather drinking water from springs in a rural area. The three boxes at the bottom describe the three rounds of data collection - a baseline before the treatment, and a short term and long term follow-up.
25
+
26
+ <!-- **Notebook Outline**
27
+
28
+ 1. [Mapping](#Mapping)
29
+ 2. [Balance Check](#Balance)
30
+ 3. [Baseline and a Randomly Selected Compound](#Baseline)
31
+ 4. [Chlorine Usage outcome variables](#Chlorine)
32
+ 5. [Graph of outcomes by Treatment Arm](#Graph) -->
33
+
34
+ ```python
35
+ from datascience import *
36
+ import numpy as np
37
+ import matplotlib.pyplot as plt
38
+ %matplotlib inline
39
+ import pandas as pd
40
+ from pandas import read_stata
41
+ ```
42
+
43
+ <!-- END QUESTION -->
44
+
45
+
46
+
47
+ <div id="Balance"></div>
48
+
49
+ ## Balance Check and Variable Names
50
+
51
+ ### Baseline Survey
52
+ This is our first look at the survey dataset. These are a limited set of questions/answers from a simple and short baseline survey. However it is a lot bigger and messier than the datasets we have seen so far and in Data 8.
53
+
54
+ Data variable names follow along with the survey below, referred to by the section, a,b,c... number, 1,2,3... and a few words about the question.
55
+
56
+ The purpose of this section will be:
57
+ * to get a familiarity with the dataset,
58
+ * to look at some background descriptor variables of the households,
59
+ * to start to think about missing values and coding of subsets of the data.
60
+ * to check the randomization of households by seeeing if the different arms of the study are balanced across some of the key baseline variables.
61
+
62
+ **The surveys that illustrate the raw data names are in a file linked [here](https://drive.google.com/open?id=1UVoiVn7LJ4rn7WEb-9BJ96jmdJ2FBk60). You have to go and look through this survey to understand the variables.**
63
+
64
+ **The code sheet that has the codes for some of the possible answers are in a file linked [here](https://drive.google.com/file/d/1iinJXExeVKV4Dm7tRKOiotoYUDSXMyqc). You have to go and look through this code sheet in a later section.**
65
+
66
+ ```python
67
+ WGP_baseline = Table.read_table("WGP_baseline_Data8.csv")
68
+ WGP_baseline
69
+ ```
70
+
71
+ ```python
72
+ baseline = pd.read_csv("WGP_baseline_Data8.csv")
73
+ baseline
74
+ ```
75
+
76
+ ```python
77
+ baseline.dropna(axis = 0)
78
+ ```
79
+
80
+ ### Misssing values
81
+
82
+ If you look through the dataset above, and scroll to the right a ways to some of the last variables, you will notice that that there are a lot of cells with NaN, which means a missing value. For these cells no data was entered at the time of data entry. In some cases it may be appropriate to enter a zero and carry on with the analyis.
83
+
84
+ ```python
85
+ WGP_base_dfna = WGP_baseline.to_df().fillna(0)
86
+ WGP_table = Table.from_df(WGP_base_dfna)
87
+ WGP_table
88
+ ```
89
+
90
+ Look at the variable names, and then look at the survey form to find the concordance of codes
91
+
92
+ ```python
93
+ # Here is a list of all of the possible categories / columns
94
+ list(WGP_table)
95
+ ```
96
+
97
+ ### What are some Variables that we want to specifically look at? ###
98
+
99
+ There are a lot of variables here and it can be kind of overwhelming, but it is good to see how many columns there can be in a comprehensive survey dataset.
100
+
101
+ #### Front Page information - A variables
102
+
103
+ - household id
104
+ - spring id
105
+ - interviewer id
106
+
107
+ #### Information about respondent - B variables
108
+
109
+ - tribe
110
+ - education
111
+ - age
112
+ - gender
113
+ - group membership
114
+
115
+ #### Water Guard Use - C variables
116
+
117
+ For Waterguard (WG) usage
118
+
119
+ - `c1a` - Whether the respondent has ever heard of WG
120
+ - `c2a` - Whether the respondent has ever used WG
121
+ - `c3a` - Whether the respondent's water is currently treated with WG
122
+ - `c4a` - Whether the respondent has used WG in the past month
123
+
124
+ #### Durable / Capital Goods - D variables
125
+
126
+ - Whether the respondent has electricity / latrine / iron roof
127
+ - Number of of bicycle / radio / hoe / beds owned
128
+ - Number of animals owned
129
+
130
+ #### Child Health - E variables
131
+
132
+ - `e1_num_kids_under_5`: Number of kids under 5
133
+ - `e2_`: This table becomes tricky because it has a different format. Each kid in the table is numbered 01, 02 and so on, and then the subsequent questions are keyed to that child number. e.g. `e2e_01_d_diarrhea`, `e2e_02_d_diarrhea` represent whether child 1 and 2 respectively have diarrhea. In total, four diseases are recorded:
134
+ - Cough
135
+ - Diarrhea
136
+ - Malaria
137
+ - Vomiting
138
+
139
+ ### The Treatment Arm
140
+
141
+ In the study, arm 1 is control, while Arms 2-7 are different types of treatment interventions:
142
+
143
+ - Arm 1 - Control
144
+ - Arm 2 - Household Script
145
+ - Arm 3 - Community Script
146
+ - Arm 4 - HH + Community Script
147
+ - Arm 5 - Flat-Fee Promoter + Coupons
148
+ - Arm 6 - Incentivized Promoter + Coupons
149
+ - Arm 7 - Incentivized Promoter + Dispenser at Spring
150
+
151
+ Let's check how many households are in each treatment arm.
152
+
153
+ ```python
154
+ WGP_table.group("treatment_arm")
155
+ ```
156
+
157
+ ### Baseline Check - Exposure to Water Guard Use
158
+
159
+ Let's see how many households have ever used Water Guards.
160
+
161
+ The data is currently Coded as 1 = Yes and 2 = No, so we can't really make sense of the Mean of the variable in its current form. Instead, we will make a new column/variable with the 1 or 2 answers translated into Yes or No.
162
+ Notably, we must first filter out respondents that had missing values (with value 0) for this question.
163
+
164
+ ```python
165
+ WGP_ever = WGP_table.where('c2a_wg_used_ever', are.above(0))
166
+ WGP_ever.group("c2a_wg_used_ever")
167
+ ```
168
+
169
+ ```python
170
+ #This helper function goes through a column of choice, and spits out yes or no based off each value in the column. It returns an array of these yes and no's
171
+ def translate_to_yesno(table, col):
172
+ dummy=[]
173
+ table=table.where(col, are.above(0))
174
+ for i in np.arange(table.num_rows):
175
+ if table.column(col).item(i) == 1:
176
+ dummy.append('Yes')
177
+ else: #if not 1 then its 2 and 2 means no
178
+ dummy.append("No")
179
+ return dummy
180
+ ```
181
+
182
+ ```python
183
+ new = translate_to_yesno(WGP_ever, 'c2a_wg_used_ever')
184
+ WGP_ever = WGP_ever.with_column('c2a_wg_used_ever',new)
185
+ WGP_ever.group('c2a_wg_used_ever')
186
+ ```
187
+
188
+ ### Pivoting and Balance Checks
189
+
190
+ Now we will use a command called **Pivot** to create a new table that has the percent of households who have ever used Water Guard within each Treatment Arm.
191
+
192
+ We can first use it to do a **balance check** for Water Guard use across Arms.
193
+
194
+ ```python
195
+ ever_yesno = WGP_ever.pivot('c2a_wg_used_ever','treatment_arm')
196
+ ever_yesno
197
+ ```
198
+
199
+ Converting to percentages...
200
+
201
+ ```python
202
+ total = ever_yesno.column(1) + ever_yesno.column(2)
203
+ ever_yesno = ever_yesno.with_columns('Percent No',ever_yesno.column(1) / total * 100,
204
+ 'Percent Yes', ever_yesno.column(2) / total * 100)
205
+ ever_yesno
206
+ ```
207
+
208
+ Let's also repeat the process for the variable of whether the households are currently using Water Guard, `c3a_wg_water_currently_treat`.
209
+
210
+ ```python
211
+ WGP_current = WGP_table.where('c3a_wg_water_currently_treat',are.not_equal_to(0))
212
+ new2 = translate_to_yesno(WGP_current,'c3a_wg_water_currently_treat')
213
+ WGP_current = WGP_current.with_column('c3a_wg_water_currently_treat',new2)
214
+ WGP_current.group("c3a_wg_water_currently_treat")
215
+ ```
216
+
217
+ Do you notice a problem here? Look at the total numbers reported in the output above.
218
+
219
+ We can do the same percentage tables for the balance check but maybe there's a problem.
220
+ Look at the total number of households answering the question and compare that to the total number from the previous section.
221
+
222
+ ```python
223
+ current_yesno = WGP_current.pivot('c3a_wg_water_currently_treat','treatment_arm')
224
+ total = current_yesno.column(1) + current_yesno.column(2)
225
+ current_yesno = current_yesno.with_columns('Percent No',current_yesno.column(1)/total * 100,
226
+ 'Percent Yes', current_yesno.column(2)/total * 100)
227
+ current_yesno
228
+ ```
229
+
230
+ This seems like a really high usage, but **maybe this is due to missing values**.
231
+
232
+ Let's now also include the 0 (missing) values in our analysis.
233
+
234
+ ```python
235
+ current_yesnomissing = WGP_table.pivot('c3a_wg_water_currently_treat','treatment_arm')
236
+ total = current_yesnomissing.column(1) + current_yesnomissing.column(2) + current_yesnomissing.column(3)
237
+ current_yesnomissing = current_yesnomissing.with_columns(
238
+ 'Percent Missing',current_yesnomissing.column("0.0") / total * 100,
239
+ 'Percent No',current_yesnomissing.column("2.0") / total * 100,
240
+ 'Percent Yes', current_yesnomissing.column("1.0") / total * 100)
241
+ current_yesnomissing
242
+ ```
243
+
244
+ <!-- END QUESTION -->
245
+
246
+
247
+
248
+ <div id="Baseline"></div>
249
+
250
+ ## Baseline and a Randomly Selected Compound
251
+
252
+ Let's describe a household selected at random.
253
+
254
+ First, we will extract the household/compound id into an array.
255
+
256
+ ```python
257
+ hhld_array = WGP_table.column('a1_cmpd_id')
258
+ hhld_array
259
+ ```
260
+
261
+ Next, we will draw randomly from this array.
262
+
263
+ ```python
264
+ randomhh = np.random.choice(hhld_array)
265
+ print("My randomly selected household is household number", randomhh)
266
+ ```
267
+
268
+ Then, let's look at the data for our randomly selected household:
269
+
270
+ ```python
271
+ myfamily = WGP_table.where("a1_cmpd_id",randomhh)
272
+ myfamily
273
+ ```
274
+
275
+ Some of the variables may need some manipulation.
276
+ Let's start with the age of the respondent:
277
+
278
+ ```python
279
+ birthyear = myfamily.column("b3_birth_year").item(0)
280
+ surveyyear = myfamily.column("a5_date_interview_year").item(0)
281
+ agecalc = surveyyear-birthyear #
282
+ agecalc
283
+ ```
284
+
285
+ And their tribe:
286
+
287
+ ```python
288
+ print("Survey respondent Tribe", myfamily.column("b5_tribe").item(0))
289
+ print("Respondent Spouse Tribe", myfamily.column("b7_tribe_spouse").item(0))
290
+ ```
291
+
292
+ Lastly, whether they have a latrine:
293
+
294
+ ```python
295
+ print("Does the household have a latrine?", myfamily.column("d3_latrine").item(0))
296
+ ```
297
+
298
+ Remember in the answer above it is coded so that 1=Yes and 2=No.
299
+
300
+ <!-- BEGIN QUESTION -->
301
+
302
+ **Question 3:** Describe your randomly selected household and the respondent who is answering the survey. Please remember you can find the code sheet under the section of Baseline Survey.
303
+
304
+ 1. Age
305
+ 2. Tribe
306
+ 3. Education
307
+ 4. Member of any groups b11-b15?
308
+ 5. Occupation
309
+ 6. Religion
310
+ 7. A summary of D variables, iron roof, floor materials, latrine, cattle, and others
311
+ 8. Have they ever used WG?
312
+ 9. Their treatment arm assignment
313
+ 10. How many children do they have
314
+ 11. Gender and Age of children
315
+ 12. Have any of the children been sick?
316
+
317
+ <!--
318
+ BEGIN QUESTION
319
+ name: q3
320
+ manual: true
321
+ -->
322
+
323
+ _Type your answer here, replacing this text._
324
+
325
+ <!-- END QUESTION -->
326
+
327
+
328
+
329
+ <div id="Chlorine"></div>
330
+
331
+ ## Water Guard Usage outcome variables
332
+
333
+ ### WGP Followup - Variability
334
+ The purpose of this section will be to continue on with the follow-up rounds of the Water Guard Promotion study. In this section we have both the household reported use, and the use validated by checking the chlorine content of the water using a test kit.
335
+
336
+ ```python
337
+ WGP3rds_table = Table.read_table('WGP_3waves_Data8.csv')
338
+ WGP3rds_table
339
+ ```
340
+
341
+ This is a large dataset, basically three datasets merged together, one for baseline, one for short term follow up and one for long term followup. The column `round` describes these 3 time steps:
342
+
343
+ - Round = 1 : baseline
344
+ - Round = 2 : 3 week followup
345
+ - Round = 3 : 3 month followup
346
+
347
+ Notably, many of the variables are only asked in one of the three rounds. For example, the chlorine use variables are:
348
+
349
+ - The variable for self reported chlorine use was `c6n` in Round 2, and `c5n` in Round 3.
350
+ - The variable for chlorine use is `c12n21pnk` in Round 2 and `c15npt2or1pnk` in Round 3.
351
+
352
+ Instead, the following variables have been combined across rounds for the ease of programming:
353
+
354
+ - `Selfrptpct` is self reported chlorine use in both round 2 and round 3
355
+ - `Vldclpct` is validated chlorine use in both rounds
356
+
357
+ ```python
358
+ WGP3rds_table.group("treatment_arm")
359
+ ```
360
+
361
+ ```python
362
+ WGP3rds_table.group('round')
363
+ ```
364
+
365
+ ### Grouping by round + treatment arm
366
+
367
+ We want to create a multi-level group: each group should be a unique combination of the survey round and the treatment arm.
368
+
369
+ ```python
370
+ WGP_3rds_outcomesonly= WGP3rds_table.select("round", "treatment_arm", "Selfrptpct", "Vldclpct")
371
+ WGP_3rds_outcomesonly.group(["round","treatment_arm"], np.mean).show(30)
372
+ ```
373
+
374
+ ### Making a smaller dataset
375
+
376
+ Lets break out a smaller dataset of the variables we want to focus on; just for Round 2 and the outcome variables.
377
+
378
+ ```python
379
+ WGPRd2 = WGP3rds_table.where("round", 2).select("a1_cmpd_id","treatment_arm",
380
+ "c6_current_water_treated_wg",
381
+ 'c6_curr_water_treat_other_c',
382
+ 'c12_chlorine_meter_reading',
383
+ 'c11_chlorine_color','c12n21pnk', 'c6n'
384
+ )
385
+ WGPRd2
386
+ ```
387
+
388
+ A quick examination of the estimated Water Guard usage in Round 2 across all treatment arms:
389
+
390
+ ```python
391
+ np.mean(WGPRd2.column('c12n21pnk'))
392
+ ```
393
+
394
+ ```python
395
+
396
+ ```
397
+
398
+ ```python
399
+
400
+ ```
401
+
@@ -0,0 +1,199 @@
1
+ ---
2
+ title: "lec10.1-mapping"
3
+ type: lecture-notebook
4
+ week: 10
5
+ source_path: "/Users/ericvandusen/Documents/Data88E-ForTraining/F24Lec_NBs/lec10/lec10.1-mapping.ipynb"
6
+ ---
7
+
8
+ <table style="width: 100%;">
9
+ <tr style="background-color: transparent;"><td>
10
+ <img src="https://data-88e.github.io/assets/images/blue_text.png" width="250px" style="margin-left: 0;" />
11
+ </td><td>
12
+ <p style="text-align: right; font-size: 10pt;"><strong>Economic Models</strong>, Fall 2024
13
+ <br>
14
+ Dr. Eric Van Dusen</p></td></tr>
15
+ </table>
16
+
17
+ # Lec9: Water Guard Randomized Controlled Trial
18
+
19
+ This notebook is an adaptation from a set of notebooks developed for a full semester Data Science Connector Course taught in Fall 2017, entitled "Behind the Curtain in Economic Development". This dataset come from a randomized controlled trial household survey carried out in Eastern Kenya in 2007-2008.
20
+
21
+ The purpose of the study was to understand how to promote the use of WaterGuard, a dilute sodium hypochlorite solution that was promoted for Point-of-use household water disinfection. There were seven arms in the study, which will be more fully described in the following chart:
22
+
23
+ <img src="Slide1.png" />
24
+
25
+ Within this table you can see the seven treatments arms - control plus three treatments - in the bolded boxes in the middle with the number of springs and households. The study was carried out as a part of a study of households who gather drinking water from springs in a rural area. The three boxes at the bottom describe the three rounds of data collection - a baseline before the treatment, and a short term and long term follow-up.
26
+
27
+ <!-- **Notebook Outline**
28
+
29
+ 1. [Mapping](#Mapping)
30
+ 2. [Balance Check](#Balance)
31
+ 3. [Baseline and a Randomly Selected Compound](#Baseline)
32
+ 4. [Chlorine Usage outcome variables](#Chlorine)
33
+ 5. [Graph of outcomes by Treatment Arm](#Graph) -->
34
+
35
+ ```python
36
+ from datascience import *
37
+ import numpy as np
38
+ import matplotlib.pyplot as plt
39
+ %matplotlib inline
40
+ import pandas as pd
41
+ from pandas import read_stata
42
+ from ipyleaflet import Map, basemaps, Marker, AwesomeIcon
43
+ ```
44
+
45
+ ## Mapping
46
+
47
+ <div id="Mapping"></div>
48
+
49
+ This first section works with a package in Jupyter called ipyleaflet.
50
+
51
+ `ipyleaflet`;
52
+ the documentation is [here](https://ipyleaflet.readthedocs.io/en/latest/)
53
+ and it is worth a short read through if you are interested.
54
+
55
+
56
+ We want to use two different base maps - one is a satellite layer and oen is the Open Street Map layer.
57
+
58
+ We will start by reading in a dataset of the coordinates of the springs that are used in the WaterGuard Promotion (WGP) study. These springs were randomized into seven different treatment arms. The springs are identified by a unique numerical id tag, and the common name in the local language.
59
+
60
+ ```python
61
+ springsGPS = Table.read_table('WGPgps_forData8.csv')
62
+ springsGPS
63
+ ```
64
+
65
+ ```python
66
+ # make a table wth just the North and East Gps columns
67
+ locations = springsGPS.select("gpsn1", "gpse1")
68
+ locations
69
+ ```
70
+
71
+ Where in the world are we?
72
+
73
+ First of all lets look at the mean for the Lat and Long and we can center our map there
74
+
75
+ ```python
76
+
77
+ mean_longitude = springsGPS.column('gpse1').mean()
78
+ mean_latitude = springsGPS.column('gpsn1').mean()
79
+
80
+ print("Mean of 'gpse1':", mean_longitude)
81
+ print("Mean of 'gpsn1':", mean_latitude)
82
+ ```
83
+
84
+ The code cell below should display a map. However, it may not run the first time you click it - if this happens, try running all the cells above this one and then refreshing your browser. After a few refreshes, the maps should load.
85
+
86
+ ```python
87
+
88
+ center = [0.4, 34.4]
89
+ zoom = 12
90
+ basemap=basemaps.Esri.WorldImagery
91
+ layout={'width': '800px', 'height': '600px'}
92
+
93
+ Map(basemap=basemap, center=center, zoom=zoom, layout=layout)
94
+ ```
95
+
96
+ Lets make a map of our sample sites ( springs)
97
+
98
+ ```python
99
+ m = Map(basemap=basemap, center=center, zoom=zoom, layout=layout)
100
+
101
+ # Iterate through the rows in the dataset
102
+ for row in springsGPS.rows:
103
+ latitude = row.item('gpsn1')
104
+ longitude = row.item('gpse1')
105
+ marker = Marker(location=(latitude, longitude))
106
+ m.add_layer(marker)
107
+
108
+ m
109
+ ```
110
+
111
+ Now the most interesting bit of data is still not being used, the Treatment Arm. Let's assign different colors to the different treatment arms so that when we map it we can see if the arms appear to be randomly distributed.
112
+
113
+ The following is function assigns the 7 different treatment arms to a set of colors. [Here](https://www.w3.org/TR/css3-color/#html4) is the colors reference if you are interested!
114
+
115
+ ```python
116
+ def color(arm):
117
+ if arm == 1:
118
+ return 'black'
119
+ elif arm == 2:
120
+ return 'red'
121
+ elif arm == 3:
122
+ return 'purple'
123
+ elif arm == 4:
124
+ return 'green'
125
+ elif arm == 5:
126
+ return 'blue'
127
+ elif arm == 6:
128
+ return 'pink'
129
+ elif arm == 7:
130
+ return 'orange'
131
+ ```
132
+
133
+ ```python
134
+ # Using the .apply method, you can apply any function to a data frame
135
+ colors = springsGPS.apply(color, "treatment_arm")
136
+ springsGPS = springsGPS.with_column("color", colors)
137
+ springsGPS
138
+ ```
139
+
140
+ ```python
141
+
142
+ m = Map( center=center, zoom=zoom, layout=layout)
143
+
144
+ for row in springsGPS.rows:
145
+ latitude = row.item('gpsn1')
146
+ longitude = row.item('gpse1')
147
+ color = row.item('color')
148
+
149
+ marker = Marker(
150
+ location=(latitude, longitude),
151
+ draggable=False, # Set to True if you want to make the markers draggable
152
+ title=color, # Set the marker title to the color for tooltip
153
+ alt=color # Set the alt text to the color
154
+ )
155
+
156
+ # Apply the specified color to the marker
157
+ marker.icon = AwesomeIcon(name='circle', marker_color=color)
158
+
159
+ m.add_layer(marker)
160
+
161
+ m
162
+ ```
163
+
164
+ ```python
165
+
166
+ m=Map(basemap=basemap, center=center, zoom=zoom, layout=layout)
167
+
168
+ for row in springsGPS.rows:
169
+ latitude = row.item('gpsn1')
170
+ longitude = row.item('gpse1')
171
+ color = row.item('color')
172
+
173
+ marker = Marker(
174
+ location=(latitude, longitude),
175
+ draggable=False, # Set to True if you want to make the markers draggable
176
+ title=color, # Set the marker title to the color for tooltip
177
+ alt=color # Set the alt text to the color
178
+ )
179
+
180
+ marker.icon = AwesomeIcon(name='circle', marker_color=color)
181
+
182
+ m.add_layer(marker)
183
+
184
+ m
185
+ ```
186
+
187
+ Do the colors seem randomly distributed?
188
+
189
+ In fact, the randomization was performed on just a list of the springs using a random number generator.
190
+ It did not take spatial distribution into effect.
191
+
192
+ ```python
193
+
194
+ ```
195
+
196
+ ```python
197
+
198
+ ```
199
+