easylink 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/cli.py +24 -3
- easylink/configuration.py +43 -36
- easylink/devtools/implementation_creator.py +71 -22
- easylink/implementation.py +88 -11
- easylink/implementation_metadata.yaml +177 -29
- easylink/pipeline.py +15 -6
- easylink/pipeline_schema.py +12 -13
- easylink/pipeline_schema_constants/__init__.py +4 -5
- easylink/pipeline_schema_constants/main.py +489 -0
- easylink/runner.py +11 -7
- easylink/step.py +89 -0
- easylink/steps/cascading/exclude_clustered.def +22 -0
- easylink/steps/cascading/exclude_clustered.py +76 -0
- easylink/steps/cascading/exclude_none.def +22 -0
- easylink/steps/cascading/exclude_none.py +76 -0
- easylink/steps/cascading/update_clusters_by_connected_components.def +22 -0
- easylink/steps/cascading/update_clusters_by_connected_components.py +101 -0
- easylink/steps/default/default_clusters_to_links.def +22 -0
- easylink/steps/default/default_clusters_to_links.py +91 -0
- easylink/steps/default/default_determining_exclusions.def +22 -0
- easylink/steps/default/default_determining_exclusions.py +81 -0
- easylink/steps/default/default_removing_records.def +22 -0
- easylink/steps/default/default_removing_records.py +59 -0
- easylink/steps/default/default_schema_alignment.def +22 -0
- easylink/steps/default/default_schema_alignment.py +53 -0
- easylink/steps/default/default_updating_clusters.def +22 -0
- easylink/steps/default/default_updating_clusters.py +67 -0
- easylink/steps/fastLink/fastLink_evaluating_pairs.R +136 -0
- easylink/steps/fastLink/fastLink_evaluating_pairs.def +21 -0
- easylink/steps/fastLink/fastLink_links_to_clusters.R +128 -0
- easylink/steps/fastLink/fastLink_links_to_clusters.def +21 -0
- easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.def +22 -0
- easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.py +42 -0
- easylink/steps/rl-dummy/input_data/create_input_files.ipynb +1433 -0
- easylink/steps/rl-dummy/input_data/input_file_1.parquet +0 -0
- easylink/steps/rl-dummy/input_data/input_file_2.parquet +0 -0
- easylink/steps/rl-dummy/input_data/known_clusters.parquet +0 -0
- easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.def +22 -0
- easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.py +59 -0
- easylink/steps/splink/splink_blocking_and_filtering.def +22 -0
- easylink/steps/splink/splink_blocking_and_filtering.py +130 -0
- easylink/steps/splink/splink_evaluating_pairs.def +22 -0
- easylink/steps/splink/splink_evaluating_pairs.py +164 -0
- easylink/steps/splink/splink_links_to_clusters.def +22 -0
- easylink/steps/splink/splink_links_to_clusters.py +63 -0
- easylink/utilities/data_utils.py +72 -0
- easylink/utilities/paths.py +4 -3
- easylink/utilities/validation_utils.py +509 -11
- {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/METADATA +5 -1
- easylink-0.1.19.dist-info/RECORD +91 -0
- {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/WHEEL +1 -1
- easylink-0.1.19.dist-info/licenses/LICENSE +28 -0
- easylink-0.1.17.dist-info/RECORD +0 -55
- {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1433 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": 1,
|
6
|
+
"id": "fd9b10f5",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [],
|
9
|
+
"source": [
|
10
|
+
"import os\n",
|
11
|
+
"from pathlib import Path\n",
|
12
|
+
"import shutil\n",
|
13
|
+
"\n",
|
14
|
+
"import numpy as np\n",
|
15
|
+
"import pandas as pd\n",
|
16
|
+
"import pseudopeople as psp"
|
17
|
+
]
|
18
|
+
},
|
19
|
+
{
|
20
|
+
"cell_type": "code",
|
21
|
+
"execution_count": 2,
|
22
|
+
"id": "f369432b",
|
23
|
+
"metadata": {},
|
24
|
+
"outputs": [
|
25
|
+
{
|
26
|
+
"name": "stderr",
|
27
|
+
"output_type": "stream",
|
28
|
+
"text": [
|
29
|
+
"/tmp/ipykernel_1669812/1173994881.py:8: SettingWithCopyWarning: \n",
|
30
|
+
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
31
|
+
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
32
|
+
"\n",
|
33
|
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
34
|
+
" df2[\"Record ID\"] = df2.index\n"
|
35
|
+
]
|
36
|
+
},
|
37
|
+
{
|
38
|
+
"data": {
|
39
|
+
"text/html": [
|
40
|
+
"<div>\n",
|
41
|
+
"<style scoped>\n",
|
42
|
+
" .dataframe tbody tr th:only-of-type {\n",
|
43
|
+
" vertical-align: middle;\n",
|
44
|
+
" }\n",
|
45
|
+
"\n",
|
46
|
+
" .dataframe tbody tr th {\n",
|
47
|
+
" vertical-align: top;\n",
|
48
|
+
" }\n",
|
49
|
+
"\n",
|
50
|
+
" .dataframe thead th {\n",
|
51
|
+
" text-align: right;\n",
|
52
|
+
" }\n",
|
53
|
+
"</style>\n",
|
54
|
+
"<table border=\"1\" class=\"dataframe\">\n",
|
55
|
+
" <thead>\n",
|
56
|
+
" <tr style=\"text-align: right;\">\n",
|
57
|
+
" <th></th>\n",
|
58
|
+
" <th>simulant_id</th>\n",
|
59
|
+
" <th>household_id</th>\n",
|
60
|
+
" <th>first_name</th>\n",
|
61
|
+
" <th>middle_initial</th>\n",
|
62
|
+
" <th>last_name</th>\n",
|
63
|
+
" <th>age</th>\n",
|
64
|
+
" <th>date_of_birth</th>\n",
|
65
|
+
" <th>street_number</th>\n",
|
66
|
+
" <th>street_name</th>\n",
|
67
|
+
" <th>unit_number</th>\n",
|
68
|
+
" <th>city</th>\n",
|
69
|
+
" <th>state</th>\n",
|
70
|
+
" <th>zipcode</th>\n",
|
71
|
+
" <th>housing_type</th>\n",
|
72
|
+
" <th>relationship_to_reference_person</th>\n",
|
73
|
+
" <th>sex</th>\n",
|
74
|
+
" <th>race_ethnicity</th>\n",
|
75
|
+
" <th>year</th>\n",
|
76
|
+
" <th>Record ID</th>\n",
|
77
|
+
" </tr>\n",
|
78
|
+
" </thead>\n",
|
79
|
+
" <tbody>\n",
|
80
|
+
" <tr>\n",
|
81
|
+
" <th>0</th>\n",
|
82
|
+
" <td>0_2</td>\n",
|
83
|
+
" <td>0_7</td>\n",
|
84
|
+
" <td>Diana</td>\n",
|
85
|
+
" <td>P</td>\n",
|
86
|
+
" <td>Kofron</td>\n",
|
87
|
+
" <td>25</td>\n",
|
88
|
+
" <td>05/06/1994</td>\n",
|
89
|
+
" <td>5112</td>\n",
|
90
|
+
" <td>145th st</td>\n",
|
91
|
+
" <td>NaN</td>\n",
|
92
|
+
" <td>Anytown</td>\n",
|
93
|
+
" <td>WA</td>\n",
|
94
|
+
" <td>00000</td>\n",
|
95
|
+
" <td>Household</td>\n",
|
96
|
+
" <td>Reference person</td>\n",
|
97
|
+
" <td>Female</td>\n",
|
98
|
+
" <td>White</td>\n",
|
99
|
+
" <td>2020</td>\n",
|
100
|
+
" <td>0</td>\n",
|
101
|
+
" </tr>\n",
|
102
|
+
" <tr>\n",
|
103
|
+
" <th>1</th>\n",
|
104
|
+
" <td>0_3</td>\n",
|
105
|
+
" <td>0_7</td>\n",
|
106
|
+
" <td>Anna</td>\n",
|
107
|
+
" <td>A</td>\n",
|
108
|
+
" <td>Kofron</td>\n",
|
109
|
+
" <td>25</td>\n",
|
110
|
+
" <td>09/29/1994</td>\n",
|
111
|
+
" <td>5112</td>\n",
|
112
|
+
" <td>145th st</td>\n",
|
113
|
+
" <td>NaN</td>\n",
|
114
|
+
" <td>Anytown</td>\n",
|
115
|
+
" <td>WA</td>\n",
|
116
|
+
" <td>00000</td>\n",
|
117
|
+
" <td>Household</td>\n",
|
118
|
+
" <td>Other relative</td>\n",
|
119
|
+
" <td>Female</td>\n",
|
120
|
+
" <td>White</td>\n",
|
121
|
+
" <td>2020</td>\n",
|
122
|
+
" <td>1</td>\n",
|
123
|
+
" </tr>\n",
|
124
|
+
" <tr>\n",
|
125
|
+
" <th>2</th>\n",
|
126
|
+
" <td>0_923</td>\n",
|
127
|
+
" <td>0_8033</td>\n",
|
128
|
+
" <td>Gerald</td>\n",
|
129
|
+
" <td>R</td>\n",
|
130
|
+
" <td>Butler</td>\n",
|
131
|
+
" <td>76</td>\n",
|
132
|
+
" <td>11/03/1943</td>\n",
|
133
|
+
" <td>1130</td>\n",
|
134
|
+
" <td>mallory ln</td>\n",
|
135
|
+
" <td>NaN</td>\n",
|
136
|
+
" <td>Anytown</td>\n",
|
137
|
+
" <td>WA</td>\n",
|
138
|
+
" <td>00000</td>\n",
|
139
|
+
" <td>Household</td>\n",
|
140
|
+
" <td>Reference person</td>\n",
|
141
|
+
" <td>Male</td>\n",
|
142
|
+
" <td>Black</td>\n",
|
143
|
+
" <td>2020</td>\n",
|
144
|
+
" <td>2</td>\n",
|
145
|
+
" </tr>\n",
|
146
|
+
" <tr>\n",
|
147
|
+
" <th>3</th>\n",
|
148
|
+
" <td>0_2641</td>\n",
|
149
|
+
" <td>0_1066</td>\n",
|
150
|
+
" <td>Loretta</td>\n",
|
151
|
+
" <td>T</td>\n",
|
152
|
+
" <td>Carley</td>\n",
|
153
|
+
" <td>61</td>\n",
|
154
|
+
" <td>07/71/1958</td>\n",
|
155
|
+
" <td>NaN</td>\n",
|
156
|
+
" <td>delacorte dr</td>\n",
|
157
|
+
" <td>NaN</td>\n",
|
158
|
+
" <td>Anytown</td>\n",
|
159
|
+
" <td>WA</td>\n",
|
160
|
+
" <td>00000</td>\n",
|
161
|
+
" <td>Household</td>\n",
|
162
|
+
" <td>Reference person</td>\n",
|
163
|
+
" <td>Female</td>\n",
|
164
|
+
" <td>White</td>\n",
|
165
|
+
" <td>2020</td>\n",
|
166
|
+
" <td>3</td>\n",
|
167
|
+
" </tr>\n",
|
168
|
+
" <tr>\n",
|
169
|
+
" <th>4</th>\n",
|
170
|
+
" <td>0_2801</td>\n",
|
171
|
+
" <td>0_1138</td>\n",
|
172
|
+
" <td>Richard</td>\n",
|
173
|
+
" <td>R</td>\n",
|
174
|
+
" <td>Jones</td>\n",
|
175
|
+
" <td>73</td>\n",
|
176
|
+
" <td>03/03/1947</td>\n",
|
177
|
+
" <td>950</td>\n",
|
178
|
+
" <td>caribou lane</td>\n",
|
179
|
+
" <td>NaN</td>\n",
|
180
|
+
" <td>Anytown</td>\n",
|
181
|
+
" <td>WA</td>\n",
|
182
|
+
" <td>00000</td>\n",
|
183
|
+
" <td>Household</td>\n",
|
184
|
+
" <td>Reference person</td>\n",
|
185
|
+
" <td>Male</td>\n",
|
186
|
+
" <td>White</td>\n",
|
187
|
+
" <td>2020</td>\n",
|
188
|
+
" <td>4</td>\n",
|
189
|
+
" </tr>\n",
|
190
|
+
" <tr>\n",
|
191
|
+
" <th>5</th>\n",
|
192
|
+
" <td>0_6176</td>\n",
|
193
|
+
" <td>0_2514</td>\n",
|
194
|
+
" <td>Sandra</td>\n",
|
195
|
+
" <td>S</td>\n",
|
196
|
+
" <td>Runnalls</td>\n",
|
197
|
+
" <td>66</td>\n",
|
198
|
+
" <td>03/18/1954</td>\n",
|
199
|
+
" <td>4458</td>\n",
|
200
|
+
" <td>windsor pl</td>\n",
|
201
|
+
" <td>NaN</td>\n",
|
202
|
+
" <td>Anytown</td>\n",
|
203
|
+
" <td>WA</td>\n",
|
204
|
+
" <td>00000</td>\n",
|
205
|
+
" <td>Household</td>\n",
|
206
|
+
" <td>Reference person</td>\n",
|
207
|
+
" <td>Female</td>\n",
|
208
|
+
" <td>Multiracial or Other</td>\n",
|
209
|
+
" <td>2020</td>\n",
|
210
|
+
" <td>5</td>\n",
|
211
|
+
" </tr>\n",
|
212
|
+
" <tr>\n",
|
213
|
+
" <th>6</th>\n",
|
214
|
+
" <td>0_13972</td>\n",
|
215
|
+
" <td>0_5627</td>\n",
|
216
|
+
" <td>Jerry</td>\n",
|
217
|
+
" <td>E</td>\n",
|
218
|
+
" <td>Murray</td>\n",
|
219
|
+
" <td>70</td>\n",
|
220
|
+
" <td>01/03/1950</td>\n",
|
221
|
+
" <td>17868</td>\n",
|
222
|
+
" <td>winding trail rd</td>\n",
|
223
|
+
" <td>NaN</td>\n",
|
224
|
+
" <td>Anytown</td>\n",
|
225
|
+
" <td>WA</td>\n",
|
226
|
+
" <td>00000</td>\n",
|
227
|
+
" <td>Household</td>\n",
|
228
|
+
" <td>Reference person</td>\n",
|
229
|
+
" <td>Male</td>\n",
|
230
|
+
" <td>White</td>\n",
|
231
|
+
" <td>2020</td>\n",
|
232
|
+
" <td>6</td>\n",
|
233
|
+
" </tr>\n",
|
234
|
+
" <tr>\n",
|
235
|
+
" <th>7</th>\n",
|
236
|
+
" <td>0_13973</td>\n",
|
237
|
+
" <td>0_5627</td>\n",
|
238
|
+
" <td>Anita</td>\n",
|
239
|
+
" <td>R</td>\n",
|
240
|
+
" <td>Murray</td>\n",
|
241
|
+
" <td>70</td>\n",
|
242
|
+
" <td>11/06/1949</td>\n",
|
243
|
+
" <td>17868</td>\n",
|
244
|
+
" <td>winding trail rd</td>\n",
|
245
|
+
" <td>NaN</td>\n",
|
246
|
+
" <td>Anytown</td>\n",
|
247
|
+
" <td>WA</td>\n",
|
248
|
+
" <td>00000</td>\n",
|
249
|
+
" <td>Household</td>\n",
|
250
|
+
" <td>Opposite-sex spouse</td>\n",
|
251
|
+
" <td>Female</td>\n",
|
252
|
+
" <td>White</td>\n",
|
253
|
+
" <td>2020</td>\n",
|
254
|
+
" <td>7</td>\n",
|
255
|
+
" </tr>\n",
|
256
|
+
" <tr>\n",
|
257
|
+
" <th>8</th>\n",
|
258
|
+
" <td>0_13974</td>\n",
|
259
|
+
" <td>0_5627</td>\n",
|
260
|
+
" <td>Jada</td>\n",
|
261
|
+
" <td>S</td>\n",
|
262
|
+
" <td>Murray</td>\n",
|
263
|
+
" <td>45</td>\n",
|
264
|
+
" <td>04/11/1974</td>\n",
|
265
|
+
" <td>17868</td>\n",
|
266
|
+
" <td>winding trail rd</td>\n",
|
267
|
+
" <td>NaN</td>\n",
|
268
|
+
" <td>Anytown</td>\n",
|
269
|
+
" <td>WA</td>\n",
|
270
|
+
" <td>00000</td>\n",
|
271
|
+
" <td>Household</td>\n",
|
272
|
+
" <td>Biological child</td>\n",
|
273
|
+
" <td>Female</td>\n",
|
274
|
+
" <td>White</td>\n",
|
275
|
+
" <td>2020</td>\n",
|
276
|
+
" <td>8</td>\n",
|
277
|
+
" </tr>\n",
|
278
|
+
" <tr>\n",
|
279
|
+
" <th>9</th>\n",
|
280
|
+
" <td>0_13975</td>\n",
|
281
|
+
" <td>0_5627</td>\n",
|
282
|
+
" <td>Toni</td>\n",
|
283
|
+
" <td>K</td>\n",
|
284
|
+
" <td>Murray</td>\n",
|
285
|
+
" <td>44</td>\n",
|
286
|
+
" <td>02/12/1976</td>\n",
|
287
|
+
" <td>17868</td>\n",
|
288
|
+
" <td>winding trail rd</td>\n",
|
289
|
+
" <td>NaN</td>\n",
|
290
|
+
" <td>Anytown</td>\n",
|
291
|
+
" <td>WA</td>\n",
|
292
|
+
" <td>00000</td>\n",
|
293
|
+
" <td>Household</td>\n",
|
294
|
+
" <td>Biological child</td>\n",
|
295
|
+
" <td>Female</td>\n",
|
296
|
+
" <td>White</td>\n",
|
297
|
+
" <td>2020</td>\n",
|
298
|
+
" <td>9</td>\n",
|
299
|
+
" </tr>\n",
|
300
|
+
" <tr>\n",
|
301
|
+
" <th>10</th>\n",
|
302
|
+
" <td>0_4</td>\n",
|
303
|
+
" <td>0_8</td>\n",
|
304
|
+
" <td>Eric</td>\n",
|
305
|
+
" <td>R</td>\n",
|
306
|
+
" <td>Alonso Tellez</td>\n",
|
307
|
+
" <td>38</td>\n",
|
308
|
+
" <td>05/29/1981</td>\n",
|
309
|
+
" <td>1501</td>\n",
|
310
|
+
" <td>interlake ave n</td>\n",
|
311
|
+
" <td>NaN</td>\n",
|
312
|
+
" <td>Anytown</td>\n",
|
313
|
+
" <td>WA</td>\n",
|
314
|
+
" <td>00000</td>\n",
|
315
|
+
" <td>Household</td>\n",
|
316
|
+
" <td>Reference person</td>\n",
|
317
|
+
" <td>Male</td>\n",
|
318
|
+
" <td>Latino</td>\n",
|
319
|
+
" <td>2020</td>\n",
|
320
|
+
" <td>10</td>\n",
|
321
|
+
" </tr>\n",
|
322
|
+
" <tr>\n",
|
323
|
+
" <th>11</th>\n",
|
324
|
+
" <td>0_5</td>\n",
|
325
|
+
" <td>0_8</td>\n",
|
326
|
+
" <td>NaN</td>\n",
|
327
|
+
" <td>R</td>\n",
|
328
|
+
" <td>Alonso Tellez</td>\n",
|
329
|
+
" <td>40</td>\n",
|
330
|
+
" <td>10/27/1979</td>\n",
|
331
|
+
" <td>1501</td>\n",
|
332
|
+
" <td>interlake ave n</td>\n",
|
333
|
+
" <td>NaN</td>\n",
|
334
|
+
" <td>Anytown</td>\n",
|
335
|
+
" <td>WA</td>\n",
|
336
|
+
" <td>00000</td>\n",
|
337
|
+
" <td>Household</td>\n",
|
338
|
+
" <td>Opposite-sex spouse</td>\n",
|
339
|
+
" <td>Female</td>\n",
|
340
|
+
" <td>Latino</td>\n",
|
341
|
+
" <td>2020</td>\n",
|
342
|
+
" <td>11</td>\n",
|
343
|
+
" </tr>\n",
|
344
|
+
" <tr>\n",
|
345
|
+
" <th>12</th>\n",
|
346
|
+
" <td>0_6</td>\n",
|
347
|
+
" <td>0_8</td>\n",
|
348
|
+
" <td>Elan</td>\n",
|
349
|
+
" <td>N</td>\n",
|
350
|
+
" <td>Alonso Tellez</td>\n",
|
351
|
+
" <td>6</td>\n",
|
352
|
+
" <td>07/30/2013</td>\n",
|
353
|
+
" <td>1501</td>\n",
|
354
|
+
" <td>interlake ave n</td>\n",
|
355
|
+
" <td>NaN</td>\n",
|
356
|
+
" <td>Anytown</td>\n",
|
357
|
+
" <td>WA</td>\n",
|
358
|
+
" <td>00000</td>\n",
|
359
|
+
" <td>Household</td>\n",
|
360
|
+
" <td>Biological child</td>\n",
|
361
|
+
" <td>Male</td>\n",
|
362
|
+
" <td>Latino</td>\n",
|
363
|
+
" <td>2020</td>\n",
|
364
|
+
" <td>12</td>\n",
|
365
|
+
" </tr>\n",
|
366
|
+
" <tr>\n",
|
367
|
+
" <th>13</th>\n",
|
368
|
+
" <td>0_5621</td>\n",
|
369
|
+
" <td>0_2289</td>\n",
|
370
|
+
" <td>Derick</td>\n",
|
371
|
+
" <td>W</td>\n",
|
372
|
+
" <td>Castillo</td>\n",
|
373
|
+
" <td>49</td>\n",
|
374
|
+
" <td>07/27/1970</td>\n",
|
375
|
+
" <td>3</td>\n",
|
376
|
+
" <td>stoke ct</td>\n",
|
377
|
+
" <td>NaN</td>\n",
|
378
|
+
" <td>Anytown</td>\n",
|
379
|
+
" <td>WA</td>\n",
|
380
|
+
" <td>00000</td>\n",
|
381
|
+
" <td>Household</td>\n",
|
382
|
+
" <td>Reference person</td>\n",
|
383
|
+
" <td>Male</td>\n",
|
384
|
+
" <td>Latino</td>\n",
|
385
|
+
" <td>2020</td>\n",
|
386
|
+
" <td>13</td>\n",
|
387
|
+
" </tr>\n",
|
388
|
+
" <tr>\n",
|
389
|
+
" <th>14</th>\n",
|
390
|
+
" <td>0_5622</td>\n",
|
391
|
+
" <td>0_2289</td>\n",
|
392
|
+
" <td>Kaylee</td>\n",
|
393
|
+
" <td>A</td>\n",
|
394
|
+
" <td>Castillo</td>\n",
|
395
|
+
" <td>16</td>\n",
|
396
|
+
" <td>06/10/2003</td>\n",
|
397
|
+
" <td>3</td>\n",
|
398
|
+
" <td>stoke ct</td>\n",
|
399
|
+
" <td>NaN</td>\n",
|
400
|
+
" <td>Anytown</td>\n",
|
401
|
+
" <td>WA</td>\n",
|
402
|
+
" <td>00000</td>\n",
|
403
|
+
" <td>Household</td>\n",
|
404
|
+
" <td>Biological child</td>\n",
|
405
|
+
" <td>Female</td>\n",
|
406
|
+
" <td>Latino</td>\n",
|
407
|
+
" <td>2020</td>\n",
|
408
|
+
" <td>14</td>\n",
|
409
|
+
" </tr>\n",
|
410
|
+
" <tr>\n",
|
411
|
+
" <th>15</th>\n",
|
412
|
+
" <td>0_5623</td>\n",
|
413
|
+
" <td>0_2289</td>\n",
|
414
|
+
" <td>Heather</td>\n",
|
415
|
+
" <td>J</td>\n",
|
416
|
+
" <td>Castillo</td>\n",
|
417
|
+
" <td>46</td>\n",
|
418
|
+
" <td>07/24/1973</td>\n",
|
419
|
+
" <td>3</td>\n",
|
420
|
+
" <td>stoke ct</td>\n",
|
421
|
+
" <td>NaN</td>\n",
|
422
|
+
" <td>Anytown</td>\n",
|
423
|
+
" <td>WA</td>\n",
|
424
|
+
" <td>00000</td>\n",
|
425
|
+
" <td>Household</td>\n",
|
426
|
+
" <td>Opposite-sex unmarried partner</td>\n",
|
427
|
+
" <td>Female</td>\n",
|
428
|
+
" <td>Latino</td>\n",
|
429
|
+
" <td>2020</td>\n",
|
430
|
+
" <td>15</td>\n",
|
431
|
+
" </tr>\n",
|
432
|
+
" <tr>\n",
|
433
|
+
" <th>16</th>\n",
|
434
|
+
" <td>0_7251</td>\n",
|
435
|
+
" <td>0_2957</td>\n",
|
436
|
+
" <td>Patrick</td>\n",
|
437
|
+
" <td>E</td>\n",
|
438
|
+
" <td>Rodriguez</td>\n",
|
439
|
+
" <td>53</td>\n",
|
440
|
+
" <td>03/26/1966</td>\n",
|
441
|
+
" <td>927</td>\n",
|
442
|
+
" <td>broomfield ln</td>\n",
|
443
|
+
" <td>NaN</td>\n",
|
444
|
+
" <td>Anytown</td>\n",
|
445
|
+
" <td>WA</td>\n",
|
446
|
+
" <td>00000</td>\n",
|
447
|
+
" <td>Household</td>\n",
|
448
|
+
" <td>Reference person</td>\n",
|
449
|
+
" <td>Male</td>\n",
|
450
|
+
" <td>Latino</td>\n",
|
451
|
+
" <td>2020</td>\n",
|
452
|
+
" <td>16</td>\n",
|
453
|
+
" </tr>\n",
|
454
|
+
" <tr>\n",
|
455
|
+
" <th>17</th>\n",
|
456
|
+
" <td>0_7252</td>\n",
|
457
|
+
" <td>0_2957</td>\n",
|
458
|
+
" <td>Melisa</td>\n",
|
459
|
+
" <td>D</td>\n",
|
460
|
+
" <td>Rodriguez</td>\n",
|
461
|
+
" <td>41</td>\n",
|
462
|
+
" <td>05/26/1978</td>\n",
|
463
|
+
" <td>927</td>\n",
|
464
|
+
" <td>broomfield ln</td>\n",
|
465
|
+
" <td>NaN</td>\n",
|
466
|
+
" <td>Anytown</td>\n",
|
467
|
+
" <td>WA</td>\n",
|
468
|
+
" <td>00000</td>\n",
|
469
|
+
" <td>Household</td>\n",
|
470
|
+
" <td>Opposite-sex spouse</td>\n",
|
471
|
+
" <td>Female</td>\n",
|
472
|
+
" <td>Latino</td>\n",
|
473
|
+
" <td>2020</td>\n",
|
474
|
+
" <td>17</td>\n",
|
475
|
+
" </tr>\n",
|
476
|
+
" <tr>\n",
|
477
|
+
" <th>18</th>\n",
|
478
|
+
" <td>0_7254</td>\n",
|
479
|
+
" <td>0_2957</td>\n",
|
480
|
+
" <td>Caleb</td>\n",
|
481
|
+
" <td>G</td>\n",
|
482
|
+
" <td>Rodriguez</td>\n",
|
483
|
+
" <td>13</td>\n",
|
484
|
+
" <td>01/10/2007</td>\n",
|
485
|
+
" <td>927</td>\n",
|
486
|
+
" <td>broomfield ln</td>\n",
|
487
|
+
" <td>NaN</td>\n",
|
488
|
+
" <td>Anytown</td>\n",
|
489
|
+
" <td>WA</td>\n",
|
490
|
+
" <td>00000</td>\n",
|
491
|
+
" <td>Household</td>\n",
|
492
|
+
" <td>Biological child</td>\n",
|
493
|
+
" <td>Male</td>\n",
|
494
|
+
" <td>White</td>\n",
|
495
|
+
" <td>2020</td>\n",
|
496
|
+
" <td>18</td>\n",
|
497
|
+
" </tr>\n",
|
498
|
+
" <tr>\n",
|
499
|
+
" <th>19</th>\n",
|
500
|
+
" <td>0_7255</td>\n",
|
501
|
+
" <td>0_2957</td>\n",
|
502
|
+
" <td>Henry</td>\n",
|
503
|
+
" <td>C</td>\n",
|
504
|
+
" <td>Rodriguez</td>\n",
|
505
|
+
" <td>12</td>\n",
|
506
|
+
" <td>07/09/2007</td>\n",
|
507
|
+
" <td>927</td>\n",
|
508
|
+
" <td>broomfield ln</td>\n",
|
509
|
+
" <td>NaN</td>\n",
|
510
|
+
" <td>Anytown</td>\n",
|
511
|
+
" <td>WA</td>\n",
|
512
|
+
" <td>00000</td>\n",
|
513
|
+
" <td>Household</td>\n",
|
514
|
+
" <td>Biological child</td>\n",
|
515
|
+
" <td>Male</td>\n",
|
516
|
+
" <td>White</td>\n",
|
517
|
+
" <td>2020</td>\n",
|
518
|
+
" <td>19</td>\n",
|
519
|
+
" </tr>\n",
|
520
|
+
" <tr>\n",
|
521
|
+
" <th>20</th>\n",
|
522
|
+
" <td>0_7256</td>\n",
|
523
|
+
" <td>0_2957</td>\n",
|
524
|
+
" <td>Kynleigh</td>\n",
|
525
|
+
" <td>J</td>\n",
|
526
|
+
" <td>Rodriguez</td>\n",
|
527
|
+
" <td>2</td>\n",
|
528
|
+
" <td>05/06/2017</td>\n",
|
529
|
+
" <td>927</td>\n",
|
530
|
+
" <td>broomfield ln</td>\n",
|
531
|
+
" <td>NaN</td>\n",
|
532
|
+
" <td>Anytown</td>\n",
|
533
|
+
" <td>WA</td>\n",
|
534
|
+
" <td>00000</td>\n",
|
535
|
+
" <td>Household</td>\n",
|
536
|
+
" <td>Biological child</td>\n",
|
537
|
+
" <td>Male</td>\n",
|
538
|
+
" <td>White</td>\n",
|
539
|
+
" <td>2020</td>\n",
|
540
|
+
" <td>20</td>\n",
|
541
|
+
" </tr>\n",
|
542
|
+
" <tr>\n",
|
543
|
+
" <th>21</th>\n",
|
544
|
+
" <td>0_9</td>\n",
|
545
|
+
" <td>0_6357</td>\n",
|
546
|
+
" <td>Elijah</td>\n",
|
547
|
+
" <td>N</td>\n",
|
548
|
+
" <td>Esquivel</td>\n",
|
549
|
+
" <td>4</td>\n",
|
550
|
+
" <td>10/31/2015</td>\n",
|
551
|
+
" <td>1648</td>\n",
|
552
|
+
" <td>eagle heights</td>\n",
|
553
|
+
" <td>NaN</td>\n",
|
554
|
+
" <td>Anytown</td>\n",
|
555
|
+
" <td>WA</td>\n",
|
556
|
+
" <td>00000</td>\n",
|
557
|
+
" <td>Household</td>\n",
|
558
|
+
" <td>Other nonrelative</td>\n",
|
559
|
+
" <td>Male</td>\n",
|
560
|
+
" <td>White</td>\n",
|
561
|
+
" <td>2020</td>\n",
|
562
|
+
" <td>21</td>\n",
|
563
|
+
" </tr>\n",
|
564
|
+
" <tr>\n",
|
565
|
+
" <th>22</th>\n",
|
566
|
+
" <td>0_5189</td>\n",
|
567
|
+
" <td>0_2124</td>\n",
|
568
|
+
" <td>David</td>\n",
|
569
|
+
" <td>R</td>\n",
|
570
|
+
" <td>Schletzbaum</td>\n",
|
571
|
+
" <td>50</td>\n",
|
572
|
+
" <td>09/13/1969</td>\n",
|
573
|
+
" <td>NaN</td>\n",
|
574
|
+
" <td>stromberg ct</td>\n",
|
575
|
+
" <td>NaN</td>\n",
|
576
|
+
" <td>Anytown</td>\n",
|
577
|
+
" <td>WA</td>\n",
|
578
|
+
" <td>00000</td>\n",
|
579
|
+
" <td>Household</td>\n",
|
580
|
+
" <td>Reference person</td>\n",
|
581
|
+
" <td>Male</td>\n",
|
582
|
+
" <td>White</td>\n",
|
583
|
+
" <td>2020</td>\n",
|
584
|
+
" <td>22</td>\n",
|
585
|
+
" </tr>\n",
|
586
|
+
" <tr>\n",
|
587
|
+
" <th>23</th>\n",
|
588
|
+
" <td>0_5190</td>\n",
|
589
|
+
" <td>0_2124</td>\n",
|
590
|
+
" <td>Sara</td>\n",
|
591
|
+
" <td>H</td>\n",
|
592
|
+
" <td>Schletzbaum</td>\n",
|
593
|
+
" <td>40</td>\n",
|
594
|
+
" <td>01/30/1980</td>\n",
|
595
|
+
" <td>626</td>\n",
|
596
|
+
" <td>stromberg ct</td>\n",
|
597
|
+
" <td>NaN</td>\n",
|
598
|
+
" <td>Anytown</td>\n",
|
599
|
+
" <td>WA</td>\n",
|
600
|
+
" <td>00000</td>\n",
|
601
|
+
" <td>Household</td>\n",
|
602
|
+
" <td>Opposite-sex spouse</td>\n",
|
603
|
+
" <td>Female</td>\n",
|
604
|
+
" <td>White</td>\n",
|
605
|
+
" <td>2020</td>\n",
|
606
|
+
" <td>23</td>\n",
|
607
|
+
" </tr>\n",
|
608
|
+
" <tr>\n",
|
609
|
+
" <th>24</th>\n",
|
610
|
+
" <td>0_5191</td>\n",
|
611
|
+
" <td>0_2124</td>\n",
|
612
|
+
" <td>Brooklyn</td>\n",
|
613
|
+
" <td>N</td>\n",
|
614
|
+
" <td>Schletzbaum</td>\n",
|
615
|
+
" <td>12</td>\n",
|
616
|
+
" <td>10/20/2007</td>\n",
|
617
|
+
" <td>626</td>\n",
|
618
|
+
" <td>NaN</td>\n",
|
619
|
+
" <td>NaN</td>\n",
|
620
|
+
" <td>Anytown</td>\n",
|
621
|
+
" <td>WA</td>\n",
|
622
|
+
" <td>00000</td>\n",
|
623
|
+
" <td>Household</td>\n",
|
624
|
+
" <td>Biological child</td>\n",
|
625
|
+
" <td>Female</td>\n",
|
626
|
+
" <td>White</td>\n",
|
627
|
+
" <td>2020</td>\n",
|
628
|
+
" <td>24</td>\n",
|
629
|
+
" </tr>\n",
|
630
|
+
" </tbody>\n",
|
631
|
+
"</table>\n",
|
632
|
+
"</div>"
|
633
|
+
],
|
634
|
+
"text/plain": [
|
635
|
+
" simulant_id household_id first_name middle_initial last_name age \\\n",
|
636
|
+
"0 0_2 0_7 Diana P Kofron 25 \n",
|
637
|
+
"1 0_3 0_7 Anna A Kofron 25 \n",
|
638
|
+
"2 0_923 0_8033 Gerald R Butler 76 \n",
|
639
|
+
"3 0_2641 0_1066 Loretta T Carley 61 \n",
|
640
|
+
"4 0_2801 0_1138 Richard R Jones 73 \n",
|
641
|
+
"5 0_6176 0_2514 Sandra S Runnalls 66 \n",
|
642
|
+
"6 0_13972 0_5627 Jerry E Murray 70 \n",
|
643
|
+
"7 0_13973 0_5627 Anita R Murray 70 \n",
|
644
|
+
"8 0_13974 0_5627 Jada S Murray 45 \n",
|
645
|
+
"9 0_13975 0_5627 Toni K Murray 44 \n",
|
646
|
+
"10 0_4 0_8 Eric R Alonso Tellez 38 \n",
|
647
|
+
"11 0_5 0_8 NaN R Alonso Tellez 40 \n",
|
648
|
+
"12 0_6 0_8 Elan N Alonso Tellez 6 \n",
|
649
|
+
"13 0_5621 0_2289 Derick W Castillo 49 \n",
|
650
|
+
"14 0_5622 0_2289 Kaylee A Castillo 16 \n",
|
651
|
+
"15 0_5623 0_2289 Heather J Castillo 46 \n",
|
652
|
+
"16 0_7251 0_2957 Patrick E Rodriguez 53 \n",
|
653
|
+
"17 0_7252 0_2957 Melisa D Rodriguez 41 \n",
|
654
|
+
"18 0_7254 0_2957 Caleb G Rodriguez 13 \n",
|
655
|
+
"19 0_7255 0_2957 Henry C Rodriguez 12 \n",
|
656
|
+
"20 0_7256 0_2957 Kynleigh J Rodriguez 2 \n",
|
657
|
+
"21 0_9 0_6357 Elijah N Esquivel 4 \n",
|
658
|
+
"22 0_5189 0_2124 David R Schletzbaum 50 \n",
|
659
|
+
"23 0_5190 0_2124 Sara H Schletzbaum 40 \n",
|
660
|
+
"24 0_5191 0_2124 Brooklyn N Schletzbaum 12 \n",
|
661
|
+
"\n",
|
662
|
+
" date_of_birth street_number street_name unit_number city state \\\n",
|
663
|
+
"0 05/06/1994 5112 145th st NaN Anytown WA \n",
|
664
|
+
"1 09/29/1994 5112 145th st NaN Anytown WA \n",
|
665
|
+
"2 11/03/1943 1130 mallory ln NaN Anytown WA \n",
|
666
|
+
"3 07/71/1958 NaN delacorte dr NaN Anytown WA \n",
|
667
|
+
"4 03/03/1947 950 caribou lane NaN Anytown WA \n",
|
668
|
+
"5 03/18/1954 4458 windsor pl NaN Anytown WA \n",
|
669
|
+
"6 01/03/1950 17868 winding trail rd NaN Anytown WA \n",
|
670
|
+
"7 11/06/1949 17868 winding trail rd NaN Anytown WA \n",
|
671
|
+
"8 04/11/1974 17868 winding trail rd NaN Anytown WA \n",
|
672
|
+
"9 02/12/1976 17868 winding trail rd NaN Anytown WA \n",
|
673
|
+
"10 05/29/1981 1501 interlake ave n NaN Anytown WA \n",
|
674
|
+
"11 10/27/1979 1501 interlake ave n NaN Anytown WA \n",
|
675
|
+
"12 07/30/2013 1501 interlake ave n NaN Anytown WA \n",
|
676
|
+
"13 07/27/1970 3 stoke ct NaN Anytown WA \n",
|
677
|
+
"14 06/10/2003 3 stoke ct NaN Anytown WA \n",
|
678
|
+
"15 07/24/1973 3 stoke ct NaN Anytown WA \n",
|
679
|
+
"16 03/26/1966 927 broomfield ln NaN Anytown WA \n",
|
680
|
+
"17 05/26/1978 927 broomfield ln NaN Anytown WA \n",
|
681
|
+
"18 01/10/2007 927 broomfield ln NaN Anytown WA \n",
|
682
|
+
"19 07/09/2007 927 broomfield ln NaN Anytown WA \n",
|
683
|
+
"20 05/06/2017 927 broomfield ln NaN Anytown WA \n",
|
684
|
+
"21 10/31/2015 1648 eagle heights NaN Anytown WA \n",
|
685
|
+
"22 09/13/1969 NaN stromberg ct NaN Anytown WA \n",
|
686
|
+
"23 01/30/1980 626 stromberg ct NaN Anytown WA \n",
|
687
|
+
"24 10/20/2007 626 NaN NaN Anytown WA \n",
|
688
|
+
"\n",
|
689
|
+
" zipcode housing_type relationship_to_reference_person sex \\\n",
|
690
|
+
"0 00000 Household Reference person Female \n",
|
691
|
+
"1 00000 Household Other relative Female \n",
|
692
|
+
"2 00000 Household Reference person Male \n",
|
693
|
+
"3 00000 Household Reference person Female \n",
|
694
|
+
"4 00000 Household Reference person Male \n",
|
695
|
+
"5 00000 Household Reference person Female \n",
|
696
|
+
"6 00000 Household Reference person Male \n",
|
697
|
+
"7 00000 Household Opposite-sex spouse Female \n",
|
698
|
+
"8 00000 Household Biological child Female \n",
|
699
|
+
"9 00000 Household Biological child Female \n",
|
700
|
+
"10 00000 Household Reference person Male \n",
|
701
|
+
"11 00000 Household Opposite-sex spouse Female \n",
|
702
|
+
"12 00000 Household Biological child Male \n",
|
703
|
+
"13 00000 Household Reference person Male \n",
|
704
|
+
"14 00000 Household Biological child Female \n",
|
705
|
+
"15 00000 Household Opposite-sex unmarried partner Female \n",
|
706
|
+
"16 00000 Household Reference person Male \n",
|
707
|
+
"17 00000 Household Opposite-sex spouse Female \n",
|
708
|
+
"18 00000 Household Biological child Male \n",
|
709
|
+
"19 00000 Household Biological child Male \n",
|
710
|
+
"20 00000 Household Biological child Male \n",
|
711
|
+
"21 00000 Household Other nonrelative Male \n",
|
712
|
+
"22 00000 Household Reference person Male \n",
|
713
|
+
"23 00000 Household Opposite-sex spouse Female \n",
|
714
|
+
"24 00000 Household Biological child Female \n",
|
715
|
+
"\n",
|
716
|
+
" race_ethnicity year Record ID \n",
|
717
|
+
"0 White 2020 0 \n",
|
718
|
+
"1 White 2020 1 \n",
|
719
|
+
"2 Black 2020 2 \n",
|
720
|
+
"3 White 2020 3 \n",
|
721
|
+
"4 White 2020 4 \n",
|
722
|
+
"5 Multiracial or Other 2020 5 \n",
|
723
|
+
"6 White 2020 6 \n",
|
724
|
+
"7 White 2020 7 \n",
|
725
|
+
"8 White 2020 8 \n",
|
726
|
+
"9 White 2020 9 \n",
|
727
|
+
"10 Latino 2020 10 \n",
|
728
|
+
"11 Latino 2020 11 \n",
|
729
|
+
"12 Latino 2020 12 \n",
|
730
|
+
"13 Latino 2020 13 \n",
|
731
|
+
"14 Latino 2020 14 \n",
|
732
|
+
"15 Latino 2020 15 \n",
|
733
|
+
"16 Latino 2020 16 \n",
|
734
|
+
"17 Latino 2020 17 \n",
|
735
|
+
"18 White 2020 18 \n",
|
736
|
+
"19 White 2020 19 \n",
|
737
|
+
"20 White 2020 20 \n",
|
738
|
+
"21 White 2020 21 \n",
|
739
|
+
"22 White 2020 22 \n",
|
740
|
+
"23 White 2020 23 \n",
|
741
|
+
"24 White 2020 24 "
|
742
|
+
]
|
743
|
+
},
|
744
|
+
"execution_count": 2,
|
745
|
+
"metadata": {},
|
746
|
+
"output_type": "execute_result"
|
747
|
+
}
|
748
|
+
],
|
749
|
+
"source": [
|
750
|
+
"file_stem_1 = \"input_file_1\"\n",
|
751
|
+
"file_stem_2 = \"input_file_2\"\n",
|
752
|
+
"\n",
|
753
|
+
"psp_census = psp.generate_decennial_census().head(25)\n",
|
754
|
+
"df1 = psp_census\n",
|
755
|
+
"df2 = psp_census[0:20] # df2 has most but not all of the records in df1\n",
|
756
|
+
"df1[\"Record ID\"] = df1.index\n",
|
757
|
+
"df2[\"Record ID\"] = df2.index\n",
|
758
|
+
"\n",
|
759
|
+
"df1"
|
760
|
+
]
|
761
|
+
},
|
762
|
+
{
|
763
|
+
"cell_type": "code",
|
764
|
+
"execution_count": 3,
|
765
|
+
"id": "eb14b165",
|
766
|
+
"metadata": {},
|
767
|
+
"outputs": [
|
768
|
+
{
|
769
|
+
"data": {
|
770
|
+
"text/html": [
|
771
|
+
"<div>\n",
|
772
|
+
"<style scoped>\n",
|
773
|
+
" .dataframe tbody tr th:only-of-type {\n",
|
774
|
+
" vertical-align: middle;\n",
|
775
|
+
" }\n",
|
776
|
+
"\n",
|
777
|
+
" .dataframe tbody tr th {\n",
|
778
|
+
" vertical-align: top;\n",
|
779
|
+
" }\n",
|
780
|
+
"\n",
|
781
|
+
" .dataframe thead th {\n",
|
782
|
+
" text-align: right;\n",
|
783
|
+
" }\n",
|
784
|
+
"</style>\n",
|
785
|
+
"<table border=\"1\" class=\"dataframe\">\n",
|
786
|
+
" <thead>\n",
|
787
|
+
" <tr style=\"text-align: right;\">\n",
|
788
|
+
" <th></th>\n",
|
789
|
+
" <th>simulant_id</th>\n",
|
790
|
+
" <th>household_id</th>\n",
|
791
|
+
" <th>first_name</th>\n",
|
792
|
+
" <th>middle_initial</th>\n",
|
793
|
+
" <th>last_name</th>\n",
|
794
|
+
" <th>age</th>\n",
|
795
|
+
" <th>date_of_birth</th>\n",
|
796
|
+
" <th>street_number</th>\n",
|
797
|
+
" <th>street_name</th>\n",
|
798
|
+
" <th>unit_number</th>\n",
|
799
|
+
" <th>city</th>\n",
|
800
|
+
" <th>state</th>\n",
|
801
|
+
" <th>zipcode</th>\n",
|
802
|
+
" <th>housing_type</th>\n",
|
803
|
+
" <th>relationship_to_reference_person</th>\n",
|
804
|
+
" <th>sex</th>\n",
|
805
|
+
" <th>race_ethnicity</th>\n",
|
806
|
+
" <th>year</th>\n",
|
807
|
+
" <th>Record ID</th>\n",
|
808
|
+
" </tr>\n",
|
809
|
+
" </thead>\n",
|
810
|
+
" <tbody>\n",
|
811
|
+
" <tr>\n",
|
812
|
+
" <th>0</th>\n",
|
813
|
+
" <td>0_2</td>\n",
|
814
|
+
" <td>0_7</td>\n",
|
815
|
+
" <td>Diana</td>\n",
|
816
|
+
" <td>P</td>\n",
|
817
|
+
" <td>Kofron</td>\n",
|
818
|
+
" <td>25</td>\n",
|
819
|
+
" <td>05/06/1994</td>\n",
|
820
|
+
" <td>5112</td>\n",
|
821
|
+
" <td>145th st</td>\n",
|
822
|
+
" <td>NaN</td>\n",
|
823
|
+
" <td>Anytown</td>\n",
|
824
|
+
" <td>WA</td>\n",
|
825
|
+
" <td>00000</td>\n",
|
826
|
+
" <td>Household</td>\n",
|
827
|
+
" <td>Reference person</td>\n",
|
828
|
+
" <td>Female</td>\n",
|
829
|
+
" <td>White</td>\n",
|
830
|
+
" <td>2020</td>\n",
|
831
|
+
" <td>0</td>\n",
|
832
|
+
" </tr>\n",
|
833
|
+
" <tr>\n",
|
834
|
+
" <th>1</th>\n",
|
835
|
+
" <td>0_3</td>\n",
|
836
|
+
" <td>0_7</td>\n",
|
837
|
+
" <td>Anna</td>\n",
|
838
|
+
" <td>A</td>\n",
|
839
|
+
" <td>Kofron</td>\n",
|
840
|
+
" <td>25</td>\n",
|
841
|
+
" <td>09/29/1994</td>\n",
|
842
|
+
" <td>5112</td>\n",
|
843
|
+
" <td>145th st</td>\n",
|
844
|
+
" <td>NaN</td>\n",
|
845
|
+
" <td>Anytown</td>\n",
|
846
|
+
" <td>WA</td>\n",
|
847
|
+
" <td>00000</td>\n",
|
848
|
+
" <td>Household</td>\n",
|
849
|
+
" <td>Other relative</td>\n",
|
850
|
+
" <td>Female</td>\n",
|
851
|
+
" <td>White</td>\n",
|
852
|
+
" <td>2020</td>\n",
|
853
|
+
" <td>1</td>\n",
|
854
|
+
" </tr>\n",
|
855
|
+
" <tr>\n",
|
856
|
+
" <th>2</th>\n",
|
857
|
+
" <td>0_923</td>\n",
|
858
|
+
" <td>0_8033</td>\n",
|
859
|
+
" <td>Gerald</td>\n",
|
860
|
+
" <td>R</td>\n",
|
861
|
+
" <td>Butler</td>\n",
|
862
|
+
" <td>76</td>\n",
|
863
|
+
" <td>11/03/1943</td>\n",
|
864
|
+
" <td>1130</td>\n",
|
865
|
+
" <td>mallory ln</td>\n",
|
866
|
+
" <td>NaN</td>\n",
|
867
|
+
" <td>Anytown</td>\n",
|
868
|
+
" <td>WA</td>\n",
|
869
|
+
" <td>00000</td>\n",
|
870
|
+
" <td>Household</td>\n",
|
871
|
+
" <td>Reference person</td>\n",
|
872
|
+
" <td>Male</td>\n",
|
873
|
+
" <td>Black</td>\n",
|
874
|
+
" <td>2020</td>\n",
|
875
|
+
" <td>2</td>\n",
|
876
|
+
" </tr>\n",
|
877
|
+
" <tr>\n",
|
878
|
+
" <th>3</th>\n",
|
879
|
+
" <td>0_2641</td>\n",
|
880
|
+
" <td>0_1066</td>\n",
|
881
|
+
" <td>Loretta</td>\n",
|
882
|
+
" <td>T</td>\n",
|
883
|
+
" <td>Carley</td>\n",
|
884
|
+
" <td>61</td>\n",
|
885
|
+
" <td>07/71/1958</td>\n",
|
886
|
+
" <td>NaN</td>\n",
|
887
|
+
" <td>delacorte dr</td>\n",
|
888
|
+
" <td>NaN</td>\n",
|
889
|
+
" <td>Anytown</td>\n",
|
890
|
+
" <td>WA</td>\n",
|
891
|
+
" <td>00000</td>\n",
|
892
|
+
" <td>Household</td>\n",
|
893
|
+
" <td>Reference person</td>\n",
|
894
|
+
" <td>Female</td>\n",
|
895
|
+
" <td>White</td>\n",
|
896
|
+
" <td>2020</td>\n",
|
897
|
+
" <td>3</td>\n",
|
898
|
+
" </tr>\n",
|
899
|
+
" <tr>\n",
|
900
|
+
" <th>4</th>\n",
|
901
|
+
" <td>0_2801</td>\n",
|
902
|
+
" <td>0_1138</td>\n",
|
903
|
+
" <td>Richard</td>\n",
|
904
|
+
" <td>R</td>\n",
|
905
|
+
" <td>Jones</td>\n",
|
906
|
+
" <td>73</td>\n",
|
907
|
+
" <td>03/03/1947</td>\n",
|
908
|
+
" <td>950</td>\n",
|
909
|
+
" <td>caribou lane</td>\n",
|
910
|
+
" <td>NaN</td>\n",
|
911
|
+
" <td>Anytown</td>\n",
|
912
|
+
" <td>WA</td>\n",
|
913
|
+
" <td>00000</td>\n",
|
914
|
+
" <td>Household</td>\n",
|
915
|
+
" <td>Reference person</td>\n",
|
916
|
+
" <td>Male</td>\n",
|
917
|
+
" <td>White</td>\n",
|
918
|
+
" <td>2020</td>\n",
|
919
|
+
" <td>4</td>\n",
|
920
|
+
" </tr>\n",
|
921
|
+
" <tr>\n",
|
922
|
+
" <th>5</th>\n",
|
923
|
+
" <td>0_6176</td>\n",
|
924
|
+
" <td>0_2514</td>\n",
|
925
|
+
" <td>Sandra</td>\n",
|
926
|
+
" <td>S</td>\n",
|
927
|
+
" <td>Runnalls</td>\n",
|
928
|
+
" <td>66</td>\n",
|
929
|
+
" <td>03/18/1954</td>\n",
|
930
|
+
" <td>4458</td>\n",
|
931
|
+
" <td>windsor pl</td>\n",
|
932
|
+
" <td>NaN</td>\n",
|
933
|
+
" <td>Anytown</td>\n",
|
934
|
+
" <td>WA</td>\n",
|
935
|
+
" <td>00000</td>\n",
|
936
|
+
" <td>Household</td>\n",
|
937
|
+
" <td>Reference person</td>\n",
|
938
|
+
" <td>Female</td>\n",
|
939
|
+
" <td>Multiracial or Other</td>\n",
|
940
|
+
" <td>2020</td>\n",
|
941
|
+
" <td>5</td>\n",
|
942
|
+
" </tr>\n",
|
943
|
+
" <tr>\n",
|
944
|
+
" <th>6</th>\n",
|
945
|
+
" <td>0_13972</td>\n",
|
946
|
+
" <td>0_5627</td>\n",
|
947
|
+
" <td>Jerry</td>\n",
|
948
|
+
" <td>E</td>\n",
|
949
|
+
" <td>Murray</td>\n",
|
950
|
+
" <td>70</td>\n",
|
951
|
+
" <td>01/03/1950</td>\n",
|
952
|
+
" <td>17868</td>\n",
|
953
|
+
" <td>winding trail rd</td>\n",
|
954
|
+
" <td>NaN</td>\n",
|
955
|
+
" <td>Anytown</td>\n",
|
956
|
+
" <td>WA</td>\n",
|
957
|
+
" <td>00000</td>\n",
|
958
|
+
" <td>Household</td>\n",
|
959
|
+
" <td>Reference person</td>\n",
|
960
|
+
" <td>Male</td>\n",
|
961
|
+
" <td>White</td>\n",
|
962
|
+
" <td>2020</td>\n",
|
963
|
+
" <td>6</td>\n",
|
964
|
+
" </tr>\n",
|
965
|
+
" <tr>\n",
|
966
|
+
" <th>7</th>\n",
|
967
|
+
" <td>0_13973</td>\n",
|
968
|
+
" <td>0_5627</td>\n",
|
969
|
+
" <td>Anita</td>\n",
|
970
|
+
" <td>R</td>\n",
|
971
|
+
" <td>Murray</td>\n",
|
972
|
+
" <td>70</td>\n",
|
973
|
+
" <td>11/06/1949</td>\n",
|
974
|
+
" <td>17868</td>\n",
|
975
|
+
" <td>winding trail rd</td>\n",
|
976
|
+
" <td>NaN</td>\n",
|
977
|
+
" <td>Anytown</td>\n",
|
978
|
+
" <td>WA</td>\n",
|
979
|
+
" <td>00000</td>\n",
|
980
|
+
" <td>Household</td>\n",
|
981
|
+
" <td>Opposite-sex spouse</td>\n",
|
982
|
+
" <td>Female</td>\n",
|
983
|
+
" <td>White</td>\n",
|
984
|
+
" <td>2020</td>\n",
|
985
|
+
" <td>7</td>\n",
|
986
|
+
" </tr>\n",
|
987
|
+
" <tr>\n",
|
988
|
+
" <th>8</th>\n",
|
989
|
+
" <td>0_13974</td>\n",
|
990
|
+
" <td>0_5627</td>\n",
|
991
|
+
" <td>Jada</td>\n",
|
992
|
+
" <td>S</td>\n",
|
993
|
+
" <td>Murray</td>\n",
|
994
|
+
" <td>45</td>\n",
|
995
|
+
" <td>04/11/1974</td>\n",
|
996
|
+
" <td>17868</td>\n",
|
997
|
+
" <td>winding trail rd</td>\n",
|
998
|
+
" <td>NaN</td>\n",
|
999
|
+
" <td>Anytown</td>\n",
|
1000
|
+
" <td>WA</td>\n",
|
1001
|
+
" <td>00000</td>\n",
|
1002
|
+
" <td>Household</td>\n",
|
1003
|
+
" <td>Biological child</td>\n",
|
1004
|
+
" <td>Female</td>\n",
|
1005
|
+
" <td>White</td>\n",
|
1006
|
+
" <td>2020</td>\n",
|
1007
|
+
" <td>8</td>\n",
|
1008
|
+
" </tr>\n",
|
1009
|
+
" <tr>\n",
|
1010
|
+
" <th>9</th>\n",
|
1011
|
+
" <td>0_13975</td>\n",
|
1012
|
+
" <td>0_5627</td>\n",
|
1013
|
+
" <td>Toni</td>\n",
|
1014
|
+
" <td>K</td>\n",
|
1015
|
+
" <td>Murray</td>\n",
|
1016
|
+
" <td>44</td>\n",
|
1017
|
+
" <td>02/12/1976</td>\n",
|
1018
|
+
" <td>17868</td>\n",
|
1019
|
+
" <td>winding trail rd</td>\n",
|
1020
|
+
" <td>NaN</td>\n",
|
1021
|
+
" <td>Anytown</td>\n",
|
1022
|
+
" <td>WA</td>\n",
|
1023
|
+
" <td>00000</td>\n",
|
1024
|
+
" <td>Household</td>\n",
|
1025
|
+
" <td>Biological child</td>\n",
|
1026
|
+
" <td>Female</td>\n",
|
1027
|
+
" <td>White</td>\n",
|
1028
|
+
" <td>2020</td>\n",
|
1029
|
+
" <td>9</td>\n",
|
1030
|
+
" </tr>\n",
|
1031
|
+
" <tr>\n",
|
1032
|
+
" <th>10</th>\n",
|
1033
|
+
" <td>0_4</td>\n",
|
1034
|
+
" <td>0_8</td>\n",
|
1035
|
+
" <td>Eric</td>\n",
|
1036
|
+
" <td>R</td>\n",
|
1037
|
+
" <td>Alonso Tellez</td>\n",
|
1038
|
+
" <td>38</td>\n",
|
1039
|
+
" <td>05/29/1981</td>\n",
|
1040
|
+
" <td>1501</td>\n",
|
1041
|
+
" <td>interlake ave n</td>\n",
|
1042
|
+
" <td>NaN</td>\n",
|
1043
|
+
" <td>Anytown</td>\n",
|
1044
|
+
" <td>WA</td>\n",
|
1045
|
+
" <td>00000</td>\n",
|
1046
|
+
" <td>Household</td>\n",
|
1047
|
+
" <td>Reference person</td>\n",
|
1048
|
+
" <td>Male</td>\n",
|
1049
|
+
" <td>Latino</td>\n",
|
1050
|
+
" <td>2020</td>\n",
|
1051
|
+
" <td>10</td>\n",
|
1052
|
+
" </tr>\n",
|
1053
|
+
" <tr>\n",
|
1054
|
+
" <th>11</th>\n",
|
1055
|
+
" <td>0_5</td>\n",
|
1056
|
+
" <td>0_8</td>\n",
|
1057
|
+
" <td>NaN</td>\n",
|
1058
|
+
" <td>R</td>\n",
|
1059
|
+
" <td>Alonso Tellez</td>\n",
|
1060
|
+
" <td>40</td>\n",
|
1061
|
+
" <td>10/27/1979</td>\n",
|
1062
|
+
" <td>1501</td>\n",
|
1063
|
+
" <td>interlake ave n</td>\n",
|
1064
|
+
" <td>NaN</td>\n",
|
1065
|
+
" <td>Anytown</td>\n",
|
1066
|
+
" <td>WA</td>\n",
|
1067
|
+
" <td>00000</td>\n",
|
1068
|
+
" <td>Household</td>\n",
|
1069
|
+
" <td>Opposite-sex spouse</td>\n",
|
1070
|
+
" <td>Female</td>\n",
|
1071
|
+
" <td>Latino</td>\n",
|
1072
|
+
" <td>2020</td>\n",
|
1073
|
+
" <td>11</td>\n",
|
1074
|
+
" </tr>\n",
|
1075
|
+
" <tr>\n",
|
1076
|
+
" <th>12</th>\n",
|
1077
|
+
" <td>0_6</td>\n",
|
1078
|
+
" <td>0_8</td>\n",
|
1079
|
+
" <td>Elan</td>\n",
|
1080
|
+
" <td>N</td>\n",
|
1081
|
+
" <td>Alonso Tellez</td>\n",
|
1082
|
+
" <td>6</td>\n",
|
1083
|
+
" <td>07/30/2013</td>\n",
|
1084
|
+
" <td>1501</td>\n",
|
1085
|
+
" <td>interlake ave n</td>\n",
|
1086
|
+
" <td>NaN</td>\n",
|
1087
|
+
" <td>Anytown</td>\n",
|
1088
|
+
" <td>WA</td>\n",
|
1089
|
+
" <td>00000</td>\n",
|
1090
|
+
" <td>Household</td>\n",
|
1091
|
+
" <td>Biological child</td>\n",
|
1092
|
+
" <td>Male</td>\n",
|
1093
|
+
" <td>Latino</td>\n",
|
1094
|
+
" <td>2020</td>\n",
|
1095
|
+
" <td>12</td>\n",
|
1096
|
+
" </tr>\n",
|
1097
|
+
" <tr>\n",
|
1098
|
+
" <th>13</th>\n",
|
1099
|
+
" <td>0_5621</td>\n",
|
1100
|
+
" <td>0_2289</td>\n",
|
1101
|
+
" <td>Derick</td>\n",
|
1102
|
+
" <td>W</td>\n",
|
1103
|
+
" <td>Castillo</td>\n",
|
1104
|
+
" <td>49</td>\n",
|
1105
|
+
" <td>07/27/1970</td>\n",
|
1106
|
+
" <td>3</td>\n",
|
1107
|
+
" <td>stoke ct</td>\n",
|
1108
|
+
" <td>NaN</td>\n",
|
1109
|
+
" <td>Anytown</td>\n",
|
1110
|
+
" <td>WA</td>\n",
|
1111
|
+
" <td>00000</td>\n",
|
1112
|
+
" <td>Household</td>\n",
|
1113
|
+
" <td>Reference person</td>\n",
|
1114
|
+
" <td>Male</td>\n",
|
1115
|
+
" <td>Latino</td>\n",
|
1116
|
+
" <td>2020</td>\n",
|
1117
|
+
" <td>13</td>\n",
|
1118
|
+
" </tr>\n",
|
1119
|
+
" <tr>\n",
|
1120
|
+
" <th>14</th>\n",
|
1121
|
+
" <td>0_5622</td>\n",
|
1122
|
+
" <td>0_2289</td>\n",
|
1123
|
+
" <td>Kaylee</td>\n",
|
1124
|
+
" <td>A</td>\n",
|
1125
|
+
" <td>Castillo</td>\n",
|
1126
|
+
" <td>16</td>\n",
|
1127
|
+
" <td>06/10/2003</td>\n",
|
1128
|
+
" <td>3</td>\n",
|
1129
|
+
" <td>stoke ct</td>\n",
|
1130
|
+
" <td>NaN</td>\n",
|
1131
|
+
" <td>Anytown</td>\n",
|
1132
|
+
" <td>WA</td>\n",
|
1133
|
+
" <td>00000</td>\n",
|
1134
|
+
" <td>Household</td>\n",
|
1135
|
+
" <td>Biological child</td>\n",
|
1136
|
+
" <td>Female</td>\n",
|
1137
|
+
" <td>Latino</td>\n",
|
1138
|
+
" <td>2020</td>\n",
|
1139
|
+
" <td>14</td>\n",
|
1140
|
+
" </tr>\n",
|
1141
|
+
" <tr>\n",
|
1142
|
+
" <th>15</th>\n",
|
1143
|
+
" <td>0_5623</td>\n",
|
1144
|
+
" <td>0_2289</td>\n",
|
1145
|
+
" <td>Heather</td>\n",
|
1146
|
+
" <td>J</td>\n",
|
1147
|
+
" <td>Castillo</td>\n",
|
1148
|
+
" <td>46</td>\n",
|
1149
|
+
" <td>07/24/1973</td>\n",
|
1150
|
+
" <td>3</td>\n",
|
1151
|
+
" <td>stoke ct</td>\n",
|
1152
|
+
" <td>NaN</td>\n",
|
1153
|
+
" <td>Anytown</td>\n",
|
1154
|
+
" <td>WA</td>\n",
|
1155
|
+
" <td>00000</td>\n",
|
1156
|
+
" <td>Household</td>\n",
|
1157
|
+
" <td>Opposite-sex unmarried partner</td>\n",
|
1158
|
+
" <td>Female</td>\n",
|
1159
|
+
" <td>Latino</td>\n",
|
1160
|
+
" <td>2020</td>\n",
|
1161
|
+
" <td>15</td>\n",
|
1162
|
+
" </tr>\n",
|
1163
|
+
" <tr>\n",
|
1164
|
+
" <th>16</th>\n",
|
1165
|
+
" <td>0_7251</td>\n",
|
1166
|
+
" <td>0_2957</td>\n",
|
1167
|
+
" <td>Patrick</td>\n",
|
1168
|
+
" <td>E</td>\n",
|
1169
|
+
" <td>Rodriguez</td>\n",
|
1170
|
+
" <td>53</td>\n",
|
1171
|
+
" <td>03/26/1966</td>\n",
|
1172
|
+
" <td>927</td>\n",
|
1173
|
+
" <td>broomfield ln</td>\n",
|
1174
|
+
" <td>NaN</td>\n",
|
1175
|
+
" <td>Anytown</td>\n",
|
1176
|
+
" <td>WA</td>\n",
|
1177
|
+
" <td>00000</td>\n",
|
1178
|
+
" <td>Household</td>\n",
|
1179
|
+
" <td>Reference person</td>\n",
|
1180
|
+
" <td>Male</td>\n",
|
1181
|
+
" <td>Latino</td>\n",
|
1182
|
+
" <td>2020</td>\n",
|
1183
|
+
" <td>16</td>\n",
|
1184
|
+
" </tr>\n",
|
1185
|
+
" <tr>\n",
|
1186
|
+
" <th>17</th>\n",
|
1187
|
+
" <td>0_7252</td>\n",
|
1188
|
+
" <td>0_2957</td>\n",
|
1189
|
+
" <td>Melisa</td>\n",
|
1190
|
+
" <td>D</td>\n",
|
1191
|
+
" <td>Rodriguez</td>\n",
|
1192
|
+
" <td>41</td>\n",
|
1193
|
+
" <td>05/26/1978</td>\n",
|
1194
|
+
" <td>927</td>\n",
|
1195
|
+
" <td>broomfield ln</td>\n",
|
1196
|
+
" <td>NaN</td>\n",
|
1197
|
+
" <td>Anytown</td>\n",
|
1198
|
+
" <td>WA</td>\n",
|
1199
|
+
" <td>00000</td>\n",
|
1200
|
+
" <td>Household</td>\n",
|
1201
|
+
" <td>Opposite-sex spouse</td>\n",
|
1202
|
+
" <td>Female</td>\n",
|
1203
|
+
" <td>Latino</td>\n",
|
1204
|
+
" <td>2020</td>\n",
|
1205
|
+
" <td>17</td>\n",
|
1206
|
+
" </tr>\n",
|
1207
|
+
" <tr>\n",
|
1208
|
+
" <th>18</th>\n",
|
1209
|
+
" <td>0_7254</td>\n",
|
1210
|
+
" <td>0_2957</td>\n",
|
1211
|
+
" <td>Caleb</td>\n",
|
1212
|
+
" <td>G</td>\n",
|
1213
|
+
" <td>Rodriguez</td>\n",
|
1214
|
+
" <td>13</td>\n",
|
1215
|
+
" <td>01/10/2007</td>\n",
|
1216
|
+
" <td>927</td>\n",
|
1217
|
+
" <td>broomfield ln</td>\n",
|
1218
|
+
" <td>NaN</td>\n",
|
1219
|
+
" <td>Anytown</td>\n",
|
1220
|
+
" <td>WA</td>\n",
|
1221
|
+
" <td>00000</td>\n",
|
1222
|
+
" <td>Household</td>\n",
|
1223
|
+
" <td>Biological child</td>\n",
|
1224
|
+
" <td>Male</td>\n",
|
1225
|
+
" <td>White</td>\n",
|
1226
|
+
" <td>2020</td>\n",
|
1227
|
+
" <td>18</td>\n",
|
1228
|
+
" </tr>\n",
|
1229
|
+
" <tr>\n",
|
1230
|
+
" <th>19</th>\n",
|
1231
|
+
" <td>0_7255</td>\n",
|
1232
|
+
" <td>0_2957</td>\n",
|
1233
|
+
" <td>Henry</td>\n",
|
1234
|
+
" <td>C</td>\n",
|
1235
|
+
" <td>Rodriguez</td>\n",
|
1236
|
+
" <td>12</td>\n",
|
1237
|
+
" <td>07/09/2007</td>\n",
|
1238
|
+
" <td>927</td>\n",
|
1239
|
+
" <td>broomfield ln</td>\n",
|
1240
|
+
" <td>NaN</td>\n",
|
1241
|
+
" <td>Anytown</td>\n",
|
1242
|
+
" <td>WA</td>\n",
|
1243
|
+
" <td>00000</td>\n",
|
1244
|
+
" <td>Household</td>\n",
|
1245
|
+
" <td>Biological child</td>\n",
|
1246
|
+
" <td>Male</td>\n",
|
1247
|
+
" <td>White</td>\n",
|
1248
|
+
" <td>2020</td>\n",
|
1249
|
+
" <td>19</td>\n",
|
1250
|
+
" </tr>\n",
|
1251
|
+
" </tbody>\n",
|
1252
|
+
"</table>\n",
|
1253
|
+
"</div>"
|
1254
|
+
],
|
1255
|
+
"text/plain": [
|
1256
|
+
" simulant_id household_id first_name middle_initial last_name age \\\n",
|
1257
|
+
"0 0_2 0_7 Diana P Kofron 25 \n",
|
1258
|
+
"1 0_3 0_7 Anna A Kofron 25 \n",
|
1259
|
+
"2 0_923 0_8033 Gerald R Butler 76 \n",
|
1260
|
+
"3 0_2641 0_1066 Loretta T Carley 61 \n",
|
1261
|
+
"4 0_2801 0_1138 Richard R Jones 73 \n",
|
1262
|
+
"5 0_6176 0_2514 Sandra S Runnalls 66 \n",
|
1263
|
+
"6 0_13972 0_5627 Jerry E Murray 70 \n",
|
1264
|
+
"7 0_13973 0_5627 Anita R Murray 70 \n",
|
1265
|
+
"8 0_13974 0_5627 Jada S Murray 45 \n",
|
1266
|
+
"9 0_13975 0_5627 Toni K Murray 44 \n",
|
1267
|
+
"10 0_4 0_8 Eric R Alonso Tellez 38 \n",
|
1268
|
+
"11 0_5 0_8 NaN R Alonso Tellez 40 \n",
|
1269
|
+
"12 0_6 0_8 Elan N Alonso Tellez 6 \n",
|
1270
|
+
"13 0_5621 0_2289 Derick W Castillo 49 \n",
|
1271
|
+
"14 0_5622 0_2289 Kaylee A Castillo 16 \n",
|
1272
|
+
"15 0_5623 0_2289 Heather J Castillo 46 \n",
|
1273
|
+
"16 0_7251 0_2957 Patrick E Rodriguez 53 \n",
|
1274
|
+
"17 0_7252 0_2957 Melisa D Rodriguez 41 \n",
|
1275
|
+
"18 0_7254 0_2957 Caleb G Rodriguez 13 \n",
|
1276
|
+
"19 0_7255 0_2957 Henry C Rodriguez 12 \n",
|
1277
|
+
"\n",
|
1278
|
+
" date_of_birth street_number street_name unit_number city state \\\n",
|
1279
|
+
"0 05/06/1994 5112 145th st NaN Anytown WA \n",
|
1280
|
+
"1 09/29/1994 5112 145th st NaN Anytown WA \n",
|
1281
|
+
"2 11/03/1943 1130 mallory ln NaN Anytown WA \n",
|
1282
|
+
"3 07/71/1958 NaN delacorte dr NaN Anytown WA \n",
|
1283
|
+
"4 03/03/1947 950 caribou lane NaN Anytown WA \n",
|
1284
|
+
"5 03/18/1954 4458 windsor pl NaN Anytown WA \n",
|
1285
|
+
"6 01/03/1950 17868 winding trail rd NaN Anytown WA \n",
|
1286
|
+
"7 11/06/1949 17868 winding trail rd NaN Anytown WA \n",
|
1287
|
+
"8 04/11/1974 17868 winding trail rd NaN Anytown WA \n",
|
1288
|
+
"9 02/12/1976 17868 winding trail rd NaN Anytown WA \n",
|
1289
|
+
"10 05/29/1981 1501 interlake ave n NaN Anytown WA \n",
|
1290
|
+
"11 10/27/1979 1501 interlake ave n NaN Anytown WA \n",
|
1291
|
+
"12 07/30/2013 1501 interlake ave n NaN Anytown WA \n",
|
1292
|
+
"13 07/27/1970 3 stoke ct NaN Anytown WA \n",
|
1293
|
+
"14 06/10/2003 3 stoke ct NaN Anytown WA \n",
|
1294
|
+
"15 07/24/1973 3 stoke ct NaN Anytown WA \n",
|
1295
|
+
"16 03/26/1966 927 broomfield ln NaN Anytown WA \n",
|
1296
|
+
"17 05/26/1978 927 broomfield ln NaN Anytown WA \n",
|
1297
|
+
"18 01/10/2007 927 broomfield ln NaN Anytown WA \n",
|
1298
|
+
"19 07/09/2007 927 broomfield ln NaN Anytown WA \n",
|
1299
|
+
"\n",
|
1300
|
+
" zipcode housing_type relationship_to_reference_person sex \\\n",
|
1301
|
+
"0 00000 Household Reference person Female \n",
|
1302
|
+
"1 00000 Household Other relative Female \n",
|
1303
|
+
"2 00000 Household Reference person Male \n",
|
1304
|
+
"3 00000 Household Reference person Female \n",
|
1305
|
+
"4 00000 Household Reference person Male \n",
|
1306
|
+
"5 00000 Household Reference person Female \n",
|
1307
|
+
"6 00000 Household Reference person Male \n",
|
1308
|
+
"7 00000 Household Opposite-sex spouse Female \n",
|
1309
|
+
"8 00000 Household Biological child Female \n",
|
1310
|
+
"9 00000 Household Biological child Female \n",
|
1311
|
+
"10 00000 Household Reference person Male \n",
|
1312
|
+
"11 00000 Household Opposite-sex spouse Female \n",
|
1313
|
+
"12 00000 Household Biological child Male \n",
|
1314
|
+
"13 00000 Household Reference person Male \n",
|
1315
|
+
"14 00000 Household Biological child Female \n",
|
1316
|
+
"15 00000 Household Opposite-sex unmarried partner Female \n",
|
1317
|
+
"16 00000 Household Reference person Male \n",
|
1318
|
+
"17 00000 Household Opposite-sex spouse Female \n",
|
1319
|
+
"18 00000 Household Biological child Male \n",
|
1320
|
+
"19 00000 Household Biological child Male \n",
|
1321
|
+
"\n",
|
1322
|
+
" race_ethnicity year Record ID \n",
|
1323
|
+
"0 White 2020 0 \n",
|
1324
|
+
"1 White 2020 1 \n",
|
1325
|
+
"2 Black 2020 2 \n",
|
1326
|
+
"3 White 2020 3 \n",
|
1327
|
+
"4 White 2020 4 \n",
|
1328
|
+
"5 Multiracial or Other 2020 5 \n",
|
1329
|
+
"6 White 2020 6 \n",
|
1330
|
+
"7 White 2020 7 \n",
|
1331
|
+
"8 White 2020 8 \n",
|
1332
|
+
"9 White 2020 9 \n",
|
1333
|
+
"10 Latino 2020 10 \n",
|
1334
|
+
"11 Latino 2020 11 \n",
|
1335
|
+
"12 Latino 2020 12 \n",
|
1336
|
+
"13 Latino 2020 13 \n",
|
1337
|
+
"14 Latino 2020 14 \n",
|
1338
|
+
"15 Latino 2020 15 \n",
|
1339
|
+
"16 Latino 2020 16 \n",
|
1340
|
+
"17 Latino 2020 17 \n",
|
1341
|
+
"18 White 2020 18 \n",
|
1342
|
+
"19 White 2020 19 "
|
1343
|
+
]
|
1344
|
+
},
|
1345
|
+
"execution_count": 3,
|
1346
|
+
"metadata": {},
|
1347
|
+
"output_type": "execute_result"
|
1348
|
+
}
|
1349
|
+
],
|
1350
|
+
"source": [
|
1351
|
+
"df2"
|
1352
|
+
]
|
1353
|
+
},
|
1354
|
+
{
|
1355
|
+
"cell_type": "code",
|
1356
|
+
"execution_count": 4,
|
1357
|
+
"id": "6eeba4af",
|
1358
|
+
"metadata": {},
|
1359
|
+
"outputs": [],
|
1360
|
+
"source": [
|
1361
|
+
"df1.to_parquet(f\"{file_stem_1}.parquet\")\n",
|
1362
|
+
"df2.to_parquet(f\"{file_stem_2}.parquet\")\n"
|
1363
|
+
]
|
1364
|
+
},
|
1365
|
+
{
|
1366
|
+
"cell_type": "code",
|
1367
|
+
"execution_count": 5,
|
1368
|
+
"id": "ee07c281",
|
1369
|
+
"metadata": {},
|
1370
|
+
"outputs": [],
|
1371
|
+
"source": [
|
1372
|
+
"known_clusters = pd.DataFrame(columns=[\"Input Record Dataset\", \"Input Record ID\", \"Cluster ID\"])\n",
|
1373
|
+
"known_clusters.to_parquet(\"known_clusters.parquet\")"
|
1374
|
+
]
|
1375
|
+
},
|
1376
|
+
{
|
1377
|
+
"cell_type": "code",
|
1378
|
+
"execution_count": 6,
|
1379
|
+
"id": "9aec4fac",
|
1380
|
+
"metadata": {},
|
1381
|
+
"outputs": [
|
1382
|
+
{
|
1383
|
+
"name": "stdout",
|
1384
|
+
"output_type": "stream",
|
1385
|
+
"text": [
|
1386
|
+
"Overwriting existing file: /mnt/team/simulation_science/priv/engineering/er_ecosystem/input_data/zmbc/input_file_1.parquet\n",
|
1387
|
+
"File copied to: /mnt/team/simulation_science/priv/engineering/er_ecosystem/input_data/zmbc/input_file_1.parquet\n",
|
1388
|
+
"Overwriting existing file: /mnt/team/simulation_science/priv/engineering/er_ecosystem/input_data/zmbc/input_file_2.parquet\n",
|
1389
|
+
"File copied to: /mnt/team/simulation_science/priv/engineering/er_ecosystem/input_data/zmbc/input_file_2.parquet\n",
|
1390
|
+
"Overwriting existing file: /mnt/team/simulation_science/priv/engineering/er_ecosystem/input_data/zmbc/known_clusters.parquet\n",
|
1391
|
+
"File copied to: /mnt/team/simulation_science/priv/engineering/er_ecosystem/input_data/zmbc/known_clusters.parquet\n"
|
1392
|
+
]
|
1393
|
+
}
|
1394
|
+
],
|
1395
|
+
"source": [
|
1396
|
+
"# copy files to shared filesystem\n",
|
1397
|
+
"output_dir = Path(f\"/mnt/team/simulation_science/priv/engineering/er_ecosystem/input_data/{os.getlogin()}\")\n",
|
1398
|
+
"if not output_dir.exists():\n",
|
1399
|
+
" # make the directory with rwxrwxr-x permissions\n",
|
1400
|
+
" output_dir.mkdir(parents=True, mode=0o775)\n",
|
1401
|
+
"\n",
|
1402
|
+
"for stem in [file_stem_1, file_stem_2, \"known_clusters\"]:\n",
|
1403
|
+
" filename = f\"{stem}.parquet\"\n",
|
1404
|
+
" filepath = output_dir / filename\n",
|
1405
|
+
" if filepath.exists():\n",
|
1406
|
+
" print(f\"Overwriting existing file: {filepath}\")\n",
|
1407
|
+
" shutil.copy(filename, filepath)\n",
|
1408
|
+
" print(f\"File copied to: {filepath}\")"
|
1409
|
+
]
|
1410
|
+
}
|
1411
|
+
],
|
1412
|
+
"metadata": {
|
1413
|
+
"kernelspec": {
|
1414
|
+
"display_name": "person_linkage_case_study",
|
1415
|
+
"language": "python",
|
1416
|
+
"name": "python3"
|
1417
|
+
},
|
1418
|
+
"language_info": {
|
1419
|
+
"codemirror_mode": {
|
1420
|
+
"name": "ipython",
|
1421
|
+
"version": 3
|
1422
|
+
},
|
1423
|
+
"file_extension": ".py",
|
1424
|
+
"mimetype": "text/x-python",
|
1425
|
+
"name": "python",
|
1426
|
+
"nbconvert_exporter": "python",
|
1427
|
+
"pygments_lexer": "ipython3",
|
1428
|
+
"version": "3.10.14"
|
1429
|
+
}
|
1430
|
+
},
|
1431
|
+
"nbformat": 4,
|
1432
|
+
"nbformat_minor": 5
|
1433
|
+
}
|