morpc 0.3.3__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {morpc-0.3.3 → morpc-0.3.4}/PKG-INFO +1 -1
- {morpc-0.3.3 → morpc-0.3.4}/morpc/__init__.py +1 -1
- {morpc-0.3.3 → morpc-0.3.4}/morpc/census/census.py +41 -2
- {morpc-0.3.3 → morpc-0.3.4}/morpc/frictionless/frictionless.py +19 -5
- {morpc-0.3.3 → morpc-0.3.4}/morpc/morpc.py +208 -40
- {morpc-0.3.3 → morpc-0.3.4}/morpc.egg-info/PKG-INFO +1 -1
- {morpc-0.3.3 → morpc-0.3.4}/.gitattributes +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/.github/workflows/deploy.yml +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/.github/workflows/python-publish.yml +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/.gitignore +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/README.md +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/.gitignore +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/.ipynb_checkpoints/index-checkpoint.md +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/.ipynb_checkpoints/morpc-color-demo-checkpoint.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/.ipynb_checkpoints/myst-checkpoint.yml +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/01-morpc-py-demos.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/02-morpc-countylookup-demo.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/03-morpc-varlookup-demo.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/04-morpc-restapi-demo.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/05-morpc-geos-demo.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/06-morpc-frictionless-demo.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/07-morpc-census-demo.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/08-morpc-plot-demo.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/09-morpc-color-demo.ipynb +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/assets/HORIZONTAL_LOGOS_PRIMARY_COLOR_V2.png +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/index.md +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/myst.yml +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/MORPC MPO Boundary.gpkg +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/Screenshot 2025-06-03 080403.png +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/dataChartToExcelOutput.xlsx +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/plot_df.csv +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/plot_df.resource.yaml +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/plot_df.schema.yaml +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/rest_resource.json +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/temp_df.csv +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/temp_df.resource.yaml +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/docs/temp_data/temp_df.schema.yaml +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/census/__init__.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/color/.ipynb_checkpoints/color-checkpoint.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/color/.ipynb_checkpoints/morpc_colors-checkpoint.json +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/color/__init__.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/color/color.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/color/morpc_colors.json +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/color/palette.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/frictionless/__init__.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/plot/__init__.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/plot/plot.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/rest_api/__init__.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc/rest_api/rest_api.py +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc.egg-info/SOURCES.txt +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc.egg-info/dependency_links.txt +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc.egg-info/requires.txt +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/morpc.egg-info/top_level.txt +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/pyproject.toml +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/release_new_package.md +0 -0
- {morpc-0.3.3 → morpc-0.3.4}/setup.cfg +0 -0
|
@@ -10,14 +10,20 @@ ACS_ID_FIELDS = {
|
|
|
10
10
|
{"name":"SUMLEVEL", "type":"string", "description":"Code representing the geographic summary level for the data"},
|
|
11
11
|
{"name":"STATE","type":"string","description":"Unique identifier for state in which geography is located"},
|
|
12
12
|
{"name":"COUNTY","type":"string","description":"Unique identifier for county in which geography is located"},
|
|
13
|
-
{"name":"TRACT","type":"string","description":"Unique identifier for tract in which geography is located"}
|
|
13
|
+
{"name":"TRACT","type":"string","description":"Unique identifier for tract in which geography is located"}
|
|
14
14
|
],
|
|
15
15
|
"tract": [
|
|
16
16
|
{"name":"GEO_ID", "type":"string", "description":"Unique identifier for geography"},
|
|
17
17
|
{"name":"SUMLEVEL", "type":"string", "description":"Code representing the geographic summary level for the data"},
|
|
18
18
|
{"name":"STATE","type":"string","description":"Unique identifier for state in which geography is located"},
|
|
19
19
|
{"name":"COUNTY","type":"string","description":"Unique identifier for county in which geography is located"}
|
|
20
|
-
],
|
|
20
|
+
],
|
|
21
|
+
"county subdivision": [
|
|
22
|
+
{"name":"GEO_ID", "type":"string", "description":"Unique identifier for geography"},
|
|
23
|
+
{"name":"SUMLEVEL", "type":"string", "description":"Code representing the geographic summary level for the data"},
|
|
24
|
+
{"name":"STATE","type":"string","description":"Unique identifier for state in which geography is located"},
|
|
25
|
+
{"name":"COUNTY","type":"string","description":"Unique identifier for county in which geography is located"}
|
|
26
|
+
],
|
|
21
27
|
"county": [
|
|
22
28
|
{"name":"GEO_ID", "type":"string", "description":"Unique identifier for geography"},
|
|
23
29
|
{"name":"SUMLEVEL", "type":"string", "description":"Code representing the geographic summary level for the data"},
|
|
@@ -278,6 +284,39 @@ def api_get(url, params, varBatchSize=20, verbose=True):
|
|
|
278
284
|
# |--------------|----------------|---------------------|-------------------------|
|
|
279
285
|
# | B25127_004E | Owner occupied | Built 2020 or later | 1, detached or attached |
|
|
280
286
|
#
|
|
287
|
+
|
|
288
|
+
def acs_variables_by_group(groupNumber, acsYear, acsSurvey):
|
|
289
|
+
"""
|
|
290
|
+
Get a list of all variables that are in a census variable group.
|
|
291
|
+
|
|
292
|
+
Parameters
|
|
293
|
+
----------
|
|
294
|
+
groupNumber : str
|
|
295
|
+
The group number to search for within the variables table. ie. B11001
|
|
296
|
+
|
|
297
|
+
acsYear : str
|
|
298
|
+
The year of the survey. ie. 2023
|
|
299
|
+
|
|
300
|
+
acsSurvey : str
|
|
301
|
+
The acs survey to get variables for. ie. 1 or 5
|
|
302
|
+
|
|
303
|
+
Returns
|
|
304
|
+
-------
|
|
305
|
+
dict
|
|
306
|
+
A dict of the variables in the group and related fields.
|
|
307
|
+
"""
|
|
308
|
+
import requests
|
|
309
|
+
import json
|
|
310
|
+
|
|
311
|
+
r = requests.get(f'https://api.census.gov/data/{acsYear}/acs/acs{acsSurvey}/variables.json')
|
|
312
|
+
json = r.json()
|
|
313
|
+
|
|
314
|
+
variables = {}
|
|
315
|
+
for variable in json['variables']:
|
|
316
|
+
if json['variables'][variable]['group'] == groupNumber:
|
|
317
|
+
variables[variable] = json['variables'][variable]
|
|
318
|
+
return variables
|
|
319
|
+
|
|
281
320
|
def acs_label_to_dimensions(labelSeries, dimensionNames=None):
|
|
282
321
|
"""
|
|
283
322
|
acs_label_to_dimensions(labelSeries, dimensionNames=None)
|
|
@@ -36,7 +36,7 @@ def name_to_desc_map(schema):
|
|
|
36
36
|
|
|
37
37
|
# Given a dataframe and the Frictionless Schema object (see load_schema), recast each of the fields in the
|
|
38
38
|
# dataframe to the data type specified in the schema.
|
|
39
|
-
def cast_field_types(df, schema, forceInteger=False, handleMissingFields="error", verbose=True):
|
|
39
|
+
def cast_field_types(df, schema, forceInteger=False, forceInt64=False, handleMissingFields="error", verbose=True):
|
|
40
40
|
import frictionless
|
|
41
41
|
import pandas as pd
|
|
42
42
|
import shapely
|
|
@@ -64,8 +64,13 @@ def cast_field_types(df, schema, forceInteger=False, handleMissingFields="error"
|
|
|
64
64
|
# the field must be cast as "Int64" instead.
|
|
65
65
|
if((fieldType == "int") or (fieldType == "integer")):
|
|
66
66
|
try:
|
|
67
|
-
|
|
68
|
-
|
|
67
|
+
if(forceInt64 == True):
|
|
68
|
+
# Cast all integer fields as Int64 whether this is necessary or not. This is useful when trying to merge
|
|
69
|
+
# dataframes with mixed int32 and Int64 values.
|
|
70
|
+
outDF[fieldName] = outDF[fieldName].astype("Int64")
|
|
71
|
+
else:
|
|
72
|
+
# Try to cast the field as an "int". This will fail if nulls are present.
|
|
73
|
+
outDF[fieldName] = outDF[fieldName].astype("int")
|
|
69
74
|
except:
|
|
70
75
|
try:
|
|
71
76
|
# Try to cast as "Int64", which supports nulls. This will fail if the fractional part is non-zero.
|
|
@@ -472,7 +477,7 @@ def validate_resource(resourcePath, verbose=True):
|
|
|
472
477
|
print(results)
|
|
473
478
|
return False
|
|
474
479
|
|
|
475
|
-
def load_data(resourcePath, archiveDir=None, validate=False, verbose=True):
|
|
480
|
+
def load_data(resourcePath, archiveDir=None, validate=False, forceInteger=False, forceInt64=False, verbose=True):
|
|
476
481
|
"""Often we want to make a copy of some input data and work with the copy, for example to protect
|
|
477
482
|
the original data or to create an archival copy of it so that we can replicate the process later.
|
|
478
483
|
The `load_data()` function simplifies the process of reading the data and
|
|
@@ -488,6 +493,15 @@ def load_data(resourcePath, archiveDir=None, validate=False, verbose=True):
|
|
|
488
493
|
validate : bool
|
|
489
494
|
Optional. If True, the resource file, schema file, and data file will be validated. If archiveDir is
|
|
490
495
|
specified, the copies of the files will be validated. If not, the original files will be validated.
|
|
496
|
+
Defaults to False.
|
|
497
|
+
forceInteger : bool
|
|
498
|
+
Optional. If True, then try harder to cast integer fields. This may involve rounding the values to the ones places.
|
|
499
|
+
Defaults to False.
|
|
500
|
+
forceInt64 : bool
|
|
501
|
+
Optional. If True, then cast all integer fields as Int64 regardless of whether this is necessary. This is useful
|
|
502
|
+
when trying to merge dataframes which would otherwise have mixed int32 and Int64 fields. Defaults to False.
|
|
503
|
+
verbose : bool
|
|
504
|
+
Optional. If False, then most output will be suppressed. Defaults to True.
|
|
491
505
|
|
|
492
506
|
Returns
|
|
493
507
|
-------
|
|
@@ -559,7 +573,7 @@ def load_data(resourcePath, archiveDir=None, validate=False, verbose=True):
|
|
|
559
573
|
print("morpc.load_data | ERROR | Unknown data file extension: {}".format(dataFileExtension))
|
|
560
574
|
raise RuntimeError
|
|
561
575
|
|
|
562
|
-
df = cast_field_types(df, resource.schema, verbose=verbose)
|
|
576
|
+
df = cast_field_types(df, resource.schema, forceInteger=forceInteger, forceInt64=forceInt64, verbose=verbose)
|
|
563
577
|
|
|
564
578
|
return df, resource, resource.schema
|
|
565
579
|
|
|
@@ -196,7 +196,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
196
196
|
"plural":"states",
|
|
197
197
|
"hierarchy_string":"STATE",
|
|
198
198
|
"authority":"census",
|
|
199
|
-
"idField":"
|
|
199
|
+
"idField":"STATEFP",
|
|
200
200
|
"nameField":"STATE"
|
|
201
201
|
},
|
|
202
202
|
'050': {
|
|
@@ -204,7 +204,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
204
204
|
"plural":"counties",
|
|
205
205
|
"hierarchy_string":"COUNTY",
|
|
206
206
|
"authority":"census",
|
|
207
|
-
"idField":"
|
|
207
|
+
"idField":"COUNTYFP",
|
|
208
208
|
"nameField":"COUNTY"
|
|
209
209
|
},
|
|
210
210
|
'060': {
|
|
@@ -212,7 +212,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
212
212
|
"plural":"county subdivisions",
|
|
213
213
|
"hierarchy_string":"COUNTY-COUSUB",
|
|
214
214
|
"authority":"census",
|
|
215
|
-
"idField":"
|
|
215
|
+
"idField":"COUSUBFP",
|
|
216
216
|
"nameField":"COUSUB"
|
|
217
217
|
},
|
|
218
218
|
'070': {
|
|
@@ -230,24 +230,24 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
230
230
|
"plural":"census blocks",
|
|
231
231
|
"hierarchy_string":"COUNTY-TRACT-BG-BLOCK",
|
|
232
232
|
"authority":"census",
|
|
233
|
-
"idField":"
|
|
234
|
-
"nameField":
|
|
235
|
-
},
|
|
233
|
+
"idField":"BLOCKCE",
|
|
234
|
+
"nameField":None
|
|
235
|
+
},
|
|
236
236
|
'140': {
|
|
237
237
|
"singular":"tract",
|
|
238
238
|
"plural":"tracts",
|
|
239
239
|
"hierarchy_string":"COUNTY-TRACT",
|
|
240
240
|
"authority":"census",
|
|
241
|
-
"idField":"
|
|
242
|
-
"nameField":
|
|
241
|
+
"idField":"TRACTCE",
|
|
242
|
+
"nameField":None
|
|
243
243
|
},
|
|
244
244
|
'150': {
|
|
245
245
|
"singular":"block group",
|
|
246
246
|
"plural":"block groups",
|
|
247
247
|
"hierarchy_string":"COUNTY-TRACT-BG",
|
|
248
248
|
"authority":"census",
|
|
249
|
-
"idField":"
|
|
250
|
-
"nameField":
|
|
249
|
+
"idField":"BLKGRPCE",
|
|
250
|
+
"nameField":None
|
|
251
251
|
},
|
|
252
252
|
'155': {
|
|
253
253
|
"singular":"place county part",
|
|
@@ -262,7 +262,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
262
262
|
"plural":"places",
|
|
263
263
|
"hierarchy_string":"PLACE",
|
|
264
264
|
"authority":"census",
|
|
265
|
-
"idField":"
|
|
265
|
+
"idField":"PLACEFP",
|
|
266
266
|
"nameField":"PLACE"
|
|
267
267
|
},
|
|
268
268
|
'310': {
|
|
@@ -270,7 +270,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
270
270
|
"plural":"metro areas",
|
|
271
271
|
"hierarchy_string":"CBSA",
|
|
272
272
|
"authority":"census",
|
|
273
|
-
"idField":"
|
|
273
|
+
"idField":"CBSAFP",
|
|
274
274
|
"nameField":"CBSA"
|
|
275
275
|
},
|
|
276
276
|
'400': {
|
|
@@ -278,7 +278,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
278
278
|
"plural":"urban areas",
|
|
279
279
|
"hierarchy_string":"URBANAREA",
|
|
280
280
|
"authority":"census",
|
|
281
|
-
"idField":"
|
|
281
|
+
"idField":"UACE",
|
|
282
282
|
"nameField":"URBANAREA"
|
|
283
283
|
},
|
|
284
284
|
'500': {
|
|
@@ -286,7 +286,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
286
286
|
"plural":"congressional districts",
|
|
287
287
|
"hierarchy_string":"CONGRESS",
|
|
288
288
|
"authority":"census",
|
|
289
|
-
"idField":"
|
|
289
|
+
"idField":"CDFP", # Census uses CDNNNFP where NNN is the congressional session number
|
|
290
290
|
"nameField":"CONGRESS"
|
|
291
291
|
},
|
|
292
292
|
'610': {
|
|
@@ -294,23 +294,23 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
294
294
|
"plural":"state senate districts",
|
|
295
295
|
"hierarchy_string":"STATESENATE",
|
|
296
296
|
"authority":"census",
|
|
297
|
-
"idField":"
|
|
298
|
-
"nameField":
|
|
297
|
+
"idField":"SLDUST",
|
|
298
|
+
"nameField":None
|
|
299
299
|
},
|
|
300
300
|
'620': {
|
|
301
301
|
"singular":"state house district",
|
|
302
302
|
"plural":"state house districts",
|
|
303
303
|
"hierarchy_string":"STATEHOUSE",
|
|
304
304
|
"authority":"census",
|
|
305
|
-
"idField":"
|
|
306
|
-
"nameField":
|
|
305
|
+
"idField":"SLDLST",
|
|
306
|
+
"nameField":None
|
|
307
307
|
},
|
|
308
308
|
'795': {
|
|
309
309
|
"singular":"public use microdata area",
|
|
310
310
|
"plural":"public use microdata areas",
|
|
311
311
|
"hierarchy_string":"PUMA",
|
|
312
312
|
"authority":"census",
|
|
313
|
-
"idField":"
|
|
313
|
+
"idField":"PUMACE",
|
|
314
314
|
"nameField":"PUMA"
|
|
315
315
|
},
|
|
316
316
|
'850': {
|
|
@@ -318,7 +318,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
318
318
|
"plural":"zip code tabulation areas",
|
|
319
319
|
"hierarchy_string":"ZCTA3",
|
|
320
320
|
"authority":"census",
|
|
321
|
-
"idField":"
|
|
321
|
+
"idField":"ZCTA3CE",
|
|
322
322
|
"nameField":None
|
|
323
323
|
},
|
|
324
324
|
'860': {
|
|
@@ -326,7 +326,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
326
326
|
"plural":"zip code tabulation area",
|
|
327
327
|
"hierarchy_string":"ZCTA5",
|
|
328
328
|
"authority":"census",
|
|
329
|
-
"idField":"
|
|
329
|
+
"idField":"ZCTA5CE",
|
|
330
330
|
"nameField":None
|
|
331
331
|
},
|
|
332
332
|
'930': {
|
|
@@ -340,25 +340,25 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
340
340
|
'950': {
|
|
341
341
|
"singular":"elementary school district",
|
|
342
342
|
"plural":"elementary school districts",
|
|
343
|
-
"hierarchy_string":"
|
|
343
|
+
"hierarchy_string":"ELSD",
|
|
344
344
|
"authority":"census",
|
|
345
|
-
"idField":"
|
|
345
|
+
"idField":"ELSDLEA",
|
|
346
346
|
"nameField":"SCHOOLDELEM"
|
|
347
347
|
},
|
|
348
348
|
'960': {
|
|
349
349
|
"singular":"high school district",
|
|
350
350
|
"plural":"high school districts",
|
|
351
|
-
"hierarchy_string":"
|
|
351
|
+
"hierarchy_string":"SCSD",
|
|
352
352
|
"authority":"census",
|
|
353
|
-
"idField":"
|
|
353
|
+
"idField":"SCSDLEA",
|
|
354
354
|
"nameField":"SCHOOLDHIGH"
|
|
355
355
|
},
|
|
356
356
|
'970': {
|
|
357
357
|
"singular":"unified school district",
|
|
358
358
|
"plural":"unified school districts",
|
|
359
|
-
"hierarchy_string":"
|
|
359
|
+
"hierarchy_string":"UNSD",
|
|
360
360
|
"authority":"census",
|
|
361
|
-
"idField":"
|
|
361
|
+
"idField":"UNSDLEA",
|
|
362
362
|
"nameField":"SCHOOLD"
|
|
363
363
|
},
|
|
364
364
|
'M01': {
|
|
@@ -446,7 +446,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
446
446
|
"plural":"Traffic analysis zones",
|
|
447
447
|
"hierarchy_string":"COUNTY-TAZ",
|
|
448
448
|
"authority":"morpc",
|
|
449
|
-
"idField":"
|
|
449
|
+
"idField":"TAZ2020",
|
|
450
450
|
"nameField":None
|
|
451
451
|
},
|
|
452
452
|
'M21': {
|
|
@@ -454,7 +454,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
454
454
|
"plural":"Micro analysis zones",
|
|
455
455
|
"hierarchy_string":"COUNTY-TAZ-MAZ",
|
|
456
456
|
"authority":"morpc",
|
|
457
|
-
"idField":"
|
|
457
|
+
"idField":"MAZ2020",
|
|
458
458
|
"nameField":None
|
|
459
459
|
},
|
|
460
460
|
'M22': {
|
|
@@ -462,7 +462,7 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
462
462
|
"plural":"GridMAZ zones",
|
|
463
463
|
"hierarchy_string":"COUNTY-TAZ-MAZ-GRIDMAZ",
|
|
464
464
|
"authority":"morpc",
|
|
465
|
-
"idField":"
|
|
465
|
+
"idField":"GridMAZ20",
|
|
466
466
|
"nameField":None
|
|
467
467
|
},
|
|
468
468
|
}
|
|
@@ -473,7 +473,6 @@ SUMLEVEL_DESCRIPTIONS = {
|
|
|
473
473
|
# GRID1MILE
|
|
474
474
|
# GRIDQUARTERMILE
|
|
475
475
|
# COUNTY-COUSUB-SCD
|
|
476
|
-
# COUNTY-TRACT-BG-BLOCK
|
|
477
476
|
# RESBLOB
|
|
478
477
|
# EMPBLOB
|
|
479
478
|
# GQBLOB
|
|
@@ -817,7 +816,7 @@ def avro_map_from_first_alias(schema):
|
|
|
817
816
|
return fieldMap
|
|
818
817
|
|
|
819
818
|
# Wrapper for backward compatibility
|
|
820
|
-
def cast_field_types(df, schema, forceInteger=False, handleMissingFields='error', verbose=True):
|
|
819
|
+
def cast_field_types(df, schema, forceInteger=False, forceInt64=False, handleMissingFields='error', verbose=True):
|
|
821
820
|
"""
|
|
822
821
|
Wrapper for backward compatibility with AVRO Schema
|
|
823
822
|
|
|
@@ -825,15 +824,15 @@ def cast_field_types(df, schema, forceInteger=False, handleMissingFields='error'
|
|
|
825
824
|
import morpc
|
|
826
825
|
# If schema is a dict object, assume it is in Avro format
|
|
827
826
|
if(type(schema) == dict):
|
|
828
|
-
outDF = avro_cast_field_types(df, schema, forceInteger=forceInteger, verbose=verbose)
|
|
827
|
+
outDF = avro_cast_field_types(df, schema, forceInteger=forceInteger, forceInt64=forceInt64, verbose=verbose)
|
|
829
828
|
# Otherwise, assume it is in Frictionless format
|
|
830
829
|
else:
|
|
831
|
-
outDF = morpc.frictionless.cast_field_types(df, schema, forceInteger=forceInteger, handleMissingFields=handleMissingFields, verbose=verbose)
|
|
830
|
+
outDF = morpc.frictionless.cast_field_types(df, schema, forceInteger=forceInteger, forceInt64=forceInt64, handleMissingFields=handleMissingFields, verbose=verbose)
|
|
832
831
|
return outDF
|
|
833
832
|
|
|
834
833
|
# Given a dataframe and the Avro dictionary object that describes its schema (see load_avro_schema), recast each of the fields in the dataframe
|
|
835
834
|
# to the data type specified in the schema.
|
|
836
|
-
def avro_cast_field_types(df, schema, forceInteger=False, verbose=True):
|
|
835
|
+
def avro_cast_field_types(df, schema, forceInteger=False, forceInt64=False, verbose=True):
|
|
837
836
|
outDF = df.copy()
|
|
838
837
|
for field in schema["fields"]:
|
|
839
838
|
fieldName = field["name"]
|
|
@@ -844,8 +843,13 @@ def avro_cast_field_types(df, schema, forceInteger=False, verbose=True):
|
|
|
844
843
|
# the field must be cast as "Int64" instead.
|
|
845
844
|
if((fieldType == "int") or (fieldType == "integer")):
|
|
846
845
|
try:
|
|
847
|
-
|
|
848
|
-
|
|
846
|
+
if(forceInt64 == True):
|
|
847
|
+
# Cast all integer fields as Int64 whether this is necessary or not. This is useful when trying to merge
|
|
848
|
+
# dataframes with mixed int32 and Int64 values.
|
|
849
|
+
outDF[fieldName] = outDF[fieldName].astype("Int64")
|
|
850
|
+
else:
|
|
851
|
+
# Try to cast the field as an "int". This will fail if nulls are present.
|
|
852
|
+
outDF[fieldName] = outDF[fieldName].astype("int")
|
|
849
853
|
except:
|
|
850
854
|
try:
|
|
851
855
|
# Try to cast as "Int64", which supports nulls. This will fail if the fractional part is non-zero.
|
|
@@ -1038,6 +1042,139 @@ def load_spatial_data(sourcePath, layerName=None, driverName=None, archiveDir=No
|
|
|
1038
1042
|
|
|
1039
1043
|
return gdf
|
|
1040
1044
|
|
|
1045
|
+
# Load tabular data
|
|
1046
|
+
def load_tabular_data(sourcePath, sheetName=None, fileType=None, archiveDir=None, archiveFileName=None, verbose=True, sep=None, encoding=None):
|
|
1047
|
+
"""Often we want to make a copy of some input data and work with the copy, for example to protect
|
|
1048
|
+
the original data or to create an archival copy of it so that we can replicate the process later.
|
|
1049
|
+
The `load_tabular_data()` function simplifies the process of reading the data and (optionally) making
|
|
1050
|
+
an archival copy.
|
|
1051
|
+
|
|
1052
|
+
Example usage: df = morpc.load_tabular_data("somefile.xlsx", sheetName="Sheet1", archiveDir="./input_data"))
|
|
1053
|
+
|
|
1054
|
+
Parameters
|
|
1055
|
+
----------
|
|
1056
|
+
sourcePath : str
|
|
1057
|
+
The path to the tabular data. It may be a file path or URL.
|
|
1058
|
+
sheetName : str
|
|
1059
|
+
Optional. The name of the sheet that you wish to extract from an Excel workbook. If unspecified, the
|
|
1060
|
+
function will read the first sheet in the workbook.
|
|
1061
|
+
fileType : str
|
|
1062
|
+
Optional. One of "csv" or "xlsx" or "xls". If unspecified, the function will attempt to infer from sourcePath.
|
|
1063
|
+
archiveDir : str
|
|
1064
|
+
Optional. The path to the directory where a copy of a data should be archived. If this is specified,
|
|
1065
|
+
the data will be copied to this location.
|
|
1066
|
+
archiveFileName : str
|
|
1067
|
+
Optional. If `archiveDir` is specified, you may use this to specify the name of the archived file.
|
|
1068
|
+
If this is unspecified, the function will preserve the original filename as-is.
|
|
1069
|
+
verbose : bool
|
|
1070
|
+
Set verbose to False to reduce the text output from the function.
|
|
1071
|
+
sep : str
|
|
1072
|
+
Optional. Delimiter to use for delimited text files. Defaults to "," (i.e. CSV file). Tabs ("\t")
|
|
1073
|
+
and pipes ("|") are also common.
|
|
1074
|
+
encoding : str
|
|
1075
|
+
Optional. Character encoding to use for delimited text files. Defaults to "utf-8" which works in most cases.
|
|
1076
|
+
Sometimes other encodings are required. Notably, Census PEP tables require the "ISO-8859-1" encoding.
|
|
1077
|
+
|
|
1078
|
+
Returns
|
|
1079
|
+
-------
|
|
1080
|
+
df : pandas.core.frame.DataFrame
|
|
1081
|
+
A Pandas GeoDataframe constructed from the data at the location specified by sourcePath and sheetName
|
|
1082
|
+
|
|
1083
|
+
"""
|
|
1084
|
+
|
|
1085
|
+
import pandas as pd
|
|
1086
|
+
import os
|
|
1087
|
+
|
|
1088
|
+
if(verbose):
|
|
1089
|
+
print("morpc.load_tabular_data | INFO | Loading tabular data from location: {}".format(sourcePath))
|
|
1090
|
+
|
|
1091
|
+
# Due to changes at the Census pd.read_csv(), pd.read_excel(), and requests.get() are blocked. Using wget as work around.
|
|
1092
|
+
if sourcePath.find('www2.census.gov') > -1:
|
|
1093
|
+
if(verbose):
|
|
1094
|
+
print("morpc.load_tabular_data | INFO | Attempting to load data from Census FTP site. Using wget to retrieve file.")
|
|
1095
|
+
print("morpc.load_tabular_data | WARNING | Data from Census FTP must be temp saved. Using ./temp_data.")
|
|
1096
|
+
tempDir = os.path.normpath('./temp_data')
|
|
1097
|
+
if not os.path.exists(tempDir):
|
|
1098
|
+
os.makedirs(tempDir)
|
|
1099
|
+
wget(url = sourcePath, archive_dir = tempDir)
|
|
1100
|
+
sourcePath = os.path.join(tempDir, os.path.split(sourcePath)[-1])
|
|
1101
|
+
|
|
1102
|
+
if(fileType == None):
|
|
1103
|
+
if(verbose):
|
|
1104
|
+
print("morpc.load_tabular_data | INFO | File type is unspecified. Will attempt to infer file type from file extension in source path.")
|
|
1105
|
+
fileExt = os.path.splitext(sourcePath)[1]
|
|
1106
|
+
if(fileExt == ".csv"):
|
|
1107
|
+
fileType = "csv"
|
|
1108
|
+
elif(fileExt == ".xlsx"):
|
|
1109
|
+
fileType = "xlsx"
|
|
1110
|
+
elif(fileExt == ".xls"):
|
|
1111
|
+
fileType = "xls"
|
|
1112
|
+
else:
|
|
1113
|
+
print("morpc.load_tabular_data | ERROR | File extension is unsupported: {}.".format(fileExt))
|
|
1114
|
+
raise RuntimeError
|
|
1115
|
+
if(verbose):
|
|
1116
|
+
print("morpc.load_tabular_data | INFO | Selecting file type {} based on file extension {}".format(fileType, fileExt))
|
|
1117
|
+
else:
|
|
1118
|
+
if(verbose):
|
|
1119
|
+
print("morpc.load_tabular_data | INFO | Using file type {} as specified by user.".format(fileType))
|
|
1120
|
+
|
|
1121
|
+
if("sheetName") == None:
|
|
1122
|
+
if(fileType == "xlsx" or fileType == "xls"):
|
|
1123
|
+
print("morpc.load_tabular_data | WARNING | Sheet name was not specified. Will load first sheet in workbook.")
|
|
1124
|
+
|
|
1125
|
+
if(verbose):
|
|
1126
|
+
print("morpc.load_tabular_data | INFO | Reading tabular data...")
|
|
1127
|
+
|
|
1128
|
+
if(fileType == "csv"):
|
|
1129
|
+
df = pd.read_csv(sourcePath, sep=sep, encoding=encoding)
|
|
1130
|
+
elif(fileType == "xlsx" or fileType == "xls"):
|
|
1131
|
+
df = pd.read_excel(sourcePath, sheet_name=sheetName)
|
|
1132
|
+
else:
|
|
1133
|
+
print("morpc.load_tabular_data | ERROR | File type {} is not handled. Troubleshoot function.".format(fileType))
|
|
1134
|
+
raise RuntimeError
|
|
1135
|
+
|
|
1136
|
+
# If the user has specified an archive directory, create an archival copy of the data
|
|
1137
|
+
if(archiveDir != None):
|
|
1138
|
+
# If no file name was specified, we need to assign one
|
|
1139
|
+
if(archiveFileName) == None:
|
|
1140
|
+
# First try to determine whether we are retrieving data from an API. In this case we may not be able to extract
|
|
1141
|
+
# a file name from the source path. Specifically, look for a "?" character in the path. This is forbidden in
|
|
1142
|
+
# Windows file paths and suggests that a query string is present.
|
|
1143
|
+
if(sourcePath.find("?") > -1):
|
|
1144
|
+
if(verbose):
|
|
1145
|
+
print("morpc.load_tabular_data | INFO | File name is unspecified and source path appears to be an API query. Will assign an alternate file name.")
|
|
1146
|
+
# If the sheet name is specified, use that as the file name. Otherwise use a generic file name.
|
|
1147
|
+
if(sheetName != None):
|
|
1148
|
+
archiveFileName = "{0}.{1}".format(sheetName, fileType)
|
|
1149
|
+
else:
|
|
1150
|
+
archiveFileName == "tabularData.{}".format(fileType)
|
|
1151
|
+
|
|
1152
|
+
# If the source path doesn't look like an API query, then attempt to extract the file name from the path
|
|
1153
|
+
else:
|
|
1154
|
+
if(verbose):
|
|
1155
|
+
print("morpc.load_tabular_data | INFO | File name is unspecified. Will infer file name from source path.")
|
|
1156
|
+
archiveFileName = os.path.split(sourcePath)[-1]
|
|
1157
|
+
if(verbose):
|
|
1158
|
+
print("morpc.load_tabular_data | INFO | Using automatically-selected file name: {}".format(archiveFileName))
|
|
1159
|
+
|
|
1160
|
+
archivePath = os.path.join(archiveDir, archiveFileName)
|
|
1161
|
+
|
|
1162
|
+
if(verbose):
|
|
1163
|
+
print("morpc.load_tabular_data | INFO | Creating archival copy of tabular data at {}".format(archivePath))
|
|
1164
|
+
if(fileType == "csv"):
|
|
1165
|
+
df.to_csv(archivePath, sep=sep, encoding=encoding, index=False)
|
|
1166
|
+
elif(fileType == "xlsx" or fileType == "xls"):
|
|
1167
|
+
df.to_excel(archivePath, sheet_name=sheetName, index=False)
|
|
1168
|
+
else:
|
|
1169
|
+
print("morpc.load_tabular_data | ERROR | File type {} is not handled. Troubleshoot function.".format(fileType))
|
|
1170
|
+
raise RuntimeError
|
|
1171
|
+
|
|
1172
|
+
if(tempDir):
|
|
1173
|
+
print("morpc.load_tabular_data | INFO | Removing temporary directory for Census file: {}".format(tempDir))
|
|
1174
|
+
#shutil.rmtree(tempDir)
|
|
1175
|
+
|
|
1176
|
+
return df
|
|
1177
|
+
|
|
1041
1178
|
# Assign geographic identifiers
|
|
1042
1179
|
# Sometimes we have a set of locations and we would like to know what geography (county, zipcode, etc.) they fall in. The
|
|
1043
1180
|
# `assign_geo_identifiers()` function takes a set of georeference points and a list of geography levels and determines for each
|
|
@@ -1063,7 +1200,36 @@ def load_spatial_data(sourcePath, layerName=None, driverName=None, archiveDir=No
|
|
|
1063
1200
|
# that has not yet been implemented, please contact Adam Porr (or implement it yourself).
|
|
1064
1201
|
def assign_geo_identifiers(points, geographies):
|
|
1065
1202
|
"""
|
|
1066
|
-
|
|
1203
|
+
Assign geographic identifiers
|
|
1204
|
+
Sometimes we have a set of locations and we would like to know what geography (county, zipcode, etc.) they fall in. The
|
|
1205
|
+
`assign_geo_identifiers()` function takes a set of georeference points and a list of geography levels and determines for each
|
|
1206
|
+
level which area each point falls in
|
|
1207
|
+
|
|
1208
|
+
Parameters
|
|
1209
|
+
----------
|
|
1210
|
+
points : geopandas.GeoDataFrame
|
|
1211
|
+
a GeoPandas GeoDataFrame consisting of the points of interest
|
|
1212
|
+
geographies : list of str
|
|
1213
|
+
A Python list of one or more strings in which each element corresponds to a geography level. You can specify as
|
|
1214
|
+
many levels as you want from the following list, however note that the function must download the polygons and perform the analysis
|
|
1215
|
+
for each level so if you specify many levels it may take a long time.
|
|
1216
|
+
- "county" - County (Census TIGER)
|
|
1217
|
+
- "tract" - *Not currently implemented*
|
|
1218
|
+
- "blockgroup" - *Not currently implemented*
|
|
1219
|
+
- "block" - *Not currently implemented*
|
|
1220
|
+
- "zcta" - Census ZCTA (tl_2024_us_zcta520)
|
|
1221
|
+
- "place" - Census place (Census TIGER)
|
|
1222
|
+
- "placecombo" - *Not currently implemented*
|
|
1223
|
+
- "juris" - *Not currently implemented*
|
|
1224
|
+
- "region15County" - *Not currently implemented*
|
|
1225
|
+
- "region10County" - *Not currently implemented*
|
|
1226
|
+
- "regionCORPO" - *Not currently implemented*
|
|
1227
|
+
- "regionMPO" - *Not currently implemented*
|
|
1228
|
+
|
|
1229
|
+
Returns
|
|
1230
|
+
-------
|
|
1231
|
+
geopandas.GeoDataFrame
|
|
1232
|
+
A geodataframe with column name id_{geographies} representing the id from the geographies passed
|
|
1067
1233
|
"""
|
|
1068
1234
|
import geopandas as gpd
|
|
1069
1235
|
import pyogrio
|
|
@@ -1098,8 +1264,10 @@ def assign_geo_identifiers(points, geographies):
|
|
|
1098
1264
|
print("ERROR: Geography is currently unsupported: {}".format(geography))
|
|
1099
1265
|
raise RuntimeError
|
|
1100
1266
|
elif(geography == "zcta"):
|
|
1101
|
-
|
|
1102
|
-
|
|
1267
|
+
filePath = "https://www2.census.gov/geo/tiger/TIGER2024/ZCTA520/tl_2024_us_zcta520.zip"
|
|
1268
|
+
layerName = None
|
|
1269
|
+
driverName = "Census Shapefile"
|
|
1270
|
+
polyIdField = ""
|
|
1103
1271
|
elif(geography == "place"):
|
|
1104
1272
|
filePath = "https://www2.census.gov/geo/tiger/TIGER2020/PLACE/tl_2020_39_place.zip"
|
|
1105
1273
|
layerName = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|