matplotlib-map-utils 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matplotlib_map_utils/__init__.py +4 -0
- matplotlib_map_utils/defaults.py +416 -0
- matplotlib_map_utils/north_arrow.py +458 -0
- matplotlib_map_utils/scratch/map_utils.py +412 -0
- matplotlib_map_utils/scratch/north_arrow_old_classes.py +1185 -0
- matplotlib_map_utils/validation.py +332 -0
- matplotlib_map_utils-1.0.0.dist-info/LICENSE +674 -0
- matplotlib_map_utils-1.0.0.dist-info/METADATA +131 -0
- matplotlib_map_utils-1.0.0.dist-info/RECORD +11 -0
- matplotlib_map_utils-1.0.0.dist-info/WHEEL +5 -0
- matplotlib_map_utils-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,412 @@
|
|
1
|
+
# Importing other packages
|
2
|
+
import json
|
3
|
+
import re
|
4
|
+
import math
|
5
|
+
import os
|
6
|
+
os.environ["USE_PYGOES"] = "0"
|
7
|
+
import requests
|
8
|
+
import geopandas
|
9
|
+
import shapely
|
10
|
+
import pandas
|
11
|
+
import numpy
|
12
|
+
import matplotlib
|
13
|
+
import matplotlib.pyplot
|
14
|
+
import matplotlib.patches
|
15
|
+
import matplotlib.patheffects
|
16
|
+
import matplotlib_scalebar.scalebar
|
17
|
+
import mpl_toolkits.axes_grid1.axes_divider
|
18
|
+
import adjustText
|
19
|
+
import rasterio
|
20
|
+
import rasterio.mask
|
21
|
+
import rasterio.enums
|
22
|
+
import osgeo.gdal
|
23
|
+
|
24
|
+
# Start a normal map with a single plot
|
25
|
+
def init_map(subplots=(1,1), figsize=(10,15), dpi=300, ticks=False, bg="white"):
|
26
|
+
# Starting the fig and ax
|
27
|
+
fig, ax = matplotlib.pyplot.subplots(subplots[0], subplots[1], figsize=figsize, dpi=dpi)
|
28
|
+
# Hiding ticks if we don't want them
|
29
|
+
if ticks == False:
|
30
|
+
ax.set_xticks([])
|
31
|
+
ax.set_yticks([])
|
32
|
+
if bg:
|
33
|
+
fig.patch.set_facecolor(bg)
|
34
|
+
# Returning the fig and the ax
|
35
|
+
return fig, ax
|
36
|
+
|
37
|
+
# Start a normal map with multiple plots
|
38
|
+
# Subplots is in (rows, column) format, but figsize is in (width, length) :(
|
39
|
+
def init_maps(subplots=(2,2), figsize=(10,15), dpi=300, ticks=False, bg="white", sharex=False, sharey=False):
|
40
|
+
# Starting the fig and ax
|
41
|
+
fig, axs = matplotlib.pyplot.subplots(subplots[0], subplots[1], figsize=figsize, dpi=dpi, sharex=sharex, sharey=sharey)
|
42
|
+
# Hiding ticks if we don't want them
|
43
|
+
if ticks == False:
|
44
|
+
for ax in axs.flatten():
|
45
|
+
ax.set_xticks([])
|
46
|
+
ax.set_yticks([])
|
47
|
+
if bg:
|
48
|
+
fig.patch.set_facecolor(bg)
|
49
|
+
# Returning the fig and the ax
|
50
|
+
return fig, axs
|
51
|
+
|
52
|
+
# Add a north arrow
|
53
|
+
# I THINK because we use ax.transAxes, loc is expressed in fraction of the axes (bottom left is 0,0 and top right is 1,1)
|
54
|
+
# AND radius is a fraction of the axis as well: so 0.05 would mean it is 5% of the axes "long" and "wide", leading to a given shape
|
55
|
+
# def north_arrow(ax, r, loc=(0.94,0.94), color="black", fontcolor="white", fontsize=8, zorder=99):
|
56
|
+
# north_arrow = matplotlib.patches.RegularPolygon(loc, 3, radius=r, color="black", transform=ax.transAxes, zorder=zorder-1)
|
57
|
+
# ax.add_patch(north_arrow)
|
58
|
+
# ax.text(x=north_arrow.xy[0], y=north_arrow.xy[1], s='N', ha="center", va="center", fontsize=fontsize, color=fontcolor, transform=ax.transAxes, zorder=zorder)
|
59
|
+
|
60
|
+
# Add a scale bar
|
61
|
+
def scale_bar(ax, location="upper left", scale_format=None, **kwargs):
|
62
|
+
if scale_format:
|
63
|
+
ax.add_artist(matplotlib_scalebar.scalebar.ScaleBar(1, location=location, scale_formatter=lambda v,l: f"{v}{scale_format}", **kwargs))
|
64
|
+
else:
|
65
|
+
ax.add_artist(matplotlib_scalebar.scalebar.ScaleBar(1, location=location, **kwargs))
|
66
|
+
|
67
|
+
# Add an arbitrary colorbar
|
68
|
+
# TODO: work for arbitrary colormaps, not just named ones
|
69
|
+
# TODO: figure out how to do this for "left" or "right"?
|
70
|
+
# Seems that the main issue is when the figure still has room to grow in a given dimension
|
71
|
+
# Could also do like: https://matplotlib.org/stable/users/explain/axes/colorbar_placement.html#colorbar-placement
|
72
|
+
def color_bar(ax, fig, cmap, vmin, vmax, label, cax_kwargs):
|
73
|
+
divider = mpl_toolkits.axes_grid1.axes_divider.make_axes_locatable(ax)
|
74
|
+
cax = divider.append_axes("bottom", size="5%", pad=0.1)
|
75
|
+
cax.set_axis_off()
|
76
|
+
cmap = matplotlib.cm.get_cmap(cmap)
|
77
|
+
norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)
|
78
|
+
fig.colorbar(matplotlib.cm.ScalarMappable(norm, cmap), ax=cax, location="bottom", label=label)
|
79
|
+
|
80
|
+
# Create legend elements
|
81
|
+
# Expects a LIST of dictionaries with 3 values: label, type (patch or point or line), and kwargs to format it
|
82
|
+
# example: [{"type":"s", "label":"Example Label", "kwargs":{"color":"tab:blue"}}]
|
83
|
+
def legend(ax, elements):
|
84
|
+
for_legend = []
|
85
|
+
for e in elements:
|
86
|
+
if e["type"] == "patch" or e["type"] == "s":
|
87
|
+
ele = matplotlib.patches.Patch(label=e["label"], **e["kwargs"])
|
88
|
+
elif e["type"] == "point" or e["type"] == "p":
|
89
|
+
ele = matplotlib.lines.Line2D([0], [0], color="none", label=e["label"], **e["kwargs"])
|
90
|
+
elif e["type"] == "line" or e["type"] == "l":
|
91
|
+
ele = matplotlib.lines.Line2D([0], [0], label=e["label"], **e["kwargs"])
|
92
|
+
else:
|
93
|
+
print("Error: invalid type")
|
94
|
+
ele = matplotlib.patches.Patch(label=e["label"])
|
95
|
+
for_legend.append(ele)
|
96
|
+
return for_legend
|
97
|
+
|
98
|
+
# Add text labels
|
99
|
+
def label_points(ax, gdf, col, wrap=None, size=None, format={}, color="white", stroke="black", alignment=("center","center"), shift=(0,0), override=None, mask=False, adjust=False, adjust_kwargs={}):
|
100
|
+
# List to hold all of our eventual text objects
|
101
|
+
texts = []
|
102
|
+
if mask:
|
103
|
+
xmin,xmax = ax.get_xlim()
|
104
|
+
ymin,ymax = ax.get_ylim()
|
105
|
+
bbox = shapely.box(xmin, ymin, xmax, ymax)
|
106
|
+
gdf_to_label = gdf.cx[xmin:xmax, ymin:ymax].copy()
|
107
|
+
gdf_to_label["geometry"] = gdf_to_label.intersection(bbox).copy()
|
108
|
+
else:
|
109
|
+
gdf_to_label = gdf.copy()
|
110
|
+
# Iterating through each provided point
|
111
|
+
for i,r in gdf_to_label.iterrows():
|
112
|
+
x = r["geometry"].centroid.x + shift[0]
|
113
|
+
y = r["geometry"].centroid.y + shift[1]
|
114
|
+
label = r[col]
|
115
|
+
# If we want to wrap our labels (only a certain # of words per line)
|
116
|
+
if wrap:
|
117
|
+
words = label.split(" ")
|
118
|
+
label = ""
|
119
|
+
for j,w in enumerate(words):
|
120
|
+
label += w
|
121
|
+
if j+1 == len(words):
|
122
|
+
pass
|
123
|
+
elif ((j+1) % wrap == 0):
|
124
|
+
label += "\n"
|
125
|
+
else:
|
126
|
+
label += " "
|
127
|
+
# If we need to manually override the text in a label (can pass empty dict to hide it)
|
128
|
+
if override and r[col] in override.keys():
|
129
|
+
label = override.get(r[col])
|
130
|
+
|
131
|
+
# Making the actual text
|
132
|
+
texts.append(ax.text(x, y, f"{format}".format(label), fontsize=size,
|
133
|
+
color=color, path_effects=[matplotlib.patheffects.withStroke(linewidth=math.floor(size/2), foreground=stroke)],
|
134
|
+
ha=alignment[0], va=alignment[1]))
|
135
|
+
|
136
|
+
# Final adjustment
|
137
|
+
if adjust:
|
138
|
+
adjustText.adjust_text(texts, **adjust_kwargs)
|
139
|
+
|
140
|
+
# Rename legend labels with a list of custom text
|
141
|
+
def replace_legend_items(ax, labels):
|
142
|
+
for t,l in zip(ax.get_legend().texts, labels):
|
143
|
+
t.set_text(l)
|
144
|
+
|
145
|
+
# Adding a patch to a currently-existing legend
|
146
|
+
# STILL BEING WORKED ON!
|
147
|
+
def add_patch(ax, patch, label):
|
148
|
+
legend = ax.get_legend()
|
149
|
+
|
150
|
+
handles = ax.get_legend().legendHandles
|
151
|
+
# This interestingly returns a Text object or array of some sort?
|
152
|
+
labels = ax.get_legend().texts
|
153
|
+
print(handles, labels)
|
154
|
+
handles.append(patch)
|
155
|
+
labels.append(label)
|
156
|
+
|
157
|
+
legend._legend_box = None
|
158
|
+
legend._init_legend_box(handles, labels)
|
159
|
+
legend._set_loc(legend._loc)
|
160
|
+
# This doesn't work, but this does: ax.get_legend().get_title().get_text()
|
161
|
+
legend.set_title(legend.get_title().get_text())
|
162
|
+
|
163
|
+
# Centering the map on a given object
|
164
|
+
def center_map(ax, geo=None, bounds=None, incr=(0.1, 0.1), square=False):
|
165
|
+
if geo is not None:
|
166
|
+
# Get the bounds of the geo we want to center on
|
167
|
+
minx, miny, maxx, maxy = geo.total_bounds
|
168
|
+
elif bounds is not None:
|
169
|
+
minx, miny, maxx, maxy = bounds
|
170
|
+
else:
|
171
|
+
minx, maxx = ax.get_xlim()
|
172
|
+
miny, maxy = ax.get_ylim()
|
173
|
+
# Get the range of each boundary
|
174
|
+
rangex = maxx-minx
|
175
|
+
rangey = maxy-miny
|
176
|
+
# Will we increment by the same amount in each direction?
|
177
|
+
if square==True:
|
178
|
+
rangemax = max(rangex, rangey)
|
179
|
+
incrementx = rangemax * incr[0]
|
180
|
+
incrementy = rangemax * incr[1]
|
181
|
+
midx = (maxx+minx)/2
|
182
|
+
midy = (maxy+miny)/2
|
183
|
+
ax.set_xlim(midx-incrementx, midx+incrementx)
|
184
|
+
ax.set_ylim(midy-incrementy, midy+incrementy)
|
185
|
+
else:
|
186
|
+
# Find the amount we want to increment on
|
187
|
+
incrementx = rangex * incr[0]
|
188
|
+
incrementy = rangey * incr[1]
|
189
|
+
# Set new x and y limits for the axis
|
190
|
+
ax.set_xlim(minx-incrementx, maxx+incrementx)
|
191
|
+
ax.set_ylim(miny-incrementy, maxy+incrementy)
|
192
|
+
|
193
|
+
### RASTER UTILS ###
|
194
|
+
# Stolen from: https://rasterio.readthedocs.io/en/stable/topics/reproject.html
|
195
|
+
def reproject_raster(raster, crs, output, return_open=False):
|
196
|
+
# Renaming the CRS
|
197
|
+
if type(crs) == int:
|
198
|
+
new_crs = f"EPSG:{str(crs)}"
|
199
|
+
else:
|
200
|
+
new_crs = crs
|
201
|
+
# Calculating how the new projection is warped from the base
|
202
|
+
transform, width, height = rasterio.warp.calculate_default_transform(raster.crs, new_crs,
|
203
|
+
raster.width, raster.height,
|
204
|
+
*raster.bounds)
|
205
|
+
# Updating the metadata of the source raster
|
206
|
+
kwargs = raster.meta.copy()
|
207
|
+
kwargs.update({
|
208
|
+
'crs': new_crs,
|
209
|
+
'transform': transform,
|
210
|
+
'width': width,
|
211
|
+
'height': height})
|
212
|
+
# Saving the file
|
213
|
+
with rasterio.open(output, "w", **kwargs) as reproj:
|
214
|
+
# Reprojecting each band
|
215
|
+
for i in range(1, raster.count + 1):
|
216
|
+
rasterio.warp.reproject(
|
217
|
+
source=rasterio.band(raster, i),
|
218
|
+
destination=rasterio.band(reproj, i),
|
219
|
+
src_transform=raster.transform,
|
220
|
+
src_crs=raster.crs,
|
221
|
+
dst_transform=transform,
|
222
|
+
dst_crs=new_crs,
|
223
|
+
resampling=rasterio.warp.Resampling.nearest)
|
224
|
+
if return_open:
|
225
|
+
# Returning the reporjected raster
|
226
|
+
return rasterio.open(output)
|
227
|
+
else:
|
228
|
+
return output
|
229
|
+
|
230
|
+
# TODO: allow this to read an already-existing raster to use as input
|
231
|
+
# TODO: allow this to work for non-categorical data
|
232
|
+
# TODO: allow this to work for non-integer data
|
233
|
+
# vector should be dissolved beforehand
|
234
|
+
# ref_path is a string
|
235
|
+
# ras_path is a string
|
236
|
+
# res is a tuple of (x,y) values
|
237
|
+
# col is the numeric column you want to retain as an attribute value
|
238
|
+
# coltype is a tuple of (numpy.dtype, rasterio.dtype)
|
239
|
+
# all_touched=False sets cell assignment based on which shape contains the centroid
|
240
|
+
def rasterize_vector(vector, ref_path, ras_path, res, col=None, coltype=None, all_touched=False):
|
241
|
+
### FIRST: SETTING UP A REFERENCE RASTER ###
|
242
|
+
# Setting bounds of the vector
|
243
|
+
xmin, ymin, xmax, ymax = vector.total_bounds
|
244
|
+
# These should be in whatever units your crs is in (meters, feet, etc.)
|
245
|
+
xres = res[0]
|
246
|
+
yres = -1*res[1] # we make this negative because GDAL expects (0,0) to be the TOP LEFT, not the BOTTOM LEFT
|
247
|
+
# Setting the spatial reference to be the same as the file we want to rasterize
|
248
|
+
spatial_ref = vector.crs.to_wkt()
|
249
|
+
# Calculating the size of the raster in pixels
|
250
|
+
xsize = abs(int(((xmax-xmin)/xres)))
|
251
|
+
ysize = abs(int(((ymax-ymin)/yres)))
|
252
|
+
|
253
|
+
# Initializing the gdal driver for geotiffs
|
254
|
+
driver = osgeo.gdal.GetDriverByName("GTiff")
|
255
|
+
|
256
|
+
# Creating the raster
|
257
|
+
ds = driver.Create(ref_path, xsize, ysize, 1, osgeo.gdal.GDT_Int16, options=["COMPRESS=LZW", "TILED=YES"]) # 1 is the number of bands we made
|
258
|
+
# Setting the projection
|
259
|
+
ds.SetProjection(spatial_ref)
|
260
|
+
# Transforming the geometry (do not understand this fully)
|
261
|
+
# I do know that the 3rd and 5th parameters (being 0) mean that the map is oriented "up"
|
262
|
+
# See: https://stackoverflow.com/questions/27166739/description-of-parameters-of-gdal-setgeotransform
|
263
|
+
ds.SetGeoTransform([xmin, xres, 0, ymax, 0, yres])
|
264
|
+
# Filling in the raster band with 0
|
265
|
+
ds.GetRasterBand(1).Fill(0)
|
266
|
+
ds.GetRasterBand(1).SetNoDataValue(-1)
|
267
|
+
# Cleaning up the memory
|
268
|
+
ds.FlushCache()
|
269
|
+
ds = None
|
270
|
+
|
271
|
+
### NEXT: RASTERIZING ###
|
272
|
+
# Creating tuples that pair the geometry with the code
|
273
|
+
if col:
|
274
|
+
geom_value = ((geom,value) for geom,value in zip(vector["geometry"], vector[col]))
|
275
|
+
nd = coltype[0]
|
276
|
+
rd = coltype[1]
|
277
|
+
else:
|
278
|
+
geom_value = ((geom,value) for geom,value in zip(vector["geometry"], range(0,len(vector))))
|
279
|
+
# Now actually rasterizing the vector using the properties of the empty raster we made above^
|
280
|
+
with rasterio.open(ref_path) as template_raster:
|
281
|
+
rasterized_vector = rasterio.features.rasterize(geom_value,
|
282
|
+
out_shape = template_raster.shape,
|
283
|
+
transform = template_raster.transform,
|
284
|
+
all_touched = all_touched,
|
285
|
+
fill = -1, #background value
|
286
|
+
merge_alg = rasterio.enums.MergeAlg.replace, # .add is also an option
|
287
|
+
dtype = nd)
|
288
|
+
# Saving the raster
|
289
|
+
with rasterio.open(ras_path, "w", driver="GTiff",
|
290
|
+
crs=template_raster.crs, transform=template_raster.transform, count=1, # count refers to the number of bands
|
291
|
+
dtype=rd, width=template_raster.width, height=template_raster.height) as raster_save:
|
292
|
+
raster_save.write(rasterized_vector, indexes=1)
|
293
|
+
|
294
|
+
return ras_path
|
295
|
+
|
296
|
+
# Masking a raster with a vector
|
297
|
+
# TODO: rewrite this with numpy arrays?
|
298
|
+
def mask_raster(original, new, gdf, crop=True, nodata=-1, all_touched=False, filled=True):
|
299
|
+
with rasterio.open(original) as og_raster:
|
300
|
+
mask, transform = rasterio.mask.mask(og_raster, gdf["geometry"], crop=crop, nodata=nodata, all_touched=all_touched, filled=filled)
|
301
|
+
# return mask
|
302
|
+
with rasterio.open(new, "w", driver="GTiff",
|
303
|
+
crs=og_raster.crs, transform=transform, count=1,
|
304
|
+
dtype=og_raster.dtypes[0], width=mask.shape[2], height=mask.shape[1]) as raster_save:
|
305
|
+
|
306
|
+
raster_save.write(mask)
|
307
|
+
|
308
|
+
return new
|
309
|
+
|
310
|
+
# Changing the resolution of a raster
|
311
|
+
# New res needs to be a tuple of (x,y) resolutions
|
312
|
+
def resample_raster(original, new, new_res, resample=rasterio.enums.Resampling.nearest):
|
313
|
+
# Opening the original raster
|
314
|
+
with rasterio.open(original) as og:
|
315
|
+
xscale = og.res[0]/new_res[0]
|
316
|
+
yscale = og.res[1]/new_res[1]
|
317
|
+
# Storing the transformation and other data
|
318
|
+
new_profile = og.profile.copy()
|
319
|
+
# Resampling the data
|
320
|
+
new_data = og.read(out_shape=(og.count, int(og.height * yscale), int(og.width * xscale)), resampling=resample)
|
321
|
+
# Scaling the transformation
|
322
|
+
new_transform = og.transform * og.transform.scale((1/xscale), (1/yscale))
|
323
|
+
# Updating the output profile
|
324
|
+
new_profile.update({"height":new_data.shape[-2],"width":new_data.shape[-1],"transform":new_transform})
|
325
|
+
# Writing the new information
|
326
|
+
with rasterio.open(new, "w", **new_profile) as nr:
|
327
|
+
nr.write(new_data)
|
328
|
+
# Returning the string path for the new raster
|
329
|
+
return new
|
330
|
+
|
331
|
+
### DATA UTILS ###
|
332
|
+
# Querying the ACS API for data
|
333
|
+
# geo should be a dictionary along the lines of {"for":"RG:*", "in":["FG1:*","FG2:*"]}
|
334
|
+
# Where RG is the geo you want data returned at the level of
|
335
|
+
# And FG# are the filtering geos you want to filter by
|
336
|
+
# See here for examples and detail: https://api.census.gov/data/2022/acs/acs5/profile/examples.html
|
337
|
+
# Also here: https://api.census.gov/data/2022/acs/acs5/geography.html
|
338
|
+
# NOTE: probably best if "in" is always a list of values, even if you only have one
|
339
|
+
# NOTE: can filter by multiple geos if comma separated, like 1,2,3
|
340
|
+
def get_acs(table, geo, year=2022, acs="acs5", pivot=True, drop_margin=True):
|
341
|
+
table = table.upper()
|
342
|
+
# Adding the necessary code for DP tables
|
343
|
+
if table[:2] == "DP":
|
344
|
+
acs = acs + "/profile"
|
345
|
+
elif table[:1] == "S":
|
346
|
+
acs = acs + "/subject"
|
347
|
+
# elif table[:1] == "C":
|
348
|
+
# acs = acs + "/cprofile"
|
349
|
+
# Constructing the geo filter
|
350
|
+
geo_filter = f"for={geo['for']}"
|
351
|
+
if "in" in geo:
|
352
|
+
if type(geo['in']) == list:
|
353
|
+
geo_filter += "&in="
|
354
|
+
for i,f in enumerate(geo['in']):
|
355
|
+
geo_filter += f
|
356
|
+
if i+1 < len(geo['in']):
|
357
|
+
geo_filter += "+"
|
358
|
+
else:
|
359
|
+
geo_filter += f"&in={geo['in']}"
|
360
|
+
# Setting up the census url
|
361
|
+
key = "d1244ae231dc81cf92fefca3ae8467caf62b0dfa" # NOTE: MY PERSONAL API KEY
|
362
|
+
acs_url = f"https://api.census.gov/data/{year}/acs/{acs}?get=group({table})&{geo_filter}&key={key}"
|
363
|
+
# Retrieving the data and doing some light cleaning
|
364
|
+
df_acs = pandas.DataFrame(requests.get(acs_url).json())
|
365
|
+
df_acs = df_acs.rename(columns=df_acs.iloc[0]).drop(df_acs.index[0]).reset_index(drop=True)
|
366
|
+
|
367
|
+
# If pivot is true, then we want to relabel the data with useful column description instead
|
368
|
+
if pivot:
|
369
|
+
json_vars = requests.get(f"https://api.census.gov/data/{year}/acs/{acs}/variables.json").json()
|
370
|
+
df_vars = pandas.DataFrame.from_dict(json_vars["variables"], orient="index").reset_index()
|
371
|
+
## Cleaning up the Label column
|
372
|
+
### Our ultimate vision is to split this into groups to make for a flat hierarchy
|
373
|
+
#### TODO: Need to create a "type" column that splits out Estimate vs Percent!
|
374
|
+
df_vars["label_clean"] = df_vars["label"].str.replace("Estimate!!","")
|
375
|
+
df_vars["label_clean"] = df_vars["label_clean"].str.replace("[^\w\s!]","",regex=True)
|
376
|
+
## Cleaning up the Concept column
|
377
|
+
df_vars["concept_clean"] = df_vars["concept"].str.replace(" \\(.+\\)", "",regex=True)
|
378
|
+
df_vars["concept_clean"] = df_vars["group"] + " " + df_vars["concept_clean"]
|
379
|
+
## Renaming the index column to variable
|
380
|
+
df_vars = df_vars.rename(columns={"index":"variable"})
|
381
|
+
|
382
|
+
# Now applying this variable names to our original dataframe
|
383
|
+
# First getting the full list of geos we need to preserve
|
384
|
+
if "in" in geo:
|
385
|
+
geo_melt = [re.search(r"(.+)\:",g).group(1) for g in geo["in"]]
|
386
|
+
else:
|
387
|
+
geo_melt = []
|
388
|
+
geo_melt.append(re.search(r"(.+)\:",geo["for"]).group(1))
|
389
|
+
# Now melting on those variables (pivoting wide -> long)
|
390
|
+
df_acs = df_acs.melt(id_vars=["GEO_ID","NAME"] + geo_melt)
|
391
|
+
## dropping non-numeric columns
|
392
|
+
if drop_margin:
|
393
|
+
df_acs = df_acs.drop(df_acs.loc[df_acs["variable"].str.contains("EA$|M$|MA$")].index)
|
394
|
+
else:
|
395
|
+
df_acs = df_acs.drop(df_acs.loc[df_acs["variable"].str.contains("EA$|MA$")].index)
|
396
|
+
# merging clean variable info
|
397
|
+
# TODO: this could be made more efficient: need to unpivot on the "attributes" column for each estimated variable, and then append new label_l# column for ESTIMATE vs MARGIN OF ERROR
|
398
|
+
df_acs = df_acs.merge(df_vars.loc[:,["variable","label_clean"]], how="left", on="variable")
|
399
|
+
# Splitting the label_clean column by the double exclamation point
|
400
|
+
names = df_acs["label_clean"].str.split("!!", expand=True)
|
401
|
+
# Renaming the columns to something useable
|
402
|
+
names.columns = ["label_l"+str(i) for i in range(0, names.shape[1])]
|
403
|
+
# Rejoining our new column info on to the data
|
404
|
+
df_acs = df_acs.merge(names, how="inner", left_index=True, right_index=True)
|
405
|
+
# Dropping the label_clean column
|
406
|
+
df_acs = df_acs.drop(columns="label_clean")
|
407
|
+
# Moving the value column to the end
|
408
|
+
df_acs = df_acs[[c for c in df_acs if c not in ["value"]] + ["value"]]
|
409
|
+
# Table is now cleaned!
|
410
|
+
|
411
|
+
|
412
|
+
return df_acs
|