matplotlib-map-utils 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,412 @@
1
+ # Importing other packages
2
+ import json
3
+ import re
4
+ import math
5
+ import os
6
+ os.environ["USE_PYGOES"] = "0"
7
+ import requests
8
+ import geopandas
9
+ import shapely
10
+ import pandas
11
+ import numpy
12
+ import matplotlib
13
+ import matplotlib.pyplot
14
+ import matplotlib.patches
15
+ import matplotlib.patheffects
16
+ import matplotlib_scalebar.scalebar
17
+ import mpl_toolkits.axes_grid1.axes_divider
18
+ import adjustText
19
+ import rasterio
20
+ import rasterio.mask
21
+ import rasterio.enums
22
+ import osgeo.gdal
23
+
24
+ # Start a normal map with a single plot
25
+ def init_map(subplots=(1,1), figsize=(10,15), dpi=300, ticks=False, bg="white"):
26
+ # Starting the fig and ax
27
+ fig, ax = matplotlib.pyplot.subplots(subplots[0], subplots[1], figsize=figsize, dpi=dpi)
28
+ # Hiding ticks if we don't want them
29
+ if ticks == False:
30
+ ax.set_xticks([])
31
+ ax.set_yticks([])
32
+ if bg:
33
+ fig.patch.set_facecolor(bg)
34
+ # Returning the fig and the ax
35
+ return fig, ax
36
+
37
+ # Start a normal map with multiple plots
38
+ # Subplots is in (rows, column) format, but figsize is in (width, length) :(
39
+ def init_maps(subplots=(2,2), figsize=(10,15), dpi=300, ticks=False, bg="white", sharex=False, sharey=False):
40
+ # Starting the fig and ax
41
+ fig, axs = matplotlib.pyplot.subplots(subplots[0], subplots[1], figsize=figsize, dpi=dpi, sharex=sharex, sharey=sharey)
42
+ # Hiding ticks if we don't want them
43
+ if ticks == False:
44
+ for ax in axs.flatten():
45
+ ax.set_xticks([])
46
+ ax.set_yticks([])
47
+ if bg:
48
+ fig.patch.set_facecolor(bg)
49
+ # Returning the fig and the ax
50
+ return fig, axs
51
+
52
+ # Add a north arrow
53
+ # I THINK because we use ax.transAxes, loc is expressed in fraction of the axes (bottom left is 0,0 and top right is 1,1)
54
+ # AND radius is a fraction of the axis as well: so 0.05 would mean it is 5% of the axes "long" and "wide", leading to a given shape
55
+ # def north_arrow(ax, r, loc=(0.94,0.94), color="black", fontcolor="white", fontsize=8, zorder=99):
56
+ # north_arrow = matplotlib.patches.RegularPolygon(loc, 3, radius=r, color="black", transform=ax.transAxes, zorder=zorder-1)
57
+ # ax.add_patch(north_arrow)
58
+ # ax.text(x=north_arrow.xy[0], y=north_arrow.xy[1], s='N', ha="center", va="center", fontsize=fontsize, color=fontcolor, transform=ax.transAxes, zorder=zorder)
59
+
60
+ # Add a scale bar
61
+ def scale_bar(ax, location="upper left", scale_format=None, **kwargs):
62
+ if scale_format:
63
+ ax.add_artist(matplotlib_scalebar.scalebar.ScaleBar(1, location=location, scale_formatter=lambda v,l: f"{v}{scale_format}", **kwargs))
64
+ else:
65
+ ax.add_artist(matplotlib_scalebar.scalebar.ScaleBar(1, location=location, **kwargs))
66
+
67
+ # Add an arbitrary colorbar
68
+ # TODO: work for arbitrary colormaps, not just named ones
69
+ # TODO: figure out how to do this for "left" or "right"?
70
+ # Seems that the main issue is when the figure still has room to grow in a given dimension
71
+ # Could also do like: https://matplotlib.org/stable/users/explain/axes/colorbar_placement.html#colorbar-placement
72
+ def color_bar(ax, fig, cmap, vmin, vmax, label, cax_kwargs):
73
+ divider = mpl_toolkits.axes_grid1.axes_divider.make_axes_locatable(ax)
74
+ cax = divider.append_axes("bottom", size="5%", pad=0.1)
75
+ cax.set_axis_off()
76
+ cmap = matplotlib.cm.get_cmap(cmap)
77
+ norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)
78
+ fig.colorbar(matplotlib.cm.ScalarMappable(norm, cmap), ax=cax, location="bottom", label=label)
79
+
80
+ # Create legend elements
81
+ # Expects a LIST of dictionaries with 3 values: label, type (patch or point or line), and kwargs to format it
82
+ # example: [{"type":"s", "label":"Example Label", "kwargs":{"color":"tab:blue"}}]
83
+ def legend(ax, elements):
84
+ for_legend = []
85
+ for e in elements:
86
+ if e["type"] == "patch" or e["type"] == "s":
87
+ ele = matplotlib.patches.Patch(label=e["label"], **e["kwargs"])
88
+ elif e["type"] == "point" or e["type"] == "p":
89
+ ele = matplotlib.lines.Line2D([0], [0], color="none", label=e["label"], **e["kwargs"])
90
+ elif e["type"] == "line" or e["type"] == "l":
91
+ ele = matplotlib.lines.Line2D([0], [0], label=e["label"], **e["kwargs"])
92
+ else:
93
+ print("Error: invalid type")
94
+ ele = matplotlib.patches.Patch(label=e["label"])
95
+ for_legend.append(ele)
96
+ return for_legend
97
+
98
+ # Add text labels
99
+ def label_points(ax, gdf, col, wrap=None, size=None, format={}, color="white", stroke="black", alignment=("center","center"), shift=(0,0), override=None, mask=False, adjust=False, adjust_kwargs={}):
100
+ # List to hold all of our eventual text objects
101
+ texts = []
102
+ if mask:
103
+ xmin,xmax = ax.get_xlim()
104
+ ymin,ymax = ax.get_ylim()
105
+ bbox = shapely.box(xmin, ymin, xmax, ymax)
106
+ gdf_to_label = gdf.cx[xmin:xmax, ymin:ymax].copy()
107
+ gdf_to_label["geometry"] = gdf_to_label.intersection(bbox).copy()
108
+ else:
109
+ gdf_to_label = gdf.copy()
110
+ # Iterating through each provided point
111
+ for i,r in gdf_to_label.iterrows():
112
+ x = r["geometry"].centroid.x + shift[0]
113
+ y = r["geometry"].centroid.y + shift[1]
114
+ label = r[col]
115
+ # If we want to wrap our labels (only a certain # of words per line)
116
+ if wrap:
117
+ words = label.split(" ")
118
+ label = ""
119
+ for j,w in enumerate(words):
120
+ label += w
121
+ if j+1 == len(words):
122
+ pass
123
+ elif ((j+1) % wrap == 0):
124
+ label += "\n"
125
+ else:
126
+ label += " "
127
+ # If we need to manually override the text in a label (can pass empty dict to hide it)
128
+ if override and r[col] in override.keys():
129
+ label = override.get(r[col])
130
+
131
+ # Making the actual text
132
+ texts.append(ax.text(x, y, f"{format}".format(label), fontsize=size,
133
+ color=color, path_effects=[matplotlib.patheffects.withStroke(linewidth=math.floor(size/2), foreground=stroke)],
134
+ ha=alignment[0], va=alignment[1]))
135
+
136
+ # Final adjustment
137
+ if adjust:
138
+ adjustText.adjust_text(texts, **adjust_kwargs)
139
+
140
+ # Rename legend labels with a list of custom text
141
+ def replace_legend_items(ax, labels):
142
+ for t,l in zip(ax.get_legend().texts, labels):
143
+ t.set_text(l)
144
+
145
+ # Adding a patch to a currently-existing legend
146
+ # STILL BEING WORKED ON!
147
+ def add_patch(ax, patch, label):
148
+ legend = ax.get_legend()
149
+
150
+ handles = ax.get_legend().legendHandles
151
+ # This interestingly returns a Text object or array of some sort?
152
+ labels = ax.get_legend().texts
153
+ print(handles, labels)
154
+ handles.append(patch)
155
+ labels.append(label)
156
+
157
+ legend._legend_box = None
158
+ legend._init_legend_box(handles, labels)
159
+ legend._set_loc(legend._loc)
160
+ # This doesn't work, but this does: ax.get_legend().get_title().get_text()
161
+ legend.set_title(legend.get_title().get_text())
162
+
163
+ # Centering the map on a given object
164
+ def center_map(ax, geo=None, bounds=None, incr=(0.1, 0.1), square=False):
165
+ if geo is not None:
166
+ # Get the bounds of the geo we want to center on
167
+ minx, miny, maxx, maxy = geo.total_bounds
168
+ elif bounds is not None:
169
+ minx, miny, maxx, maxy = bounds
170
+ else:
171
+ minx, maxx = ax.get_xlim()
172
+ miny, maxy = ax.get_ylim()
173
+ # Get the range of each boundary
174
+ rangex = maxx-minx
175
+ rangey = maxy-miny
176
+ # Will we increment by the same amount in each direction?
177
+ if square==True:
178
+ rangemax = max(rangex, rangey)
179
+ incrementx = rangemax * incr[0]
180
+ incrementy = rangemax * incr[1]
181
+ midx = (maxx+minx)/2
182
+ midy = (maxy+miny)/2
183
+ ax.set_xlim(midx-incrementx, midx+incrementx)
184
+ ax.set_ylim(midy-incrementy, midy+incrementy)
185
+ else:
186
+ # Find the amount we want to increment on
187
+ incrementx = rangex * incr[0]
188
+ incrementy = rangey * incr[1]
189
+ # Set new x and y limits for the axis
190
+ ax.set_xlim(minx-incrementx, maxx+incrementx)
191
+ ax.set_ylim(miny-incrementy, maxy+incrementy)
192
+
193
+ ### RASTER UTILS ###
194
+ # Stolen from: https://rasterio.readthedocs.io/en/stable/topics/reproject.html
195
+ def reproject_raster(raster, crs, output, return_open=False):
196
+ # Renaming the CRS
197
+ if type(crs) == int:
198
+ new_crs = f"EPSG:{str(crs)}"
199
+ else:
200
+ new_crs = crs
201
+ # Calculating how the new projection is warped from the base
202
+ transform, width, height = rasterio.warp.calculate_default_transform(raster.crs, new_crs,
203
+ raster.width, raster.height,
204
+ *raster.bounds)
205
+ # Updating the metadata of the source raster
206
+ kwargs = raster.meta.copy()
207
+ kwargs.update({
208
+ 'crs': new_crs,
209
+ 'transform': transform,
210
+ 'width': width,
211
+ 'height': height})
212
+ # Saving the file
213
+ with rasterio.open(output, "w", **kwargs) as reproj:
214
+ # Reprojecting each band
215
+ for i in range(1, raster.count + 1):
216
+ rasterio.warp.reproject(
217
+ source=rasterio.band(raster, i),
218
+ destination=rasterio.band(reproj, i),
219
+ src_transform=raster.transform,
220
+ src_crs=raster.crs,
221
+ dst_transform=transform,
222
+ dst_crs=new_crs,
223
+ resampling=rasterio.warp.Resampling.nearest)
224
+ if return_open:
225
+ # Returning the reporjected raster
226
+ return rasterio.open(output)
227
+ else:
228
+ return output
229
+
230
+ # TODO: allow this to read an already-existing raster to use as input
231
+ # TODO: allow this to work for non-categorical data
232
+ # TODO: allow this to work for non-integer data
233
+ # vector should be dissolved beforehand
234
+ # ref_path is a string
235
+ # ras_path is a string
236
+ # res is a tuple of (x,y) values
237
+ # col is the numeric column you want to retain as an attribute value
238
+ # coltype is a tuple of (numpy.dtype, rasterio.dtype)
239
+ # all_touched=False sets cell assignment based on which shape contains the centroid
240
+ def rasterize_vector(vector, ref_path, ras_path, res, col=None, coltype=None, all_touched=False):
241
+ ### FIRST: SETTING UP A REFERENCE RASTER ###
242
+ # Setting bounds of the vector
243
+ xmin, ymin, xmax, ymax = vector.total_bounds
244
+ # These should be in whatever units your crs is in (meters, feet, etc.)
245
+ xres = res[0]
246
+ yres = -1*res[1] # we make this negative because GDAL expects (0,0) to be the TOP LEFT, not the BOTTOM LEFT
247
+ # Setting the spatial reference to be the same as the file we want to rasterize
248
+ spatial_ref = vector.crs.to_wkt()
249
+ # Calculating the size of the raster in pixels
250
+ xsize = abs(int(((xmax-xmin)/xres)))
251
+ ysize = abs(int(((ymax-ymin)/yres)))
252
+
253
+ # Initializing the gdal driver for geotiffs
254
+ driver = osgeo.gdal.GetDriverByName("GTiff")
255
+
256
+ # Creating the raster
257
+ ds = driver.Create(ref_path, xsize, ysize, 1, osgeo.gdal.GDT_Int16, options=["COMPRESS=LZW", "TILED=YES"]) # 1 is the number of bands we made
258
+ # Setting the projection
259
+ ds.SetProjection(spatial_ref)
260
+ # Transforming the geometry (do not understand this fully)
261
+ # I do know that the 3rd and 5th parameters (being 0) mean that the map is oriented "up"
262
+ # See: https://stackoverflow.com/questions/27166739/description-of-parameters-of-gdal-setgeotransform
263
+ ds.SetGeoTransform([xmin, xres, 0, ymax, 0, yres])
264
+ # Filling in the raster band with 0
265
+ ds.GetRasterBand(1).Fill(0)
266
+ ds.GetRasterBand(1).SetNoDataValue(-1)
267
+ # Cleaning up the memory
268
+ ds.FlushCache()
269
+ ds = None
270
+
271
+ ### NEXT: RASTERIZING ###
272
+ # Creating tuples that pair the geometry with the code
273
+ if col:
274
+ geom_value = ((geom,value) for geom,value in zip(vector["geometry"], vector[col]))
275
+ nd = coltype[0]
276
+ rd = coltype[1]
277
+ else:
278
+ geom_value = ((geom,value) for geom,value in zip(vector["geometry"], range(0,len(vector))))
279
+ # Now actually rasterizing the vector using the properties of the empty raster we made above^
280
+ with rasterio.open(ref_path) as template_raster:
281
+ rasterized_vector = rasterio.features.rasterize(geom_value,
282
+ out_shape = template_raster.shape,
283
+ transform = template_raster.transform,
284
+ all_touched = all_touched,
285
+ fill = -1, #background value
286
+ merge_alg = rasterio.enums.MergeAlg.replace, # .add is also an option
287
+ dtype = nd)
288
+ # Saving the raster
289
+ with rasterio.open(ras_path, "w", driver="GTiff",
290
+ crs=template_raster.crs, transform=template_raster.transform, count=1, # count refers to the number of bands
291
+ dtype=rd, width=template_raster.width, height=template_raster.height) as raster_save:
292
+ raster_save.write(rasterized_vector, indexes=1)
293
+
294
+ return ras_path
295
+
296
+ # Masking a raster with a vector
297
+ # TODO: rewrite this with numpy arrays?
298
+ def mask_raster(original, new, gdf, crop=True, nodata=-1, all_touched=False, filled=True):
299
+ with rasterio.open(original) as og_raster:
300
+ mask, transform = rasterio.mask.mask(og_raster, gdf["geometry"], crop=crop, nodata=nodata, all_touched=all_touched, filled=filled)
301
+ # return mask
302
+ with rasterio.open(new, "w", driver="GTiff",
303
+ crs=og_raster.crs, transform=transform, count=1,
304
+ dtype=og_raster.dtypes[0], width=mask.shape[2], height=mask.shape[1]) as raster_save:
305
+
306
+ raster_save.write(mask)
307
+
308
+ return new
309
+
310
+ # Changing the resolution of a raster
311
+ # New res needs to be a tuple of (x,y) resolutions
312
+ def resample_raster(original, new, new_res, resample=rasterio.enums.Resampling.nearest):
313
+ # Opening the original raster
314
+ with rasterio.open(original) as og:
315
+ xscale = og.res[0]/new_res[0]
316
+ yscale = og.res[1]/new_res[1]
317
+ # Storing the transformation and other data
318
+ new_profile = og.profile.copy()
319
+ # Resampling the data
320
+ new_data = og.read(out_shape=(og.count, int(og.height * yscale), int(og.width * xscale)), resampling=resample)
321
+ # Scaling the transformation
322
+ new_transform = og.transform * og.transform.scale((1/xscale), (1/yscale))
323
+ # Updating the output profile
324
+ new_profile.update({"height":new_data.shape[-2],"width":new_data.shape[-1],"transform":new_transform})
325
+ # Writing the new information
326
+ with rasterio.open(new, "w", **new_profile) as nr:
327
+ nr.write(new_data)
328
+ # Returning the string path for the new raster
329
+ return new
330
+
331
+ ### DATA UTILS ###
332
+ # Querying the ACS API for data
333
+ # geo should be a dictionary along the lines of {"for":"RG:*", "in":["FG1:*","FG2:*"]}
334
+ # Where RG is the geo you want data returned at the level of
335
+ # And FG# are the filtering geos you want to filter by
336
+ # See here for examples and detail: https://api.census.gov/data/2022/acs/acs5/profile/examples.html
337
+ # Also here: https://api.census.gov/data/2022/acs/acs5/geography.html
338
+ # NOTE: probably best if "in" is always a list of values, even if you only have one
339
+ # NOTE: can filter by multiple geos if comma separated, like 1,2,3
340
+ def get_acs(table, geo, year=2022, acs="acs5", pivot=True, drop_margin=True):
341
+ table = table.upper()
342
+ # Adding the necessary code for DP tables
343
+ if table[:2] == "DP":
344
+ acs = acs + "/profile"
345
+ elif table[:1] == "S":
346
+ acs = acs + "/subject"
347
+ # elif table[:1] == "C":
348
+ # acs = acs + "/cprofile"
349
+ # Constructing the geo filter
350
+ geo_filter = f"for={geo['for']}"
351
+ if "in" in geo:
352
+ if type(geo['in']) == list:
353
+ geo_filter += "&in="
354
+ for i,f in enumerate(geo['in']):
355
+ geo_filter += f
356
+ if i+1 < len(geo['in']):
357
+ geo_filter += "+"
358
+ else:
359
+ geo_filter += f"&in={geo['in']}"
360
+ # Setting up the census url
361
+ key = "d1244ae231dc81cf92fefca3ae8467caf62b0dfa" # NOTE: MY PERSONAL API KEY
362
+ acs_url = f"https://api.census.gov/data/{year}/acs/{acs}?get=group({table})&{geo_filter}&key={key}"
363
+ # Retrieving the data and doing some light cleaning
364
+ df_acs = pandas.DataFrame(requests.get(acs_url).json())
365
+ df_acs = df_acs.rename(columns=df_acs.iloc[0]).drop(df_acs.index[0]).reset_index(drop=True)
366
+
367
+ # If pivot is true, then we want to relabel the data with useful column description instead
368
+ if pivot:
369
+ json_vars = requests.get(f"https://api.census.gov/data/{year}/acs/{acs}/variables.json").json()
370
+ df_vars = pandas.DataFrame.from_dict(json_vars["variables"], orient="index").reset_index()
371
+ ## Cleaning up the Label column
372
+ ### Our ultimate vision is to split this into groups to make for a flat hierarchy
373
+ #### TODO: Need to create a "type" column that splits out Estimate vs Percent!
374
+ df_vars["label_clean"] = df_vars["label"].str.replace("Estimate!!","")
375
+ df_vars["label_clean"] = df_vars["label_clean"].str.replace("[^\w\s!]","",regex=True)
376
+ ## Cleaning up the Concept column
377
+ df_vars["concept_clean"] = df_vars["concept"].str.replace(" \\(.+\\)", "",regex=True)
378
+ df_vars["concept_clean"] = df_vars["group"] + " " + df_vars["concept_clean"]
379
+ ## Renaming the index column to variable
380
+ df_vars = df_vars.rename(columns={"index":"variable"})
381
+
382
+ # Now applying this variable names to our original dataframe
383
+ # First getting the full list of geos we need to preserve
384
+ if "in" in geo:
385
+ geo_melt = [re.search(r"(.+)\:",g).group(1) for g in geo["in"]]
386
+ else:
387
+ geo_melt = []
388
+ geo_melt.append(re.search(r"(.+)\:",geo["for"]).group(1))
389
+ # Now melting on those variables (pivoting wide -> long)
390
+ df_acs = df_acs.melt(id_vars=["GEO_ID","NAME"] + geo_melt)
391
+ ## dropping non-numeric columns
392
+ if drop_margin:
393
+ df_acs = df_acs.drop(df_acs.loc[df_acs["variable"].str.contains("EA$|M$|MA$")].index)
394
+ else:
395
+ df_acs = df_acs.drop(df_acs.loc[df_acs["variable"].str.contains("EA$|MA$")].index)
396
+ # merging clean variable info
397
+ # TODO: this could be made more efficient: need to unpivot on the "attributes" column for each estimated variable, and then append new label_l# column for ESTIMATE vs MARGIN OF ERROR
398
+ df_acs = df_acs.merge(df_vars.loc[:,["variable","label_clean"]], how="left", on="variable")
399
+ # Splitting the label_clean column by the double exclamation point
400
+ names = df_acs["label_clean"].str.split("!!", expand=True)
401
+ # Renaming the columns to something useable
402
+ names.columns = ["label_l"+str(i) for i in range(0, names.shape[1])]
403
+ # Rejoining our new column info on to the data
404
+ df_acs = df_acs.merge(names, how="inner", left_index=True, right_index=True)
405
+ # Dropping the label_clean column
406
+ df_acs = df_acs.drop(columns="label_clean")
407
+ # Moving the value column to the end
408
+ df_acs = df_acs[[c for c in df_acs if c not in ["value"]] + ["value"]]
409
+ # Table is now cleaned!
410
+
411
+
412
+ return df_acs