bedrock-ge 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,349 @@
1
+ import geopandas as gpd
2
+ import numpy as np
3
+ import pandas as pd
4
+ from pandera.typing import DataFrame
5
+ from pyproj import CRS, Transformer
6
+ from pyproj.crs.crs import CompoundCRS
7
+ from shapely.geometry import LineString, Point
8
+
9
+ from bedrock_ge.gi.schemas import (
10
+ BedrockGIDatabase,
11
+ BedrockGIGeospatialDatabase,
12
+ InSituTestSchema,
13
+ LocationSchema,
14
+ SampleSchema,
15
+ )
16
+
17
+
18
+ def create_brgi_geodb(
19
+ brgi_db: BedrockGIDatabase,
20
+ ) -> BedrockGIGeospatialDatabase:
21
+ """Creates a Bedrock GI geospatial database from a Bedrock GI database.
22
+
23
+ Creates a Bedrock GI geospatial database by performing the following steps:
24
+ 1. Creates a geospatial DataFrame for the Location table using the
25
+ `create_location_geodf` function.
26
+ 2. Creates a geospatial DataFrame for the LonLatHeight table using the
27
+ `create_lon_lat_height_geodf` function.
28
+ 3. Creates a dictionary of geospatial DataFrames for the In-Situ test tables
29
+ using the `interpolate_gi_geometry` function.
30
+ 4. Creates a geospatial DataFrame for the Sample table using the
31
+ `interpolate_gi_geometry` function, if the Sample table exists.
32
+ 5. Returns a BedrockGIGeospatialDatabase object.
33
+
34
+ Args:
35
+ brgi_db (BedrockGIDatabase): The Bedrock GI database to be converted.
36
+
37
+ Returns:
38
+ BedrockGIGeospatialDatabase: The resulting Bedrock GI geospatial database.
39
+ """
40
+ location_geodf = create_location_geodf(brgi_db)
41
+ lon_lat_height_geodf = create_lon_lat_height_geodf(brgi_db)
42
+ insitu_test_geodfs = {}
43
+ for insitu_test_name, insitu_test_data in brgi_db.InSituTests.items():
44
+ insitu_test_geodfs[insitu_test_name] = interpolate_gi_geometry( # type: ignore
45
+ insitu_test_data, # type: ignore
46
+ location_geodf, # type: ignore
47
+ ) # type: ignore
48
+
49
+ if brgi_db.Sample is not None:
50
+ sample_geodf = interpolate_gi_geometry(brgi_db.Sample, location_geodf) # type: ignore
51
+ else:
52
+ sample_geodf = None
53
+
54
+ return BedrockGIGeospatialDatabase(
55
+ Project=brgi_db.Project,
56
+ Location=location_geodf,
57
+ LonLatHeight=lon_lat_height_geodf,
58
+ InSituTests=insitu_test_geodfs,
59
+ Sample=sample_geodf,
60
+ LabTests=brgi_db.LabTests,
61
+ Other=brgi_db.Other,
62
+ )
63
+
64
+
65
+ def create_location_geodf(brgi_db: BedrockGIDatabase) -> gpd.GeoDataFrame:
66
+ """Creates a geospatial DataFrame for the Location table from a Bedrock GI database.
67
+
68
+ This function generates a GeoDataFrame for the Location table using the input
69
+ Bedrock GI database. It assumes the boreholes are vertical (for now) and calculates
70
+ elevation at the base of each borehole. It raises an error if multiple
71
+ horizontal or vertical coordinate reference systems (CRS) are found in the
72
+ project data.
73
+
74
+ Args:
75
+ brgi_db (BedrockGIDatabase): The Bedrock GI database containing location
76
+ data and project CRS information.
77
+
78
+ Returns:
79
+ gpd.GeoDataFrame: A GeoDataFrame with LineString geometries representing
80
+ vertical boreholes, using the compound CRS derived from the project's
81
+ horizontal and vertical CRS.
82
+ """
83
+ # TODO: Implement logic to handle multiple CRS'es in the input GI data:
84
+ # 1. Create WKT geometry for each location in original CRS
85
+ # 2. Convert to WGS84 + EGM2008 orthometric height EPSG:9518
86
+ # 3. Interpolate InSituTest and Sample geospatial vector geometry from active geometry column
87
+ hor_crs_series = brgi_db.Project["horizontal_crs_wkt"]
88
+ vert_crs_series = brgi_db.Project["vertical_crs_wkt"]
89
+ if hor_crs_series.nunique() > 1 or vert_crs_series.nunique() > 1:
90
+ raise ValueError(
91
+ "All projects must have the same horizontal and vertical CRS (Coordinate Reference System).\n"
92
+ "Raise an issue on GitHub in case you need to be able to combine GI data that was acquired in multiple different CRSes."
93
+ )
94
+
95
+ horizontal_crs = CRS.from_wkt(hor_crs_series.iat[0])
96
+ vertical_crs = CRS.from_wkt(vert_crs_series.iat[0])
97
+ compound_crs = CompoundCRS(
98
+ name=f"{horizontal_crs.name} + {vertical_crs.name}",
99
+ components=[horizontal_crs, vertical_crs],
100
+ )
101
+
102
+ # TODO: Implement logic such that inclined borholes are handled correctly.
103
+ # All boreholes are now assumed to be vertical.
104
+ location_df = brgi_db.Location.copy()
105
+ location_df["elevation_at_base"] = (
106
+ location_df["ground_level_elevation"] - location_df["depth_to_base"]
107
+ )
108
+ return gpd.GeoDataFrame(
109
+ brgi_db.Location.copy(),
110
+ geometry=location_df.apply(
111
+ lambda row: LineString(
112
+ [
113
+ (row["easting"], row["northing"], row["ground_level_elevation"]),
114
+ (row["easting"], row["northing"], row["elevation_at_base"]),
115
+ ]
116
+ ),
117
+ axis=1,
118
+ ),
119
+ crs=compound_crs,
120
+ )
121
+
122
+
123
+ def create_lon_lat_height_geodf(brgi_db: BedrockGIDatabase) -> gpd.GeoDataFrame:
124
+ """Creates GeoDataFrame with (lon, lat, height) for each location in a Bedrock GI database.
125
+
126
+ This function processes all GI locations in a Bedrock GI database, transforming the
127
+ (easting, northing, ground level elevation) coordinates to WGS84 (lon, lat)
128
+ + EGM2008 orthometric height coordinates, which have coordinate reference system EPSG:9518.
129
+ It returns a GeoDataFrame with the transformed longitude, latitude, and
130
+ EGM2008 ground level height, along with the corresponding point geometries in EPSG:9518.
131
+
132
+ Args:
133
+ brgi_db (BedrockGIDatabase): The source Bedrock Ground Investigation database
134
+ containing location and project information.
135
+
136
+ Returns:
137
+ gpd.GeoDataFrame: A GeoDataFrame with the transformed longitude, latitude,
138
+ and EGM2008 ground level height, along with the corresponding point
139
+ geometries in EPSG:9518.
140
+ """
141
+ wgs84_egm2008_crs = CRS("EPSG:9518")
142
+ crs_lookup = brgi_db.Project.set_index("project_uid")
143
+ dfs = []
144
+ for project_uid, location_df in brgi_db.Location.groupby("project_uid"):
145
+ horizontal_crs = CRS.from_wkt(crs_lookup.at[project_uid, "horizontal_crs_wkt"])
146
+ vertical_crs = CRS.from_wkt(crs_lookup.at[project_uid, "vertical_crs_wkt"])
147
+ compound_crs = CompoundCRS(
148
+ name=f"{horizontal_crs.name} + {vertical_crs.name}",
149
+ components=[horizontal_crs, vertical_crs],
150
+ )
151
+ transformer = Transformer.from_crs(
152
+ compound_crs, wgs84_egm2008_crs, always_xy=True
153
+ )
154
+ lon, lat, egm2008_height = transformer.transform(
155
+ location_df["easting"],
156
+ location_df["northing"],
157
+ location_df["ground_level_elevation"],
158
+ )
159
+ dfs.append(
160
+ pd.DataFrame(
161
+ {
162
+ "project_uid": project_uid,
163
+ "location_uid": location_df["location_uid"],
164
+ "longitude": lon,
165
+ "latitude": lat,
166
+ "egm2008_ground_level_height": egm2008_height,
167
+ }
168
+ )
169
+ )
170
+
171
+ lon_lat_height_df = pd.concat(dfs, ignore_index=True)
172
+ return gpd.GeoDataFrame(
173
+ lon_lat_height_df,
174
+ geometry=gpd.points_from_xy(
175
+ lon_lat_height_df["longitude"],
176
+ lon_lat_height_df["latitude"],
177
+ lon_lat_height_df["egm2008_ground_level_height"],
178
+ ),
179
+ crs=wgs84_egm2008_crs,
180
+ )
181
+
182
+
183
+ def interpolate_gi_geometry(
184
+ insitu_test_df: DataFrame[InSituTestSchema] | DataFrame[SampleSchema],
185
+ location_geodf: gpd.GeoDataFrame,
186
+ ) -> gpd.GeoDataFrame:
187
+ """Interpolates the geospatial geometry for a given In-Situ test DataFrame using the corresponding GI Location GeoDataFrame.
188
+
189
+ This function takes an In-Situ test or Sample DataFrame and a GI Location GeoDataFrame and
190
+ returns a GeoDataFrame with its geometry interpolated from the Location GeoDataFrame.
191
+ The In-Situ test geometry is always a LineString or Point, depending on whether the
192
+ In-Situ test is performed at a specific depth or over a depth interval inside a borehole.
193
+ The geometry is calculated by linearly interpolating the depth values for each row
194
+ in a In-Situ test DataFrame along the corresponding location's LineString geometry.
195
+
196
+ Args:
197
+ insitu_test_df: The In-Situ test or Sample DataFrame containing the depth values to be interpolated.
198
+ location_geodf: The location GeoDataFrame containing the location LineStrings to be used for interpolation.
199
+
200
+ Returns:
201
+ gpd.GeoDataFrame: A GeoDataFrame containing the interpolated geospatial geometry
202
+ for the In-Situ test DataFrame.
203
+ """
204
+ # TODO: implement a warning when interpolating GI geospatial geometry when
205
+ # TODO: a single GI location has waaay too many rows in a certain In-Situ test.
206
+ geodf = location_geodf[["location_uid", "geometry"]].merge(
207
+ insitu_test_df,
208
+ how="right",
209
+ on="location_uid",
210
+ )
211
+ return gpd.GeoDataFrame(
212
+ insitu_test_df.copy(),
213
+ geometry=geodf.apply(
214
+ _interpolate_gi_geometry_row,
215
+ axis=1,
216
+ ),
217
+ crs=str(geodf.crs),
218
+ )
219
+
220
+
221
+ def _interpolate_gi_geometry_row(row: pd.Series) -> LineString | Point:
222
+ """Process geometry based on available depth values for each row."""
223
+ has_top = pd.notna(row.get("depth_to_top"))
224
+ has_base = pd.notna(row.get("depth_to_base"))
225
+
226
+ if has_top and has_base:
227
+ return substring_3d(
228
+ row["geometry"],
229
+ start_dist=row["depth_to_top"],
230
+ end_dist=row["depth_to_base"],
231
+ )
232
+ elif has_top:
233
+ return interpolate_3d(
234
+ row["geometry"],
235
+ distance=row["depth_to_top"],
236
+ )
237
+ elif has_base:
238
+ return interpolate_3d(
239
+ row["geometry"],
240
+ distance=row["depth_to_base"],
241
+ )
242
+ else:
243
+ raise KeyError(
244
+ "An In-Situ test must either have a 'depth_to_top' or a 'depth_to_base', or both."
245
+ )
246
+
247
+
248
+ def calc_distances_along_3d_linestring(linestring: LineString) -> np.ndarray:
249
+ """Calculate cumulative distances along a 3D LineString."""
250
+ coords = np.array(linestring.coords)
251
+ if coords.shape[1] < 3:
252
+ raise ValueError("Coordinates must be 3D (x, y, z)")
253
+
254
+ # Calculate 3D distances between consecutive points
255
+ diffs = np.diff(coords, axis=0)
256
+ distances = np.sqrt(np.sum(diffs**2, axis=1))
257
+
258
+ # Return cumulative distances (starting with 0)
259
+ return np.concatenate([[0], np.cumsum(distances)])
260
+
261
+
262
+ def interpolate_3d(linestring: LineString, distance: float) -> Point:
263
+ """Interpolate a point along a 3D LineString using true 3D distance.
264
+
265
+ Return the first point if the distance is less than 0 or the last point if
266
+ the distance is greater than the total length. This behavior is different than
267
+ the shapely.LineString.interpolate method.
268
+
269
+ Args:
270
+ linestring: A 3D LineString geometry
271
+ distance: Distance along the line in 3D space
272
+
273
+ Returns:
274
+ Point: The interpolated 3D point
275
+ """
276
+ if distance <= 0:
277
+ return Point(linestring.coords[0])
278
+
279
+ cumulative_distances = calc_distances_along_3d_linestring(linestring)
280
+ total_length = cumulative_distances[-1]
281
+
282
+ if distance >= total_length:
283
+ return Point(linestring.coords[-1])
284
+
285
+ # Find the segment where the distance falls
286
+ segment_end_idx = int(np.searchsorted(cumulative_distances, distance))
287
+ segment_end_dist = cumulative_distances[segment_end_idx]
288
+ segment_start_idx = max(0, segment_end_idx - 1) # Ensure non-negative
289
+ segment_start_dist = cumulative_distances[segment_start_idx]
290
+
291
+ # Get the coordinates of the point at the start of the segment
292
+ p1 = np.array(linestring.coords[segment_start_idx])
293
+ segment_length = segment_end_dist - segment_start_dist
294
+ if segment_length == 0:
295
+ return Point(p1)
296
+ p2 = np.array(linestring.coords[segment_end_idx])
297
+
298
+ # Calculate the ratio of how far along the segment the distance of interest falls
299
+ ratio = (distance - segment_start_dist) / segment_length
300
+
301
+ return Point(p1 + ratio * (p2 - p1))
302
+
303
+
304
+ def substring_3d(
305
+ linestring: LineString, start_dist: float, end_dist: float
306
+ ) -> LineString | Point:
307
+ """Extract a substring of a 3D LineString using true 3D distances.
308
+
309
+ Args:
310
+ linestring: A 3D LineString geometry
311
+ start_dist: Start distance along the line in 3D space
312
+ end_dist: End distance along the line in 3D space
313
+
314
+ Returns:
315
+ LineString: The extracted 3D LineString segment
316
+ """
317
+ # Ensure start_dist <= end_dist
318
+ if start_dist > end_dist:
319
+ start_dist, end_dist = end_dist, start_dist
320
+
321
+ # Calculate cumulative 3D distances
322
+ cumulative_distances = calc_distances_along_3d_linestring(linestring)
323
+ total_length = cumulative_distances[-1]
324
+
325
+ # Handle edge cases
326
+ start_dist = max(0, min(start_dist, total_length))
327
+ end_dist = max(0, min(end_dist, total_length))
328
+
329
+ if start_dist == end_dist:
330
+ return interpolate_3d(linestring, start_dist)
331
+
332
+ # Find segments that intersect with our range
333
+ result_coords = []
334
+
335
+ # Add start point if it's not at a linestring vertex
336
+ start_point = interpolate_3d(linestring, start_dist)
337
+ result_coords.append(start_point.coords[0])
338
+
339
+ # Add all vertices that fall within the range
340
+ for i, dist in enumerate(cumulative_distances):
341
+ if start_dist < dist < end_dist:
342
+ result_coords.append(linestring.coords[i])
343
+
344
+ # Add end point if it's not at a vertex
345
+ end_point = interpolate_3d(linestring, end_dist)
346
+ if end_point.coords[0] != result_coords[-1]: # Avoid duplicate points
347
+ result_coords.append(end_point.coords[0])
348
+
349
+ return LineString(result_coords)
@@ -0,0 +1,271 @@
1
+ """Utility functions for reading, parsing and writing data."""
2
+
3
+ import codecs
4
+ import io
5
+ from contextlib import contextmanager, nullcontext
6
+ from pathlib import Path
7
+ from typing import IO, ContextManager
8
+
9
+ import chardet
10
+ import geopandas as gpd
11
+ import pandas as pd
12
+
13
+ from bedrock_ge.gi.schemas import BedrockGIDatabase, BedrockGIGeospatialDatabase
14
+
15
+ DEFAULT_ENCODING = "utf-8"
16
+
17
+
18
+ def detect_encoding(source: str | Path | IO[str] | IO[bytes] | bytes) -> str:
19
+ """Detect the character encoding of various input types.
20
+
21
+ Args:
22
+ source (str | Path | IO[str] | IO[bytes] | bytes): The source to detect encoding from.
23
+ - str or Path: File path.
24
+ - IO[str]: Already decoded text stream (returns `DEFAULT_ENCODING`)
25
+ - IO[bytes]: Binary stream to detect encoding from
26
+ - bytes: Binary data to detect encoding from
27
+
28
+ Returns:
29
+ str: The detected encoding name (e.g., 'utf-8', 'iso-8859-1', 'ascii', etc.)
30
+
31
+ Raises:
32
+ TypeError: If the source type is unsupported
33
+ FileNotFoundError: If a file path doesn't exist
34
+ """
35
+ # Set number of bytes to read for detection and required confidence
36
+ SAMPLE_SIZE = 1_000_000
37
+ REQUIRED_CONFIDENCE = 0.7
38
+
39
+ def _detect_from_bytes(data: bytes) -> str:
40
+ """Detect encoding from bytes data."""
41
+ sample = data[: min(len(data), SAMPLE_SIZE)]
42
+ result = chardet.detect(sample)
43
+ encoding = result.get("encoding", DEFAULT_ENCODING)
44
+ confidence = result.get("confidence", 0.0)
45
+
46
+ if not encoding or confidence < REQUIRED_CONFIDENCE:
47
+ return DEFAULT_ENCODING
48
+
49
+ if encoding.lower() == "ascii":
50
+ return "utf-8"
51
+
52
+ return encoding
53
+
54
+ def _read_from_path(path: Path):
55
+ """Read contents from path."""
56
+ if path.exists() and path.is_file():
57
+ with open(path, "rb") as file:
58
+ sample = file.read(SAMPLE_SIZE)
59
+ return _detect_from_bytes(sample)
60
+ else:
61
+ raise FileNotFoundError(
62
+ f"Path does not exist or is not a file: {path.__str__()[0:40]}"
63
+ )
64
+
65
+ # bytes
66
+ if isinstance(source, bytes):
67
+ return _detect_from_bytes(source)
68
+
69
+ # String, if not a path, still returns DEFAULT_ENCODING
70
+ if isinstance(source, str):
71
+ path = Path(source)
72
+ try:
73
+ return _read_from_path(path)
74
+ except FileNotFoundError:
75
+ return DEFAULT_ENCODING
76
+
77
+ # Path object
78
+ if isinstance(source, Path):
79
+ return _read_from_path(source)
80
+
81
+ # IO[str] object
82
+ if hasattr(source, "encoding"):
83
+ if source.encoding:
84
+ # Could be `None`, e.g. io.StringIO has an encoding attribute which is None.
85
+ return source.encoding
86
+ else:
87
+ return DEFAULT_ENCODING
88
+
89
+ # IO[bytes]
90
+ if isinstance(source, io.BufferedIOBase):
91
+ try:
92
+ if not source.seekable():
93
+ # For non-seekable streams, read what we can without seeking
94
+ sample = source.read(SAMPLE_SIZE)
95
+ if isinstance(sample, bytes):
96
+ return _detect_from_bytes(sample)
97
+ else:
98
+ return DEFAULT_ENCODING
99
+
100
+ # For seekable streams, preserve position
101
+ original_position = source.tell()
102
+ try:
103
+ source.seek(0)
104
+ sample = source.read(SAMPLE_SIZE)
105
+ if isinstance(sample, bytes):
106
+ encoding = _detect_from_bytes(sample)
107
+ else:
108
+ # if not bytes, then its a custom string-like type that was not caught
109
+ encoding = DEFAULT_ENCODING
110
+ return encoding
111
+ finally:
112
+ source.seek(original_position)
113
+ except (AttributeError, IOError, OSError):
114
+ return DEFAULT_ENCODING
115
+
116
+ raise TypeError(f"Unsupported input type for encoding detection: {type(source)}")
117
+
118
+
119
+ def open_text_data_source(
120
+ source: str | Path | IO[str] | IO[bytes] | bytes, encoding: str | None = None
121
+ ) -> ContextManager[io.TextIOBase]:
122
+ """Opens or wraps a given source for reading AGS (text-based) data.
123
+
124
+ Args:
125
+ source (str | Path | IO[str] | IO[bytes] | bytes): The source to read from.
126
+ - str or Path: File path or direct string content.
127
+ - IO[str]: A file-like text stream.
128
+ - IO[bytes]: Byte stream
129
+ - bytes: Binary content or stream (will be decoded).
130
+ encoding (str | None): Encoding to use for decoding bytes. Default is None.
131
+
132
+ Returns:
133
+ ContextManager[TextIOBase]: A context manager yielding a text stream.
134
+
135
+ Raises:
136
+ TypeError: If the source type is unsupported or binary streams are not decoded.
137
+ """
138
+ try:
139
+ codecs.lookup(encoding)
140
+ except LookupError:
141
+ raise ValueError(f"Unsupported encoding: {encoding}")
142
+
143
+ @contextmanager
144
+ def _bytes_source(bytes_content: bytes):
145
+ string_io = io.StringIO(bytes_content.decode(encoding))
146
+ try:
147
+ yield string_io
148
+ finally:
149
+ string_io.close()
150
+
151
+ if isinstance(source, (str, Path)):
152
+ path = Path(source)
153
+ if path.exists() and path.is_file():
154
+ return open(path, "r", encoding=encoding)
155
+ raise FileNotFoundError(f"Path does not exist or is not a file: {source}")
156
+
157
+ elif isinstance(source, io.TextIOBase):
158
+ source.seek(0)
159
+ return nullcontext(source)
160
+
161
+ elif isinstance(source, io.BufferedIOBase):
162
+ text_stream = io.TextIOWrapper(source, encoding=encoding)
163
+ text_stream.seek(0)
164
+ return nullcontext(text_stream)
165
+
166
+ elif isinstance(source, bytes):
167
+ return _bytes_source(source)
168
+
169
+ else:
170
+ raise TypeError(
171
+ f"Unsupported source type: {type(source)}. "
172
+ "Expected str, Path, IO[str], IO[bytes], or bytes."
173
+ )
174
+
175
+
176
+ def coerce_string(string: str) -> None | bool | float | str:
177
+ """Converts a string to an appropriate Python data type.
178
+
179
+ Args:
180
+ string (str): The input string to be converted.
181
+
182
+ Returns:
183
+ None: If the string is 'none', 'null', or empty.
184
+ bool: If the string is 'true' or 'false' (case insensitive).
185
+ int: If the string can be converted to a float and has no decimal part.
186
+ float: If the string can be converted to a float with a decimal part.
187
+ str: If the string cannot be converted to any of the above types.
188
+
189
+ """
190
+ if string.lower() in {"none", "null", ""}:
191
+ return None
192
+ elif string.lower() == "true":
193
+ return True
194
+ elif string.lower() == "false":
195
+ return False
196
+ else:
197
+ try:
198
+ value = float(string)
199
+ if value.is_integer():
200
+ return int(value)
201
+ else:
202
+ return value
203
+ except ValueError:
204
+ return string
205
+
206
+
207
+ def brgi_db_to_dfs(
208
+ brgi_db: BedrockGIDatabase | BedrockGIGeospatialDatabase,
209
+ ) -> dict[str, pd.DataFrame | gpd.GeoDataFrame]:
210
+ """Converts a Bedrock GI (geospatial) database to a dictionary of DataFrames.
211
+
212
+ Args:
213
+ brgi_db (BedrockGIDatabase | BedrockGIGeospatialDatabase): The Bedrock GI (geospatial) database.
214
+
215
+ Returns:
216
+ dict[str, pd.DataFrame | gpd.GeoDataFrame]: A dictionary where the keys are
217
+ the Bedrock GI table names and the values are the DataFrames that contain
218
+ the data for each table.
219
+ """
220
+ dict_of_dfs = {
221
+ "Project": brgi_db.Project,
222
+ "Location": brgi_db.Location,
223
+ }
224
+
225
+ if hasattr(brgi_db, "LonLatHeight"):
226
+ dict_of_dfs["LonLatHeight"] = brgi_db.LonLatHeight
227
+
228
+ if brgi_db.Sample is not None:
229
+ dict_of_dfs["Sample"] = brgi_db.Sample
230
+
231
+ insitu_dfs = {k: v for k, v in brgi_db.InSituTests.items()}
232
+ lab_dfs = {k: v for k, v in brgi_db.LabTests.items()}
233
+ other_dfs = {k: v for k, v in brgi_db.Other.items()}
234
+
235
+ return dict_of_dfs | insitu_dfs | lab_dfs | other_dfs
236
+
237
+
238
+ def convert_object_col_content_to_string(
239
+ df: pd.DataFrame, in_place: bool = True
240
+ ) -> pd.DataFrame:
241
+ """Converts the data in columns with the object dtype to strings.
242
+
243
+ The real reason that this is necessary is that pandas and marimo are a little finicky about strings:
244
+ 1. The built-in pd.Dataframe.convert_dtypes() method doesn't convert the dtype of
245
+ columns that contain multiple types in that same column to string.
246
+ 2. marimo cannot handle pd.DataFrames with nullable strings (and other nullable pandas dtypes)
247
+ very well, see https://github.com/marimo-team/marimo/issues/5445.
248
+
249
+ Therefore, this function converts all the data in columns with the object dtype to strings,
250
+ and then back to the object dtype.
251
+
252
+ Args:
253
+ df: The DataFrame to modify.
254
+ in_place: Whether to modify the DataFrame in-place (default) or return a new DataFrame.
255
+
256
+ Returns:
257
+ pd.DataFrame: The modified DataFrame with object dtypes converted to string dtypes.
258
+
259
+ """
260
+ if not in_place:
261
+ df = df.copy()
262
+ object_cols = df.select_dtypes(include=["object"]).columns
263
+ df[object_cols] = df[object_cols].astype("string")
264
+ df[object_cols] = df[object_cols].astype("object")
265
+ return df
266
+
267
+
268
+ def geodf_to_df(geodf: gpd.GeoDataFrame) -> pd.DataFrame:
269
+ """Convenience function to convert GeoDataFrames to DataFrames for nicer display in notebook environments like marimo."""
270
+ df = pd.DataFrame(geodf.copy())
271
+ return df.assign(geometry=df.geometry.astype(str))