bedrock-ge 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ import base64
2
+ import hashlib
3
+ import json
4
+
5
+ import pandas as pd
6
+
7
+ from bedrock_ge.gi.mapping_models import BedrockGIMapping
8
+ from bedrock_ge.gi.schemas import (
9
+ BedrockGIDatabase,
10
+ InSituTestSchema,
11
+ LabTestSchema,
12
+ LocationSchema,
13
+ ProjectSchema,
14
+ SampleSchema,
15
+ )
16
+
17
+
18
+ def map_to_brgi_db(brgi_db_mapping: BedrockGIMapping) -> BedrockGIDatabase:
19
+ """Creates a Bedrock GI Database for a single project from a BedrockGIMapping.
20
+
21
+ This function takes a BedrockGIDatabaseMapping, which contains various table mappings
22
+ for project, location, in-situ tests, samples, lab tests, and other tables, and
23
+ converts it into a BedrockGIDatabase object. It creates pandas DataFrames for each
24
+ table, validates them against their respective schemas, and constructs the final
25
+ BedrockGIDatabase object.
26
+
27
+ Examples:
28
+ ```python
29
+ from pyproj import CRS
30
+ from bedrock_ge.gi.mapping_models import BedrockGIMapping
31
+
32
+ brgi_db_mapping = BedrockGIMapping(
33
+ ProjectTableMapping={
34
+ "data": {
35
+ "project_name: "Test Project",
36
+ "project_description": "Project description. Add more data about the project here if you please."
37
+ }
38
+ "project_id": "project-1",
39
+ "horizontal_crs": CRS("EPSG:2193"),
40
+ "vertical_crs": CRS("EPSG:7839"),
41
+ },
42
+ LocationTableMapping={
43
+ "data": location_df,
44
+ "location_id_column": "LocationID",
45
+ "easting_column": "Easting",
46
+ "northing_column": "Northing",
47
+ "ground_level_elevation_column": "GroundLevel",
48
+ "depth_to_base_column": "FinalDepth",
49
+ },
50
+ InSituTestTableMapping=[
51
+ {
52
+ "table_name": "Geol",
53
+ "data": geology_df,
54
+ "location_id_column": "LocationID",
55
+ "depth_to_top_column": "from",
56
+ "depth_to_base_column": "to",
57
+ },
58
+ {
59
+ "table_name": "SPT",
60
+ "data": spt_df,
61
+ "location_id_column": "LocationID",
62
+ "depth_to_top"column": "from",
63
+ }
64
+ ],
65
+ SampleTableMapping=None,
66
+ LabTestTableMapping=[],
67
+ OtherTable=[],
68
+ )
69
+ ```
70
+
71
+ Args:
72
+ brgi_db_mapping (BedrockGIDatabaseMapping): The mapping object containing GI
73
+ data and metadata for mapping to Bedrock's schema.
74
+
75
+ Returns:
76
+ BedrockGIDatabase: The transformed Bedrock GI database containing validated
77
+ DataFrames for each table type.
78
+ """
79
+ # Create a base64 hash from the project data, such that a project Unique ID
80
+ # can be created from the project_id and the hash of the project data.
81
+ project_data_jsons = json.dumps(brgi_db_mapping.Project.data, sort_keys=True)
82
+ project_data_bytes_hash = hashlib.blake2b(
83
+ project_data_jsons.encode("utf-8"), digest_size=9
84
+ ).digest()
85
+ project_data_b64_hash = base64.b64encode(project_data_bytes_hash).decode()
86
+ project_uid = brgi_db_mapping.Project.project_id + "-" + project_data_b64_hash
87
+
88
+ # Create the project table
89
+ project_df = pd.DataFrame(
90
+ {
91
+ "project_uid": project_uid,
92
+ "project_source_id": brgi_db_mapping.Project.project_id,
93
+ "horizontal_crs": brgi_db_mapping.Project.horizontal_crs.to_string(),
94
+ "horizontal_crs_wkt": brgi_db_mapping.Project.horizontal_crs.to_wkt(),
95
+ "vertical_crs": brgi_db_mapping.Project.vertical_crs.to_string(),
96
+ "vertical_crs_wkt": brgi_db_mapping.Project.vertical_crs.to_wkt(),
97
+ **brgi_db_mapping.Project.data,
98
+ },
99
+ index=[0],
100
+ )
101
+ project_df = project_df.loc[:, ~project_df.columns.duplicated()]
102
+ ProjectSchema.validate(project_df)
103
+
104
+ # Create the location table
105
+ location_df = pd.DataFrame(
106
+ {
107
+ "location_uid": brgi_db_mapping.Location.data[
108
+ brgi_db_mapping.Location.location_id_column
109
+ ]
110
+ + f"_{project_uid}",
111
+ "location_source_id": brgi_db_mapping.Location.data[
112
+ brgi_db_mapping.Location.location_id_column
113
+ ],
114
+ "project_uid": project_uid,
115
+ "easting": brgi_db_mapping.Location.data[
116
+ brgi_db_mapping.Location.easting_column
117
+ ],
118
+ "northing": brgi_db_mapping.Location.data[
119
+ brgi_db_mapping.Location.northing_column
120
+ ],
121
+ "ground_level_elevation": brgi_db_mapping.Location.data[
122
+ brgi_db_mapping.Location.ground_level_elevation_column
123
+ ],
124
+ "depth_to_base": brgi_db_mapping.Location.data[
125
+ brgi_db_mapping.Location.depth_to_base_column
126
+ ],
127
+ }
128
+ )
129
+ location_df = pd.concat([location_df, brgi_db_mapping.Location.data], axis=1)
130
+ location_df = location_df.loc[:, ~location_df.columns.duplicated()]
131
+ location_df = LocationSchema.validate(location_df)
132
+
133
+ # Create the in-situ test tables
134
+ insitu_tests = {}
135
+ for insitu_mapping in brgi_db_mapping.InSitu:
136
+ insitu_df = pd.DataFrame(
137
+ {
138
+ "project_uid": project_uid,
139
+ "location_uid": insitu_mapping.data[insitu_mapping.location_id_column]
140
+ + f"_{project_uid}",
141
+ }
142
+ )
143
+ if insitu_mapping.depth_to_top_column:
144
+ insitu_df["depth_to_top"] = insitu_mapping.data[
145
+ insitu_mapping.depth_to_top_column
146
+ ]
147
+ if insitu_mapping.depth_to_base_column:
148
+ insitu_df["depth_to_base"] = insitu_mapping.data[
149
+ insitu_mapping.depth_to_base_column
150
+ ]
151
+ insitu_df = pd.concat([insitu_df, insitu_mapping.data], axis=1)
152
+ insitu_df = insitu_df.loc[:, ~insitu_df.columns.duplicated()]
153
+ insitu_df = InSituTestSchema.validate(insitu_df)
154
+ insitu_tests[insitu_mapping.table_name] = insitu_df.copy()
155
+
156
+ # Create the sample table
157
+ sample_df = None
158
+ if brgi_db_mapping.Sample:
159
+ sample_df = pd.DataFrame(
160
+ {
161
+ "sample_uid": brgi_db_mapping.Sample.data[
162
+ brgi_db_mapping.Sample.sample_id_column
163
+ ]
164
+ + f"_{project_uid}",
165
+ "sample_source_id": brgi_db_mapping.Sample.data[
166
+ brgi_db_mapping.Sample.sample_id_column
167
+ ],
168
+ "project_uid": project_uid,
169
+ "location_uid": brgi_db_mapping.Sample.data[
170
+ brgi_db_mapping.Sample.location_id_column
171
+ ]
172
+ + f"_{project_uid}",
173
+ "depth_to_top": brgi_db_mapping.Sample.data[
174
+ brgi_db_mapping.Sample.depth_to_top_column
175
+ ],
176
+ }
177
+ )
178
+ if brgi_db_mapping.Sample.depth_to_base_column:
179
+ sample_df["depth_to_base"] = brgi_db_mapping.Sample.data[
180
+ brgi_db_mapping.Sample.depth_to_top_column
181
+ ]
182
+ sample_df = pd.concat([sample_df, brgi_db_mapping.Sample.data], axis=1)
183
+ sample_df = sample_df.loc[:, ~sample_df.columns.duplicated()]
184
+ sample_df = SampleSchema.validate(sample_df)
185
+
186
+ # Create the lab test tables
187
+ lab_tests = {}
188
+ if brgi_db_mapping.Lab:
189
+ for lab_mapping in brgi_db_mapping.Lab:
190
+ lab_df = pd.DataFrame(
191
+ {
192
+ "project_uid": project_uid,
193
+ "sample_uid": lab_mapping.data[lab_mapping.sample_id_column]
194
+ + f"_{project_uid}",
195
+ }
196
+ )
197
+ if lab_mapping.location_id_column:
198
+ lab_df["location_uid"] = lab_mapping.data[
199
+ lab_mapping.location_id_column
200
+ ]
201
+ lab_df = pd.concat([lab_df, lab_mapping.data.copy()], axis=1)
202
+ LabTestSchema.validate(lab_df)
203
+ lab_tests[lab_mapping.table_name] = lab_df.copy()
204
+
205
+ # Create the other tables
206
+ other_tables = {}
207
+ if brgi_db_mapping.Other:
208
+ for other_table_mapping in brgi_db_mapping.Other:
209
+ other_table_df = other_table_mapping.data
210
+ other_table_df.insert(0, "project_uid", project_uid)
211
+ other_tables[other_table_mapping.table_name] = other_table_df
212
+
213
+ # Create and return the Bedrock GI database
214
+ return BedrockGIDatabase(
215
+ Project=project_df,
216
+ Location=location_df,
217
+ InSituTests=insitu_tests,
218
+ Sample=sample_df,
219
+ LabTests=lab_tests,
220
+ Other=other_tables,
221
+ )
@@ -0,0 +1,69 @@
1
+ from typing import Optional, Union
2
+
3
+ import pandas as pd
4
+ import pyproj
5
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
6
+
7
+
8
+ class ProjectTableMapping(BaseModel):
9
+ data: dict = {}
10
+ project_id: str
11
+ horizontal_crs: pyproj.CRS
12
+ vertical_crs: pyproj.CRS = Field(default=pyproj.CRS(3855))
13
+ # "compound_crs": Optional[CRS] = None
14
+
15
+ model_config = ConfigDict(arbitrary_types_allowed=True)
16
+
17
+
18
+ class LocationTableMapping(BaseModel):
19
+ data: pd.DataFrame
20
+ location_id_column: str
21
+ easting_column: str
22
+ northing_column: str
23
+ ground_level_elevation_column: str
24
+ depth_to_base_column: str
25
+
26
+ model_config = ConfigDict(arbitrary_types_allowed=True)
27
+
28
+
29
+ class SampleTableMapping(BaseModel):
30
+ data: pd.DataFrame
31
+ sample_id_column: str
32
+ location_id_column: str
33
+ depth_to_top_column: str
34
+ depth_to_base_column: Optional[str] = None
35
+
36
+ model_config = ConfigDict(arbitrary_types_allowed=True)
37
+
38
+
39
+ class OtherTable(BaseModel):
40
+ table_name: str
41
+ data: pd.DataFrame
42
+
43
+ model_config = ConfigDict(arbitrary_types_allowed=True)
44
+
45
+
46
+ class InSituTestTableMapping(OtherTable):
47
+ location_id_column: str
48
+ depth_to_top_column: Optional[str] = None
49
+ depth_to_base_column: Optional[str] = None
50
+
51
+ @model_validator(mode="after")
52
+ def validate_at_least_one_depth_column(self):
53
+ if not (self.depth_to_top_column or self.depth_to_base_column):
54
+ raise ValueError("At least one depth column must be specified.")
55
+ return self
56
+
57
+
58
+ class LabTestTableMapping(OtherTable):
59
+ sample_id_column: str
60
+ location_id_column: Optional[str] = None
61
+
62
+
63
+ class BedrockGIMapping(BaseModel):
64
+ Project: ProjectTableMapping
65
+ Location: LocationTableMapping
66
+ InSitu: list[InSituTestTableMapping]
67
+ Sample: Union[SampleTableMapping, None] = None
68
+ Lab: list[LabTestTableMapping] = []
69
+ Other: list[OtherTable] = []
bedrock_ge/gi/schemas.py CHANGED
@@ -2,21 +2,33 @@
2
2
 
3
3
  from typing import Optional
4
4
 
5
- import pandera as pa
5
+ import geopandas as gpd
6
+ import pandas as pd
7
+ import pandera.pandas as pa
6
8
  from pandera.typing import Series
7
- from pandera.typing.geopandas import GeoSeries
9
+ from pydantic import BaseModel, ConfigDict
8
10
 
9
11
 
10
- class Project(pa.DataFrameModel):
12
+ class ProjectSchema(pa.DataFrameModel):
11
13
  project_uid: Series[str] = pa.Field(
12
14
  # primary_key=True,
13
15
  unique=True,
14
16
  )
15
- crs_wkt: Series[str] = pa.Field(description="Coordinate Reference System")
16
- # datum: Series[str] = pa.Field(description="Datum used for measurement of the ground level elevation.")
17
+ horizontal_crs: Series[str] = pa.Field(
18
+ description="Horizontal Coordinate Reference System (CRS)."
19
+ )
20
+ horizontal_crs_wkt: Series[str] = pa.Field(
21
+ description="Horizontal CRS in Well-known Text (WKT) format."
22
+ )
23
+ vertical_crs: Series[str] = pa.Field(
24
+ description="Vertical Coordinate Reference System (CRS)."
25
+ )
26
+ vertical_crs_wkt: Series[str] = pa.Field(
27
+ description="Vertical CRS in Well-known Text (WKT) format."
28
+ )
17
29
 
18
30
 
19
- class BaseLocation(pa.DataFrameModel):
31
+ class LocationSchema(pa.DataFrameModel):
20
32
  location_uid: Series[str] = pa.Field(
21
33
  # primary_key=True,
22
34
  unique=True,
@@ -25,39 +37,122 @@ class BaseLocation(pa.DataFrameModel):
25
37
  # foreign_key="project.project_uid"
26
38
  )
27
39
  location_source_id: Series[str]
28
- location_type: Series[str]
29
40
  easting: Series[float] = pa.Field(coerce=True)
30
41
  northing: Series[float] = pa.Field(coerce=True)
31
42
  ground_level_elevation: Series[float] = pa.Field(
32
43
  coerce=True,
33
44
  description="Elevation w.r.t. a local datum. Usually the orthometric height from the geoid, i.e. mean sea level, to the ground level.",
34
45
  )
35
- depth_to_base: Series[float]
46
+ depth_to_base: Series[float] = pa.Field(coerce=True, gt=0)
36
47
 
37
48
 
38
- class Location(BaseLocation):
39
- elevation_at_base: Series[float]
49
+ class LonLatHeightSchema(pa.DataFrameModel):
50
+ project_uid: Series[str] = pa.Field(
51
+ # foreign_key="project.project_uid"
52
+ )
53
+ location_uid: Series[str] = pa.Field(
54
+ # foreign_key="location.location_uid",
55
+ unique=True,
56
+ )
40
57
  longitude: Series[float]
41
58
  latitude: Series[float]
42
- wgs84_ground_level_height: Series[float] = pa.Field(
43
- description="Ground level height w.r.t. the WGS84 (World Geodetic System 1984) ellipsoid.",
59
+ egm2008_ground_level_height: Series[float] = pa.Field(
60
+ description="Ground level orthometric height w.r.t. the EGM2008 (Earth Gravitational Model 2008).",
44
61
  nullable=True,
45
62
  )
46
- geometry: GeoSeries
47
63
 
48
64
 
49
- class BaseInSitu(pa.DataFrameModel):
65
+ class InSituTestSchema(pa.DataFrameModel):
50
66
  project_uid: Series[str] = pa.Field(
51
67
  # foreign_key="project.project_uid"
52
68
  )
53
69
  location_uid: Series[str] = pa.Field(
54
70
  # foreign_key="location.location_uid"
55
71
  )
56
- depth_to_top: Series[float] = pa.Field(coerce=True)
57
- depth_to_base: Optional[Series[float]] = pa.Field(coerce=True, nullable=True)
58
-
59
-
60
- class BaseSample(BaseInSitu):
72
+ depth_to_top: Optional[Series[float]] = pa.Field(nullable=True, coerce=True, ge=0)
73
+ depth_to_base: Optional[Series[float]] = pa.Field(nullable=True, coerce=True, gt=0)
74
+
75
+ # https://pandera.readthedocs.io/en/stable/dataframe_models.html#dataframe-checks
76
+ # Check depth column completeness such that either shapely.Point's or
77
+ # shapely.LineString's can be created.
78
+ @pa.dataframe_check
79
+ def depth_column_completeness(cls, df: pd.DataFrame) -> pd.Series:
80
+ has_top = "depth_to_top" in df.columns
81
+ has_base = "depth_to_base" in df.columns
82
+
83
+ # If neither column exists, this check should fail
84
+ if not has_top and not has_base:
85
+ return pd.Series([False] * len(df), index=df.index)
86
+
87
+ # If only one column exists, check that it's all non-null
88
+ if has_top and not has_base:
89
+ return df["depth_to_top"].notna()
90
+ if has_base and not has_top:
91
+ return df["depth_to_base"].notna()
92
+
93
+ # If both columns exist:
94
+ # Either depth_to_top or depth_to_base must be non-null => Point
95
+ # OR
96
+ # Both depth_to_top and depth_to_base must be non-null => LineString
97
+ # ! Commented out, because some In-Situ tests have a mix of
98
+ # ! Point's and LineString's, such as IPRM
99
+ # top_has_value = df["depth_to_top"].notna()
100
+ # base_has_value = df["depth_to_base"].notna()
101
+ # either_has_value = top_has_value ^ base_has_value
102
+ # both_have_values = top_has_value & base_has_value
103
+
104
+ # if either_has_value.all():
105
+ # return either_has_value
106
+ # elif both_have_values.all():
107
+ # return both_have_values
108
+ # else:
109
+ # if either_has_value.sum() < both_have_values.sum():
110
+ # return either_has_value
111
+ # else:
112
+ # return both_have_values
113
+
114
+ # ! Incorrect check
115
+ # If both columns exist, at least one must be non-null
116
+ return ~(df["depth_to_top"].isna() & df["depth_to_base"].isna())
117
+
118
+ @pa.dataframe_check
119
+ def top_above_base(cls, df: pd.DataFrame) -> pd.Series:
120
+ """Check that depth_to_top <= depth_to_base when both columns are present.
121
+
122
+ If either column is missing, this check passes (nothing to compare).
123
+ If both columns are present, the check fails if any row has
124
+ depth_to_top > depth_to_base.
125
+
126
+ Returns:
127
+ pd.Series: pandas.Series of bools indicating successful checks.
128
+ """
129
+ has_top = "depth_to_top" in df.columns
130
+ has_base = "depth_to_base" in df.columns
131
+
132
+ # If either column is missing, this check passes (nothing to compare)
133
+ if not has_top or not has_base:
134
+ return pd.Series([True] * len(df), index=df.index)
135
+
136
+ # Only compare when both values are non-null
137
+ mask = df["depth_to_top"].notna() & df["depth_to_base"].notna()
138
+ # Use where() to conditionally apply the comparison
139
+ result = (~mask) | (df["depth_to_top"] <= df["depth_to_base"])
140
+
141
+ # Debug: Show failing cases
142
+ failing_mask = mask & ~result
143
+ if failing_mask.any():
144
+ print("🚨 ERROR: depth_to_top > depth_to_base:")
145
+ print(
146
+ df.loc[
147
+ failing_mask,
148
+ ["location_uid", "depth_to_top", "depth_to_base", df.columns[5]],
149
+ ]
150
+ )
151
+
152
+ return result
153
+
154
+
155
+ class SampleSchema(InSituTestSchema):
61
156
  sample_uid: Series[str] = pa.Field(
62
157
  # primary_key=True,
63
158
  unique=True,
@@ -65,19 +160,7 @@ class BaseSample(BaseInSitu):
65
160
  sample_source_id: Series[str]
66
161
 
67
162
 
68
- class Sample(BaseSample):
69
- elevation_at_top: Series[float]
70
- elevation_at_base: Optional[Series[float]] = pa.Field(nullable=True)
71
- geometry: GeoSeries
72
-
73
-
74
- class InSitu(BaseInSitu):
75
- elevation_at_top: Series[float]
76
- elevation_at_base: Optional[Series[float]] = pa.Field(nullable=True)
77
- geometry: GeoSeries
78
-
79
-
80
- class BaseLab(pa.DataFrameModel):
163
+ class LabTestSchema(pa.DataFrameModel):
81
164
  project_uid: Series[str] = pa.Field(
82
165
  # foreign_key="project.project_uid"
83
166
  )
@@ -89,7 +172,24 @@ class BaseLab(pa.DataFrameModel):
89
172
  )
90
173
 
91
174
 
92
- class Lab(BaseLab):
93
- geometry: GeoSeries = pa.Field(
94
- description="GIS geometry of the sample on which this lab test was performed."
95
- )
175
+ class BedrockGIDatabase(BaseModel):
176
+ Project: pd.DataFrame
177
+ Location: pd.DataFrame
178
+ InSituTests: dict[str, pd.DataFrame]
179
+ Sample: pd.DataFrame | None = None
180
+ LabTests: dict[str, pd.DataFrame] = {}
181
+ Other: dict[str, pd.DataFrame] = {}
182
+
183
+ model_config = ConfigDict(arbitrary_types_allowed=True)
184
+
185
+
186
+ class BedrockGIGeospatialDatabase(BaseModel):
187
+ Project: pd.DataFrame
188
+ Location: gpd.GeoDataFrame
189
+ LonLatHeight: gpd.GeoDataFrame
190
+ InSituTests: dict[str, gpd.GeoDataFrame]
191
+ Sample: gpd.GeoDataFrame | None = None
192
+ LabTests: dict[str, pd.DataFrame] = {}
193
+ Other: dict[str, pd.DataFrame] = {}
194
+
195
+ model_config = ConfigDict(arbitrary_types_allowed=True)