bedrock-ge 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bedrock_ge/__init__.py +3 -0
- bedrock_ge/gi/__init__.py +0 -0
- bedrock_ge/gi/ags/__init__.py +0 -0
- bedrock_ge/gi/ags/ags3_data_dictionary.json +7445 -0
- bedrock_ge/gi/ags/ags4_data_dictionary.json +24074 -0
- bedrock_ge/gi/ags/read.py +192 -0
- bedrock_ge/gi/ags/schemas.py +282 -0
- bedrock_ge/gi/ags/transform.py +230 -0
- bedrock_ge/gi/ags/validate.py +26 -0
- bedrock_ge/gi/bedrock-gi-schema.json +36 -0
- bedrock_ge/gi/concatenate.py +38 -0
- bedrock_ge/gi/gis_geometry.py +235 -0
- bedrock_ge/gi/schemas.py +95 -0
- bedrock_ge/gi/sqlmodels.py +74 -0
- bedrock_ge/gi/validate.py +116 -0
- bedrock_ge/gi/write.py +105 -0
- bedrock_ge/plot.py +2 -0
- {bedrock_ge-0.2.0.dist-info → bedrock_ge-0.2.2.dist-info}/METADATA +16 -8
- bedrock_ge-0.2.2.dist-info/RECORD +21 -0
- bedrock_ge-0.2.0.dist-info/RECORD +0 -4
- {bedrock_ge-0.2.0.dist-info → bedrock_ge-0.2.2.dist-info}/WHEEL +0 -0
- {bedrock_ge-0.2.0.dist-info → bedrock_ge-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,192 @@
|
|
1
|
+
import io
|
2
|
+
from typing import Any, Dict, List, Union
|
3
|
+
|
4
|
+
import pandas as pd
|
5
|
+
from python_ags4 import AGS4
|
6
|
+
|
7
|
+
from bedrock_ge.gi.ags.validate import check_ags_proj_group
|
8
|
+
|
9
|
+
|
10
|
+
def ags_to_dfs(ags_data: str) -> Dict[str, pd.DataFrame]:
|
11
|
+
"""
|
12
|
+
Convert AGS 3 or AGS 4 data to a dictionary of pandas DataFrames.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
ags_data (str): The AGS data as a string.
|
16
|
+
|
17
|
+
Raises:
|
18
|
+
ValueError: If the data does not match AGS 3 or AGS 4 format.
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
Dict[str, pd.DataFrame]]: A dictionary where keys represent AGS group
|
22
|
+
names with corresponding DataFrames for the corresponding group data.
|
23
|
+
"""
|
24
|
+
# Process each line to find the AGS version and delegate parsing
|
25
|
+
for line in ags_data.splitlines():
|
26
|
+
stripped_line = line.strip() # Remove leading/trailing whitespace
|
27
|
+
if stripped_line: # Skip empty lines at the start of the file
|
28
|
+
if stripped_line.startswith('"**'):
|
29
|
+
ags_version = 3
|
30
|
+
ags_dfs = ags3_to_dfs(ags_data)
|
31
|
+
break
|
32
|
+
elif stripped_line.startswith('"GROUP"'):
|
33
|
+
ags_version = 4
|
34
|
+
ags_dfs = ags4_to_dfs(ags_data)
|
35
|
+
break
|
36
|
+
else:
|
37
|
+
# If first non-empty line doesn't match AGS 3 or AGS 4 format
|
38
|
+
raise ValueError("The data provided is not valid AGS 3 or AGS 4 data.")
|
39
|
+
|
40
|
+
is_proj_group_correct = check_ags_proj_group(ags_dfs["PROJ"])
|
41
|
+
if is_proj_group_correct:
|
42
|
+
project_id = ags_dfs["PROJ"]["PROJ_ID"].iloc[0]
|
43
|
+
print(
|
44
|
+
f"AGS {ags_version} data was read for Project {project_id}",
|
45
|
+
"This Ground Investigation data contains groups:",
|
46
|
+
list(ags_dfs.keys()),
|
47
|
+
sep="\n",
|
48
|
+
end="\n\n",
|
49
|
+
)
|
50
|
+
|
51
|
+
return ags_dfs
|
52
|
+
|
53
|
+
|
54
|
+
def ags3_to_dfs(ags3_data: str) -> Dict[str, pd.DataFrame]:
|
55
|
+
"""Convert AGS 3 data to a dictionary of pandas DataFrames.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
ags_data (str): The AGS 3 data as a string.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key represents a group name from AGS 3 data,
|
62
|
+
and the corresponding value is a pandas DataFrame containing the data for that group.
|
63
|
+
"""
|
64
|
+
|
65
|
+
# Initialize dictionary and variables used in the AGS 3 read loop
|
66
|
+
ags3_dfs = {}
|
67
|
+
line_type = "line_0"
|
68
|
+
group = ""
|
69
|
+
headers: List[str] = ["", "", ""]
|
70
|
+
group_data: List[List[Any]] = [[], [], []]
|
71
|
+
|
72
|
+
for i, line in enumerate(ags3_data.splitlines()):
|
73
|
+
last_line_type = line_type
|
74
|
+
|
75
|
+
# In AGS 3.1 group names are prefixed with **
|
76
|
+
if line.startswith('"**'):
|
77
|
+
line_type = "group_name"
|
78
|
+
if group:
|
79
|
+
ags3_dfs[group] = pd.DataFrame(group_data, columns=headers)
|
80
|
+
|
81
|
+
group = line.strip(' ,"*')
|
82
|
+
group_data = []
|
83
|
+
|
84
|
+
# In AGS 3 header names are prefixed with "*
|
85
|
+
elif line.startswith('"*'):
|
86
|
+
line_type = "headers"
|
87
|
+
new_headers = line.split('","')
|
88
|
+
new_headers = [h.strip(' ,"*') for h in new_headers]
|
89
|
+
|
90
|
+
# Some groups have so many headers that they span multiple lines.
|
91
|
+
# Therefore we need to check whether the new headers are
|
92
|
+
# a continuation of the previous headers from the last line.
|
93
|
+
if line_type == last_line_type:
|
94
|
+
headers = headers + new_headers
|
95
|
+
else:
|
96
|
+
headers = new_headers
|
97
|
+
|
98
|
+
# Skip lines where group units are defined, these are defined in the AGS 3 data dictionary.
|
99
|
+
elif line.startswith('"<UNITS>"'):
|
100
|
+
line_type = "units"
|
101
|
+
continue
|
102
|
+
|
103
|
+
# The rest of the lines contain:
|
104
|
+
# 1. GI data
|
105
|
+
# 2. a continuation of the previous line. These lines contain "<CONT>" in the first column.
|
106
|
+
# 3. are empty or contain worthless data
|
107
|
+
else:
|
108
|
+
line_type = "data_row"
|
109
|
+
data_row = line.split('","')
|
110
|
+
if len("".join(data_row)) == 0:
|
111
|
+
# print(f"Line {i} is empty. Last Group: {group}")
|
112
|
+
continue
|
113
|
+
elif len(data_row) != len(headers):
|
114
|
+
print(
|
115
|
+
f"\n🚨 CAUTION: The number of columns on line {i + 1} ({len(data_row)}) doesn't match the number of columns of group {group} ({len(headers)})!",
|
116
|
+
f"{group} headers: {headers}",
|
117
|
+
f"Line {i + 1}: {data_row}",
|
118
|
+
sep="\n",
|
119
|
+
end="\n\n",
|
120
|
+
)
|
121
|
+
continue
|
122
|
+
# Append continued lines (<CONT>) to the last data_row
|
123
|
+
elif data_row[0] == '"<CONT>':
|
124
|
+
last_data_row = group_data[-1]
|
125
|
+
for j, data in enumerate(data_row):
|
126
|
+
data = data.strip(' "')
|
127
|
+
if data and data != "<CONT>":
|
128
|
+
if last_data_row[j] is None:
|
129
|
+
# Last data row didn't contain data for this column
|
130
|
+
last_data_row[j] = coerce_string(data)
|
131
|
+
else:
|
132
|
+
# Last data row already contains data for this column
|
133
|
+
last_data_row[j] = str(last_data_row[j]) + data
|
134
|
+
# Lines that are assumed to contain valid data are added to the group data
|
135
|
+
else:
|
136
|
+
cleaned_data_row = []
|
137
|
+
for data in data_row:
|
138
|
+
cleaned_data_row.append(coerce_string(data.strip(' "')))
|
139
|
+
group_data.append(cleaned_data_row)
|
140
|
+
|
141
|
+
# Also add the last group's df to the dictionary of AGS dfs
|
142
|
+
ags3_dfs[group] = pd.DataFrame(group_data, columns=headers).dropna(
|
143
|
+
axis=1, how="all"
|
144
|
+
)
|
145
|
+
|
146
|
+
if not group:
|
147
|
+
print(
|
148
|
+
'🚨 ERROR: The provided AGS 3 data does not contain any groups, i.e. lines starting with "**'
|
149
|
+
)
|
150
|
+
|
151
|
+
return ags3_dfs
|
152
|
+
|
153
|
+
|
154
|
+
def ags4_to_dfs(ags4_data: str) -> Dict[str, pd.DataFrame]:
|
155
|
+
"""Convert AGS 4 data to a dictionary of pandas DataFrames.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
ags_data (str): The AGS 4 data as a string.
|
159
|
+
|
160
|
+
Returns:
|
161
|
+
Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key represents a group name from AGS 4 data,
|
162
|
+
and the corresponding value is a pandas DataFrame containing the data for that group.
|
163
|
+
"""
|
164
|
+
# AGS4.AGS4_to_dataframe accepts the file, not the data string
|
165
|
+
ags4_file = io.StringIO(ags4_data)
|
166
|
+
|
167
|
+
ags4_tups = AGS4.AGS4_to_dataframe(ags4_file)
|
168
|
+
|
169
|
+
ags4_dfs = {}
|
170
|
+
for group, df in ags4_tups[0].items():
|
171
|
+
df = df.loc[2:].drop(columns=["HEADING"]).reset_index(drop=True)
|
172
|
+
ags4_dfs[group] = df
|
173
|
+
|
174
|
+
return ags4_dfs
|
175
|
+
|
176
|
+
|
177
|
+
def coerce_string(string: str) -> Union[None, bool, float, str]:
|
178
|
+
if string.lower() in {"none", "null", ""}:
|
179
|
+
return None
|
180
|
+
elif string.lower() == "true":
|
181
|
+
return True
|
182
|
+
elif string.lower() == "false":
|
183
|
+
return False
|
184
|
+
else:
|
185
|
+
try:
|
186
|
+
value = float(string)
|
187
|
+
if value.is_integer():
|
188
|
+
return int(value)
|
189
|
+
else:
|
190
|
+
return value
|
191
|
+
except ValueError:
|
192
|
+
return string
|
@@ -0,0 +1,282 @@
|
|
1
|
+
import pandera as pa
|
2
|
+
from pandera.typing import Series
|
3
|
+
|
4
|
+
|
5
|
+
class Ags3HOLE(pa.DataFrameModel):
|
6
|
+
HOLE_ID: Series[str] = pa.Field(
|
7
|
+
# primary_key=True,
|
8
|
+
unique=True,
|
9
|
+
coerce=True,
|
10
|
+
description="Exploratory hole or location equivalent",
|
11
|
+
# example="327/16A",
|
12
|
+
)
|
13
|
+
HOLE_TYPE: Series[str] = pa.Field(
|
14
|
+
coerce=True,
|
15
|
+
# isin=["CP", "TP", "TPS", "TPS2", "TPS3", "TPS4", "TPS5", "TPS6", "TPS7", "TPS8"],
|
16
|
+
description="Type of exploratory hole",
|
17
|
+
# example="CP (See Appendix 1)",
|
18
|
+
)
|
19
|
+
HOLE_NATE: Series[float] = pa.Field(coerce=True)
|
20
|
+
HOLE_NATN: Series[float] = pa.Field(coerce=True)
|
21
|
+
HOLE_GL: Series[float] = pa.Field(coerce=True)
|
22
|
+
HOLE_FDEP: Series[float] = pa.Field(
|
23
|
+
coerce=True,
|
24
|
+
description="Final depth of hole",
|
25
|
+
# example=32.60,
|
26
|
+
metadata={"unit": "m"},
|
27
|
+
)
|
28
|
+
|
29
|
+
|
30
|
+
class BaseSAMP(pa.DataFrameModel):
|
31
|
+
SAMP_REF: Series[str] = pa.Field(
|
32
|
+
coerce=True,
|
33
|
+
nullable=True,
|
34
|
+
description="Sample reference number",
|
35
|
+
# example="24",
|
36
|
+
)
|
37
|
+
SAMP_TYPE: Series[str] = pa.Field(
|
38
|
+
coerce=True,
|
39
|
+
nullable=True,
|
40
|
+
description="Sample type",
|
41
|
+
# example="U (See Appendix 1)",
|
42
|
+
)
|
43
|
+
SAMP_TOP: Series[float] = pa.Field(
|
44
|
+
coerce=True,
|
45
|
+
description="Depth to TOP of sample",
|
46
|
+
# example=24.55,
|
47
|
+
metadata={"unit": "m"},
|
48
|
+
)
|
49
|
+
SAMP_BASE: Series[float] = pa.Field(
|
50
|
+
coerce=True,
|
51
|
+
nullable=True,
|
52
|
+
description="Depth to BASE of sample",
|
53
|
+
# example=24.55,
|
54
|
+
metadata={"unit": "m"},
|
55
|
+
)
|
56
|
+
|
57
|
+
|
58
|
+
class Ags3SAMP(BaseSAMP):
|
59
|
+
sample_id: Series[str] = pa.Field(
|
60
|
+
# primary_key=True,
|
61
|
+
unique=True,
|
62
|
+
coerce=True,
|
63
|
+
description="Sample unique identifier",
|
64
|
+
# example="REF_TYPE_TOP_HOLE_ID",
|
65
|
+
)
|
66
|
+
HOLE_ID: Series[str] = pa.Field(
|
67
|
+
# foreign_key="Ags3HOLE.HOLE_ID",
|
68
|
+
description="Exploratory hole or location equivalent",
|
69
|
+
# example="327/16A",
|
70
|
+
)
|
71
|
+
|
72
|
+
|
73
|
+
class Ags4SAMP(BaseSAMP):
|
74
|
+
SAMP_ID: Series[str] = pa.Field(
|
75
|
+
# primary_key=True,
|
76
|
+
unique=True,
|
77
|
+
coerce=True,
|
78
|
+
description="Sample unique identifier",
|
79
|
+
# example="ABC121415010",
|
80
|
+
)
|
81
|
+
LOCA_ID: Series[str] = pa.Field(
|
82
|
+
# foreign_key="Ags4LOCA.LOCA_ID",
|
83
|
+
coerce=True,
|
84
|
+
description="Location identifier",
|
85
|
+
# example="327/16A",
|
86
|
+
)
|
87
|
+
|
88
|
+
|
89
|
+
class BaseGEOL(pa.DataFrameModel):
|
90
|
+
GEOL_TOP: Series[float] = pa.Field(
|
91
|
+
coerce=True,
|
92
|
+
description="Depth to the top of stratum",
|
93
|
+
# example=16.21,
|
94
|
+
metadata={"unit": "m"},
|
95
|
+
)
|
96
|
+
GEOL_BASE: Series[float] = pa.Field(
|
97
|
+
coerce=True,
|
98
|
+
description="Depth to the base of description",
|
99
|
+
# example=17.25,
|
100
|
+
metadata={"unit": "m"},
|
101
|
+
)
|
102
|
+
GEOL_DESC: Series[str] = pa.Field(
|
103
|
+
coerce=True,
|
104
|
+
description="General description of stratum",
|
105
|
+
# example="Stiff grey silty CLAY",
|
106
|
+
)
|
107
|
+
GEOL_LEG: Series[str] = pa.Field(
|
108
|
+
nullable=True,
|
109
|
+
description="Legend code",
|
110
|
+
# example="102",
|
111
|
+
)
|
112
|
+
GEOL_GEOL: Series[str] = pa.Field(
|
113
|
+
coerce=True,
|
114
|
+
description="Geology code",
|
115
|
+
# example="LC",
|
116
|
+
)
|
117
|
+
GEOL_GEO2: Series[str] = pa.Field(
|
118
|
+
coerce=True,
|
119
|
+
nullable=True,
|
120
|
+
description="Second geology code",
|
121
|
+
# example="SAND",
|
122
|
+
)
|
123
|
+
|
124
|
+
|
125
|
+
class Ags3GEOL(BaseGEOL):
|
126
|
+
HOLE_ID: Series[str] = pa.Field(
|
127
|
+
# foreign_key="Ags3HOLE.HOLE_ID",
|
128
|
+
coerce=True,
|
129
|
+
description="Exploratory hole or location equivalent",
|
130
|
+
# example="6421/A",
|
131
|
+
)
|
132
|
+
|
133
|
+
|
134
|
+
class Ags4GEOL(BaseGEOL):
|
135
|
+
LOCA_ID: Series[str] = pa.Field(
|
136
|
+
# foreign_key="Ags4LOCA.LOCA_ID",
|
137
|
+
coerce=True,
|
138
|
+
description="Location identifier",
|
139
|
+
# example="327/16A",
|
140
|
+
)
|
141
|
+
|
142
|
+
|
143
|
+
class BaseISPT(pa.DataFrameModel):
|
144
|
+
ISPT_TOP: Series[float] = pa.Field(
|
145
|
+
coerce=True,
|
146
|
+
description="Depth to top of test",
|
147
|
+
# example=13.50,
|
148
|
+
metadata={"unit": "m"},
|
149
|
+
)
|
150
|
+
ISPT_NVAL: Series[int] = pa.Field(
|
151
|
+
coerce=True,
|
152
|
+
description="Depth to the base of description",
|
153
|
+
# example=35,
|
154
|
+
ge=0,
|
155
|
+
)
|
156
|
+
|
157
|
+
|
158
|
+
class Ags3ISPT(BaseISPT):
|
159
|
+
HOLE_ID: Series[str] = pa.Field(
|
160
|
+
# foreign_key="Ags3HOLE.HOLE_ID",
|
161
|
+
coerce=True,
|
162
|
+
description="Exploratory hole or location equivalent",
|
163
|
+
# example="6421/A",
|
164
|
+
)
|
165
|
+
|
166
|
+
|
167
|
+
class Ags4ISPT(BaseISPT):
|
168
|
+
LOCA_ID: Series[str] = pa.Field(
|
169
|
+
# foreign_key="Ags4LOCA.LOCA_ID",
|
170
|
+
coerce=True,
|
171
|
+
description="Location identifier",
|
172
|
+
# example="327/16A",
|
173
|
+
)
|
174
|
+
|
175
|
+
|
176
|
+
class BaseCORE(pa.DataFrameModel):
|
177
|
+
CORE_TOP: Series[float] = pa.Field(
|
178
|
+
coerce=True,
|
179
|
+
description="Depth to TOP of core run",
|
180
|
+
# example=2.54,
|
181
|
+
metadata={"unit": "m"},
|
182
|
+
)
|
183
|
+
CORE_PREC: Series[int] = pa.Field(
|
184
|
+
coerce=True,
|
185
|
+
nullable=True,
|
186
|
+
description="Percentage of core recovered in core run (TCR)",
|
187
|
+
# example="32",
|
188
|
+
metadata={"unit": "%"},
|
189
|
+
ge=0,
|
190
|
+
le=100,
|
191
|
+
)
|
192
|
+
CORE_SREC: Series[int] = pa.Field(
|
193
|
+
coerce=True,
|
194
|
+
nullable=True,
|
195
|
+
description="Percentage of solid core recovered in core run (SCR)",
|
196
|
+
# example="23",
|
197
|
+
metadata={"unit": "%"},
|
198
|
+
ge=0,
|
199
|
+
le=100,
|
200
|
+
)
|
201
|
+
CORE_RQD: Series[int] = pa.Field(
|
202
|
+
coerce=True,
|
203
|
+
nullable=True,
|
204
|
+
description="Rock Quality Designation for core run (RQD)",
|
205
|
+
# example="20",
|
206
|
+
metadata={"unit": "%"},
|
207
|
+
ge=0,
|
208
|
+
le=100,
|
209
|
+
)
|
210
|
+
|
211
|
+
|
212
|
+
class Ags3CORE(BaseCORE):
|
213
|
+
HOLE_ID: Series[str] = pa.Field(
|
214
|
+
# foreign_key="Ags3HOLE.HOLE_ID",
|
215
|
+
coerce=True,
|
216
|
+
description="Exploratory hole or location equivalent",
|
217
|
+
# example="6421/A",
|
218
|
+
)
|
219
|
+
CORE_BOT: Series[float] = pa.Field(
|
220
|
+
coerce=True,
|
221
|
+
description="Depth to BOTTOM of core run",
|
222
|
+
# example=3.54,
|
223
|
+
metadata={"unit": "m"},
|
224
|
+
)
|
225
|
+
|
226
|
+
|
227
|
+
class Ags4CORE(BaseCORE):
|
228
|
+
LOCA_ID: Series[str] = pa.Field(
|
229
|
+
# foreign_key="Ags4LOCA.LOCA_ID",
|
230
|
+
coerce=True,
|
231
|
+
description="Location identifier",
|
232
|
+
# example="327/16A",
|
233
|
+
)
|
234
|
+
CORE_BASE: Series[float] = pa.Field(
|
235
|
+
coerce=True,
|
236
|
+
description="Depth to BASE of core run",
|
237
|
+
# example=3.54,
|
238
|
+
metadata={"unit": "m"},
|
239
|
+
)
|
240
|
+
|
241
|
+
|
242
|
+
class BaseWETH(pa.DataFrameModel):
|
243
|
+
WETH_TOP: Series[float] = pa.Field(
|
244
|
+
coerce=True,
|
245
|
+
description="Depth to top of weathering subdivision",
|
246
|
+
# example=3.50,
|
247
|
+
metadata={"unit": "m"},
|
248
|
+
)
|
249
|
+
WETH_BASE: Series[float] = pa.Field(
|
250
|
+
coerce=True,
|
251
|
+
description="Depth to base of weathering subdivision",
|
252
|
+
# example=3.95,
|
253
|
+
metadata={"unit": "m"},
|
254
|
+
)
|
255
|
+
|
256
|
+
|
257
|
+
class Ags3WETH(BaseWETH):
|
258
|
+
HOLE_ID: Series[str] = pa.Field(
|
259
|
+
# foreign_key="Ags3HOLE.HOLE_ID",
|
260
|
+
coerce=True,
|
261
|
+
description="Exploratory hole or location equivalent",
|
262
|
+
# example="6421/A",
|
263
|
+
)
|
264
|
+
WETH_GRAD: Series[str] = pa.Field(
|
265
|
+
coerce=True,
|
266
|
+
description="Weather Gradient",
|
267
|
+
# example="IV",
|
268
|
+
)
|
269
|
+
|
270
|
+
|
271
|
+
class Ags4WETH(BaseWETH):
|
272
|
+
LOCA_ID: Series[str] = pa.Field(
|
273
|
+
# foreign_key="Ags4LOCA.LOCA_ID",
|
274
|
+
coerce=True,
|
275
|
+
description="Location identifier",
|
276
|
+
# example="327/16A",
|
277
|
+
)
|
278
|
+
WETH_WETH: Series[str] = pa.Field(
|
279
|
+
coerce=True,
|
280
|
+
description="Weathering classifier for WETH_SCH and WETH_SYS",
|
281
|
+
# example="IV",
|
282
|
+
)
|
@@ -0,0 +1,230 @@
|
|
1
|
+
"""Transforms, i.e. maps, AGS data to Bedrock's schema"""
|
2
|
+
|
3
|
+
from typing import Dict
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
import pandera as pa
|
7
|
+
from pandera.typing import DataFrame
|
8
|
+
from pyproj import CRS
|
9
|
+
|
10
|
+
from bedrock_ge.gi.ags.schemas import Ags3HOLE, Ags3SAMP, BaseSAMP
|
11
|
+
from bedrock_ge.gi.schemas import BaseInSitu, BaseLocation, BaseSample, Project
|
12
|
+
from bedrock_ge.gi.validate import check_foreign_key
|
13
|
+
|
14
|
+
|
15
|
+
def ags3_db_to_no_gis_brgi_db(
|
16
|
+
ags3_db: Dict[str, pd.DataFrame], crs: CRS
|
17
|
+
) -> Dict[str, pd.DataFrame]:
|
18
|
+
# Make sure that the AGS 3 database is not changed outside this function.
|
19
|
+
ags3_db = ags3_db.copy()
|
20
|
+
|
21
|
+
print("Transforming AGS 3 groups to Bedrock tables...")
|
22
|
+
|
23
|
+
# Instantiate Bedrock dictionary of pd.DataFrames
|
24
|
+
brgi_db = {}
|
25
|
+
|
26
|
+
# Project
|
27
|
+
print("Transforming AGS 3 group 'PROJ' to Bedrock GI 'Project' table...")
|
28
|
+
brgi_db["Project"] = ags_proj_to_brgi_project(ags3_db["PROJ"], crs)
|
29
|
+
project_uid = brgi_db["Project"]["project_uid"].item()
|
30
|
+
del ags3_db["PROJ"]
|
31
|
+
|
32
|
+
# Locations
|
33
|
+
if "HOLE" in ags3_db.keys():
|
34
|
+
print("Transforming AGS 3 group 'HOLE' to Bedrock GI 'Location' table...")
|
35
|
+
brgi_db["Location"] = ags3_hole_to_brgi_location(ags3_db["HOLE"], project_uid) # type: ignore
|
36
|
+
del ags3_db["HOLE"]
|
37
|
+
else:
|
38
|
+
print(
|
39
|
+
"Your AGS 3 data doesn't contain a HOLE group, i.e. Ground Investigation locations."
|
40
|
+
)
|
41
|
+
|
42
|
+
# Samples
|
43
|
+
if "SAMP" in ags3_db.keys():
|
44
|
+
print("Transforming AGS 3 group 'SAMP' to Bedrock GI 'Sample' table...")
|
45
|
+
check_foreign_key("HOLE_ID", brgi_db["Location"], ags3_db["SAMP"])
|
46
|
+
ags3_db["SAMP"] = generate_sample_ids_for_ags3(ags3_db["SAMP"]) # type: ignore
|
47
|
+
brgi_db["Sample"] = ags3_samp_to_brgi_sample(ags3_db["SAMP"], project_uid) # type: ignore
|
48
|
+
del ags3_db["SAMP"]
|
49
|
+
else:
|
50
|
+
print("Your AGS 3 data doesn't contain a SAMP group, i.e. samples.")
|
51
|
+
|
52
|
+
# The rest of the tables: 1. Lab Tests 2. In-Situ Measurements 3. Other tables
|
53
|
+
for group, group_df in ags3_db.items():
|
54
|
+
if "SAMP_REF" in ags3_db[group].columns:
|
55
|
+
print(f"Project {project_uid} has lab test data: {group}.")
|
56
|
+
brgi_db[group] = group_df # type: ignore
|
57
|
+
elif "HOLE_ID" in ags3_db[group].columns:
|
58
|
+
print(
|
59
|
+
f"Transforming AGS 3 group '{group}' to Bedrock GI 'InSitu_{group}' table..."
|
60
|
+
)
|
61
|
+
check_foreign_key("HOLE_ID", brgi_db["Location"], group_df)
|
62
|
+
brgi_db[f"InSitu_{group}"] = ags3_in_situ_to_brgi_in_situ( # type: ignore
|
63
|
+
group, group_df, project_uid
|
64
|
+
)
|
65
|
+
else:
|
66
|
+
brgi_db[group] = ags3_db[group] # type: ignore
|
67
|
+
|
68
|
+
print(
|
69
|
+
"Done",
|
70
|
+
"The Bedrock database contains the following tables:",
|
71
|
+
list(brgi_db.keys()),
|
72
|
+
sep="\n",
|
73
|
+
end="\n\n",
|
74
|
+
)
|
75
|
+
return brgi_db # type: ignore
|
76
|
+
|
77
|
+
|
78
|
+
@pa.check_types(lazy=True)
|
79
|
+
def ags_proj_to_brgi_project(ags_proj: pd.DataFrame, crs: CRS) -> DataFrame[Project]:
|
80
|
+
"""Maps the AGS 3 'PROJ' group to a Bedrock GI 'Project' table.
|
81
|
+
|
82
|
+
Args:
|
83
|
+
ags_proj (pd.DataFrame): The AGS 3 'PROJ' group.
|
84
|
+
crs (CRS): The coordinate reference system of the project.
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
DataFrame[Project]: The Bedrock GI 'Project' table.
|
88
|
+
"""
|
89
|
+
if "project_uid" not in ags_proj.columns:
|
90
|
+
ags_proj["project_uid"] = ags_proj["PROJ_ID"]
|
91
|
+
|
92
|
+
ags_proj["crs_wkt"] = crs.to_wkt()
|
93
|
+
|
94
|
+
return ags_proj # type: ignore
|
95
|
+
|
96
|
+
|
97
|
+
@pa.check_types(lazy=True)
|
98
|
+
def ags3_hole_to_brgi_location(
|
99
|
+
ags3_hole: DataFrame[Ags3HOLE], project_uid: str
|
100
|
+
) -> DataFrame[BaseLocation]:
|
101
|
+
brgi_location = ags3_hole
|
102
|
+
brgi_location["project_uid"] = project_uid
|
103
|
+
brgi_location["location_source_id"] = ags3_hole["HOLE_ID"]
|
104
|
+
brgi_location["location_uid"] = (
|
105
|
+
ags3_hole["HOLE_ID"] + "_" + ags3_hole["project_uid"]
|
106
|
+
)
|
107
|
+
brgi_location["location_type"] = ags3_hole["HOLE_TYPE"]
|
108
|
+
brgi_location["easting"] = ags3_hole["HOLE_NATE"]
|
109
|
+
brgi_location["northing"] = ags3_hole["HOLE_NATN"]
|
110
|
+
brgi_location["ground_level_elevation"] = ags3_hole["HOLE_GL"]
|
111
|
+
brgi_location["depth_to_base"] = ags3_hole["HOLE_FDEP"]
|
112
|
+
|
113
|
+
return ags3_hole # type: ignore
|
114
|
+
|
115
|
+
|
116
|
+
@pa.check_types(lazy=True)
|
117
|
+
def ags3_samp_to_brgi_sample(
|
118
|
+
ags3_samp: DataFrame[Ags3SAMP],
|
119
|
+
project_uid: str,
|
120
|
+
) -> DataFrame[BaseSample]:
|
121
|
+
brgi_sample = ags3_samp
|
122
|
+
brgi_sample["project_uid"] = project_uid
|
123
|
+
brgi_sample["location_source_id"] = ags3_samp["HOLE_ID"]
|
124
|
+
brgi_sample["location_uid"] = ags3_samp["HOLE_ID"] + "_" + ags3_samp["project_uid"]
|
125
|
+
brgi_sample["sample_source_id"] = ags3_samp["sample_id"]
|
126
|
+
brgi_sample["sample_uid"] = ags3_samp["sample_id"] + "_" + ags3_samp["project_uid"]
|
127
|
+
brgi_sample["depth_to_top"] = ags3_samp["SAMP_TOP"]
|
128
|
+
brgi_sample["depth_to_base"] = ags3_samp["SAMP_BASE"]
|
129
|
+
|
130
|
+
return brgi_sample # type: ignore
|
131
|
+
|
132
|
+
|
133
|
+
@pa.check_types(lazy=True)
|
134
|
+
def ags3_in_situ_to_brgi_in_situ(
|
135
|
+
group_name: str, ags3_in_situ: pd.DataFrame, project_uid: str
|
136
|
+
) -> DataFrame[BaseInSitu]:
|
137
|
+
"""Transform, i.e. map, AGS 3 in-situ measurement data to Bedrock's in-situ data schema.
|
138
|
+
|
139
|
+
Args:
|
140
|
+
group_name (str): The AGS 3 group name.
|
141
|
+
ags3_data (pd.DataFrame): The AGS 3 data.
|
142
|
+
project_uid (str): The project uid.
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
DataFrame[BaseInSitu]: The Bedrock in-situ data.
|
146
|
+
"""
|
147
|
+
brgi_in_situ = ags3_in_situ
|
148
|
+
brgi_in_situ["project_uid"] = project_uid
|
149
|
+
brgi_in_situ["location_uid"] = ags3_in_situ["HOLE_ID"] + "_" + project_uid
|
150
|
+
|
151
|
+
top_depth = f"{group_name}_TOP"
|
152
|
+
base_depth = f"{group_name}_BASE"
|
153
|
+
|
154
|
+
if group_name == "CDIA":
|
155
|
+
top_depth = "CDIA_CDEP"
|
156
|
+
elif group_name == "FLSH":
|
157
|
+
top_depth = "FLSH_FROM"
|
158
|
+
base_depth = "FLSH_TO"
|
159
|
+
elif group_name == "CORE":
|
160
|
+
base_depth = "CORE_BOT"
|
161
|
+
elif group_name == "HDIA":
|
162
|
+
top_depth = "HDIA_HDEP"
|
163
|
+
elif group_name == "PTIM":
|
164
|
+
top_depth = "PTIM_DEP"
|
165
|
+
elif group_name == "IVAN":
|
166
|
+
top_depth = "IVAN_DPTH"
|
167
|
+
elif group_name == "STCN":
|
168
|
+
top_depth = "STCN_DPTH"
|
169
|
+
elif group_name == "POBS" or group_name == "PREF":
|
170
|
+
top_depth = "PREF_TDEP"
|
171
|
+
elif group_name == "DREM":
|
172
|
+
top_depth = "DREM_DPTH"
|
173
|
+
elif group_name == "PRTD" or group_name == "PRTG" or group_name == "PRTL":
|
174
|
+
top_depth = "PRTD_DPTH"
|
175
|
+
elif group_name == "IPRM":
|
176
|
+
if top_depth not in ags3_in_situ.columns:
|
177
|
+
print(
|
178
|
+
"\n🚨 CAUTION: The IPRM group in this AGS 3 file does not contain a 'IPRM_TOP' heading!",
|
179
|
+
"🚨 CAUTION: Making the 'IPRM_BASE' heading the 'depth_to_top'...",
|
180
|
+
sep="\n",
|
181
|
+
end="\n\n",
|
182
|
+
)
|
183
|
+
top_depth = "IPRM_BASE"
|
184
|
+
base_depth = "None"
|
185
|
+
|
186
|
+
brgi_in_situ["depth_to_top"] = ags3_in_situ[top_depth]
|
187
|
+
brgi_in_situ["depth_to_base"] = ags3_in_situ.get(base_depth)
|
188
|
+
|
189
|
+
return brgi_in_situ # type: ignore
|
190
|
+
|
191
|
+
|
192
|
+
@pa.check_types(lazy=True)
|
193
|
+
def generate_sample_ids_for_ags3(
|
194
|
+
ags3_with_samp: DataFrame[BaseSAMP],
|
195
|
+
) -> DataFrame[Ags3SAMP]:
|
196
|
+
ags3_with_samp["sample_id"] = (
|
197
|
+
ags3_with_samp["SAMP_REF"].astype(str)
|
198
|
+
+ "_"
|
199
|
+
+ ags3_with_samp["SAMP_TYPE"].astype(str)
|
200
|
+
+ "_"
|
201
|
+
+ ags3_with_samp["SAMP_TOP"].astype(str)
|
202
|
+
+ "_"
|
203
|
+
+ ags3_with_samp["HOLE_ID"].astype(str)
|
204
|
+
)
|
205
|
+
# try:
|
206
|
+
# # SAMP_REF really should not be able to be null... Right?
|
207
|
+
# # Maybe SAMP_REF can be null when the
|
208
|
+
# Ags3SAMP_REF.validate(ags3_samp)
|
209
|
+
# print(
|
210
|
+
# "Generating unique sample IDs for AGS 3 data: 'sample_id'='{SAMP_REF}_{HOLE_ID}'"
|
211
|
+
# )
|
212
|
+
# ags3_samp["sample_id"] = (
|
213
|
+
# ags3_samp["SAMP_REF"].astype(str) + "_" + ags3_samp["HOLE_ID"].astype(str)
|
214
|
+
# )
|
215
|
+
# except pa.errors.SchemaError as exc:
|
216
|
+
# print(f"🚨 CAUTION: The AGS 3 SAMP group contains rows without SAMP_REF:\n{exc}")
|
217
|
+
|
218
|
+
# if "non-nullable series 'SAMP_REF'" in str(exc):
|
219
|
+
# print(
|
220
|
+
# "\nTo ensure unique sample IDs: 'sample_id'='{SAMP_REF}_{SAMP_TOP}_{HOLE_ID}'\n"
|
221
|
+
# )
|
222
|
+
# ags3_samp["sample_id"] = (
|
223
|
+
# ags3_samp["SAMP_REF"].astype(str)
|
224
|
+
# + "_"
|
225
|
+
# + ags3_samp["SAMP_TOP"].astype(str)
|
226
|
+
# + "_"
|
227
|
+
# + ags3_samp["HOLE_ID"].astype(str)
|
228
|
+
# )
|
229
|
+
|
230
|
+
return ags3_with_samp # type: ignore
|