bedrock-ge 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,192 @@
1
+ import io
2
+ from typing import Any, Dict, List, Union
3
+
4
+ import pandas as pd
5
+ from python_ags4 import AGS4
6
+
7
+ from bedrock_ge.gi.ags.validate import check_ags_proj_group
8
+
9
+
10
+ def ags_to_dfs(ags_data: str) -> Dict[str, pd.DataFrame]:
11
+ """
12
+ Convert AGS 3 or AGS 4 data to a dictionary of pandas DataFrames.
13
+
14
+ Args:
15
+ ags_data (str): The AGS data as a string.
16
+
17
+ Raises:
18
+ ValueError: If the data does not match AGS 3 or AGS 4 format.
19
+
20
+ Returns:
21
+ Dict[str, pd.DataFrame]]: A dictionary where keys represent AGS group
22
+ names with corresponding DataFrames for the corresponding group data.
23
+ """
24
+ # Process each line to find the AGS version and delegate parsing
25
+ for line in ags_data.splitlines():
26
+ stripped_line = line.strip() # Remove leading/trailing whitespace
27
+ if stripped_line: # Skip empty lines at the start of the file
28
+ if stripped_line.startswith('"**'):
29
+ ags_version = 3
30
+ ags_dfs = ags3_to_dfs(ags_data)
31
+ break
32
+ elif stripped_line.startswith('"GROUP"'):
33
+ ags_version = 4
34
+ ags_dfs = ags4_to_dfs(ags_data)
35
+ break
36
+ else:
37
+ # If first non-empty line doesn't match AGS 3 or AGS 4 format
38
+ raise ValueError("The data provided is not valid AGS 3 or AGS 4 data.")
39
+
40
+ is_proj_group_correct = check_ags_proj_group(ags_dfs["PROJ"])
41
+ if is_proj_group_correct:
42
+ project_id = ags_dfs["PROJ"]["PROJ_ID"].iloc[0]
43
+ print(
44
+ f"AGS {ags_version} data was read for Project {project_id}",
45
+ "This Ground Investigation data contains groups:",
46
+ list(ags_dfs.keys()),
47
+ sep="\n",
48
+ end="\n\n",
49
+ )
50
+
51
+ return ags_dfs
52
+
53
+
54
+ def ags3_to_dfs(ags3_data: str) -> Dict[str, pd.DataFrame]:
55
+ """Convert AGS 3 data to a dictionary of pandas DataFrames.
56
+
57
+ Args:
58
+ ags_data (str): The AGS 3 data as a string.
59
+
60
+ Returns:
61
+ Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key represents a group name from AGS 3 data,
62
+ and the corresponding value is a pandas DataFrame containing the data for that group.
63
+ """
64
+
65
+ # Initialize dictionary and variables used in the AGS 3 read loop
66
+ ags3_dfs = {}
67
+ line_type = "line_0"
68
+ group = ""
69
+ headers: List[str] = ["", "", ""]
70
+ group_data: List[List[Any]] = [[], [], []]
71
+
72
+ for i, line in enumerate(ags3_data.splitlines()):
73
+ last_line_type = line_type
74
+
75
+ # In AGS 3.1 group names are prefixed with **
76
+ if line.startswith('"**'):
77
+ line_type = "group_name"
78
+ if group:
79
+ ags3_dfs[group] = pd.DataFrame(group_data, columns=headers)
80
+
81
+ group = line.strip(' ,"*')
82
+ group_data = []
83
+
84
+ # In AGS 3 header names are prefixed with "*
85
+ elif line.startswith('"*'):
86
+ line_type = "headers"
87
+ new_headers = line.split('","')
88
+ new_headers = [h.strip(' ,"*') for h in new_headers]
89
+
90
+ # Some groups have so many headers that they span multiple lines.
91
+ # Therefore we need to check whether the new headers are
92
+ # a continuation of the previous headers from the last line.
93
+ if line_type == last_line_type:
94
+ headers = headers + new_headers
95
+ else:
96
+ headers = new_headers
97
+
98
+ # Skip lines where group units are defined, these are defined in the AGS 3 data dictionary.
99
+ elif line.startswith('"<UNITS>"'):
100
+ line_type = "units"
101
+ continue
102
+
103
+ # The rest of the lines contain:
104
+ # 1. GI data
105
+ # 2. a continuation of the previous line. These lines contain "<CONT>" in the first column.
106
+ # 3. are empty or contain worthless data
107
+ else:
108
+ line_type = "data_row"
109
+ data_row = line.split('","')
110
+ if len("".join(data_row)) == 0:
111
+ # print(f"Line {i} is empty. Last Group: {group}")
112
+ continue
113
+ elif len(data_row) != len(headers):
114
+ print(
115
+ f"\n🚨 CAUTION: The number of columns on line {i + 1} ({len(data_row)}) doesn't match the number of columns of group {group} ({len(headers)})!",
116
+ f"{group} headers: {headers}",
117
+ f"Line {i + 1}: {data_row}",
118
+ sep="\n",
119
+ end="\n\n",
120
+ )
121
+ continue
122
+ # Append continued lines (<CONT>) to the last data_row
123
+ elif data_row[0] == '"<CONT>':
124
+ last_data_row = group_data[-1]
125
+ for j, data in enumerate(data_row):
126
+ data = data.strip(' "')
127
+ if data and data != "<CONT>":
128
+ if last_data_row[j] is None:
129
+ # Last data row didn't contain data for this column
130
+ last_data_row[j] = coerce_string(data)
131
+ else:
132
+ # Last data row already contains data for this column
133
+ last_data_row[j] = str(last_data_row[j]) + data
134
+ # Lines that are assumed to contain valid data are added to the group data
135
+ else:
136
+ cleaned_data_row = []
137
+ for data in data_row:
138
+ cleaned_data_row.append(coerce_string(data.strip(' "')))
139
+ group_data.append(cleaned_data_row)
140
+
141
+ # Also add the last group's df to the dictionary of AGS dfs
142
+ ags3_dfs[group] = pd.DataFrame(group_data, columns=headers).dropna(
143
+ axis=1, how="all"
144
+ )
145
+
146
+ if not group:
147
+ print(
148
+ '🚨 ERROR: The provided AGS 3 data does not contain any groups, i.e. lines starting with "**'
149
+ )
150
+
151
+ return ags3_dfs
152
+
153
+
154
+ def ags4_to_dfs(ags4_data: str) -> Dict[str, pd.DataFrame]:
155
+ """Convert AGS 4 data to a dictionary of pandas DataFrames.
156
+
157
+ Args:
158
+ ags_data (str): The AGS 4 data as a string.
159
+
160
+ Returns:
161
+ Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key represents a group name from AGS 4 data,
162
+ and the corresponding value is a pandas DataFrame containing the data for that group.
163
+ """
164
+ # AGS4.AGS4_to_dataframe accepts the file, not the data string
165
+ ags4_file = io.StringIO(ags4_data)
166
+
167
+ ags4_tups = AGS4.AGS4_to_dataframe(ags4_file)
168
+
169
+ ags4_dfs = {}
170
+ for group, df in ags4_tups[0].items():
171
+ df = df.loc[2:].drop(columns=["HEADING"]).reset_index(drop=True)
172
+ ags4_dfs[group] = df
173
+
174
+ return ags4_dfs
175
+
176
+
177
+ def coerce_string(string: str) -> Union[None, bool, float, str]:
178
+ if string.lower() in {"none", "null", ""}:
179
+ return None
180
+ elif string.lower() == "true":
181
+ return True
182
+ elif string.lower() == "false":
183
+ return False
184
+ else:
185
+ try:
186
+ value = float(string)
187
+ if value.is_integer():
188
+ return int(value)
189
+ else:
190
+ return value
191
+ except ValueError:
192
+ return string
@@ -0,0 +1,282 @@
1
+ import pandera as pa
2
+ from pandera.typing import Series
3
+
4
+
5
+ class Ags3HOLE(pa.DataFrameModel):
6
+ HOLE_ID: Series[str] = pa.Field(
7
+ # primary_key=True,
8
+ unique=True,
9
+ coerce=True,
10
+ description="Exploratory hole or location equivalent",
11
+ # example="327/16A",
12
+ )
13
+ HOLE_TYPE: Series[str] = pa.Field(
14
+ coerce=True,
15
+ # isin=["CP", "TP", "TPS", "TPS2", "TPS3", "TPS4", "TPS5", "TPS6", "TPS7", "TPS8"],
16
+ description="Type of exploratory hole",
17
+ # example="CP (See Appendix 1)",
18
+ )
19
+ HOLE_NATE: Series[float] = pa.Field(coerce=True)
20
+ HOLE_NATN: Series[float] = pa.Field(coerce=True)
21
+ HOLE_GL: Series[float] = pa.Field(coerce=True)
22
+ HOLE_FDEP: Series[float] = pa.Field(
23
+ coerce=True,
24
+ description="Final depth of hole",
25
+ # example=32.60,
26
+ metadata={"unit": "m"},
27
+ )
28
+
29
+
30
+ class BaseSAMP(pa.DataFrameModel):
31
+ SAMP_REF: Series[str] = pa.Field(
32
+ coerce=True,
33
+ nullable=True,
34
+ description="Sample reference number",
35
+ # example="24",
36
+ )
37
+ SAMP_TYPE: Series[str] = pa.Field(
38
+ coerce=True,
39
+ nullable=True,
40
+ description="Sample type",
41
+ # example="U (See Appendix 1)",
42
+ )
43
+ SAMP_TOP: Series[float] = pa.Field(
44
+ coerce=True,
45
+ description="Depth to TOP of sample",
46
+ # example=24.55,
47
+ metadata={"unit": "m"},
48
+ )
49
+ SAMP_BASE: Series[float] = pa.Field(
50
+ coerce=True,
51
+ nullable=True,
52
+ description="Depth to BASE of sample",
53
+ # example=24.55,
54
+ metadata={"unit": "m"},
55
+ )
56
+
57
+
58
+ class Ags3SAMP(BaseSAMP):
59
+ sample_id: Series[str] = pa.Field(
60
+ # primary_key=True,
61
+ unique=True,
62
+ coerce=True,
63
+ description="Sample unique identifier",
64
+ # example="REF_TYPE_TOP_HOLE_ID",
65
+ )
66
+ HOLE_ID: Series[str] = pa.Field(
67
+ # foreign_key="Ags3HOLE.HOLE_ID",
68
+ description="Exploratory hole or location equivalent",
69
+ # example="327/16A",
70
+ )
71
+
72
+
73
+ class Ags4SAMP(BaseSAMP):
74
+ SAMP_ID: Series[str] = pa.Field(
75
+ # primary_key=True,
76
+ unique=True,
77
+ coerce=True,
78
+ description="Sample unique identifier",
79
+ # example="ABC121415010",
80
+ )
81
+ LOCA_ID: Series[str] = pa.Field(
82
+ # foreign_key="Ags4LOCA.LOCA_ID",
83
+ coerce=True,
84
+ description="Location identifier",
85
+ # example="327/16A",
86
+ )
87
+
88
+
89
+ class BaseGEOL(pa.DataFrameModel):
90
+ GEOL_TOP: Series[float] = pa.Field(
91
+ coerce=True,
92
+ description="Depth to the top of stratum",
93
+ # example=16.21,
94
+ metadata={"unit": "m"},
95
+ )
96
+ GEOL_BASE: Series[float] = pa.Field(
97
+ coerce=True,
98
+ description="Depth to the base of description",
99
+ # example=17.25,
100
+ metadata={"unit": "m"},
101
+ )
102
+ GEOL_DESC: Series[str] = pa.Field(
103
+ coerce=True,
104
+ description="General description of stratum",
105
+ # example="Stiff grey silty CLAY",
106
+ )
107
+ GEOL_LEG: Series[str] = pa.Field(
108
+ nullable=True,
109
+ description="Legend code",
110
+ # example="102",
111
+ )
112
+ GEOL_GEOL: Series[str] = pa.Field(
113
+ coerce=True,
114
+ description="Geology code",
115
+ # example="LC",
116
+ )
117
+ GEOL_GEO2: Series[str] = pa.Field(
118
+ coerce=True,
119
+ nullable=True,
120
+ description="Second geology code",
121
+ # example="SAND",
122
+ )
123
+
124
+
125
+ class Ags3GEOL(BaseGEOL):
126
+ HOLE_ID: Series[str] = pa.Field(
127
+ # foreign_key="Ags3HOLE.HOLE_ID",
128
+ coerce=True,
129
+ description="Exploratory hole or location equivalent",
130
+ # example="6421/A",
131
+ )
132
+
133
+
134
+ class Ags4GEOL(BaseGEOL):
135
+ LOCA_ID: Series[str] = pa.Field(
136
+ # foreign_key="Ags4LOCA.LOCA_ID",
137
+ coerce=True,
138
+ description="Location identifier",
139
+ # example="327/16A",
140
+ )
141
+
142
+
143
+ class BaseISPT(pa.DataFrameModel):
144
+ ISPT_TOP: Series[float] = pa.Field(
145
+ coerce=True,
146
+ description="Depth to top of test",
147
+ # example=13.50,
148
+ metadata={"unit": "m"},
149
+ )
150
+ ISPT_NVAL: Series[int] = pa.Field(
151
+ coerce=True,
152
+ description="Depth to the base of description",
153
+ # example=35,
154
+ ge=0,
155
+ )
156
+
157
+
158
+ class Ags3ISPT(BaseISPT):
159
+ HOLE_ID: Series[str] = pa.Field(
160
+ # foreign_key="Ags3HOLE.HOLE_ID",
161
+ coerce=True,
162
+ description="Exploratory hole or location equivalent",
163
+ # example="6421/A",
164
+ )
165
+
166
+
167
+ class Ags4ISPT(BaseISPT):
168
+ LOCA_ID: Series[str] = pa.Field(
169
+ # foreign_key="Ags4LOCA.LOCA_ID",
170
+ coerce=True,
171
+ description="Location identifier",
172
+ # example="327/16A",
173
+ )
174
+
175
+
176
+ class BaseCORE(pa.DataFrameModel):
177
+ CORE_TOP: Series[float] = pa.Field(
178
+ coerce=True,
179
+ description="Depth to TOP of core run",
180
+ # example=2.54,
181
+ metadata={"unit": "m"},
182
+ )
183
+ CORE_PREC: Series[int] = pa.Field(
184
+ coerce=True,
185
+ nullable=True,
186
+ description="Percentage of core recovered in core run (TCR)",
187
+ # example="32",
188
+ metadata={"unit": "%"},
189
+ ge=0,
190
+ le=100,
191
+ )
192
+ CORE_SREC: Series[int] = pa.Field(
193
+ coerce=True,
194
+ nullable=True,
195
+ description="Percentage of solid core recovered in core run (SCR)",
196
+ # example="23",
197
+ metadata={"unit": "%"},
198
+ ge=0,
199
+ le=100,
200
+ )
201
+ CORE_RQD: Series[int] = pa.Field(
202
+ coerce=True,
203
+ nullable=True,
204
+ description="Rock Quality Designation for core run (RQD)",
205
+ # example="20",
206
+ metadata={"unit": "%"},
207
+ ge=0,
208
+ le=100,
209
+ )
210
+
211
+
212
+ class Ags3CORE(BaseCORE):
213
+ HOLE_ID: Series[str] = pa.Field(
214
+ # foreign_key="Ags3HOLE.HOLE_ID",
215
+ coerce=True,
216
+ description="Exploratory hole or location equivalent",
217
+ # example="6421/A",
218
+ )
219
+ CORE_BOT: Series[float] = pa.Field(
220
+ coerce=True,
221
+ description="Depth to BOTTOM of core run",
222
+ # example=3.54,
223
+ metadata={"unit": "m"},
224
+ )
225
+
226
+
227
+ class Ags4CORE(BaseCORE):
228
+ LOCA_ID: Series[str] = pa.Field(
229
+ # foreign_key="Ags4LOCA.LOCA_ID",
230
+ coerce=True,
231
+ description="Location identifier",
232
+ # example="327/16A",
233
+ )
234
+ CORE_BASE: Series[float] = pa.Field(
235
+ coerce=True,
236
+ description="Depth to BASE of core run",
237
+ # example=3.54,
238
+ metadata={"unit": "m"},
239
+ )
240
+
241
+
242
+ class BaseWETH(pa.DataFrameModel):
243
+ WETH_TOP: Series[float] = pa.Field(
244
+ coerce=True,
245
+ description="Depth to top of weathering subdivision",
246
+ # example=3.50,
247
+ metadata={"unit": "m"},
248
+ )
249
+ WETH_BASE: Series[float] = pa.Field(
250
+ coerce=True,
251
+ description="Depth to base of weathering subdivision",
252
+ # example=3.95,
253
+ metadata={"unit": "m"},
254
+ )
255
+
256
+
257
+ class Ags3WETH(BaseWETH):
258
+ HOLE_ID: Series[str] = pa.Field(
259
+ # foreign_key="Ags3HOLE.HOLE_ID",
260
+ coerce=True,
261
+ description="Exploratory hole or location equivalent",
262
+ # example="6421/A",
263
+ )
264
+ WETH_GRAD: Series[str] = pa.Field(
265
+ coerce=True,
266
+ description="Weather Gradient",
267
+ # example="IV",
268
+ )
269
+
270
+
271
+ class Ags4WETH(BaseWETH):
272
+ LOCA_ID: Series[str] = pa.Field(
273
+ # foreign_key="Ags4LOCA.LOCA_ID",
274
+ coerce=True,
275
+ description="Location identifier",
276
+ # example="327/16A",
277
+ )
278
+ WETH_WETH: Series[str] = pa.Field(
279
+ coerce=True,
280
+ description="Weathering classifier for WETH_SCH and WETH_SYS",
281
+ # example="IV",
282
+ )
@@ -0,0 +1,230 @@
1
+ """Transforms, i.e. maps, AGS data to Bedrock's schema"""
2
+
3
+ from typing import Dict
4
+
5
+ import pandas as pd
6
+ import pandera as pa
7
+ from pandera.typing import DataFrame
8
+ from pyproj import CRS
9
+
10
+ from bedrock_ge.gi.ags.schemas import Ags3HOLE, Ags3SAMP, BaseSAMP
11
+ from bedrock_ge.gi.schemas import BaseInSitu, BaseLocation, BaseSample, Project
12
+ from bedrock_ge.gi.validate import check_foreign_key
13
+
14
+
15
+ def ags3_db_to_no_gis_brgi_db(
16
+ ags3_db: Dict[str, pd.DataFrame], crs: CRS
17
+ ) -> Dict[str, pd.DataFrame]:
18
+ # Make sure that the AGS 3 database is not changed outside this function.
19
+ ags3_db = ags3_db.copy()
20
+
21
+ print("Transforming AGS 3 groups to Bedrock tables...")
22
+
23
+ # Instantiate Bedrock dictionary of pd.DataFrames
24
+ brgi_db = {}
25
+
26
+ # Project
27
+ print("Transforming AGS 3 group 'PROJ' to Bedrock GI 'Project' table...")
28
+ brgi_db["Project"] = ags_proj_to_brgi_project(ags3_db["PROJ"], crs)
29
+ project_uid = brgi_db["Project"]["project_uid"].item()
30
+ del ags3_db["PROJ"]
31
+
32
+ # Locations
33
+ if "HOLE" in ags3_db.keys():
34
+ print("Transforming AGS 3 group 'HOLE' to Bedrock GI 'Location' table...")
35
+ brgi_db["Location"] = ags3_hole_to_brgi_location(ags3_db["HOLE"], project_uid) # type: ignore
36
+ del ags3_db["HOLE"]
37
+ else:
38
+ print(
39
+ "Your AGS 3 data doesn't contain a HOLE group, i.e. Ground Investigation locations."
40
+ )
41
+
42
+ # Samples
43
+ if "SAMP" in ags3_db.keys():
44
+ print("Transforming AGS 3 group 'SAMP' to Bedrock GI 'Sample' table...")
45
+ check_foreign_key("HOLE_ID", brgi_db["Location"], ags3_db["SAMP"])
46
+ ags3_db["SAMP"] = generate_sample_ids_for_ags3(ags3_db["SAMP"]) # type: ignore
47
+ brgi_db["Sample"] = ags3_samp_to_brgi_sample(ags3_db["SAMP"], project_uid) # type: ignore
48
+ del ags3_db["SAMP"]
49
+ else:
50
+ print("Your AGS 3 data doesn't contain a SAMP group, i.e. samples.")
51
+
52
+ # The rest of the tables: 1. Lab Tests 2. In-Situ Measurements 3. Other tables
53
+ for group, group_df in ags3_db.items():
54
+ if "SAMP_REF" in ags3_db[group].columns:
55
+ print(f"Project {project_uid} has lab test data: {group}.")
56
+ brgi_db[group] = group_df # type: ignore
57
+ elif "HOLE_ID" in ags3_db[group].columns:
58
+ print(
59
+ f"Transforming AGS 3 group '{group}' to Bedrock GI 'InSitu_{group}' table..."
60
+ )
61
+ check_foreign_key("HOLE_ID", brgi_db["Location"], group_df)
62
+ brgi_db[f"InSitu_{group}"] = ags3_in_situ_to_brgi_in_situ( # type: ignore
63
+ group, group_df, project_uid
64
+ )
65
+ else:
66
+ brgi_db[group] = ags3_db[group] # type: ignore
67
+
68
+ print(
69
+ "Done",
70
+ "The Bedrock database contains the following tables:",
71
+ list(brgi_db.keys()),
72
+ sep="\n",
73
+ end="\n\n",
74
+ )
75
+ return brgi_db # type: ignore
76
+
77
+
78
+ @pa.check_types(lazy=True)
79
+ def ags_proj_to_brgi_project(ags_proj: pd.DataFrame, crs: CRS) -> DataFrame[Project]:
80
+ """Maps the AGS 3 'PROJ' group to a Bedrock GI 'Project' table.
81
+
82
+ Args:
83
+ ags_proj (pd.DataFrame): The AGS 3 'PROJ' group.
84
+ crs (CRS): The coordinate reference system of the project.
85
+
86
+ Returns:
87
+ DataFrame[Project]: The Bedrock GI 'Project' table.
88
+ """
89
+ if "project_uid" not in ags_proj.columns:
90
+ ags_proj["project_uid"] = ags_proj["PROJ_ID"]
91
+
92
+ ags_proj["crs_wkt"] = crs.to_wkt()
93
+
94
+ return ags_proj # type: ignore
95
+
96
+
97
+ @pa.check_types(lazy=True)
98
+ def ags3_hole_to_brgi_location(
99
+ ags3_hole: DataFrame[Ags3HOLE], project_uid: str
100
+ ) -> DataFrame[BaseLocation]:
101
+ brgi_location = ags3_hole
102
+ brgi_location["project_uid"] = project_uid
103
+ brgi_location["location_source_id"] = ags3_hole["HOLE_ID"]
104
+ brgi_location["location_uid"] = (
105
+ ags3_hole["HOLE_ID"] + "_" + ags3_hole["project_uid"]
106
+ )
107
+ brgi_location["location_type"] = ags3_hole["HOLE_TYPE"]
108
+ brgi_location["easting"] = ags3_hole["HOLE_NATE"]
109
+ brgi_location["northing"] = ags3_hole["HOLE_NATN"]
110
+ brgi_location["ground_level_elevation"] = ags3_hole["HOLE_GL"]
111
+ brgi_location["depth_to_base"] = ags3_hole["HOLE_FDEP"]
112
+
113
+ return ags3_hole # type: ignore
114
+
115
+
116
+ @pa.check_types(lazy=True)
117
+ def ags3_samp_to_brgi_sample(
118
+ ags3_samp: DataFrame[Ags3SAMP],
119
+ project_uid: str,
120
+ ) -> DataFrame[BaseSample]:
121
+ brgi_sample = ags3_samp
122
+ brgi_sample["project_uid"] = project_uid
123
+ brgi_sample["location_source_id"] = ags3_samp["HOLE_ID"]
124
+ brgi_sample["location_uid"] = ags3_samp["HOLE_ID"] + "_" + ags3_samp["project_uid"]
125
+ brgi_sample["sample_source_id"] = ags3_samp["sample_id"]
126
+ brgi_sample["sample_uid"] = ags3_samp["sample_id"] + "_" + ags3_samp["project_uid"]
127
+ brgi_sample["depth_to_top"] = ags3_samp["SAMP_TOP"]
128
+ brgi_sample["depth_to_base"] = ags3_samp["SAMP_BASE"]
129
+
130
+ return brgi_sample # type: ignore
131
+
132
+
133
+ @pa.check_types(lazy=True)
134
+ def ags3_in_situ_to_brgi_in_situ(
135
+ group_name: str, ags3_in_situ: pd.DataFrame, project_uid: str
136
+ ) -> DataFrame[BaseInSitu]:
137
+ """Transform, i.e. map, AGS 3 in-situ measurement data to Bedrock's in-situ data schema.
138
+
139
+ Args:
140
+ group_name (str): The AGS 3 group name.
141
+ ags3_data (pd.DataFrame): The AGS 3 data.
142
+ project_uid (str): The project uid.
143
+
144
+ Returns:
145
+ DataFrame[BaseInSitu]: The Bedrock in-situ data.
146
+ """
147
+ brgi_in_situ = ags3_in_situ
148
+ brgi_in_situ["project_uid"] = project_uid
149
+ brgi_in_situ["location_uid"] = ags3_in_situ["HOLE_ID"] + "_" + project_uid
150
+
151
+ top_depth = f"{group_name}_TOP"
152
+ base_depth = f"{group_name}_BASE"
153
+
154
+ if group_name == "CDIA":
155
+ top_depth = "CDIA_CDEP"
156
+ elif group_name == "FLSH":
157
+ top_depth = "FLSH_FROM"
158
+ base_depth = "FLSH_TO"
159
+ elif group_name == "CORE":
160
+ base_depth = "CORE_BOT"
161
+ elif group_name == "HDIA":
162
+ top_depth = "HDIA_HDEP"
163
+ elif group_name == "PTIM":
164
+ top_depth = "PTIM_DEP"
165
+ elif group_name == "IVAN":
166
+ top_depth = "IVAN_DPTH"
167
+ elif group_name == "STCN":
168
+ top_depth = "STCN_DPTH"
169
+ elif group_name == "POBS" or group_name == "PREF":
170
+ top_depth = "PREF_TDEP"
171
+ elif group_name == "DREM":
172
+ top_depth = "DREM_DPTH"
173
+ elif group_name == "PRTD" or group_name == "PRTG" or group_name == "PRTL":
174
+ top_depth = "PRTD_DPTH"
175
+ elif group_name == "IPRM":
176
+ if top_depth not in ags3_in_situ.columns:
177
+ print(
178
+ "\n🚨 CAUTION: The IPRM group in this AGS 3 file does not contain a 'IPRM_TOP' heading!",
179
+ "🚨 CAUTION: Making the 'IPRM_BASE' heading the 'depth_to_top'...",
180
+ sep="\n",
181
+ end="\n\n",
182
+ )
183
+ top_depth = "IPRM_BASE"
184
+ base_depth = "None"
185
+
186
+ brgi_in_situ["depth_to_top"] = ags3_in_situ[top_depth]
187
+ brgi_in_situ["depth_to_base"] = ags3_in_situ.get(base_depth)
188
+
189
+ return brgi_in_situ # type: ignore
190
+
191
+
192
+ @pa.check_types(lazy=True)
193
+ def generate_sample_ids_for_ags3(
194
+ ags3_with_samp: DataFrame[BaseSAMP],
195
+ ) -> DataFrame[Ags3SAMP]:
196
+ ags3_with_samp["sample_id"] = (
197
+ ags3_with_samp["SAMP_REF"].astype(str)
198
+ + "_"
199
+ + ags3_with_samp["SAMP_TYPE"].astype(str)
200
+ + "_"
201
+ + ags3_with_samp["SAMP_TOP"].astype(str)
202
+ + "_"
203
+ + ags3_with_samp["HOLE_ID"].astype(str)
204
+ )
205
+ # try:
206
+ # # SAMP_REF really should not be able to be null... Right?
207
+ # # Maybe SAMP_REF can be null when the
208
+ # Ags3SAMP_REF.validate(ags3_samp)
209
+ # print(
210
+ # "Generating unique sample IDs for AGS 3 data: 'sample_id'='{SAMP_REF}_{HOLE_ID}'"
211
+ # )
212
+ # ags3_samp["sample_id"] = (
213
+ # ags3_samp["SAMP_REF"].astype(str) + "_" + ags3_samp["HOLE_ID"].astype(str)
214
+ # )
215
+ # except pa.errors.SchemaError as exc:
216
+ # print(f"🚨 CAUTION: The AGS 3 SAMP group contains rows without SAMP_REF:\n{exc}")
217
+
218
+ # if "non-nullable series 'SAMP_REF'" in str(exc):
219
+ # print(
220
+ # "\nTo ensure unique sample IDs: 'sample_id'='{SAMP_REF}_{SAMP_TOP}_{HOLE_ID}'\n"
221
+ # )
222
+ # ags3_samp["sample_id"] = (
223
+ # ags3_samp["SAMP_REF"].astype(str)
224
+ # + "_"
225
+ # + ags3_samp["SAMP_TOP"].astype(str)
226
+ # + "_"
227
+ # + ags3_samp["HOLE_ID"].astype(str)
228
+ # )
229
+
230
+ return ags3_with_samp # type: ignore