bedrock-ge 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bedrock_ge/gi/ags/read.py DELETED
@@ -1,190 +0,0 @@
1
- import io
2
- from typing import Any, Dict, List, Union
3
-
4
- import pandas as pd
5
- from python_ags4 import AGS4
6
-
7
- from bedrock_ge.gi.ags.validate import check_ags_proj_group
8
-
9
-
10
- def ags_to_dfs(ags_data: str) -> Dict[str, pd.DataFrame]:
11
- """Converts AGS 3 or AGS 4 data to a dictionary of pandas DataFrames.
12
-
13
- Args:
14
- ags_data (str): The AGS data as a string.
15
-
16
- Raises:
17
- ValueError: If the data does not match AGS 3 or AGS 4 format.
18
-
19
- Returns:
20
- Dict[str, pd.DataFrame]]: A dictionary where keys represent AGS group
21
- names with corresponding DataFrames for the corresponding group data.
22
- """
23
- # Process each line to find the AGS version and delegate parsing
24
- for line in ags_data.splitlines():
25
- stripped_line = line.strip() # Remove leading/trailing whitespace
26
- if stripped_line: # Skip empty lines at the start of the file
27
- if stripped_line.startswith('"**'):
28
- ags_version = 3
29
- ags_dfs = ags3_to_dfs(ags_data)
30
- break
31
- elif stripped_line.startswith('"GROUP"'):
32
- ags_version = 4
33
- ags_dfs = ags4_to_dfs(ags_data)
34
- break
35
- else:
36
- # If first non-empty line doesn't match AGS 3 or AGS 4 format
37
- raise ValueError("The data provided is not valid AGS 3 or AGS 4 data.")
38
-
39
- is_proj_group_correct = check_ags_proj_group(ags_dfs["PROJ"])
40
- if is_proj_group_correct:
41
- project_id = ags_dfs["PROJ"]["PROJ_ID"].iloc[0]
42
- print(
43
- f"AGS {ags_version} data was read for Project {project_id}",
44
- "This Ground Investigation data contains groups:",
45
- list(ags_dfs.keys()),
46
- sep="\n",
47
- end="\n\n",
48
- )
49
-
50
- return ags_dfs
51
-
52
-
53
- def ags3_to_dfs(ags3_data: str) -> Dict[str, pd.DataFrame]:
54
- """Converts AGS 3 data to a dictionary of pandas DataFrames.
55
-
56
- Args:
57
- ags3_data (str): The AGS 3 data as a string.
58
-
59
- Returns:
60
- Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key represents a group name from AGS 3 data,
61
- and the corresponding value is a pandas DataFrame containing the data for that group.
62
- """
63
- # Initialize dictionary and variables used in the AGS 3 read loop
64
- ags3_dfs = {}
65
- line_type = "line_0"
66
- group = ""
67
- headers: List[str] = ["", "", ""]
68
- group_data: List[List[Any]] = [[], [], []]
69
-
70
- for i, line in enumerate(ags3_data.splitlines()):
71
- last_line_type = line_type
72
-
73
- # In AGS 3.1 group names are prefixed with **
74
- if line.startswith('"**'):
75
- line_type = "group_name"
76
- if group:
77
- ags3_dfs[group] = pd.DataFrame(group_data, columns=headers)
78
-
79
- group = line.strip(' ,"*')
80
- group_data = []
81
-
82
- # In AGS 3 header names are prefixed with "*
83
- elif line.startswith('"*'):
84
- line_type = "headers"
85
- new_headers = line.split('","')
86
- new_headers = [h.strip(' ,"*') for h in new_headers]
87
-
88
- # Some groups have so many headers that they span multiple lines.
89
- # Therefore we need to check whether the new headers are
90
- # a continuation of the previous headers from the last line.
91
- if line_type == last_line_type:
92
- headers = headers + new_headers
93
- else:
94
- headers = new_headers
95
-
96
- # Skip lines where group units are defined, these are defined in the AGS 3 data dictionary.
97
- elif line.startswith('"<UNITS>"'):
98
- line_type = "units"
99
- continue
100
-
101
- # The rest of the lines contain:
102
- # 1. GI data
103
- # 2. a continuation of the previous line. These lines contain "<CONT>" in the first column.
104
- # 3. are empty or contain worthless data
105
- else:
106
- line_type = "data_row"
107
- data_row = line.split('","')
108
- if len("".join(data_row)) == 0:
109
- # print(f"Line {i} is empty. Last Group: {group}")
110
- continue
111
- elif len(data_row) != len(headers):
112
- print(
113
- f"\n🚨 CAUTION: The number of columns on line {i + 1} ({len(data_row)}) doesn't match the number of columns of group {group} ({len(headers)})!",
114
- f"{group} headers: {headers}",
115
- f"Line {i + 1}: {data_row}",
116
- sep="\n",
117
- end="\n\n",
118
- )
119
- continue
120
- # Append continued lines (<CONT>) to the last data_row
121
- elif data_row[0] == '"<CONT>':
122
- last_data_row = group_data[-1]
123
- for j, data in enumerate(data_row):
124
- data = data.strip(' "')
125
- if data and data != "<CONT>":
126
- if last_data_row[j] is None:
127
- # Last data row didn't contain data for this column
128
- last_data_row[j] = coerce_string(data)
129
- else:
130
- # Last data row already contains data for this column
131
- last_data_row[j] = str(last_data_row[j]) + data
132
- # Lines that are assumed to contain valid data are added to the group data
133
- else:
134
- cleaned_data_row = []
135
- for data in data_row:
136
- cleaned_data_row.append(coerce_string(data.strip(' "')))
137
- group_data.append(cleaned_data_row)
138
-
139
- # Also add the last group's df to the dictionary of AGS dfs
140
- ags3_dfs[group] = pd.DataFrame(group_data, columns=headers).dropna(
141
- axis=1, how="all"
142
- )
143
-
144
- if not group:
145
- print(
146
- '🚨 ERROR: The provided AGS 3 data does not contain any groups, i.e. lines starting with "**'
147
- )
148
-
149
- return ags3_dfs
150
-
151
-
152
- def ags4_to_dfs(ags4_data: str) -> Dict[str, pd.DataFrame]:
153
- """Converts AGS 4 data to a dictionary of pandas DataFrames.
154
-
155
- Args:
156
- ags4_data (str): The AGS 4 data as a string.
157
-
158
- Returns:
159
- Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key represents a group name from AGS 4 data,
160
- and the corresponding value is a pandas DataFrame containing the data for that group.
161
- """
162
- # AGS4.AGS4_to_dataframe accepts the file, not the data string
163
- ags4_file = io.StringIO(ags4_data)
164
-
165
- ags4_tups = AGS4.AGS4_to_dataframe(ags4_file)
166
-
167
- ags4_dfs = {}
168
- for group, df in ags4_tups[0].items():
169
- df = df.loc[2:].drop(columns=["HEADING"]).reset_index(drop=True)
170
- ags4_dfs[group] = df
171
-
172
- return ags4_dfs
173
-
174
-
175
- def coerce_string(string: str) -> Union[None, bool, float, str]:
176
- if string.lower() in {"none", "null", ""}:
177
- return None
178
- elif string.lower() == "true":
179
- return True
180
- elif string.lower() == "false":
181
- return False
182
- else:
183
- try:
184
- value = float(string)
185
- if value.is_integer():
186
- return int(value)
187
- else:
188
- return value
189
- except ValueError:
190
- return string
@@ -1,264 +0,0 @@
1
- """Transforms, i.e. maps, AGS data to Bedrock's schema."""
2
-
3
- from typing import Dict
4
-
5
- import pandas as pd
6
- import pandera as pa
7
- from pandera.typing import DataFrame
8
- from pyproj import CRS
9
-
10
- from bedrock_ge.gi.ags.schemas import Ags3HOLE, Ags3SAMP, BaseSAMP
11
- from bedrock_ge.gi.schemas import BaseInSitu, BaseLocation, BaseSample, Project
12
- from bedrock_ge.gi.validate import check_foreign_key
13
-
14
-
15
- # What this function really does, is add the CRS and Bedrock columns:
16
- # - `project_uid`
17
- # - `location_uid`
18
- # - `sample_id`
19
- # - `sample_uid`
20
- # - `depth_to_`
21
- # There really isn't any mapping going on here...
22
- # TODO: Make sure that the name of the function and docstrings reflect this.
23
- def ags3_db_to_no_gis_brgi_db(
24
- ags3_db: Dict[str, pd.DataFrame], crs: CRS
25
- ) -> Dict[str, pd.DataFrame]:
26
- """Maps a database with GI data from a single AGS 3 file to a database with Bedrock's schema.
27
-
28
- This function converts an AGS 3 formatted geotechnical database into Bedrock's
29
- internal database format, maintaining data relationships and structure. It handles
30
- various types of geotechnical data including project information, locations,
31
- samples, lab tests, and in-situ measurements.
32
-
33
- The mapping process:
34
- 1. Project Data: Converts AGS 3 'PROJ' group to Bedrock's 'Project' table
35
- 2. Location Data: Converts AGS 3 'HOLE' group to Bedrock's 'Location' table
36
- 3. Sample Data: Converts AGS 3 'SAMP' group to Bedrock's 'Sample' table
37
- 4. Other Data: Handles lab tests, in-situ measurements, and miscellaneous tables
38
-
39
- Args:
40
- ags3_db (Dict[str, pd.DataFrame]): A dictionary containing AGS 3 data tables,
41
- where keys are table names and values are pandas DataFrames.
42
- crs (CRS): Coordinate Reference System for the project data.
43
-
44
- Returns:
45
- Dict[str, pd.DataFrame]: A dictionary containing Bedrock GI database tables,
46
- where keys are table names and values are transformed pandas DataFrames.
47
-
48
- Note:
49
- The function creates a copy of the input database to avoid modifying the original data.
50
- It performs foreign key checks to maintain data integrity during the mapping.
51
- """
52
- # Make sure that the AGS 3 database is not changed outside this function.
53
- ags3_db = ags3_db.copy()
54
-
55
- print("Transforming AGS 3 groups to Bedrock tables...")
56
-
57
- # Instantiate Bedrock dictionary of pd.DataFrames
58
- brgi_db = {}
59
-
60
- # Project
61
- print("Transforming AGS 3 group 'PROJ' to Bedrock GI 'Project' table...")
62
- brgi_db["Project"] = ags_proj_to_brgi_project(ags3_db["PROJ"], crs)
63
- project_uid = brgi_db["Project"]["project_uid"].item()
64
- del ags3_db["PROJ"]
65
-
66
- # Locations
67
- if "HOLE" in ags3_db.keys():
68
- print("Transforming AGS 3 group 'HOLE' to Bedrock GI 'Location' table...")
69
- brgi_db["Location"] = ags3_hole_to_brgi_location(ags3_db["HOLE"], project_uid) # type: ignore
70
- del ags3_db["HOLE"]
71
- else:
72
- print(
73
- "Your AGS 3 data doesn't contain a HOLE group, i.e. Ground Investigation locations."
74
- )
75
-
76
- # Samples
77
- if "SAMP" in ags3_db.keys():
78
- print("Transforming AGS 3 group 'SAMP' to Bedrock GI 'Sample' table...")
79
- check_foreign_key("HOLE_ID", brgi_db["Location"], ags3_db["SAMP"])
80
- ags3_db["SAMP"] = generate_sample_ids_for_ags3(ags3_db["SAMP"]) # type: ignore
81
- brgi_db["Sample"] = ags3_samp_to_brgi_sample(ags3_db["SAMP"], project_uid) # type: ignore
82
- del ags3_db["SAMP"]
83
- else:
84
- print("Your AGS 3 data doesn't contain a SAMP group, i.e. samples.")
85
-
86
- # The rest of the tables: 1. Lab Tests 2. In-Situ Measurements 3. Other tables
87
- for group, group_df in ags3_db.items():
88
- if "SAMP_REF" in ags3_db[group].columns:
89
- print(f"Project {project_uid} has lab test data: {group}.")
90
- brgi_db[group] = group_df # type: ignore
91
- elif "HOLE_ID" in ags3_db[group].columns:
92
- print(
93
- f"Transforming AGS 3 group '{group}' to Bedrock GI 'InSitu_{group}' table..."
94
- )
95
- check_foreign_key("HOLE_ID", brgi_db["Location"], group_df)
96
- brgi_db[f"InSitu_{group}"] = ags3_in_situ_to_brgi_in_situ( # type: ignore
97
- group, group_df, project_uid
98
- )
99
- else:
100
- brgi_db[group] = ags3_db[group] # type: ignore
101
-
102
- print(
103
- "Done",
104
- "The Bedrock database contains the following tables:",
105
- list(brgi_db.keys()),
106
- sep="\n",
107
- end="\n\n",
108
- )
109
- return brgi_db # type: ignore
110
-
111
-
112
- @pa.check_types(lazy=True)
113
- def ags_proj_to_brgi_project(ags_proj: pd.DataFrame, crs: CRS) -> DataFrame[Project]:
114
- """Maps the AGS 3 'PROJ' group to a Bedrock GI 'Project' table.
115
-
116
- Args:
117
- ags_proj (pd.DataFrame): The AGS 3 'PROJ' group.
118
- crs (CRS): The coordinate reference system of the project.
119
-
120
- Returns:
121
- DataFrame[Project]: The Bedrock GI 'Project' table.
122
- """
123
- if "project_uid" not in ags_proj.columns:
124
- ags_proj["project_uid"] = ags_proj["PROJ_ID"]
125
-
126
- ags_proj["crs_wkt"] = crs.to_wkt()
127
-
128
- return ags_proj # type: ignore
129
-
130
-
131
- @pa.check_types(lazy=True)
132
- def ags3_hole_to_brgi_location(
133
- ags3_hole: DataFrame[Ags3HOLE], project_uid: str
134
- ) -> DataFrame[BaseLocation]:
135
- brgi_location = ags3_hole
136
- brgi_location["project_uid"] = project_uid
137
- brgi_location["location_source_id"] = ags3_hole["HOLE_ID"]
138
- brgi_location["location_uid"] = (
139
- ags3_hole["HOLE_ID"] + "_" + ags3_hole["project_uid"]
140
- )
141
- brgi_location["location_type"] = ags3_hole["HOLE_TYPE"]
142
- brgi_location["easting"] = ags3_hole["HOLE_NATE"]
143
- brgi_location["northing"] = ags3_hole["HOLE_NATN"]
144
- brgi_location["ground_level_elevation"] = ags3_hole["HOLE_GL"]
145
- brgi_location["depth_to_base"] = ags3_hole["HOLE_FDEP"]
146
-
147
- return ags3_hole # type: ignore
148
-
149
-
150
- @pa.check_types(lazy=True)
151
- def ags3_samp_to_brgi_sample(
152
- ags3_samp: DataFrame[Ags3SAMP],
153
- project_uid: str,
154
- ) -> DataFrame[BaseSample]:
155
- brgi_sample = ags3_samp
156
- brgi_sample["project_uid"] = project_uid
157
- brgi_sample["location_source_id"] = ags3_samp["HOLE_ID"]
158
- brgi_sample["location_uid"] = ags3_samp["HOLE_ID"] + "_" + ags3_samp["project_uid"]
159
- brgi_sample["sample_source_id"] = ags3_samp["sample_id"]
160
- brgi_sample["sample_uid"] = ags3_samp["sample_id"] + "_" + ags3_samp["project_uid"]
161
- brgi_sample["depth_to_top"] = ags3_samp["SAMP_TOP"]
162
- brgi_sample["depth_to_base"] = ags3_samp["SAMP_BASE"]
163
-
164
- return brgi_sample # type: ignore
165
-
166
-
167
- @pa.check_types(lazy=True)
168
- def ags3_in_situ_to_brgi_in_situ(
169
- group_name: str, ags3_in_situ: pd.DataFrame, project_uid: str
170
- ) -> DataFrame[BaseInSitu]:
171
- """Maps AGS 3 in-situ measurement data to Bedrock's in-situ data schema.
172
-
173
- Args:
174
- group_name (str): The AGS 3 group name.
175
- ags3_data (pd.DataFrame): The AGS 3 data.
176
- project_uid (str): The project uid.
177
-
178
- Returns:
179
- DataFrame[BaseInSitu]: The Bedrock in-situ data.
180
- """
181
- brgi_in_situ = ags3_in_situ
182
- brgi_in_situ["project_uid"] = project_uid
183
- brgi_in_situ["location_uid"] = ags3_in_situ["HOLE_ID"] + "_" + project_uid
184
-
185
- top_depth = f"{group_name}_TOP"
186
- base_depth = f"{group_name}_BASE"
187
-
188
- if group_name == "CDIA":
189
- top_depth = "CDIA_CDEP"
190
- elif group_name == "FLSH":
191
- top_depth = "FLSH_FROM"
192
- base_depth = "FLSH_TO"
193
- elif group_name == "CORE":
194
- base_depth = "CORE_BOT"
195
- elif group_name == "HDIA":
196
- top_depth = "HDIA_HDEP"
197
- elif group_name == "PTIM":
198
- top_depth = "PTIM_DEP"
199
- elif group_name == "IVAN":
200
- top_depth = "IVAN_DPTH"
201
- elif group_name == "STCN":
202
- top_depth = "STCN_DPTH"
203
- elif group_name == "POBS" or group_name == "PREF":
204
- top_depth = "PREF_TDEP"
205
- elif group_name == "DREM":
206
- top_depth = "DREM_DPTH"
207
- elif group_name == "PRTD" or group_name == "PRTG" or group_name == "PRTL":
208
- top_depth = "PRTD_DPTH"
209
- elif group_name == "IPRM":
210
- if top_depth not in ags3_in_situ.columns:
211
- print(
212
- "\n🚨 CAUTION: The IPRM group in this AGS 3 file does not contain a 'IPRM_TOP' heading!",
213
- "🚨 CAUTION: Making the 'IPRM_BASE' heading the 'depth_to_top'...",
214
- sep="\n",
215
- end="\n\n",
216
- )
217
- top_depth = "IPRM_BASE"
218
- base_depth = "None"
219
-
220
- brgi_in_situ["depth_to_top"] = ags3_in_situ[top_depth]
221
- brgi_in_situ["depth_to_base"] = ags3_in_situ.get(base_depth)
222
-
223
- return brgi_in_situ # type: ignore
224
-
225
-
226
- @pa.check_types(lazy=True)
227
- def generate_sample_ids_for_ags3(
228
- ags3_with_samp: DataFrame[BaseSAMP],
229
- ) -> DataFrame[Ags3SAMP]:
230
- ags3_with_samp["sample_id"] = (
231
- ags3_with_samp["SAMP_REF"].astype(str)
232
- + "_"
233
- + ags3_with_samp["SAMP_TYPE"].astype(str)
234
- + "_"
235
- + ags3_with_samp["SAMP_TOP"].astype(str)
236
- + "_"
237
- + ags3_with_samp["HOLE_ID"].astype(str)
238
- )
239
- # try:
240
- # # SAMP_REF really should not be able to be null... Right?
241
- # # Maybe SAMP_REF can be null when the
242
- # Ags3SAMP_REF.validate(ags3_samp)
243
- # print(
244
- # "Generating unique sample IDs for AGS 3 data: 'sample_id'='{SAMP_REF}_{HOLE_ID}'"
245
- # )
246
- # ags3_samp["sample_id"] = (
247
- # ags3_samp["SAMP_REF"].astype(str) + "_" + ags3_samp["HOLE_ID"].astype(str)
248
- # )
249
- # except pa.errors.SchemaError as exc:
250
- # print(f"🚨 CAUTION: The AGS 3 SAMP group contains rows without SAMP_REF:\n{exc}")
251
-
252
- # if "non-nullable series 'SAMP_REF'" in str(exc):
253
- # print(
254
- # "\nTo ensure unique sample IDs: 'sample_id'='{SAMP_REF}_{SAMP_TOP}_{HOLE_ID}'\n"
255
- # )
256
- # ags3_samp["sample_id"] = (
257
- # ags3_samp["SAMP_REF"].astype(str)
258
- # + "_"
259
- # + ags3_samp["SAMP_TOP"].astype(str)
260
- # + "_"
261
- # + ags3_samp["HOLE_ID"].astype(str)
262
- # )
263
-
264
- return ags3_with_samp # type: ignore
@@ -1,25 +0,0 @@
1
- import pandas as pd
2
-
3
-
4
- def check_ags_proj_group(ags_proj: pd.DataFrame) -> bool:
5
- """Checks if the AGS 3 or AGS 4 PROJ group is correct.
6
-
7
- Args:
8
- ags_proj (pd.DataFrame): The DataFrame with the PROJ group.
9
-
10
- Raises:
11
- ValueError: If AGS 3 of AGS 4 PROJ group is not correct.
12
-
13
- Returns:
14
- bool: Returns True if the AGS 3 or AGS 4 PROJ group is correct.
15
- """
16
- if len(ags_proj) != 1:
17
- raise ValueError("The PROJ group must contain exactly one row.")
18
-
19
- project_id = ags_proj["PROJ_ID"].iloc[0]
20
- if not project_id:
21
- raise ValueError(
22
- 'The project ID ("PROJ_ID" in the "PROJ" group) is missing from the AGS data.'
23
- )
24
-
25
- return True
@@ -1,36 +0,0 @@
1
- {
2
- "Location": {
3
- "attributes": {},
4
- "geometry_type": "Point / 3D LineString",
5
- "children": {
6
- "MaterialClassification": {
7
- "attributes": {},
8
- "geometry_type": "3D LineString"
9
- },
10
- "SPT": {
11
- "attributes": {},
12
- "geometry_type": "3D Point"
13
- },
14
- "RQD": {
15
- "attributes": {},
16
- "geometry_type": "3D LineString"
17
- },
18
- "OtherInSituTests": {
19
- "attributes": {},
20
- "geometry_type": "3D Point or 3D LineString"
21
- },
22
- "Sample": {
23
- "attributes": {},
24
- "geometry_type": "3D Point",
25
- "children": {
26
- "grainSizeDistribution": {},
27
- "atterbergLimits": {},
28
- "oedometerTest": {},
29
- "triaxialTest": {},
30
- "unconfinedCompressiveStrength": {},
31
- "otherLabTests": {}
32
- }
33
- }
34
- }
35
- }
36
- }
@@ -1,38 +0,0 @@
1
- from typing import Dict, Union
2
-
3
- import geopandas as gpd
4
- import pandas as pd
5
-
6
-
7
- def concatenate_databases(
8
- db1: Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]],
9
- db2: Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]],
10
- ) -> Dict[str, pd.DataFrame]:
11
- """Concatenates two dictionaries of DataFrames into one dict of DataFrames.
12
-
13
- The function concatenates the pandas DataFrames of the second dict of
14
- DataFrames to the first dict of DataFrames for the keys they have in common.
15
- Keys that are unique to either dictionary will be included in the final
16
- concatenated dictionary.
17
-
18
- Args:
19
- db1 (Dict[str, pd.DataFrame]): A dictionary of pandas DataFrames, i.e. a database.
20
- db2 (Dict[str, pd.DataFrame]): A dictionary of pandas DataFrames, i.e. a database.
21
-
22
- Returns:
23
- dict: A dictionary of concatenated pandas DataFrames.
24
- """
25
- # Create a new dict to store the concatenated dataframes
26
- concatenated_dict = {key: df.dropna(axis=1, how="all") for key, df in db1.items()}
27
-
28
- # Iterate over the keys in the second dict
29
- for key, df in db2.items():
30
- df = df.dropna(axis=1, how="all")
31
- # If the key is also in the first dict, concatenate the dataframes
32
- if key in db1:
33
- concatenated_dict[key] = pd.concat([db1[key], df], ignore_index=True)
34
- # If the key is not in the first dict, just add it to the new dict
35
- else:
36
- concatenated_dict[key] = df
37
-
38
- return concatenated_dict