bedrock-ge 0.2.4__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bedrock_ge/gi/validate.py CHANGED
@@ -1,151 +1,88 @@
1
- from typing import Dict, Union
2
-
3
1
  import geopandas as gpd # type: ignore
4
2
  import pandas as pd
5
3
 
6
4
  from bedrock_ge.gi.schemas import (
7
- BaseInSitu,
8
- BaseLocation,
9
- BaseSample,
10
- InSitu,
11
- Location,
12
- Project,
13
- Sample,
5
+ BedrockGIDatabase,
6
+ BedrockGIGeospatialDatabase,
14
7
  )
15
8
 
16
9
 
17
- # TODO: rename to check_brgi_geodb
18
- # TODO: make this check actually work...
19
- def check_brgi_database(brgi_db: Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]]):
20
- """Validates the structure and relationships of a 'Bedrock Ground Investigation' (BRGI) database (which is a dictionary of DataFrames).
10
+ def check_brgi_geodb(
11
+ brgi_geodb: BedrockGIGeospatialDatabase,
12
+ ):
13
+ """Validates the structure and relationships of a 'Bedrock Ground Investigation' (BrGI) geospatial database.
21
14
 
22
- This function checks that all tables in the BRGI database conform to their respective schemas
15
+ This function checks that all tables in the BrGI geospatialdatabase conform to their respective schemas
23
16
  and that all foreign key relationships are properly maintained. It validates the following tables:
24
17
  - Project
25
18
  - Location
19
+ - LonLatHeight
20
+ - All In-Situ test tables
26
21
  - Sample
27
- - InSitu_TESTX
28
- - Lab_TESTY (not yet implemented)
22
+ - All Lab test tables
29
23
 
30
24
  Args:
31
- brgi_db (Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]]): A dictionary
32
- containing the BRGI database tables, where keys are table names and
33
- values are the corresponding data tables (DataFrame or GeoDataFrame).
25
+ brgi_geodb (BedrockGIGeospatialDatabase): Bedrock GI geospatial database object.
34
26
 
35
27
  Returns:
36
28
  is_valid (bool): True if all tables are valid and relationships are properly maintained.
37
29
 
38
30
  Example:
39
31
  ```python
40
- brgi_db = {
41
- "Project": project_df,
42
- "Location": location_gdf,
43
- "Sample": sample_gdf,
44
- "InSitu_ISPT": in_situ_ispt_gdf,
45
- }
46
- check_brgi_database(brgi_db)
32
+ brgi_geodb = BedrockGIGeospatialDatabase(
33
+ Project=project_df,
34
+ Location=location_geodf,
35
+ LonLatHeight=lon_lat_height_geodf,
36
+ InSituTests={"ISPT": ispt_geodf},
37
+ Sample=sample_geodf,
38
+ LabTests={"LLPL": llpl_df},
39
+ )
40
+ check_brgi_geodb(brgi_db)
47
41
  ```
48
42
  """
49
- for table_name, table in brgi_db.items():
50
- if table_name == "Project":
51
- Project.validate(table)
52
- print("'Project' table aligns with Bedrock's 'Project' table schema.")
53
- elif table_name == "Location":
54
- Location.validate(table)
55
- check_foreign_key("project_uid", brgi_db["Project"], table)
56
- print("'Location' table aligns with Bedrock's 'Location' table schema.")
57
- elif table_name == "Sample":
58
- Sample.validate(table)
59
- check_foreign_key("project_uid", brgi_db["Project"], table)
60
- check_foreign_key("location_uid", brgi_db["Location"], table)
61
- print("'Sample' table aligns with Bedrock's 'Sample' table schema.")
62
- # ! JG is pretty sure that this doesn't work
63
- # ! The line below should be:
64
- # ! elif table_name.startswith("InSitu_"):
65
- elif table_name == "InSitu":
66
- InSitu.validate(table)
67
- check_foreign_key("project_uid", brgi_db["Project"], table)
68
- check_foreign_key("location_uid", brgi_db["Location"], table)
69
- print(
70
- f"'{table_name}' table aligns with Bedrock's table schema for In-Situ measurements."
71
- )
72
- elif table_name.startswith("Lab_"):
73
- print(
74
- "🚨 !NOT IMPLEMENTED! We haven't come across Lab data yet. !NOT IMPLEMENTED!"
75
- )
76
-
43
+ # TODO: implement this
77
44
  return True
78
45
 
79
46
 
80
- # TODO: rename to check_brgi_db
81
- def check_no_gis_brgi_database(
82
- brgi_db: Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]],
47
+ def check_brgi_db(
48
+ brgi_db: BedrockGIDatabase,
83
49
  ):
84
- """Validates the structure and relationships of a 'Bedrock Ground Investigation' (BGI) database without GIS geometry.
50
+ """Validates the structure and relationships of a 'Bedrock Ground Investigation' (BrGI) database.
85
51
 
86
- This function performs the same validation as `check_brgi_database` but uses schemas
87
- that don't require GIS geometry. It validates the following tables:
88
- - Project (never has GIS geometry)
89
- - Location (without GIS geometry)
90
- - Sample (without GIS geometry)
91
- - InSitu_TESTX (without GIS geometry)
92
- - Lab_TESTY (not yet implemented)
52
+ This function performs the same validation as `check_brgi_geodb`, but uses schemas
53
+ that don't require geospatial geometry. It validates the following tables:
54
+ - Project (never has geospatial geometry)
55
+ - Location (without geospatial geometry)
56
+ - All In-Situ test tables (without geospatial geometry)
57
+ - Sample (without geospatial geometry)
58
+ - All Lab test tables (never has geospatial geometry)
93
59
 
94
60
  Args:
95
- brgi_db (Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]]): A dictionary
96
- containing the Bedrock GI database tables, where keys are table names
97
- and values are the corresponding data tables (DataFrame or GeoDataFrame).
61
+ brgi_db (BedrockGIDatabase): A Bedrock GI database object.
98
62
 
99
63
  Returns:
100
64
  bool: True if all tables are valid and relationships are properly maintained.
101
65
 
102
66
  Example:
103
67
  ```python
104
- brgi_db = {
105
- "Project": projects_df,
106
- "Location": locations_df,
107
- "Sample": samples_df,
108
- "InSitu_measurements": insitu_df,
109
- }
110
- check_no_gis_brgi_database(brgi_db)
68
+ brgi_db = BedrockGIDatabase(
69
+ Project=project_df,
70
+ Location=location_df,
71
+ InSituTests={"ISPT": ispt_df},
72
+ Sample=sample_df,
73
+ LabTests={"LLPL": llpl_df},
74
+ )
75
+ check_brgi_db(brgi_db)
111
76
  ```
112
77
  """
113
- for table_name, table in brgi_db.items():
114
- if table_name == "Project":
115
- Project.validate(table)
116
- print("'Project' table aligns with Bedrock's 'Project' table schema.")
117
- elif table_name == "Location":
118
- BaseLocation.validate(table)
119
- check_foreign_key("project_uid", brgi_db["Project"], table)
120
- print(
121
- "'Location' table aligns with Bedrock's 'Location' table schema without GIS geometry."
122
- )
123
- elif table_name == "Sample":
124
- BaseSample.validate(table)
125
- check_foreign_key("project_uid", brgi_db["Project"], table)
126
- check_foreign_key("location_uid", brgi_db["Location"], table)
127
- print(
128
- "'Sample' table aligns with Bedrock's 'Sample' table schema without GIS geometry."
129
- )
130
- elif table_name.startswith("InSitu_"):
131
- BaseInSitu.validate(table)
132
- check_foreign_key("project_uid", brgi_db["Project"], table)
133
- check_foreign_key("location_uid", brgi_db["Location"], table)
134
- print(
135
- f"'{table_name}' table aligns with Bedrock's '{table_name}' table schema without GIS geometry."
136
- )
137
- elif table_name.startswith("Lab_"):
138
- print(
139
- "🚨 !NOT IMPLEMENTED! We haven't come across Lab data yet. !NOT IMPLEMENTED!"
140
- )
141
-
78
+ # TODO: implement this
142
79
  return True
143
80
 
144
81
 
145
82
  def check_foreign_key(
146
83
  foreign_key: str,
147
- parent_table: Union[pd.DataFrame, gpd.GeoDataFrame],
148
- table_with_foreign_key: Union[pd.DataFrame, gpd.GeoDataFrame],
84
+ parent_table: pd.DataFrame | gpd.GeoDataFrame,
85
+ table_with_foreign_key: pd.DataFrame | gpd.GeoDataFrame,
149
86
  ) -> bool:
150
87
  """Validates referential integrity between two tables by checking foreign key relationships.
151
88
 
@@ -154,8 +91,8 @@ def check_foreign_key(
154
91
 
155
92
  Args:
156
93
  foreign_key (str): The name of the column that serves as the foreign key.
157
- parent_table (Union[pd.DataFrame, gpd.GeoDataFrame]): The parent table containing the primary keys.
158
- table_with_foreign_key (Union[pd.DataFrame, gpd.GeoDataFrame]): The child table containing the foreign keys.
94
+ parent_table (pd.DataFrame| gpd.GeoDataFrame): The parent table containing the primary keys.
95
+ table_with_foreign_key (pd.DataFrame| gpd.GeoDataFrame): The child table containing the foreign keys.
159
96
 
160
97
  Returns:
161
98
  bool: True if all foreign keys exist in the parent table.
bedrock_ge/gi/write.py CHANGED
@@ -1,13 +1,44 @@
1
1
  from pathlib import Path
2
- from typing import Dict, Union
2
+ from typing import Literal
3
3
 
4
4
  import geopandas as gpd
5
5
  import pandas as pd
6
6
 
7
+ from bedrock_ge.gi.io_utils import brgi_db_to_dfs, geodf_to_df
8
+ from bedrock_ge.gi.schemas import BedrockGIDatabase, BedrockGIGeospatialDatabase
9
+
10
+
11
+ # ? Should this function be made a to_file(s) method of BedrockGIDatabase?
12
+ def write_brgi_db_to_file(
13
+ brgi_db: BedrockGIDatabase | BedrockGIGeospatialDatabase,
14
+ path: str | Path,
15
+ driver: Literal["EXCEL", "GPKG"] = "GPKG",
16
+ ) -> None:
17
+ """Writes a Bedrock GI (geospatial) database to a file.
18
+
19
+ Writes a Bedrock GI (geospatial) database to a file. The file type is
20
+ determined by the `driver` argument. Possible values are "GPKG" and "EXCEL".
21
+
22
+ Args:
23
+ brgi_db (BedrockGIDatabase | BedrockGIGeospatialDatabase): The Bedrock GI (geospatial) database.
24
+ path (str | Path): The path of the output file.
25
+ driver (str): The type of the output file. Possible values are "GPKG" and "EXCEL".
26
+
27
+ Returns:
28
+ None
29
+ """
30
+ dict_of_dfs = brgi_db_to_dfs(brgi_db)
31
+ if driver.upper() == "GPKG":
32
+ write_gi_db_to_gpkg(dict_of_dfs, path)
33
+ elif driver.upper() == "EXCEL":
34
+ write_gi_db_to_excel(dict_of_dfs, path)
35
+ else:
36
+ raise ValueError(f"Invalid driver: {driver}")
37
+
7
38
 
8
39
  def write_gi_db_to_gpkg(
9
- brgi_db: Dict[str, gpd.GeoDataFrame],
10
- gpkg_path: Union[str, Path],
40
+ dict_of_dfs: dict[str, pd.DataFrame | gpd.GeoDataFrame],
41
+ gpkg_path: str | Path,
11
42
  ) -> None:
12
43
  """Writes a database with Bedrock Ground Investigation data to a GeoPackage file.
13
44
 
@@ -16,32 +47,28 @@ def write_gi_db_to_gpkg(
16
47
  separate table named by the keys of the dictionary.
17
48
 
18
49
  Args:
19
- brgi_db (Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]]): A dictionary where
50
+ dict_of_dfs (dict[str, pd.DataFrame | gpd.GeoDataFrame]): A dictionary where
20
51
  keys are brgi table names and values are pandas DataFrames or GeoDataFrames
21
52
  with brgi data.
22
- gpkg_path (str): The name of the output GeoPackage file.
53
+ gpkg_path (str | Path): The name of the output GeoPackage file.
23
54
 
24
55
  Returns:
25
56
  None
26
57
  """
27
58
  # Create a GeoDataFrame from the dictionary of DataFrames
28
- for sheet_name, brgi_table in brgi_db.items():
29
- sanitized_table_name = sanitize_table_name(sheet_name)
30
-
31
- if isinstance(brgi_table, pd.DataFrame):
32
- brgi_table = gpd.GeoDataFrame(brgi_table)
59
+ for table_name, df in dict_of_dfs.items():
60
+ sanitized_table_name = sanitize_table_name(table_name)
61
+ if isinstance(df, pd.DataFrame):
62
+ df = gpd.GeoDataFrame(df)
33
63
 
34
- if isinstance(brgi_table, gpd.GeoDataFrame):
35
- brgi_table.to_file(
36
- gpkg_path, driver="GPKG", layer=sanitized_table_name, overwrite=True
37
- )
64
+ df.to_file(gpkg_path, driver="GPKG", layer=sanitized_table_name, overwrite=True)
38
65
 
39
66
  print(f"Ground Investigation data has been written to '{gpkg_path}'.")
40
67
 
41
68
 
42
69
  def write_gi_db_to_excel(
43
- gi_dfs: Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]],
44
- excel_path: Union[str, Path],
70
+ dict_of_dfs: dict[str, pd.DataFrame | gpd.GeoDataFrame],
71
+ excel_path: str | Path,
45
72
  ) -> None:
46
73
  """Writes a database with Ground Investigation data to an Excel file.
47
74
 
@@ -50,27 +77,27 @@ def write_gi_db_to_excel(
50
77
  AGS, Bedrock, or another format.
51
78
 
52
79
  Args:
53
- gi_dfs (Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]]): A dictionary where
80
+ dict_of_dfs (dict[str, pd.DataFrame | gpd.GeoDataFrame]): A dictionary where
54
81
  keys are GI table names and values are DataFrames with GI data.
55
- excel_path (Union[str, Path]): Path to the output Excel file. Can be provided as a
82
+ excel_path (str | Path): Path to the output Excel file. Can be provided as a
56
83
  string or Path object.
57
84
 
58
85
  Returns:
59
86
  None
60
87
  """
61
- # Create an Excel writer object
62
88
  with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
63
- for sheet_name, df in gi_dfs.items():
64
- sanitized_sheet_name = sanitize_table_name(sheet_name)
65
- if isinstance(df, pd.DataFrame) or isinstance(df, gpd.GeoDataFrame):
66
- df.to_excel(writer, sheet_name=sanitized_sheet_name, index=False)
89
+ for sheet_name, df in dict_of_dfs.items():
90
+ sanitized_sheet_name = sanitize_table_name(sheet_name)[:31]
91
+ if isinstance(df, gpd.GeoDataFrame):
92
+ df = geodf_to_df(df)
93
+
94
+ df.to_excel(writer, sheet_name=sanitized_sheet_name, index=False)
67
95
 
68
96
  print(f"Ground Investigation data has been written to '{excel_path}'.")
69
97
 
70
98
 
71
- # TODO: Make the 31 character table name length truncation a separate function. Only necessary for Excel.
72
99
  def sanitize_table_name(sheet_name):
73
- """Replaces invalid characters and spaces in GI table names with underscores and truncates to 31 characters.
100
+ """Replaces invalid characters and spaces in GI table names with underscores.
74
101
 
75
102
  Makes table names consistent with SQL, GeoPackage and Excel naming conventions by
76
103
  replacing invalid characters and spaces with underscores.
@@ -81,12 +108,8 @@ def sanitize_table_name(sheet_name):
81
108
  Returns:
82
109
  sanitized_name (str): A sanitized sheet name with invalid characters and spaces replaced.
83
110
  """
84
- # Trim to a maximum length of 31 characters
85
- trimmed_name = sheet_name.strip()[:31]
86
-
87
- # Define invalid characters and replace with underscores
88
111
  invalid_chars = [":", "/", "\\", "?", "*", "[", "]"]
89
- sanitized_name = trimmed_name
112
+ sanitized_name = sheet_name.strip()
90
113
  for char in invalid_chars:
91
114
  sanitized_name = sanitized_name.replace(char, "_")
92
115
 
@@ -96,16 +119,10 @@ def sanitize_table_name(sheet_name):
96
119
  # Collapse multiple underscores to one
97
120
  sanitized_name = "_".join(filter(None, sanitized_name.split("_")))
98
121
 
99
- if trimmed_name != sanitized_name:
122
+ if sheet_name != sanitized_name:
100
123
  print(
101
124
  f"Table names shouldn't contain {invalid_chars} or spaces and shouldn't be longer than 31 characters.\n",
102
125
  f"Replaced '{sheet_name}' with '{sanitized_name}'.",
103
126
  )
104
127
 
105
- # Ensure name isn't empty after sanitization
106
- # ! "Table1" doesn't make a lot of sense?!? It could be that there are more than 1 table without a name...
107
- if not sanitized_name:
108
- sanitized_name = "Table1"
109
- print("The table name was completely invalid or empty. Replaced with 'Table1'.")
110
-
111
128
  return sanitized_name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bedrock-ge
3
- Version: 0.2.4
3
+ Version: 0.3.1
4
4
  Summary: Bedrock's Python library for geotechnical engineering.
5
5
  Project-URL: Homepage, https://bedrock.engineer/
6
6
  Project-URL: Source, https://github.com/bedrock-engineer/bedrock-ge
@@ -17,14 +17,14 @@ Classifier: License :: OSI Approved :: Apache Software License
17
17
  Classifier: Operating System :: OS Independent
18
18
  Classifier: Programming Language :: Python
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
- Classifier: Programming Language :: Python :: 3.9
21
20
  Classifier: Programming Language :: Python :: 3.10
22
21
  Classifier: Programming Language :: Python :: 3.11
23
22
  Classifier: Programming Language :: Python :: 3.12
24
23
  Classifier: Programming Language :: Python :: 3.13
25
24
  Classifier: Topic :: Scientific/Engineering
26
25
  Classifier: Topic :: Scientific/Engineering :: GIS
27
- Requires-Python: >=3.9
26
+ Requires-Python: >=3.10
27
+ Requires-Dist: chardet>=5.2.0
28
28
  Requires-Dist: geopandas~=1.0
29
29
  Requires-Dist: openpyxl~=3.0
30
30
  Requires-Dist: pandera>=0.23.0
@@ -0,0 +1,22 @@
1
+ bedrock_ge/__init__.py,sha256=_SKQpwL2hyrtdWQOcbHmo86vQEJbICRTU-Delt085g4,89
2
+ bedrock_ge/plot.py,sha256=C95aj8CXjFVZRGYYBssJMm5MyljLbdt_TKyvmQyWZBE,149
3
+ bedrock_ge/gi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ bedrock_ge/gi/ags.py,sha256=k2ZotuEt08hGvLAjKCDFR_HLFCRHVuMej1dnw-T6WI4,4388
5
+ bedrock_ge/gi/ags3.py,sha256=HNdX1avwzzZsrkTm54aqs9neUrTXa2e784Q8mSy6Zso,10161
6
+ bedrock_ge/gi/ags3_data_dictionary.json,sha256=Wx20_oJRdAlzEo-cKD6FgN9B9zOMDTcsp5dgc8QWofI,188588
7
+ bedrock_ge/gi/ags4.py,sha256=pDKf-l1jheeQAU2bHkiJiIgjUGvD3Iv8of77rYDwUQA,916
8
+ bedrock_ge/gi/ags4_data_dictionary.json,sha256=XE5XJNo8GBPZTUPgvVr3QgO1UfEIAxzlSeXi-P1VLTs,609670
9
+ bedrock_ge/gi/ags_schemas.py,sha256=R5yubnRacAlQBqb7W7Rj_Y4canhg6Tls38e66xXQNRA,8065
10
+ bedrock_ge/gi/db_operations.py,sha256=Pjtslv9syB-_xumH38F2XWt6XLsvrT8MHLgwAGCYEw0,5153
11
+ bedrock_ge/gi/geospatial.py,sha256=w9sP3SIZZceSW98z3LQT_aJKqs0rSd4DDunTFFSJygY,13739
12
+ bedrock_ge/gi/io_utils.py,sha256=Yd1RGEo_DbYoOklJbEKWdaeTw7KckkHDfKZrr91fu1o,9456
13
+ bedrock_ge/gi/mapper.py,sha256=8vFVPlgLY37iNw_5pkSyze6zOmeQjlBHGY4OAFdx5B0,8665
14
+ bedrock_ge/gi/mapping_models.py,sha256=cvepeKwqwdmVqbNBORkgIDgHq0eOPiRIERjO4RYeAQo,1876
15
+ bedrock_ge/gi/schemas.py,sha256=w0tb3c6YBTXdvpdFWWIGmlE7CYsJfo352nWnD9bmXfM,6883
16
+ bedrock_ge/gi/sqlmodels.py,sha256=_h3H9UP91I_1Ya_SZuL6gZbqL7uNCd5Y-u-yTf7CNto,2253
17
+ bedrock_ge/gi/validate.py,sha256=hgT5qZHLeeXR_cgXf1bhzJnJ-wMhE0_0if_H1rtwsiM,3918
18
+ bedrock_ge/gi/write.py,sha256=N8i1oerOaR7-XJnycmN9gXLkpjMdT5PFFB3GduogyKs,4749
19
+ bedrock_ge-0.3.1.dist-info/METADATA,sha256=bCB5WqqWu_BLYMTsYxb5ZHrRtSqYXoKKnk9S_Ykkzi4,11708
20
+ bedrock_ge-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
21
+ bedrock_ge-0.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
+ bedrock_ge-0.3.1.dist-info/RECORD,,
File without changes
bedrock_ge/gi/ags/read.py DELETED
@@ -1,192 +0,0 @@
1
- import io
2
- from typing import Any, Dict, List, Union
3
-
4
- import pandas as pd
5
- from python_ags4 import AGS4
6
-
7
- from bedrock_ge.gi.ags.validate import check_ags_proj_group
8
-
9
-
10
- def ags_to_dfs(ags_data: str) -> Dict[str, pd.DataFrame]:
11
- """Converts AGS 3 or AGS 4 data to a dictionary of pandas DataFrames.
12
-
13
- Args:
14
- ags_data (str): The AGS data as a string.
15
-
16
- Raises:
17
- ValueError: If the data does not match AGS 3 or AGS 4 format.
18
-
19
- Returns:
20
- Dict[str, pd.DataFrame]]: A dictionary where keys represent AGS group
21
- names with corresponding DataFrames for the corresponding group data.
22
- """
23
- # Process each line to find the AGS version and delegate parsing
24
- for line in ags_data.splitlines():
25
- stripped_line = line.strip() # Remove leading/trailing whitespace
26
- if stripped_line: # Skip empty lines at the start of the file
27
- if stripped_line.startswith('"**'):
28
- ags_version = 3
29
- ags_dfs = ags3_to_dfs(ags_data)
30
- break
31
- elif stripped_line.startswith('"GROUP"'):
32
- ags_version = 4
33
- ags_dfs = ags4_to_dfs(ags_data)
34
- break
35
- else:
36
- # If first non-empty line doesn't match AGS 3 or AGS 4 format
37
- raise ValueError("The data provided is not valid AGS 3 or AGS 4 data.")
38
-
39
- is_proj_group_correct = check_ags_proj_group(ags_dfs["PROJ"])
40
- if is_proj_group_correct:
41
- project_id = ags_dfs["PROJ"]["PROJ_ID"].iloc[0]
42
- print(
43
- f"AGS {ags_version} data was read for Project {project_id}",
44
- "This Ground Investigation data contains groups:",
45
- list(ags_dfs.keys()),
46
- sep="\n",
47
- end="\n\n",
48
- )
49
-
50
- return ags_dfs
51
-
52
-
53
- def ags3_to_dfs(ags3_data: str) -> Dict[str, pd.DataFrame]:
54
- """Converts AGS 3 data to a dictionary of pandas DataFrames.
55
-
56
- Args:
57
- ags3_data (str): The AGS 3 data as a string.
58
-
59
- Returns:
60
- Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key
61
- represents a group name from AGS 3 data, and the corresponding value is a
62
- pandas DataFrame containing the data for that group.
63
- """
64
- # Initialize dictionary and variables used in the AGS 3 read loop
65
- ags3_dfs = {}
66
- line_type = "line_0"
67
- group = ""
68
- headers: List[str] = ["", "", ""]
69
- group_data: List[List[Any]] = [[], [], []]
70
-
71
- for i, line in enumerate(ags3_data.splitlines()):
72
- last_line_type = line_type
73
-
74
- # In AGS 3.1 group names are prefixed with **
75
- if line.startswith('"**'):
76
- line_type = "group_name"
77
- if group:
78
- ags3_dfs[group] = pd.DataFrame(group_data, columns=headers)
79
-
80
- group = line.strip(' ,"*')
81
- group_data = []
82
-
83
- # In AGS 3 header names are prefixed with "*
84
- elif line.startswith('"*'):
85
- line_type = "headers"
86
- new_headers = line.split('","')
87
- new_headers = [h.strip(' ,"*') for h in new_headers]
88
-
89
- # Some groups have so many headers that they span multiple lines.
90
- # Therefore we need to check whether the new headers are
91
- # a continuation of the previous headers from the last line.
92
- if line_type == last_line_type:
93
- headers = headers + new_headers
94
- else:
95
- headers = new_headers
96
-
97
- # Skip lines where group units are defined, these are defined in the AGS 3 data dictionary.
98
- elif line.startswith('"<UNITS>"'):
99
- line_type = "units"
100
- continue
101
-
102
- # The rest of the lines contain:
103
- # 1. GI data
104
- # 2. a continuation of the previous line. These lines contain "<CONT>" in the first column.
105
- # 3. are empty or contain worthless data
106
- else:
107
- line_type = "data_row"
108
- data_row = line.split('","')
109
- if len("".join(data_row)) == 0:
110
- # print(f"Line {i} is empty. Last Group: {group}")
111
- continue
112
- elif len(data_row) != len(headers):
113
- print(
114
- f"\n🚨 CAUTION: The number of columns on line {i + 1} ({len(data_row)}) doesn't match the number of columns of group {group} ({len(headers)})!",
115
- f"{group} headers: {headers}",
116
- f"Line {i + 1}: {data_row}",
117
- sep="\n",
118
- end="\n\n",
119
- )
120
- continue
121
- # Append continued lines (<CONT>) to the last data_row
122
- elif data_row[0] == '"<CONT>':
123
- last_data_row = group_data[-1]
124
- for j, data in enumerate(data_row):
125
- data = data.strip(' "')
126
- if data and data != "<CONT>":
127
- if last_data_row[j] is None:
128
- # Last data row didn't contain data for this column
129
- last_data_row[j] = coerce_string(data)
130
- else:
131
- # Last data row already contains data for this column
132
- last_data_row[j] = str(last_data_row[j]) + data
133
- # Lines that are assumed to contain valid data are added to the group data
134
- else:
135
- cleaned_data_row = []
136
- for data in data_row:
137
- cleaned_data_row.append(coerce_string(data.strip(' "')))
138
- group_data.append(cleaned_data_row)
139
-
140
- # Also add the last group's df to the dictionary of AGS dfs
141
- ags3_dfs[group] = pd.DataFrame(group_data, columns=headers).dropna(
142
- axis=1, how="all"
143
- )
144
-
145
- if not group:
146
- print(
147
- '🚨 ERROR: The provided AGS 3 data does not contain any groups, i.e. lines starting with "**'
148
- )
149
-
150
- return ags3_dfs
151
-
152
-
153
- def ags4_to_dfs(ags4_data: str) -> Dict[str, pd.DataFrame]:
154
- """Converts AGS 4 data to a dictionary of pandas DataFrames.
155
-
156
- Args:
157
- ags4_data (str): The AGS 4 data as a string.
158
-
159
- Returns:
160
- Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key
161
- represents a group name from AGS 4 data, and the corresponding value is a
162
- pandas DataFrame containing the data for that group.
163
- """
164
- # AGS4.AGS4_to_dataframe accepts the file, not the data string
165
- ags4_file = io.StringIO(ags4_data)
166
-
167
- ags4_tups = AGS4.AGS4_to_dataframe(ags4_file)
168
-
169
- ags4_dfs = {}
170
- for group, df in ags4_tups[0].items():
171
- df = df.loc[2:].drop(columns=["HEADING"]).reset_index(drop=True)
172
- ags4_dfs[group] = df
173
-
174
- return ags4_dfs
175
-
176
-
177
- def coerce_string(string: str) -> Union[None, bool, float, str]:
178
- if string.lower() in {"none", "null", ""}:
179
- return None
180
- elif string.lower() == "true":
181
- return True
182
- elif string.lower() == "false":
183
- return False
184
- else:
185
- try:
186
- value = float(string)
187
- if value.is_integer():
188
- return int(value)
189
- else:
190
- return value
191
- except ValueError:
192
- return string