bedrock-ge 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bedrock_ge/__init__.py +1 -1
- bedrock_ge/gi/ags.py +103 -0
- bedrock_ge/gi/ags3.py +275 -0
- bedrock_ge/gi/ags4.py +29 -0
- bedrock_ge/gi/{ags/schemas.py → ags_schemas.py} +29 -8
- bedrock_ge/gi/db_operations.py +128 -0
- bedrock_ge/gi/geospatial.py +349 -0
- bedrock_ge/gi/io_utils.py +271 -0
- bedrock_ge/gi/mapper.py +221 -0
- bedrock_ge/gi/mapping_models.py +69 -0
- bedrock_ge/gi/schemas.py +136 -36
- bedrock_ge/gi/validate.py +45 -108
- bedrock_ge/gi/write.py +54 -37
- {bedrock_ge-0.2.4.dist-info → bedrock_ge-0.3.0.dist-info}/METADATA +2 -3
- bedrock_ge-0.3.0.dist-info/RECORD +22 -0
- bedrock_ge/gi/ags/__init__.py +0 -0
- bedrock_ge/gi/ags/read.py +0 -192
- bedrock_ge/gi/ags/transform.py +0 -264
- bedrock_ge/gi/ags/validate.py +0 -25
- bedrock_ge/gi/brgi-schema.json +0 -36
- bedrock_ge/gi/concatenate.py +0 -38
- bedrock_ge/gi/gis_geometry.py +0 -282
- bedrock_ge-0.2.4.dist-info/RECORD +0 -21
- /bedrock_ge/gi/{ags/ags3_data_dictionary.json → ags3_data_dictionary.json} +0 -0
- /bedrock_ge/gi/{ags/ags4_data_dictionary.json → ags4_data_dictionary.json} +0 -0
- {bedrock_ge-0.2.4.dist-info → bedrock_ge-0.3.0.dist-info}/WHEEL +0 -0
- {bedrock_ge-0.2.4.dist-info → bedrock_ge-0.3.0.dist-info}/licenses/LICENSE +0 -0
bedrock_ge/gi/validate.py
CHANGED
@@ -1,151 +1,88 @@
|
|
1
|
-
from typing import Dict, Union
|
2
|
-
|
3
1
|
import geopandas as gpd # type: ignore
|
4
2
|
import pandas as pd
|
5
3
|
|
6
4
|
from bedrock_ge.gi.schemas import (
|
7
|
-
|
8
|
-
|
9
|
-
BaseSample,
|
10
|
-
InSitu,
|
11
|
-
Location,
|
12
|
-
Project,
|
13
|
-
Sample,
|
5
|
+
BedrockGIDatabase,
|
6
|
+
BedrockGIGeospatialDatabase,
|
14
7
|
)
|
15
8
|
|
16
9
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
"""Validates the structure and relationships of a 'Bedrock Ground Investigation' (
|
10
|
+
def check_brgi_geodb(
|
11
|
+
brgi_geodb: BedrockGIGeospatialDatabase,
|
12
|
+
):
|
13
|
+
"""Validates the structure and relationships of a 'Bedrock Ground Investigation' (BrGI) geospatial database.
|
21
14
|
|
22
|
-
This function checks that all tables in the
|
15
|
+
This function checks that all tables in the BrGI geospatialdatabase conform to their respective schemas
|
23
16
|
and that all foreign key relationships are properly maintained. It validates the following tables:
|
24
17
|
- Project
|
25
18
|
- Location
|
19
|
+
- LonLatHeight
|
20
|
+
- All In-Situ test tables
|
26
21
|
- Sample
|
27
|
-
-
|
28
|
-
- Lab_TESTY (not yet implemented)
|
22
|
+
- All Lab test tables
|
29
23
|
|
30
24
|
Args:
|
31
|
-
|
32
|
-
containing the BRGI database tables, where keys are table names and
|
33
|
-
values are the corresponding data tables (DataFrame or GeoDataFrame).
|
25
|
+
brgi_geodb (BedrockGIGeospatialDatabase): Bedrock GI geospatial database object.
|
34
26
|
|
35
27
|
Returns:
|
36
28
|
is_valid (bool): True if all tables are valid and relationships are properly maintained.
|
37
29
|
|
38
30
|
Example:
|
39
31
|
```python
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
"
|
45
|
-
|
46
|
-
|
32
|
+
brgi_geodb = BedrockGIGeospatialDatabase(
|
33
|
+
Project=project_df,
|
34
|
+
Location=location_geodf,
|
35
|
+
LonLatHeight=lon_lat_height_geodf,
|
36
|
+
InSituTests={"ISPT": ispt_geodf},
|
37
|
+
Sample=sample_geodf,
|
38
|
+
LabTests={"LLPL": llpl_df},
|
39
|
+
)
|
40
|
+
check_brgi_geodb(brgi_db)
|
47
41
|
```
|
48
42
|
"""
|
49
|
-
|
50
|
-
if table_name == "Project":
|
51
|
-
Project.validate(table)
|
52
|
-
print("'Project' table aligns with Bedrock's 'Project' table schema.")
|
53
|
-
elif table_name == "Location":
|
54
|
-
Location.validate(table)
|
55
|
-
check_foreign_key("project_uid", brgi_db["Project"], table)
|
56
|
-
print("'Location' table aligns with Bedrock's 'Location' table schema.")
|
57
|
-
elif table_name == "Sample":
|
58
|
-
Sample.validate(table)
|
59
|
-
check_foreign_key("project_uid", brgi_db["Project"], table)
|
60
|
-
check_foreign_key("location_uid", brgi_db["Location"], table)
|
61
|
-
print("'Sample' table aligns with Bedrock's 'Sample' table schema.")
|
62
|
-
# ! JG is pretty sure that this doesn't work
|
63
|
-
# ! The line below should be:
|
64
|
-
# ! elif table_name.startswith("InSitu_"):
|
65
|
-
elif table_name == "InSitu":
|
66
|
-
InSitu.validate(table)
|
67
|
-
check_foreign_key("project_uid", brgi_db["Project"], table)
|
68
|
-
check_foreign_key("location_uid", brgi_db["Location"], table)
|
69
|
-
print(
|
70
|
-
f"'{table_name}' table aligns with Bedrock's table schema for In-Situ measurements."
|
71
|
-
)
|
72
|
-
elif table_name.startswith("Lab_"):
|
73
|
-
print(
|
74
|
-
"🚨 !NOT IMPLEMENTED! We haven't come across Lab data yet. !NOT IMPLEMENTED!"
|
75
|
-
)
|
76
|
-
|
43
|
+
# TODO: implement this
|
77
44
|
return True
|
78
45
|
|
79
46
|
|
80
|
-
|
81
|
-
|
82
|
-
brgi_db: Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]],
|
47
|
+
def check_brgi_db(
|
48
|
+
brgi_db: BedrockGIDatabase,
|
83
49
|
):
|
84
|
-
"""Validates the structure and relationships of a 'Bedrock Ground Investigation' (
|
50
|
+
"""Validates the structure and relationships of a 'Bedrock Ground Investigation' (BrGI) database.
|
85
51
|
|
86
|
-
This function performs the same validation as `
|
87
|
-
that don't require
|
88
|
-
- Project (never has
|
89
|
-
- Location (without
|
90
|
-
-
|
91
|
-
-
|
92
|
-
-
|
52
|
+
This function performs the same validation as `check_brgi_geodb`, but uses schemas
|
53
|
+
that don't require geospatial geometry. It validates the following tables:
|
54
|
+
- Project (never has geospatial geometry)
|
55
|
+
- Location (without geospatial geometry)
|
56
|
+
- All In-Situ test tables (without geospatial geometry)
|
57
|
+
- Sample (without geospatial geometry)
|
58
|
+
- All Lab test tables (never has geospatial geometry)
|
93
59
|
|
94
60
|
Args:
|
95
|
-
brgi_db (
|
96
|
-
containing the Bedrock GI database tables, where keys are table names
|
97
|
-
and values are the corresponding data tables (DataFrame or GeoDataFrame).
|
61
|
+
brgi_db (BedrockGIDatabase): A Bedrock GI database object.
|
98
62
|
|
99
63
|
Returns:
|
100
64
|
bool: True if all tables are valid and relationships are properly maintained.
|
101
65
|
|
102
66
|
Example:
|
103
67
|
```python
|
104
|
-
brgi_db =
|
105
|
-
|
106
|
-
|
107
|
-
"
|
108
|
-
|
109
|
-
|
110
|
-
|
68
|
+
brgi_db = BedrockGIDatabase(
|
69
|
+
Project=project_df,
|
70
|
+
Location=location_df,
|
71
|
+
InSituTests={"ISPT": ispt_df},
|
72
|
+
Sample=sample_df,
|
73
|
+
LabTests={"LLPL": llpl_df},
|
74
|
+
)
|
75
|
+
check_brgi_db(brgi_db)
|
111
76
|
```
|
112
77
|
"""
|
113
|
-
|
114
|
-
if table_name == "Project":
|
115
|
-
Project.validate(table)
|
116
|
-
print("'Project' table aligns with Bedrock's 'Project' table schema.")
|
117
|
-
elif table_name == "Location":
|
118
|
-
BaseLocation.validate(table)
|
119
|
-
check_foreign_key("project_uid", brgi_db["Project"], table)
|
120
|
-
print(
|
121
|
-
"'Location' table aligns with Bedrock's 'Location' table schema without GIS geometry."
|
122
|
-
)
|
123
|
-
elif table_name == "Sample":
|
124
|
-
BaseSample.validate(table)
|
125
|
-
check_foreign_key("project_uid", brgi_db["Project"], table)
|
126
|
-
check_foreign_key("location_uid", brgi_db["Location"], table)
|
127
|
-
print(
|
128
|
-
"'Sample' table aligns with Bedrock's 'Sample' table schema without GIS geometry."
|
129
|
-
)
|
130
|
-
elif table_name.startswith("InSitu_"):
|
131
|
-
BaseInSitu.validate(table)
|
132
|
-
check_foreign_key("project_uid", brgi_db["Project"], table)
|
133
|
-
check_foreign_key("location_uid", brgi_db["Location"], table)
|
134
|
-
print(
|
135
|
-
f"'{table_name}' table aligns with Bedrock's '{table_name}' table schema without GIS geometry."
|
136
|
-
)
|
137
|
-
elif table_name.startswith("Lab_"):
|
138
|
-
print(
|
139
|
-
"🚨 !NOT IMPLEMENTED! We haven't come across Lab data yet. !NOT IMPLEMENTED!"
|
140
|
-
)
|
141
|
-
|
78
|
+
# TODO: implement this
|
142
79
|
return True
|
143
80
|
|
144
81
|
|
145
82
|
def check_foreign_key(
|
146
83
|
foreign_key: str,
|
147
|
-
parent_table:
|
148
|
-
table_with_foreign_key:
|
84
|
+
parent_table: pd.DataFrame | gpd.GeoDataFrame,
|
85
|
+
table_with_foreign_key: pd.DataFrame | gpd.GeoDataFrame,
|
149
86
|
) -> bool:
|
150
87
|
"""Validates referential integrity between two tables by checking foreign key relationships.
|
151
88
|
|
@@ -154,8 +91,8 @@ def check_foreign_key(
|
|
154
91
|
|
155
92
|
Args:
|
156
93
|
foreign_key (str): The name of the column that serves as the foreign key.
|
157
|
-
parent_table (
|
158
|
-
table_with_foreign_key (
|
94
|
+
parent_table (pd.DataFrame| gpd.GeoDataFrame): The parent table containing the primary keys.
|
95
|
+
table_with_foreign_key (pd.DataFrame| gpd.GeoDataFrame): The child table containing the foreign keys.
|
159
96
|
|
160
97
|
Returns:
|
161
98
|
bool: True if all foreign keys exist in the parent table.
|
bedrock_ge/gi/write.py
CHANGED
@@ -1,13 +1,44 @@
|
|
1
1
|
from pathlib import Path
|
2
|
-
from typing import
|
2
|
+
from typing import Literal
|
3
3
|
|
4
4
|
import geopandas as gpd
|
5
5
|
import pandas as pd
|
6
6
|
|
7
|
+
from bedrock_ge.gi.io_utils import brgi_db_to_dfs, geodf_to_df
|
8
|
+
from bedrock_ge.gi.schemas import BedrockGIDatabase, BedrockGIGeospatialDatabase
|
9
|
+
|
10
|
+
|
11
|
+
# ? Should this function be made a to_file(s) method of BedrockGIDatabase?
|
12
|
+
def write_brgi_db_to_file(
|
13
|
+
brgi_db: BedrockGIDatabase | BedrockGIGeospatialDatabase,
|
14
|
+
path: str | Path,
|
15
|
+
driver: Literal["EXCEL", "GPKG"] = "GPKG",
|
16
|
+
) -> None:
|
17
|
+
"""Writes a Bedrock GI (geospatial) database to a file.
|
18
|
+
|
19
|
+
Writes a Bedrock GI (geospatial) database to a file. The file type is
|
20
|
+
determined by the `driver` argument. Possible values are "GPKG" and "EXCEL".
|
21
|
+
|
22
|
+
Args:
|
23
|
+
brgi_db (BedrockGIDatabase | BedrockGIGeospatialDatabase): The Bedrock GI (geospatial) database.
|
24
|
+
path (str | Path): The path of the output file.
|
25
|
+
driver (str): The type of the output file. Possible values are "GPKG" and "EXCEL".
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
None
|
29
|
+
"""
|
30
|
+
dict_of_dfs = brgi_db_to_dfs(brgi_db)
|
31
|
+
if driver.upper() == "GPKG":
|
32
|
+
write_gi_db_to_gpkg(dict_of_dfs, path)
|
33
|
+
elif driver.upper() == "EXCEL":
|
34
|
+
write_gi_db_to_excel(dict_of_dfs, path)
|
35
|
+
else:
|
36
|
+
raise ValueError(f"Invalid driver: {driver}")
|
37
|
+
|
7
38
|
|
8
39
|
def write_gi_db_to_gpkg(
|
9
|
-
|
10
|
-
gpkg_path:
|
40
|
+
dict_of_dfs: dict[str, pd.DataFrame | gpd.GeoDataFrame],
|
41
|
+
gpkg_path: str | Path,
|
11
42
|
) -> None:
|
12
43
|
"""Writes a database with Bedrock Ground Investigation data to a GeoPackage file.
|
13
44
|
|
@@ -16,32 +47,28 @@ def write_gi_db_to_gpkg(
|
|
16
47
|
separate table named by the keys of the dictionary.
|
17
48
|
|
18
49
|
Args:
|
19
|
-
|
50
|
+
dict_of_dfs (dict[str, pd.DataFrame | gpd.GeoDataFrame]): A dictionary where
|
20
51
|
keys are brgi table names and values are pandas DataFrames or GeoDataFrames
|
21
52
|
with brgi data.
|
22
|
-
gpkg_path (str): The name of the output GeoPackage file.
|
53
|
+
gpkg_path (str | Path): The name of the output GeoPackage file.
|
23
54
|
|
24
55
|
Returns:
|
25
56
|
None
|
26
57
|
"""
|
27
58
|
# Create a GeoDataFrame from the dictionary of DataFrames
|
28
|
-
for
|
29
|
-
sanitized_table_name = sanitize_table_name(
|
30
|
-
|
31
|
-
|
32
|
-
brgi_table = gpd.GeoDataFrame(brgi_table)
|
59
|
+
for table_name, df in dict_of_dfs.items():
|
60
|
+
sanitized_table_name = sanitize_table_name(table_name)
|
61
|
+
if isinstance(df, pd.DataFrame):
|
62
|
+
df = gpd.GeoDataFrame(df)
|
33
63
|
|
34
|
-
|
35
|
-
brgi_table.to_file(
|
36
|
-
gpkg_path, driver="GPKG", layer=sanitized_table_name, overwrite=True
|
37
|
-
)
|
64
|
+
df.to_file(gpkg_path, driver="GPKG", layer=sanitized_table_name, overwrite=True)
|
38
65
|
|
39
66
|
print(f"Ground Investigation data has been written to '{gpkg_path}'.")
|
40
67
|
|
41
68
|
|
42
69
|
def write_gi_db_to_excel(
|
43
|
-
|
44
|
-
excel_path:
|
70
|
+
dict_of_dfs: dict[str, pd.DataFrame | gpd.GeoDataFrame],
|
71
|
+
excel_path: str | Path,
|
45
72
|
) -> None:
|
46
73
|
"""Writes a database with Ground Investigation data to an Excel file.
|
47
74
|
|
@@ -50,27 +77,27 @@ def write_gi_db_to_excel(
|
|
50
77
|
AGS, Bedrock, or another format.
|
51
78
|
|
52
79
|
Args:
|
53
|
-
|
80
|
+
dict_of_dfs (dict[str, pd.DataFrame | gpd.GeoDataFrame]): A dictionary where
|
54
81
|
keys are GI table names and values are DataFrames with GI data.
|
55
|
-
excel_path (
|
82
|
+
excel_path (str | Path): Path to the output Excel file. Can be provided as a
|
56
83
|
string or Path object.
|
57
84
|
|
58
85
|
Returns:
|
59
86
|
None
|
60
87
|
"""
|
61
|
-
# Create an Excel writer object
|
62
88
|
with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
|
63
|
-
for sheet_name, df in
|
64
|
-
sanitized_sheet_name = sanitize_table_name(sheet_name)
|
65
|
-
if isinstance(df,
|
66
|
-
df
|
89
|
+
for sheet_name, df in dict_of_dfs.items():
|
90
|
+
sanitized_sheet_name = sanitize_table_name(sheet_name)[:31]
|
91
|
+
if isinstance(df, gpd.GeoDataFrame):
|
92
|
+
df = geodf_to_df(df)
|
93
|
+
|
94
|
+
df.to_excel(writer, sheet_name=sanitized_sheet_name, index=False)
|
67
95
|
|
68
96
|
print(f"Ground Investigation data has been written to '{excel_path}'.")
|
69
97
|
|
70
98
|
|
71
|
-
# TODO: Make the 31 character table name length truncation a separate function. Only necessary for Excel.
|
72
99
|
def sanitize_table_name(sheet_name):
|
73
|
-
"""Replaces invalid characters and spaces in GI table names with underscores
|
100
|
+
"""Replaces invalid characters and spaces in GI table names with underscores.
|
74
101
|
|
75
102
|
Makes table names consistent with SQL, GeoPackage and Excel naming conventions by
|
76
103
|
replacing invalid characters and spaces with underscores.
|
@@ -81,12 +108,8 @@ def sanitize_table_name(sheet_name):
|
|
81
108
|
Returns:
|
82
109
|
sanitized_name (str): A sanitized sheet name with invalid characters and spaces replaced.
|
83
110
|
"""
|
84
|
-
# Trim to a maximum length of 31 characters
|
85
|
-
trimmed_name = sheet_name.strip()[:31]
|
86
|
-
|
87
|
-
# Define invalid characters and replace with underscores
|
88
111
|
invalid_chars = [":", "/", "\\", "?", "*", "[", "]"]
|
89
|
-
sanitized_name =
|
112
|
+
sanitized_name = sheet_name.strip()
|
90
113
|
for char in invalid_chars:
|
91
114
|
sanitized_name = sanitized_name.replace(char, "_")
|
92
115
|
|
@@ -96,16 +119,10 @@ def sanitize_table_name(sheet_name):
|
|
96
119
|
# Collapse multiple underscores to one
|
97
120
|
sanitized_name = "_".join(filter(None, sanitized_name.split("_")))
|
98
121
|
|
99
|
-
if
|
122
|
+
if sheet_name != sanitized_name:
|
100
123
|
print(
|
101
124
|
f"Table names shouldn't contain {invalid_chars} or spaces and shouldn't be longer than 31 characters.\n",
|
102
125
|
f"Replaced '{sheet_name}' with '{sanitized_name}'.",
|
103
126
|
)
|
104
127
|
|
105
|
-
# Ensure name isn't empty after sanitization
|
106
|
-
# ! "Table1" doesn't make a lot of sense?!? It could be that there are more than 1 table without a name...
|
107
|
-
if not sanitized_name:
|
108
|
-
sanitized_name = "Table1"
|
109
|
-
print("The table name was completely invalid or empty. Replaced with 'Table1'.")
|
110
|
-
|
111
128
|
return sanitized_name
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: bedrock-ge
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Bedrock's Python library for geotechnical engineering.
|
5
5
|
Project-URL: Homepage, https://bedrock.engineer/
|
6
6
|
Project-URL: Source, https://github.com/bedrock-engineer/bedrock-ge
|
@@ -17,14 +17,13 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
17
17
|
Classifier: Operating System :: OS Independent
|
18
18
|
Classifier: Programming Language :: Python
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
|
-
Classifier: Programming Language :: Python :: 3.9
|
21
20
|
Classifier: Programming Language :: Python :: 3.10
|
22
21
|
Classifier: Programming Language :: Python :: 3.11
|
23
22
|
Classifier: Programming Language :: Python :: 3.12
|
24
23
|
Classifier: Programming Language :: Python :: 3.13
|
25
24
|
Classifier: Topic :: Scientific/Engineering
|
26
25
|
Classifier: Topic :: Scientific/Engineering :: GIS
|
27
|
-
Requires-Python: >=3.
|
26
|
+
Requires-Python: >=3.10
|
28
27
|
Requires-Dist: geopandas~=1.0
|
29
28
|
Requires-Dist: openpyxl~=3.0
|
30
29
|
Requires-Dist: pandera>=0.23.0
|
@@ -0,0 +1,22 @@
|
|
1
|
+
bedrock_ge/__init__.py,sha256=F4CYyFSMweApwDS139REvy262QKShDMRdP739NOv9Co,89
|
2
|
+
bedrock_ge/plot.py,sha256=C95aj8CXjFVZRGYYBssJMm5MyljLbdt_TKyvmQyWZBE,149
|
3
|
+
bedrock_ge/gi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
bedrock_ge/gi/ags.py,sha256=k2ZotuEt08hGvLAjKCDFR_HLFCRHVuMej1dnw-T6WI4,4388
|
5
|
+
bedrock_ge/gi/ags3.py,sha256=HNdX1avwzzZsrkTm54aqs9neUrTXa2e784Q8mSy6Zso,10161
|
6
|
+
bedrock_ge/gi/ags3_data_dictionary.json,sha256=Wx20_oJRdAlzEo-cKD6FgN9B9zOMDTcsp5dgc8QWofI,188588
|
7
|
+
bedrock_ge/gi/ags4.py,sha256=pDKf-l1jheeQAU2bHkiJiIgjUGvD3Iv8of77rYDwUQA,916
|
8
|
+
bedrock_ge/gi/ags4_data_dictionary.json,sha256=XE5XJNo8GBPZTUPgvVr3QgO1UfEIAxzlSeXi-P1VLTs,609670
|
9
|
+
bedrock_ge/gi/ags_schemas.py,sha256=R5yubnRacAlQBqb7W7Rj_Y4canhg6Tls38e66xXQNRA,8065
|
10
|
+
bedrock_ge/gi/db_operations.py,sha256=Pjtslv9syB-_xumH38F2XWt6XLsvrT8MHLgwAGCYEw0,5153
|
11
|
+
bedrock_ge/gi/geospatial.py,sha256=w9sP3SIZZceSW98z3LQT_aJKqs0rSd4DDunTFFSJygY,13739
|
12
|
+
bedrock_ge/gi/io_utils.py,sha256=Yd1RGEo_DbYoOklJbEKWdaeTw7KckkHDfKZrr91fu1o,9456
|
13
|
+
bedrock_ge/gi/mapper.py,sha256=8vFVPlgLY37iNw_5pkSyze6zOmeQjlBHGY4OAFdx5B0,8665
|
14
|
+
bedrock_ge/gi/mapping_models.py,sha256=cvepeKwqwdmVqbNBORkgIDgHq0eOPiRIERjO4RYeAQo,1876
|
15
|
+
bedrock_ge/gi/schemas.py,sha256=w0tb3c6YBTXdvpdFWWIGmlE7CYsJfo352nWnD9bmXfM,6883
|
16
|
+
bedrock_ge/gi/sqlmodels.py,sha256=_h3H9UP91I_1Ya_SZuL6gZbqL7uNCd5Y-u-yTf7CNto,2253
|
17
|
+
bedrock_ge/gi/validate.py,sha256=hgT5qZHLeeXR_cgXf1bhzJnJ-wMhE0_0if_H1rtwsiM,3918
|
18
|
+
bedrock_ge/gi/write.py,sha256=N8i1oerOaR7-XJnycmN9gXLkpjMdT5PFFB3GduogyKs,4749
|
19
|
+
bedrock_ge-0.3.0.dist-info/METADATA,sha256=dGqh8KV7QCwnhi67RrQOfGtj5USY8Ml6Pr627jDKeMA,11678
|
20
|
+
bedrock_ge-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
21
|
+
bedrock_ge-0.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
22
|
+
bedrock_ge-0.3.0.dist-info/RECORD,,
|
bedrock_ge/gi/ags/__init__.py
DELETED
File without changes
|
bedrock_ge/gi/ags/read.py
DELETED
@@ -1,192 +0,0 @@
|
|
1
|
-
import io
|
2
|
-
from typing import Any, Dict, List, Union
|
3
|
-
|
4
|
-
import pandas as pd
|
5
|
-
from python_ags4 import AGS4
|
6
|
-
|
7
|
-
from bedrock_ge.gi.ags.validate import check_ags_proj_group
|
8
|
-
|
9
|
-
|
10
|
-
def ags_to_dfs(ags_data: str) -> Dict[str, pd.DataFrame]:
|
11
|
-
"""Converts AGS 3 or AGS 4 data to a dictionary of pandas DataFrames.
|
12
|
-
|
13
|
-
Args:
|
14
|
-
ags_data (str): The AGS data as a string.
|
15
|
-
|
16
|
-
Raises:
|
17
|
-
ValueError: If the data does not match AGS 3 or AGS 4 format.
|
18
|
-
|
19
|
-
Returns:
|
20
|
-
Dict[str, pd.DataFrame]]: A dictionary where keys represent AGS group
|
21
|
-
names with corresponding DataFrames for the corresponding group data.
|
22
|
-
"""
|
23
|
-
# Process each line to find the AGS version and delegate parsing
|
24
|
-
for line in ags_data.splitlines():
|
25
|
-
stripped_line = line.strip() # Remove leading/trailing whitespace
|
26
|
-
if stripped_line: # Skip empty lines at the start of the file
|
27
|
-
if stripped_line.startswith('"**'):
|
28
|
-
ags_version = 3
|
29
|
-
ags_dfs = ags3_to_dfs(ags_data)
|
30
|
-
break
|
31
|
-
elif stripped_line.startswith('"GROUP"'):
|
32
|
-
ags_version = 4
|
33
|
-
ags_dfs = ags4_to_dfs(ags_data)
|
34
|
-
break
|
35
|
-
else:
|
36
|
-
# If first non-empty line doesn't match AGS 3 or AGS 4 format
|
37
|
-
raise ValueError("The data provided is not valid AGS 3 or AGS 4 data.")
|
38
|
-
|
39
|
-
is_proj_group_correct = check_ags_proj_group(ags_dfs["PROJ"])
|
40
|
-
if is_proj_group_correct:
|
41
|
-
project_id = ags_dfs["PROJ"]["PROJ_ID"].iloc[0]
|
42
|
-
print(
|
43
|
-
f"AGS {ags_version} data was read for Project {project_id}",
|
44
|
-
"This Ground Investigation data contains groups:",
|
45
|
-
list(ags_dfs.keys()),
|
46
|
-
sep="\n",
|
47
|
-
end="\n\n",
|
48
|
-
)
|
49
|
-
|
50
|
-
return ags_dfs
|
51
|
-
|
52
|
-
|
53
|
-
def ags3_to_dfs(ags3_data: str) -> Dict[str, pd.DataFrame]:
|
54
|
-
"""Converts AGS 3 data to a dictionary of pandas DataFrames.
|
55
|
-
|
56
|
-
Args:
|
57
|
-
ags3_data (str): The AGS 3 data as a string.
|
58
|
-
|
59
|
-
Returns:
|
60
|
-
Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key
|
61
|
-
represents a group name from AGS 3 data, and the corresponding value is a
|
62
|
-
pandas DataFrame containing the data for that group.
|
63
|
-
"""
|
64
|
-
# Initialize dictionary and variables used in the AGS 3 read loop
|
65
|
-
ags3_dfs = {}
|
66
|
-
line_type = "line_0"
|
67
|
-
group = ""
|
68
|
-
headers: List[str] = ["", "", ""]
|
69
|
-
group_data: List[List[Any]] = [[], [], []]
|
70
|
-
|
71
|
-
for i, line in enumerate(ags3_data.splitlines()):
|
72
|
-
last_line_type = line_type
|
73
|
-
|
74
|
-
# In AGS 3.1 group names are prefixed with **
|
75
|
-
if line.startswith('"**'):
|
76
|
-
line_type = "group_name"
|
77
|
-
if group:
|
78
|
-
ags3_dfs[group] = pd.DataFrame(group_data, columns=headers)
|
79
|
-
|
80
|
-
group = line.strip(' ,"*')
|
81
|
-
group_data = []
|
82
|
-
|
83
|
-
# In AGS 3 header names are prefixed with "*
|
84
|
-
elif line.startswith('"*'):
|
85
|
-
line_type = "headers"
|
86
|
-
new_headers = line.split('","')
|
87
|
-
new_headers = [h.strip(' ,"*') for h in new_headers]
|
88
|
-
|
89
|
-
# Some groups have so many headers that they span multiple lines.
|
90
|
-
# Therefore we need to check whether the new headers are
|
91
|
-
# a continuation of the previous headers from the last line.
|
92
|
-
if line_type == last_line_type:
|
93
|
-
headers = headers + new_headers
|
94
|
-
else:
|
95
|
-
headers = new_headers
|
96
|
-
|
97
|
-
# Skip lines where group units are defined, these are defined in the AGS 3 data dictionary.
|
98
|
-
elif line.startswith('"<UNITS>"'):
|
99
|
-
line_type = "units"
|
100
|
-
continue
|
101
|
-
|
102
|
-
# The rest of the lines contain:
|
103
|
-
# 1. GI data
|
104
|
-
# 2. a continuation of the previous line. These lines contain "<CONT>" in the first column.
|
105
|
-
# 3. are empty or contain worthless data
|
106
|
-
else:
|
107
|
-
line_type = "data_row"
|
108
|
-
data_row = line.split('","')
|
109
|
-
if len("".join(data_row)) == 0:
|
110
|
-
# print(f"Line {i} is empty. Last Group: {group}")
|
111
|
-
continue
|
112
|
-
elif len(data_row) != len(headers):
|
113
|
-
print(
|
114
|
-
f"\n🚨 CAUTION: The number of columns on line {i + 1} ({len(data_row)}) doesn't match the number of columns of group {group} ({len(headers)})!",
|
115
|
-
f"{group} headers: {headers}",
|
116
|
-
f"Line {i + 1}: {data_row}",
|
117
|
-
sep="\n",
|
118
|
-
end="\n\n",
|
119
|
-
)
|
120
|
-
continue
|
121
|
-
# Append continued lines (<CONT>) to the last data_row
|
122
|
-
elif data_row[0] == '"<CONT>':
|
123
|
-
last_data_row = group_data[-1]
|
124
|
-
for j, data in enumerate(data_row):
|
125
|
-
data = data.strip(' "')
|
126
|
-
if data and data != "<CONT>":
|
127
|
-
if last_data_row[j] is None:
|
128
|
-
# Last data row didn't contain data for this column
|
129
|
-
last_data_row[j] = coerce_string(data)
|
130
|
-
else:
|
131
|
-
# Last data row already contains data for this column
|
132
|
-
last_data_row[j] = str(last_data_row[j]) + data
|
133
|
-
# Lines that are assumed to contain valid data are added to the group data
|
134
|
-
else:
|
135
|
-
cleaned_data_row = []
|
136
|
-
for data in data_row:
|
137
|
-
cleaned_data_row.append(coerce_string(data.strip(' "')))
|
138
|
-
group_data.append(cleaned_data_row)
|
139
|
-
|
140
|
-
# Also add the last group's df to the dictionary of AGS dfs
|
141
|
-
ags3_dfs[group] = pd.DataFrame(group_data, columns=headers).dropna(
|
142
|
-
axis=1, how="all"
|
143
|
-
)
|
144
|
-
|
145
|
-
if not group:
|
146
|
-
print(
|
147
|
-
'🚨 ERROR: The provided AGS 3 data does not contain any groups, i.e. lines starting with "**'
|
148
|
-
)
|
149
|
-
|
150
|
-
return ags3_dfs
|
151
|
-
|
152
|
-
|
153
|
-
def ags4_to_dfs(ags4_data: str) -> Dict[str, pd.DataFrame]:
|
154
|
-
"""Converts AGS 4 data to a dictionary of pandas DataFrames.
|
155
|
-
|
156
|
-
Args:
|
157
|
-
ags4_data (str): The AGS 4 data as a string.
|
158
|
-
|
159
|
-
Returns:
|
160
|
-
Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key
|
161
|
-
represents a group name from AGS 4 data, and the corresponding value is a
|
162
|
-
pandas DataFrame containing the data for that group.
|
163
|
-
"""
|
164
|
-
# AGS4.AGS4_to_dataframe accepts the file, not the data string
|
165
|
-
ags4_file = io.StringIO(ags4_data)
|
166
|
-
|
167
|
-
ags4_tups = AGS4.AGS4_to_dataframe(ags4_file)
|
168
|
-
|
169
|
-
ags4_dfs = {}
|
170
|
-
for group, df in ags4_tups[0].items():
|
171
|
-
df = df.loc[2:].drop(columns=["HEADING"]).reset_index(drop=True)
|
172
|
-
ags4_dfs[group] = df
|
173
|
-
|
174
|
-
return ags4_dfs
|
175
|
-
|
176
|
-
|
177
|
-
def coerce_string(string: str) -> Union[None, bool, float, str]:
|
178
|
-
if string.lower() in {"none", "null", ""}:
|
179
|
-
return None
|
180
|
-
elif string.lower() == "true":
|
181
|
-
return True
|
182
|
-
elif string.lower() == "false":
|
183
|
-
return False
|
184
|
-
else:
|
185
|
-
try:
|
186
|
-
value = float(string)
|
187
|
-
if value.is_integer():
|
188
|
-
return int(value)
|
189
|
-
else:
|
190
|
-
return value
|
191
|
-
except ValueError:
|
192
|
-
return string
|