PyPI - bedrock-ge - Versions diffs - 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

bedrock-ge 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

bedrock_ge/__init__.py +1 -1
bedrock_ge/gi/ags.py +103 -0
bedrock_ge/gi/ags3.py +275 -0
bedrock_ge/gi/ags4.py +29 -0
bedrock_ge/gi/{ags/schemas.py → ags_schemas.py} +29 -8
bedrock_ge/gi/db_operations.py +128 -0
bedrock_ge/gi/geospatial.py +349 -0
bedrock_ge/gi/io_utils.py +271 -0
bedrock_ge/gi/mapper.py +221 -0
bedrock_ge/gi/mapping_models.py +69 -0
bedrock_ge/gi/schemas.py +136 -36
bedrock_ge/gi/validate.py +46 -109
bedrock_ge/gi/write.py +58 -38
bedrock_ge/plot.py +3 -1
bedrock_ge-0.3.0.dist-info/METADATA +208 -0
bedrock_ge-0.3.0.dist-info/RECORD +22 -0
bedrock_ge/gi/ags/__init__.py +0 -0
bedrock_ge/gi/ags/read.py +0 -190
bedrock_ge/gi/ags/transform.py +0 -264
bedrock_ge/gi/ags/validate.py +0 -25
bedrock_ge/gi/brgi-schema.json +0 -36
bedrock_ge/gi/concatenate.py +0 -38
bedrock_ge/gi/gis_geometry.py +0 -280
bedrock_ge-0.2.3.dist-info/METADATA +0 -227
bedrock_ge-0.2.3.dist-info/RECORD +0 -21
/bedrock_ge/gi/{ags/ags3_data_dictionary.json → ags3_data_dictionary.json} +0 -0
/bedrock_ge/gi/{ags/ags4_data_dictionary.json → ags4_data_dictionary.json} +0 -0
{bedrock_ge-0.2.3.dist-info → bedrock_ge-0.3.0.dist-info}/WHEEL +0 -0
{bedrock_ge-0.2.3.dist-info → bedrock_ge-0.3.0.dist-info}/licenses/LICENSE +0 -0

bedrock_ge/gi/ags/read.py DELETED Viewed

@@ -1,190 +0,0 @@
-import io
-from typing import Any, Dict, List, Union
-import pandas as pd
-from python_ags4 import AGS4
-from bedrock_ge.gi.ags.validate import check_ags_proj_group
-def ags_to_dfs(ags_data: str) -> Dict[str, pd.DataFrame]:
-    """Converts AGS 3 or AGS 4 data to a dictionary of pandas DataFrames.
-    Args:
-        ags_data (str): The AGS data as a string.
-    Raises:
-        ValueError: If the data does not match AGS 3 or AGS 4 format.
-    Returns:
-        Dict[str, pd.DataFrame]]: A dictionary where keys represent AGS group
-        names with corresponding DataFrames for the corresponding group data.
-    """
-    # Process each line to find the AGS version and delegate parsing
-    for line in ags_data.splitlines():
-        stripped_line = line.strip()  # Remove leading/trailing whitespace
-        if stripped_line:  # Skip empty lines at the start of the file
-            if stripped_line.startswith('"**'):
-                ags_version = 3
-                ags_dfs = ags3_to_dfs(ags_data)
-                break
-            elif stripped_line.startswith('"GROUP"'):
-                ags_version = 4
-                ags_dfs = ags4_to_dfs(ags_data)
-                break
-            else:
-                # If first non-empty line doesn't match AGS 3 or AGS 4 format
-                raise ValueError("The data provided is not valid AGS 3 or AGS 4 data.")
-    is_proj_group_correct = check_ags_proj_group(ags_dfs["PROJ"])
-    if is_proj_group_correct:
-        project_id = ags_dfs["PROJ"]["PROJ_ID"].iloc[0]
-        print(
-            f"AGS {ags_version} data was read for Project {project_id}",
-            "This Ground Investigation data contains groups:",
-            list(ags_dfs.keys()),
-            sep="\n",
-            end="\n\n",
-        )
-    return ags_dfs
-def ags3_to_dfs(ags3_data: str) -> Dict[str, pd.DataFrame]:
-    """Converts AGS 3 data to a dictionary of pandas DataFrames.
-    Args:
-        ags3_data (str): The AGS 3 data as a string.
-    Returns:
-        Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key represents a group name from AGS 3 data,
-        and the corresponding value is a pandas DataFrame containing the data for that group.
-    """
-    # Initialize dictionary and variables used in the AGS 3 read loop
-    ags3_dfs = {}
-    line_type = "line_0"
-    group = ""
-    headers: List[str] = ["", "", ""]
-    group_data: List[List[Any]] = [[], [], []]
-    for i, line in enumerate(ags3_data.splitlines()):
-        last_line_type = line_type
-        # In AGS 3.1 group names are prefixed with **
-        if line.startswith('"**'):
-            line_type = "group_name"
-            if group:
-                ags3_dfs[group] = pd.DataFrame(group_data, columns=headers)
-            group = line.strip(' ,"*')
-            group_data = []
-        # In AGS 3 header names are prefixed with "*
-        elif line.startswith('"*'):
-            line_type = "headers"
-            new_headers = line.split('","')
-            new_headers = [h.strip(' ,"*') for h in new_headers]
-            # Some groups have so many headers that they span multiple lines.
-            # Therefore we need to check whether the new headers are
-            # a continuation of the previous headers from the last line.
-            if line_type == last_line_type:
-                headers = headers + new_headers
-            else:
-                headers = new_headers
-        # Skip lines where group units are defined, these are defined in the AGS 3 data dictionary.
-        elif line.startswith('"<UNITS>"'):
-            line_type = "units"
-            continue
-        # The rest of the lines contain:
-        # 1. GI data
-        # 2. a continuation of the previous line. These lines contain "<CONT>" in the first column.
-        # 3. are empty or contain worthless data
-        else:
-            line_type = "data_row"
-            data_row = line.split('","')
-            if len("".join(data_row)) == 0:
-                # print(f"Line {i} is empty. Last Group: {group}")
-                continue
-            elif len(data_row) != len(headers):
-                print(
-                    f"\n🚨 CAUTION: The number of columns on line {i + 1} ({len(data_row)}) doesn't match the number of columns of group {group} ({len(headers)})!",
-                    f"{group} headers: {headers}",
-                    f"Line {i + 1}:      {data_row}",
-                    sep="\n",
-                    end="\n\n",
-                )
-                continue
-            # Append continued lines (<CONT>) to the last data_row
-            elif data_row[0] == '"<CONT>':
-                last_data_row = group_data[-1]
-                for j, data in enumerate(data_row):
-                    data = data.strip(' "')
-                    if data and data != "<CONT>":
-                        if last_data_row[j] is None:
-                            # Last data row didn't contain data for this column
-                            last_data_row[j] = coerce_string(data)
-                        else:
-                            # Last data row already contains data for this column
-                            last_data_row[j] = str(last_data_row[j]) + data
-            # Lines that are assumed to contain valid data are added to the group data
-            else:
-                cleaned_data_row = []
-                for data in data_row:
-                    cleaned_data_row.append(coerce_string(data.strip(' "')))
-                group_data.append(cleaned_data_row)
-    # Also add the last group's df to the dictionary of AGS dfs
-    ags3_dfs[group] = pd.DataFrame(group_data, columns=headers).dropna(
-        axis=1, how="all"
-    )
-    if not group:
-        print(
-            '🚨 ERROR: The provided AGS 3 data does not contain any groups, i.e. lines starting with "**'
-        )
-    return ags3_dfs
-def ags4_to_dfs(ags4_data: str) -> Dict[str, pd.DataFrame]:
-    """Converts AGS 4 data to a dictionary of pandas DataFrames.
-    Args:
-        ags4_data (str): The AGS 4 data as a string.
-    Returns:
-        Dict[str, pd.DataFrame]: A dictionary of pandas DataFrames, where each key represents a group name from AGS 4 data,
-        and the corresponding value is a pandas DataFrame containing the data for that group.
-    """
-    # AGS4.AGS4_to_dataframe accepts the file, not the data string
-    ags4_file = io.StringIO(ags4_data)
-    ags4_tups = AGS4.AGS4_to_dataframe(ags4_file)
-    ags4_dfs = {}
-    for group, df in ags4_tups[0].items():
-        df = df.loc[2:].drop(columns=["HEADING"]).reset_index(drop=True)
-        ags4_dfs[group] = df
-    return ags4_dfs
-def coerce_string(string: str) -> Union[None, bool, float, str]:
-    if string.lower() in {"none", "null", ""}:
-        return None
-    elif string.lower() == "true":
-        return True
-    elif string.lower() == "false":
-        return False
-    else:
-        try:
-            value = float(string)
-            if value.is_integer():
-                return int(value)
-            else:
-                return value
-        except ValueError:
-            return string

bedrock_ge/gi/ags/transform.py DELETED Viewed

@@ -1,264 +0,0 @@
-"""Transforms, i.e. maps, AGS data to Bedrock's schema."""
-from typing import Dict
-import pandas as pd
-import pandera as pa
-from pandera.typing import DataFrame
-from pyproj import CRS
-from bedrock_ge.gi.ags.schemas import Ags3HOLE, Ags3SAMP, BaseSAMP
-from bedrock_ge.gi.schemas import BaseInSitu, BaseLocation, BaseSample, Project
-from bedrock_ge.gi.validate import check_foreign_key
-# What this function really does, is add the CRS and Bedrock columns:
-# - `project_uid`
-# - `location_uid`
-# - `sample_id`
-# - `sample_uid`
-# - `depth_to_`
-# There really isn't any mapping going on here...
-# TODO: Make sure that the name of the function and docstrings reflect this.
-def ags3_db_to_no_gis_brgi_db(
-    ags3_db: Dict[str, pd.DataFrame], crs: CRS
-) -> Dict[str, pd.DataFrame]:
-    """Maps a database with GI data from a single AGS 3 file to a database with Bedrock's schema.
-    This function converts an AGS 3 formatted geotechnical database into Bedrock's
-    internal database format, maintaining data relationships and structure. It handles
-    various types of geotechnical data including project information, locations,
-    samples, lab tests, and in-situ measurements.
-    The mapping process:
-    1. Project Data: Converts AGS 3 'PROJ' group to Bedrock's 'Project' table
-    2. Location Data: Converts AGS 3 'HOLE' group to Bedrock's 'Location' table
-    3. Sample Data: Converts AGS 3 'SAMP' group to Bedrock's 'Sample' table
-    4. Other Data: Handles lab tests, in-situ measurements, and miscellaneous tables
-    Args:
-        ags3_db (Dict[str, pd.DataFrame]): A dictionary containing AGS 3 data tables,
-            where keys are table names and values are pandas DataFrames.
-        crs (CRS): Coordinate Reference System for the project data.
-    Returns:
-        Dict[str, pd.DataFrame]: A dictionary containing Bedrock GI database tables,
-        where keys are table names and values are transformed pandas DataFrames.
-    Note:
-        The function creates a copy of the input database to avoid modifying the original data.
-        It performs foreign key checks to maintain data integrity during the mapping.
-    """
-    # Make sure that the AGS 3 database is not changed outside this function.
-    ags3_db = ags3_db.copy()
-    print("Transforming AGS 3 groups to Bedrock tables...")
-    # Instantiate Bedrock dictionary of pd.DataFrames
-    brgi_db = {}
-    # Project
-    print("Transforming AGS 3 group 'PROJ' to Bedrock GI 'Project' table...")
-    brgi_db["Project"] = ags_proj_to_brgi_project(ags3_db["PROJ"], crs)
-    project_uid = brgi_db["Project"]["project_uid"].item()
-    del ags3_db["PROJ"]
-    # Locations
-    if "HOLE" in ags3_db.keys():
-        print("Transforming AGS 3 group 'HOLE' to Bedrock GI 'Location' table...")
-        brgi_db["Location"] = ags3_hole_to_brgi_location(ags3_db["HOLE"], project_uid)  # type: ignore
-        del ags3_db["HOLE"]
-    else:
-        print(
-            "Your AGS 3 data doesn't contain a HOLE group, i.e. Ground Investigation locations."
-        )
-    # Samples
-    if "SAMP" in ags3_db.keys():
-        print("Transforming AGS 3 group 'SAMP' to Bedrock GI 'Sample' table...")
-        check_foreign_key("HOLE_ID", brgi_db["Location"], ags3_db["SAMP"])
-        ags3_db["SAMP"] = generate_sample_ids_for_ags3(ags3_db["SAMP"])  # type: ignore
-        brgi_db["Sample"] = ags3_samp_to_brgi_sample(ags3_db["SAMP"], project_uid)  # type: ignore
-        del ags3_db["SAMP"]
-    else:
-        print("Your AGS 3 data doesn't contain a SAMP group, i.e. samples.")
-    # The rest of the tables: 1. Lab Tests 2. In-Situ Measurements 3. Other tables
-    for group, group_df in ags3_db.items():
-        if "SAMP_REF" in ags3_db[group].columns:
-            print(f"Project {project_uid} has lab test data: {group}.")
-            brgi_db[group] = group_df  # type: ignore
-        elif "HOLE_ID" in ags3_db[group].columns:
-            print(
-                f"Transforming AGS 3 group '{group}' to Bedrock GI 'InSitu_{group}' table..."
-            )
-            check_foreign_key("HOLE_ID", brgi_db["Location"], group_df)
-            brgi_db[f"InSitu_{group}"] = ags3_in_situ_to_brgi_in_situ(  # type: ignore
-                group, group_df, project_uid
-            )
-        else:
-            brgi_db[group] = ags3_db[group]  # type: ignore
-    print(
-        "Done",
-        "The Bedrock database contains the following tables:",
-        list(brgi_db.keys()),
-        sep="\n",
-        end="\n\n",
-    )
-    return brgi_db  # type: ignore
-@pa.check_types(lazy=True)
-def ags_proj_to_brgi_project(ags_proj: pd.DataFrame, crs: CRS) -> DataFrame[Project]:
-    """Maps the AGS 3 'PROJ' group to a Bedrock GI 'Project' table.
-    Args:
-        ags_proj (pd.DataFrame): The AGS 3 'PROJ' group.
-        crs (CRS): The coordinate reference system of the project.
-    Returns:
-        DataFrame[Project]: The Bedrock GI 'Project' table.
-    """
-    if "project_uid" not in ags_proj.columns:
-        ags_proj["project_uid"] = ags_proj["PROJ_ID"]
-    ags_proj["crs_wkt"] = crs.to_wkt()
-    return ags_proj  # type: ignore
-@pa.check_types(lazy=True)
-def ags3_hole_to_brgi_location(
-    ags3_hole: DataFrame[Ags3HOLE], project_uid: str
-) -> DataFrame[BaseLocation]:
-    brgi_location = ags3_hole
-    brgi_location["project_uid"] = project_uid
-    brgi_location["location_source_id"] = ags3_hole["HOLE_ID"]
-    brgi_location["location_uid"] = (
-        ags3_hole["HOLE_ID"] + "_" + ags3_hole["project_uid"]
-    )
-    brgi_location["location_type"] = ags3_hole["HOLE_TYPE"]
-    brgi_location["easting"] = ags3_hole["HOLE_NATE"]
-    brgi_location["northing"] = ags3_hole["HOLE_NATN"]
-    brgi_location["ground_level_elevation"] = ags3_hole["HOLE_GL"]
-    brgi_location["depth_to_base"] = ags3_hole["HOLE_FDEP"]
-    return ags3_hole  # type: ignore
-@pa.check_types(lazy=True)
-def ags3_samp_to_brgi_sample(
-    ags3_samp: DataFrame[Ags3SAMP],
-    project_uid: str,
-) -> DataFrame[BaseSample]:
-    brgi_sample = ags3_samp
-    brgi_sample["project_uid"] = project_uid
-    brgi_sample["location_source_id"] = ags3_samp["HOLE_ID"]
-    brgi_sample["location_uid"] = ags3_samp["HOLE_ID"] + "_" + ags3_samp["project_uid"]
-    brgi_sample["sample_source_id"] = ags3_samp["sample_id"]
-    brgi_sample["sample_uid"] = ags3_samp["sample_id"] + "_" + ags3_samp["project_uid"]
-    brgi_sample["depth_to_top"] = ags3_samp["SAMP_TOP"]
-    brgi_sample["depth_to_base"] = ags3_samp["SAMP_BASE"]
-    return brgi_sample  # type: ignore
-@pa.check_types(lazy=True)
-def ags3_in_situ_to_brgi_in_situ(
-    group_name: str, ags3_in_situ: pd.DataFrame, project_uid: str
-) -> DataFrame[BaseInSitu]:
-    """Maps AGS 3 in-situ measurement data to Bedrock's in-situ data schema.
-    Args:
-        group_name (str): The AGS 3 group name.
-        ags3_data (pd.DataFrame): The AGS 3 data.
-        project_uid (str): The project uid.
-    Returns:
-        DataFrame[BaseInSitu]: The Bedrock in-situ data.
-    """
-    brgi_in_situ = ags3_in_situ
-    brgi_in_situ["project_uid"] = project_uid
-    brgi_in_situ["location_uid"] = ags3_in_situ["HOLE_ID"] + "_" + project_uid
-    top_depth = f"{group_name}_TOP"
-    base_depth = f"{group_name}_BASE"
-    if group_name == "CDIA":
-        top_depth = "CDIA_CDEP"
-    elif group_name == "FLSH":
-        top_depth = "FLSH_FROM"
-        base_depth = "FLSH_TO"
-    elif group_name == "CORE":
-        base_depth = "CORE_BOT"
-    elif group_name == "HDIA":
-        top_depth = "HDIA_HDEP"
-    elif group_name == "PTIM":
-        top_depth = "PTIM_DEP"
-    elif group_name == "IVAN":
-        top_depth = "IVAN_DPTH"
-    elif group_name == "STCN":
-        top_depth = "STCN_DPTH"
-    elif group_name == "POBS" or group_name == "PREF":
-        top_depth = "PREF_TDEP"
-    elif group_name == "DREM":
-        top_depth = "DREM_DPTH"
-    elif group_name == "PRTD" or group_name == "PRTG" or group_name == "PRTL":
-        top_depth = "PRTD_DPTH"
-    elif group_name == "IPRM":
-        if top_depth not in ags3_in_situ.columns:
-            print(
-                "\n🚨 CAUTION: The IPRM group in this AGS 3 file does not contain a 'IPRM_TOP' heading!",
-                "🚨 CAUTION: Making the 'IPRM_BASE' heading the 'depth_to_top'...",
-                sep="\n",
-                end="\n\n",
-            )
-            top_depth = "IPRM_BASE"
-            base_depth = "None"
-    brgi_in_situ["depth_to_top"] = ags3_in_situ[top_depth]
-    brgi_in_situ["depth_to_base"] = ags3_in_situ.get(base_depth)
-    return brgi_in_situ  # type: ignore
-@pa.check_types(lazy=True)
-def generate_sample_ids_for_ags3(
-    ags3_with_samp: DataFrame[BaseSAMP],
-) -> DataFrame[Ags3SAMP]:
-    ags3_with_samp["sample_id"] = (
-        ags3_with_samp["SAMP_REF"].astype(str)
-        + "_"
-        + ags3_with_samp["SAMP_TYPE"].astype(str)
-        + "_"
-        + ags3_with_samp["SAMP_TOP"].astype(str)
-        + "_"
-        + ags3_with_samp["HOLE_ID"].astype(str)
-    )
-    # try:
-    #     # SAMP_REF really should not be able to be null... Right?
-    #     # Maybe SAMP_REF can be null when the
-    #     Ags3SAMP_REF.validate(ags3_samp)
-    #     print(
-    #         "Generating unique sample IDs for AGS 3 data: 'sample_id'='{SAMP_REF}_{HOLE_ID}'"
-    #     )
-    #     ags3_samp["sample_id"] = (
-    #         ags3_samp["SAMP_REF"].astype(str) + "_" + ags3_samp["HOLE_ID"].astype(str)
-    #     )
-    # except pa.errors.SchemaError as exc:
-    #     print(f"🚨 CAUTION: The AGS 3 SAMP group contains rows without SAMP_REF:\n{exc}")
-    #     if "non-nullable series 'SAMP_REF'" in str(exc):
-    #         print(
-    #             "\nTo ensure unique sample IDs: 'sample_id'='{SAMP_REF}_{SAMP_TOP}_{HOLE_ID}'\n"
-    #         )
-    #         ags3_samp["sample_id"] = (
-    #             ags3_samp["SAMP_REF"].astype(str)
-    #             + "_"
-    #             + ags3_samp["SAMP_TOP"].astype(str)
-    #             + "_"
-    #             + ags3_samp["HOLE_ID"].astype(str)
-    #         )
-    return ags3_with_samp  # type: ignore

bedrock_ge/gi/ags/validate.py DELETED Viewed

@@ -1,25 +0,0 @@
-import pandas as pd
-def check_ags_proj_group(ags_proj: pd.DataFrame) -> bool:
-    """Checks if the AGS 3 or AGS 4 PROJ group is correct.
-    Args:
-        ags_proj (pd.DataFrame): The DataFrame with the PROJ group.
-    Raises:
-        ValueError: If AGS 3 of AGS 4 PROJ group is not correct.
-    Returns:
-        bool: Returns True if the AGS 3 or AGS 4 PROJ group is correct.
-    """
-    if len(ags_proj) != 1:
-        raise ValueError("The PROJ group must contain exactly one row.")
-    project_id = ags_proj["PROJ_ID"].iloc[0]
-    if not project_id:
-        raise ValueError(
-            'The project ID ("PROJ_ID" in the "PROJ" group) is missing from the AGS data.'
-        )
-    return True

bedrock_ge/gi/brgi-schema.json DELETED Viewed

@@ -1,36 +0,0 @@
-{
-  "Location": {
-    "attributes": {},
-    "geometry_type": "Point / 3D LineString",
-    "children": {
-      "MaterialClassification": {
-        "attributes": {},
-        "geometry_type": "3D LineString"
-      },
-      "SPT": {
-        "attributes": {},
-        "geometry_type": "3D Point"
-      },
-      "RQD": {
-        "attributes": {},
-        "geometry_type": "3D LineString"
-      },
-      "OtherInSituTests": {
-        "attributes": {},
-        "geometry_type": "3D Point or 3D LineString"
-      },
-      "Sample": {
-        "attributes": {},
-        "geometry_type": "3D Point",
-        "children": {
-          "grainSizeDistribution": {},
-          "atterbergLimits": {},
-          "oedometerTest": {},
-          "triaxialTest": {},
-          "unconfinedCompressiveStrength": {},
-          "otherLabTests": {}
-        }
-      }
-    }
-  }
-}

bedrock_ge/gi/concatenate.py DELETED Viewed

@@ -1,38 +0,0 @@
-from typing import Dict, Union
-import geopandas as gpd
-import pandas as pd
-def concatenate_databases(
-    db1: Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]],
-    db2: Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]],
-) -> Dict[str, pd.DataFrame]:
-    """Concatenates two dictionaries of DataFrames into one dict of DataFrames.
-    The function concatenates the pandas DataFrames of the second dict of
-    DataFrames to the first dict of DataFrames for the keys they have in common.
-    Keys that are unique to either dictionary will be included in the final
-    concatenated dictionary.
-    Args:
-        db1 (Dict[str, pd.DataFrame]): A dictionary of pandas DataFrames, i.e. a database.
-        db2 (Dict[str, pd.DataFrame]): A dictionary of pandas DataFrames, i.e. a database.
-    Returns:
-        dict: A dictionary of concatenated pandas DataFrames.
-    """
-    # Create a new dict to store the concatenated dataframes
-    concatenated_dict = {key: df.dropna(axis=1, how="all") for key, df in db1.items()}
-    # Iterate over the keys in the second dict
-    for key, df in db2.items():
-        df = df.dropna(axis=1, how="all")
-        # If the key is also in the first dict, concatenate the dataframes
-        if key in db1:
-            concatenated_dict[key] = pd.concat([db1[key], df], ignore_index=True)
-        # If the key is not in the first dict, just add it to the new dict
-        else:
-            concatenated_dict[key] = df
-    return concatenated_dict

bedrock-ge 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

bedrock-ge 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl