PyPI - buckpy-dev - Versions diffs - 0.0.1__py3-none-any.whl - Mend

buckpy-dev 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

buckpy/__init__.py +13 -0
buckpy/_static/logo.png +0 -0
buckpy/_static/logo.svg +4 -0
buckpy/buckfast_input_file_writer_.py +1233 -0
buckpy/buckpy.py +132 -0
buckpy/buckpy_gui.py +359 -0
buckpy/buckpy_postprocessing.py +1305 -0
buckpy/buckpy_preprocessing_current.py +1142 -0
buckpy/buckpy_preprocessing_legacy.py +900 -0
buckpy/buckpy_solver.py +777 -0
buckpy/buckpy_variables.py +98 -0
buckpy/buckpy_visualisation.py +419 -0
buckpy_dev-0.0.1.dist-info/METADATA +51 -0
buckpy_dev-0.0.1.dist-info/RECORD +18 -0
buckpy_dev-0.0.1.dist-info/WHEEL +5 -0
buckpy_dev-0.0.1.dist-info/entry_points.txt +2 -0
buckpy_dev-0.0.1.dist-info/licenses/LICENSE +674 -0
buckpy_dev-0.0.1.dist-info/top_level.txt +1 -0

buckpy/buckpy_preprocessing_legacy.py ADDED Viewed

@@ -0,0 +1,900 @@
+"""
+This module contains the pre-processing functions of BuckPy.
+"""
+import time
+import numpy as np
+import pandas as pd
+from scipy.stats import lognorm
+import pysubsea as ss
+from .buckpy_variables import KP_TO
+def calc_expand_kp(df):
+    '''
+    Function to expand the KP array with 1000 intervals from 1000 to nearest maximum KP.
+    Parameters
+    ----------
+    df : pandas Dataframe
+        Dataframe containing the original KP values.
+    Returns
+    -------
+    df : pandas Dataframe
+        Dataframe containing the expanded KP values.
+    '''
+    # Rename 'KP To' to 'KP From'
+    df = df.rename(columns = {'KP To': 'KP From'})
+    # Expand the KP array with 1000 intervals from 1000 to nearest maximum KP
+    max_kp = np.floor(df['KP From'].max() / 1000.0) * 1000.0
+    kp_array = np.arange(1000, max_kp + 1.0, 1000)
+    # Create a dataframe for the expanded kp
+    df_expand = pd.DataFrame({'Point ID From': [np.nan] * len(kp_array), 'KP From': kp_array})
+    df = pd.concat([df, df_expand], ignore_index = True).sort_values(
+        by = 'KP From').drop_duplicates('KP From').reset_index(drop = True).ffill()
+    # Calculate relative length between KP and KP To
+    df['KP To'] = df['KP From'].shift(-1)
+    df = df.dropna()
+    df['Length'] = df['KP To'] - df['KP From']
+    # Calculate element number and element size
+    df['Elem No.'] = np.ceil(df['Length'] / 100.0)
+    df['Elem Size'] = df['Length'] / df['Elem No.']
+    return df
+def calc_element_array(df):
+    '''
+    Function to create element array based on KP, KP TO and element number.
+    Parameters
+    ----------
+    df : pandas Dataframe
+        Dataframe containing the expanded KP values.
+    Returns
+    -------
+    df : pandas Dataframe
+        Dataframe containing the elements between each KP value.
+    '''
+    # Create the elements between each KP points
+    elem_array = np.empty(0)
+    elem_array = df.apply(lambda x: pd.Series(np.append(elem_array, np.linspace(
+        x['KP From'], x['KP To'], int(x['Elem No.'] + 1.0)))), axis = 1)
+    # Convert the element dataframe to np array and flatten
+    elem_array = elem_array.to_numpy().flatten()
+    # Remove duplicated values at 1000*n and np.nan
+    elem_array = np.unique(elem_array)
+    elem_array = elem_array[~np.isnan(elem_array)]
+    return elem_array
+def calc_kp_interpolation(elem_array, df_oper):
+    '''
+    Function to interpolate the RLT, pressure and temperature using KP and operating profile.
+    Parameters
+    ----------
+    elem_array : np Array
+        Array containing the kp value of the elements.
+    df_oper : pandas Dataframe
+        Dataframe containing the original operating profiles data.
+    Returns
+    -------
+    df : pandas Dataframe
+        Dataframe containing the interpolated operating profiles data.
+    '''
+    # Interpolate operating profile based on KP
+    df = pd.DataFrame({'KP': elem_array})
+    df['Pressure Installation'] = np.interp(
+        df['KP'], df_oper['KP'], df_oper['Pressure Installation'])
+    df['Pressure Hydrotest'] = np.interp(
+        df['KP'], df_oper['KP'], df_oper['Pressure Hydrotest'])
+    df['Pressure Operation'] = np.interp(
+        df['KP'], df_oper['KP'], df_oper['Pressure Operation'])
+    df['Temperature Installation'] = np.interp(
+        df['KP'], df_oper['KP'], df_oper['Temperature Installation'])
+    df['Temperature Hydrotest'] = np.interp(
+        df['KP'], df_oper['KP'], df_oper['Temperature Hydrotest'])
+    df['Temperature Operation'] = np.interp(
+        df['KP'], df_oper['KP'], df_oper['Temperature Operation'])
+    df['RLT'] = np.interp(df['KP'], df_oper['KP'], df_oper['RLT'])
+    return df
+def calc_operating_profiles(df, df_route, pipeline_set, loadcase_set):
+    """
+    Calculate operating profiles data and process it.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        DataFrame containing the operating profiles data.
+    df_route : pandas.DataFrame
+        DataFrame containing route data and calculated route data.
+    pipeline_set : str
+        Identifier of the pipeline set.
+    loadcase_set : str
+        Identifier of the loadcase set.
+    Returns
+    -------
+    df : pandas.DataFrame
+        DataFrame containing the operating profiles data and calculated operating data.
+    """
+    # Filter df DataFrame based on pipeline_set and loadcase_set
+    df_profile = df.loc[(df['Pipeline'] == pipeline_set) & (df['Loadcase Set'] == loadcase_set)]
+    # Select the 'Point ID From' and 'KP To' columns
+    df_route = df_route[['Point ID From', 'KP To']].reset_index(drop = True)
+    # Add the end row of route and the start KP
+    end_row = pd.DataFrame({'Point ID From': 'End', 'KP To': np.nan}, index = [99999])
+    df_route = pd.concat([df_route, end_row], ignore_index = True)
+    # Shift KP column 1 downwards and assign 0.0 to the first KP
+    df_route['KP To'] = df_route['KP To'].shift().fillna(0.0)
+    # Expand the KP array with 1000 intervals from 1000 to nearest maximum KP
+    df_route = calc_expand_kp(df_route)
+    # Create the elements between each KP points
+    elem_array = calc_element_array(df_route)
+    # Interpolate the RLT, pressure and temperature using KP and operating profile
+    df = calc_kp_interpolation(elem_array, df_profile)
+    # Insert pipeline_set and loadcase_set columns as the first and second columns
+    df.insert(0, 'Pipeline', [pipeline_set] * df.shape[0])
+    df.insert(1, 'Loadcase Set', [loadcase_set] * df.shape[0])
+    return df
+def calc_route_data(df, layout_set, pipeline_set):
+    """
+    Extract and process route data for calculations.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        DataFrame containing route data.
+    layout_set : str
+        Identifier of the layout set.
+    pipeline_set : str
+        Identifier of the pipeline set.
+    Returns
+    -------
+    df : pandas.DataFrame
+        DataFrame containing route data and calculated route data.
+    df_ends : pandas.DataFrame
+        DataFrame containing end boundary conditions.
+    Notes
+    -----
+    This function extracts route ends and route data based on pipeline_set and layout_set. It
+    selects specific columns for route ends data. Route Type is converted from string to
+    float for numerical representation. Route ends data is converted to a NumPy array for
+    efficient processing.
+    """
+    # Extract route ends and route data based on pipeline_set and layout_set
+    df_ends = df.loc[(df['Pipeline'] == pipeline_set) &
+                             (df['Layout Set'] == layout_set)].iloc[[0, -1]]
+    df = df.loc[(df['Pipeline'] == pipeline_set) &
+                        (df['Layout Set'] == layout_set)].iloc[1:-1]
+    # Select specific columns for route ends data
+    df_ends = df_ends[['Route Type', 'KP From', 'KP To', 'Reaction Installation',
+                                   'Reaction Hydrotest', 'Reaction Operation']]
+    # Convert 'Route Type' from string to float for numerical representation
+    df_ends.loc[df_ends['Route Type'] == 'Spool', 'Route Type'] = 1
+    df_ends.loc[df_ends['Route Type'] == 'Fixed', 'Route Type'] = 2
+    df_ends['Route Type'] = df_ends['Route Type'].astype(float)
+    # Convert KP From and KP to to float
+    df[['KP From', 'KP To']] = df[['KP From', 'KP To']].astype(float)
+    return df, df_ends
+def calc_pipe_data(df, pipeline_set):
+    """
+    Calculate properties of pipes for a specific pipeline set.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        DataFrame containing the pipe data.
+    pipeline_set : str
+        Identifier of the pipeline set.
+    Returns
+    -------
+    df : pandas.DataFrame
+        DataFrame containing the pipe data and calculated pipe properties.
+    Notes
+    -----
+    This function filters the df DataFrame based on the pipeline_set. It computes the
+    inner diameter (ID), cross-sectional area (As), inner area (Ai), moment of inertia (I),
+    hydrotest characteristic buckling force (SChar HT), and operation characteristic buckling
+    force (SChar OP) of the pipe.
+    """
+    # Compute the inner diameter (ID) of the pipe
+    df['ID'] = df['OD'] - 2.0 * df['WT']
+    # Compute the cross-sectional area (As) of the pipe
+    df['As'] = np.pi / 4.0 * (df['OD'] ** 2 - df['ID'] ** 2)
+    # Compute the inner area (Ai) of the pipe
+    df['Ai'] = np.pi / 4.0 * df['ID'] ** 2
+    # Compute the moment of inertia (I) of the pipe
+    df['I'] = np.pi / 64.0 * (df['OD'] ** 4 - df['ID'] ** 4)
+    # Compute the hydrotest characteristic buckling force (SChar HT) of the pipe
+    df['SChar HT'] = 2.26 * (df['E'] * df['As']) ** 0.25 * (
+         df['E'] * df['I']) ** 0.25 * df['sw Hydrotest'] ** 0.5
+    # Compute the operation characteristic buckling force (SChar OP) of the pipe
+    df['SChar OP'] = 2.26 * (df['E'] * df['As']) ** 0.25 * (
+        df['E'] * df['I']) ** 0.25 * df['sw Operation'] ** 0.5
+    # Filter df DataFrame based on pipeline_set
+    df = df.loc[(df['Pipeline'] == pipeline_set)]
+    return df
+def calc_oper_data(df, df_route_ends, pipeline_set, loadcase_set):
+    """
+    Calculate operating data and process it.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        DataFrame containing the operating data.
+    df_route_ends : pandas.DataFrame
+        DataFrame containing the end boundary conditions.
+    pipeline_set : str
+        Identifier of the pipeline set.
+    loadcase_set : str
+        Identifier of the loadcase set.
+    Returns
+    -------
+    df : pandas.DataFrame
+        DataFrame containing the operating data and calculated operating data.
+    Notes
+    -----
+    This function filters df DataFrame based on pipeline_set, loadcase_set, and 'KP To'.
+    It calculates rolling mean and difference, assigns the 'Length' column, resets the index, and
+    drops rows with NaN values before returning the preprocessed DataFrame.
+    """
+    # Filter df DataFrame based on pipeline_set, loadcase_set and 'KP To'
+    df = df.loc[(df['Pipeline'] == pipeline_set) &
+                (df['Loadcase Set'] == loadcase_set) &
+                (df['KP'] <= df_route_ends['KP To'].iloc[-1])]
+    # Calculate the rolling mean of df grouped by Pipeline and Loadcase Set
+    df_rolling_mean = df.groupby(['Pipeline', 'Loadcase Set']).rolling(2).mean()
+    # Calculate the rolling difference of df grouped by Pipeline and Loadcase Set
+    df_rolling_difference = df.groupby(
+        ['Pipeline', 'Loadcase Set']).rolling(2).max() - df.groupby(
+            ['Pipeline', 'Loadcase Set']).rolling(2).min()
+    # Assign the 'Length' column in df_rolling_mean
+    df_rolling_mean['Length'] = df_rolling_difference['KP']
+    # Reset the index of df_rolling_mean and drop the 'level_2' index level
+    df_rolling_mean = df_rolling_mean.reset_index().drop('level_2', axis=1)
+    # Drop rows with NaN values
+    df_rolling_mean = df_rolling_mean.dropna()
+    return df_rolling_mean
+def calc_soil_data(df, pipeline_set):
+    """
+    Calculate soil data and axial and lateral friction factor distributions and assign them to
+    DataFrame columns.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        DataFrame containing soil data.
+    pipeline_set : str
+        Identifier of the pipeline set.
+    Returns
+    -------
+    df : pandas.DataFrame
+        DataFrame containing soil data and calculated friction factor distributions.
+    Notes
+    -----
+    This function filters df DataFrame based on pipeline_set value. It computes lognormal
+    distributions for axial and lateral friction factors and assigns them to DataFrame columns.
+    """
+    # Compute lognormal or normal distributions for axial friction and assign arrays to DataFrame columns
+    df['muax Array'], df['muax CDF Array'] = zip(
+        *df.apply(
+            lambda x: calc_lognorm_soil(x['Axial Mean'], x['Axial STD']),
+            axis=1
+        ).apply(np.array)
+    )
+    # Compute lognormal distributions for lateral hydrotest friction and assign arrays to DataFrame columns
+    df['mul HT Array'], df['mul HT CDF Array'] = zip(
+        *df.apply(
+            lambda x: calc_lognorm_soil(x['Lateral Hydrotest Mean'], x['Lateral Hydrotest STD']),
+            axis=1
+        ).apply(np.array)
+    )
+    # Compute lognormal distributions for lateral operation friction and assign arrays to DataFrame columns
+    df['mul OP Array'], df['mul OP CDF Array'] = zip(
+        *df.apply(
+            lambda x: calc_lognorm_soil(x['Lateral Operation Mean'], x['Lateral Operation STD']),
+            axis=1
+        ).apply(np.array)
+    )
+    # Filter soil data based on pipeline set
+    df = df[df['Pipeline'] == pipeline_set]
+    return df
+def calc_scenario_data(df_scen, df_route, df_pipe, df_oper, df_soil):
+    """
+    Calculate scenario data based on route, pipe, operating, and soil data.
+    Parameters
+    ----------
+    df_scen : pandas.DataFrame
+        DataFrame containing scenario data.
+    df_route : pandas.DataFrame
+        DataFrame containing route data.
+    df_pipe : pandas.DataFrame
+        DataFrame containing pipe data.
+    df_oper : pandas.DataFrame
+        DataFrame containing operating data.
+    df_soil : pandas.DataFrame
+        DataFrame containing soil data.
+    Returns
+    -------
+    df: pandas.DataFrame
+        DataFrame containing the calculated scenario data.
+    Notes
+    -----
+    This function merges route, pipe, operating, and soil data to compute various scenario
+    parameters. It calculates various attributes such as lognormal distributions, buckling forces,
+    and section counts. The resulting DataFrame includes a subset of calculated columns and is
+    filled with 0 for missing values.
+    """
+    # Merge operating data with route data based on 'KP'
+    df = pd.merge_asof(left=df_oper, right=df_route, left_on='KP', right_on='KP From',
+                             direction='backward', left_by='Pipeline', right_by='Pipeline')
+    # Merge resulting DataFrame with pipe data
+    df = pd.merge(left=df, right=df_pipe, left_on=['Pipeline', 'Pipe Set'],
+                       right_on=['Pipeline', 'Pipe Set'])
+    # Merge resulting DataFrame with soil data
+    df = pd.merge(left=df, right=df_soil, left_on=['Pipeline', 'Friction Set'],
+                       right_on=['Pipeline', 'Friction Set'])
+    # Compute lognormal distributions for soil properties and assign to DataFrame columns
+    df['HOOS X Array'], df['HOOS CDF Array'] = zip(*df.apply(
+        lambda x: calc_lognorm_hoos(x['Route Type'], x['Length'], x['HOOS Mean'],
+                                     x['HOOS STD'], x['HOOS Reference Length'], x['RCM Buckling Force']), axis=1)
+                                     .apply(np.array))
+    # Compute various buckling forces based on calculated parameters
+    df['FRF HT'] = df['RLT'] + df['E'] * df['Alpha'] * df['As'] * (
+                df['Temperature Hydrotest'] - df['Temperature Installation']) + (
+                                1 - 2 * df['Poisson']) * (
+                                df['Pressure Hydrotest'] - df['Pressure Installation']) * df['Ai']
+    df['FRF OP'] = df['RLT'] + df['E'] * df['Alpha'] * df['As'] * (
+                df['Temperature Operation'] - df['Temperature Installation']) + (
+                                 1 - 2 * df['Poisson']) * (
+                                 df['Pressure Operation'] - df['Pressure Installation']) * df['Ai']
+    df['FRF OP Pressure'] = df['RLT'] + (
+                         1 - 2 * df['Poisson']) * df['Pressure Operation'] * df['Ai']
+    df['FRF OP Temperature'] = df['E'] * df['As'] * df['Alpha'] * (
+                            df['Temperature Operation'] - df['Temperature Installation'])
+    df['Sv HT'] = 4.0 * np.sqrt(df['E'] * df['I'] * df['sw Hydrotest'] / df['Sleeper Height'])
+    df['Sv OP'] = 4.0 * np.sqrt(df['E'] * df['I'] * df['sw Operation'] / df['Sleeper Height'])
+    # Calculate section-related parameters
+    df['KP Section'] = df['KP'] - df['KP From']
+    df['Reference Section'] = (df['KP Section'] / df['HOOS Reference Length']).apply(np.floor)
+    df['Section Count'] = 0.0
+    df.loc[
+        (df['Route Type'] != df['Route Type'].shift()) |
+        (df['Reference Section'] != df['Reference Section'].shift()), 'Section Count'] = 1.0
+    df['Section Count'] = df['Section Count'].cumsum()
+    # Select relevant columns and rename them for clarity
+    df = df[[
+        'KP', 'Length', 'Route Type', 'KP From', 'KP To', 'Point ID From', 'Point ID To',
+        'Bend Radius', 'muax Array', 'muax CDF Array',
+        'mul HT Array', 'mul HT CDF Array', 'mul OP Array', 'mul OP CDF Array',
+        'HOOS X Array', 'HOOS CDF Array', 'sw Installation', 'sw Hydrotest', 'sw Operation',
+        'SChar HT', 'SChar OP', 'Sv HT', 'Sv OP', 'RCM Buckling Force', 'RLT', 'FRF HT',
+        'FRF OP Pressure', 'FRF OP Temperature', 'FRF OP', 'Residual Buckle Length Hydrotest',
+        'Residual Buckle Force Hydrotest', 'Residual Buckle Length Operation',
+        'Residual Buckle Force Operation', 'Section Count', 'KP Section', 'Reference Section',
+        'Axial Mean', 'Lateral Hydrotest Mean', 'Lateral Operation Mean', 'HOOS Mean'
+    ]]
+    df = df.rename(columns={'sw Installation': 'sw IN',
+                            'sw Hydrotest': 'sw HT',
+                            'sw Operation': 'sw OP',
+                            'Residual Buckle Length Hydrotest': 'buckleLength HT',
+                            'Residual Buckle Force Hydrotest': 'buckleEAF HT',
+                            'Residual Buckle Length Operation': 'buckleLength OP',
+                            'Residual Buckle Force Operation': 'buckleEAF OP'})
+    # Convert route type strings to numerical representation
+    df.loc[df['Route Type'] == 'Straight', 'Route Type'] = 1
+    df.loc[df['Route Type'] == 'Bend', 'Route Type'] = 2
+    df.loc[df['Route Type'] == 'Sleeper', 'Route Type'] = 3
+    df.loc[df['Route Type'] == 'RCM', 'Route Type'] = 4
+    df['Route Type'] = df['Route Type'].astype(float)
+    # Fill missing values with 0
+    df = df.fillna(0)
+    # Add scenario parameters to the DataFrame
+    df["Pipeline"] = df_scen["Pipeline"].values[0]
+    df["Scenario"] = df_scen["Scenario"].values[0]
+    df["Layout Set"] = df_scen["Layout Set"].values[0]
+    df["Simulations"] = df_scen["Simulations"].values[0]
+    df["Friction Sampling"] = df_scen["Friction Sampling"].values[0]
+    df["Char. Friction Prob."] = df_scen["Char. Friction Prob."].values[0]
+    return df
+def calc_monte_carlo_data(df, df_ends):
+    """
+    Convert the scenario data and pipeline end boundary conditions data to NumPy arrays for
+    Monte Carlo simulations.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        DataFrame containing the scenario data.
+    df_ends : pandas.DataFrame
+        DataFrame containing the pipeline end boundary conditions data.
+    Returns
+    -------
+    np_distr : numpy.ndarray
+        2D array with probabilistic distributions (rows) along the route mesh (columns).
+    np_scen : numpy.ndarray
+        2D array with scenario properties (rows) along the route mesh (columns).
+    np_ends : numpy.ndarray
+        2D array with end properties (rows) for the pipeline ends.
+    Notes
+    -----
+    The arrays have the following row layout (index : meaning):
+    np_distr:
+      - 0 : MUAX_ARRAY
+      - 1 : MUAX_CDF_ARRAY
+      - 2 : MULAT_ARRAY_HT
+      - 3 : MULAT_CDF_ARRAY_HT
+      - 4 : MULAT_ARRAY_OP
+      - 5 : MULAT_CDF_ARRAY_OP
+      - 6 : HOOS_ARRAY
+      - 7 : HOOS_CDF_ARRAY
+    np_scen:
+      - 0  : KP
+      - 1  : LENGTH
+      - 2  : ROUTE_TYPE
+      - 3  : BEND_RADIUS
+      - 4  : SW_INST
+      - 5  : SW_HT
+      - 6  : SW_OP
+      - 7  : SCHAR_HT
+      - 8  : SCHAR_OP
+      - 9  : SV_HT
+      - 10 : SV_OP
+      - 11 : CBF_RCM
+      - 12 : RLT
+      - 13 : FRF_HT
+      - 14 : FRF_P_OP
+      - 15 : FRF_T_OP
+      - 16 : FRF_OP
+      - 17 : L_BUCKLE_HT
+      - 18 : EAF_BUCKLE_HT
+      - 19 : L_BUCKLE_OP
+      - 20 : EAF_BUCKLE_OP
+      - 21 : SECTION_ID
+      - 22 : SECTION_KP
+      - 23 : SECTION_REF
+      - 24 : MUAX_MEAN
+      - 25 : MULAT_HT_MEAN
+      - 26 : MULAT_OP_MEAN
+      - 27 : HOOS_MEAN
+    np_ends:
+      - 0 : ROUTE_TYPE
+      - 1 : KP_FROM
+      - 2 : KP_TO
+      - 3 : REAC_INST
+      - 4 : REAC_HT
+      - 5 : REAC_OP
+    """
+    # Convert probabilistic distributions to numpy array
+    list_temp1 = []
+    prob_label_list = [
+        'muax Array', 'muax CDF Array', 'mul HT Array', 'mul HT CDF Array',
+        'mul OP Array', 'mul OP CDF Array', 'HOOS X Array', 'HOOS CDF Array'
+    ]
+    for array_label in prob_label_list:
+        list_temp2 = []
+        for i in range(df[array_label].size):
+            list_temp2.append(df[array_label][i])
+        list_temp1.append(list_temp2)
+    np_distr = np.array(list_temp1, dtype='float64')
+    # Add extra columns to remove
+    columns_drop = [
+        "Pipeline", "Scenario", "Simulations", "Friction Sampling", "Char. Friction Prob.",
+        'KP From', 'KP To', 'Point ID From', 'Point ID To'
+    ]
+    columns_drop = np.append(columns_drop, prob_label_list)
+    # Convert scenario properties to numpy array
+    np_scen = df.drop(columns_drop, axis=1).to_numpy().transpose()
+    # Convert end properties to numpy array
+    np_ends = df_ends.to_numpy().transpose()
+    return np_distr, np_scen, np_ends
+def calc_pp_data(df, np_array, pipeline_id, layout_set):
+    """
+    Calculate post-processing data set for a given layout set.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        DataFrame containing post-processing data.
+    np_array : numpy.ndarray
+        NumPy array containing pipeline end boundary conditions.
+    pipeline_id : str
+        Identifier of the pipeline.
+    layout_set : str
+        Identifier of the layout set.
+    Returns
+    -------
+    df : pandas.DataFrame
+        DataFrame containing calculated post-processing data.
+    Notes
+    -----
+    This function filters the DataFrame based on the layout set. It resets the index, renames
+    columns, and selects relevant columns. Adjusts the last 'KP_to' value if it is smaller
+    than the maximum value in np_array. Converts data types of columns to appropriate numeric
+    types.
+    """
+    # Filter DataFrame based on layout_set
+    df = df.loc[(df['Pipeline'] == pipeline_id) & (df['Layout Set'] == layout_set)]
+    # Reset index, rename columns, and select relevant columns
+    df = df.reset_index(drop=True).rename(columns={'Post-Processing Set': 'pp_set',
+                                                    'KP From': 'KP_from',
+                                                    'KP To': 'KP_to',
+                                                    'Post-Processing Description': 'description'})
+    df = df[['pp_set', 'KP_from', 'KP_to', 'description', 'Characteristic VAS Probability']]
+    # Adjust last 'KP_to' value if necessary
+    kp_max = np_array[KP_TO, -1]
+    if kp_max > (df['KP_to'].iloc[-1]):
+        df.loc[df.index[-1], 'KP_to'] = kp_max
+    # Convert columns to appropriate numeric types
+    df['pp_set'] = df['pp_set'].astype(np.int64)
+    df['KP_from'] = df['KP_from'].astype(np.float64)
+    df['KP_to'] = df['KP_to'].astype(np.float64)
+    return df
+def run(work_dir, file_name, pipeline_id, scenario_no, bl_verbose):
+    """
+    Import scenario data from an Excel file and preprocess it.
+    Parameters
+    ----------
+    work_dir : str
+        Directory where the Excel file is located.
+    file_name : str
+        Name of the Excel file.
+    pipeline_id : str
+        Identifier of the pipeline.
+    scenario_no : int
+        Identifier of the scenario.
+    Returns
+    -------
+    df_scen : pandas.DataFrame
+        Dataframe containing the scenario data
+    np_distr : numpy.ndarray
+        Array containing the friction factor distributions
+    np_scen : numpy.ndarray
+        Array containing the scenario data
+    np_ends : numpy.ndarray
+        Array containing the end boundary conditions
+    df_pp : pandas.DataFrame
+        Array containing the post-processing data
+    Notes
+    -----
+    This function reads scenario data from an Excel file and preprocesses it. It extracts layout,
+    pipeline, and loadcase sets, and the number of simulations from the Excel file. Postprocesses
+    route, pipe, operating, soil, and scenario data. Processes post-processing sets and defines
+    the NumPy arrays for Monte Carlo Simulations.
+    Other Parameters
+    ----------------
+    bl_verbose : boolean, optional
+        True if intermediate printouts are required (False by default).
+    """
+    # Starting time of the pre-processing module
+    start_time = time.time()
+    # Print out in the terminal that the assembly of the main dataframe has started
+    if bl_verbose:
+        print("1. Assembly of the main dataframe")
+    # Read scenario data from the input Excel file
+    df_sens = pd.read_excel(rf'{work_dir}/{file_name}', sheet_name = 'Scenario')
+    scenario_no = int(scenario_no)
+    # Define layout, pipeline and loadcase sets and number of simulations
+    layout_set = df_sens.loc[(df_sens['Pipeline'] == pipeline_id) &
+                             (df_sens['Scenario'] == scenario_no), 'Layout Set'].values[0]
+    pipeline_set = df_sens.loc[(df_sens['Pipeline'] == pipeline_id) &
+                               (df_sens['Scenario'] == scenario_no), 'Pipeline'].values[0]
+    loadcase_set = df_sens.loc[(df_sens['Pipeline'] == pipeline_id) &
+                               (df_sens['Scenario'] == scenario_no), 'Loadcase Set'].values[0]
+    # Read route data from the input Excel file and postprocess it
+    df_route = pd.read_excel(rf'{work_dir}/{file_name}', sheet_name='Route')
+    df_route_input = df_route.copy()
+    df_route, df_route_ends = calc_route_data(df_route, layout_set, pipeline_set)
+    # Read pipe data from the input Excel file and postprocess it
+    df_pipe = pd.read_excel(rf'{work_dir}/{file_name}', sheet_name = 'Pipe')
+    df_pipe = calc_pipe_data(df_pipe, pipeline_set)
+    # Read operating data from the input Excel file and interpolate it
+    df_oper = pd.read_excel(rf'{work_dir}/{file_name}', sheet_name = 'Operating')
+    df_oper = calc_operating_profiles(df_oper, df_route, pipeline_set, loadcase_set)
+    df_oper = calc_oper_data(df_oper, df_route_ends, pipeline_set, loadcase_set)
+    # Read soil data from the input Excel file and postprocess it
+    df_soil = pd.read_excel(rf'{work_dir}/{file_name}', sheet_name = 'Soils')
+    # Axial
+    df_soil['Axial Mean'], df_soil['Axial STD'] = ss.LBSoilDistributions(
+        friction_factor_le=df_soil['Axial LE'],
+        friction_factor_be=df_soil['Axial BE'],
+        friction_factor_he=df_soil['Axial HE'],
+        friction_factor_fit_type=df_soil['Axial Fit Bounds']
+    ).friction_distribution_parameters()[:2]
+    # Lateral Hydrotest
+    df_soil['Lateral Hydrotest Mean'], df_soil['Lateral Hydrotest STD'] = ss.LBSoilDistributions(
+        friction_factor_le=df_soil['Lateral Hydrotest LE'],
+        friction_factor_be=df_soil['Lateral Hydrotest BE'],
+        friction_factor_he=df_soil['Lateral Hydrotest HE'],
+        friction_factor_fit_type=df_soil['Lateral Hydrotest Fit Bounds']
+    ).friction_distribution_parameters()[:2]
+    # Lateral Operation
+    df_soil['Lateral Operation Mean'], df_soil['Lateral Operation STD'] = ss.LBSoilDistributions(
+        friction_factor_le=df_soil['Lateral Operation LE'],
+        friction_factor_be=df_soil['Lateral Operation BE'],
+        friction_factor_he=df_soil['Lateral Operation HE'],
+        friction_factor_fit_type=df_soil['Lateral Operation Fit Bounds']
+    ).friction_distribution_parameters()[:2]
+    df_soil = calc_soil_data(df_soil, pipeline_set)
+    # Postprocess scenario data
+    df_scen = calc_scenario_data(df_sens, df_route, df_pipe, df_oper, df_soil)
+    # Define the NumPy arrays used in the Monte Carlo Simulations
+    np_distr, np_scen, np_ends = calc_monte_carlo_data(df_scen, df_route_ends)
+    # Read post-processing sets from the input Excel file and postprocess them
+    df_pp = pd.read_excel(rf'{work_dir}/{file_name}', sheet_name = 'Post-Processing')
+    df_pp = calc_pp_data(df_pp, np_ends, pipeline_id, layout_set)
+    # Print out in the terminal time taken to create main dataframe
+    if bl_verbose:
+        print(f'   Time taken to create main dataframe: {time.time() - start_time:.1f}s')
+    return np_distr, np_scen, np_ends, df_scen, df_route_input, df_pp
+def calc_lognorm_soil(mu_mean, mu_std):
+    """
+    Compute the parameters of a lognormal distribution for friction factors (axial or lateral).
+    Parameters
+    ----------
+    mu_mean : float
+        The mean of the friction factor distribution.
+    mu_std : float
+        The standard deviation of the friction factor distribution.
+    Returns
+    -------
+    mu_range : numpy.ndarray
+        An array of values representing the range of the friction factor distribution
+        between probabilities of exceedance between 0.01% and 99.99%.
+    cdf_range : numpy.ndarray
+        An array of cumulative density function (CDF) values corresponding to `mu_range`.
+    Notes
+    -----
+        The function calculates the shape and scale parameters of a friction factor lognormal
+        distribution based on the provided mean (`mu_mean`) and standard deviation (`mu_std`).
+        It then computes the cumulative density function (CDF) for the generated range of values.
+    """
+    # Calculate shape and scale parameters of the lognormal distribution
+    mu_shape = np.sqrt(np.log(1 + mu_std**2 / mu_mean**2))
+    mu_scale = np.log(mu_mean**2 / np.sqrt(mu_mean**2 + mu_std**2))
+    # Calculate the lower and upper bounds of the distribution
+    mu_lower = lognorm(mu_shape, 0.0, np.exp(mu_scale)).ppf(0.0001)
+    mu_upper = lognorm(mu_shape, 0.0, np.exp(mu_scale)).ppf(0.9999)
+    # Generate a range of values within the distribution
+    mu_range = np.linspace(mu_lower, mu_upper, 10000)
+    # Compute the cumulative density function (CDF) for the generated range
+    cdf_range = lognorm.cdf(mu_range, mu_shape, 0.0, np.exp(mu_scale))
+    return mu_range, cdf_range
+def calc_lognorm_hoos(type_elt, length_elt, hoos_mean, hoos_std, length_ref, rcm_charac):
+    """
+    Compute the parameters of the horizontal out-of-straightness (HOOS) lognormal distribution
+    for different types of elements (e.g., Straight, Bend, Sleeper, RCM). This function takes into
+    account the scaling factor of the HOOS distribution. For RCM, the HOOS factor is not a factor
+    but the critical buckling force.
+    Parameters
+    ----------
+    type_elt : str
+        Type of the element.
+    length_elt : float
+        Length of the element.
+    hoos_mean : float
+        Mean of the HOOS distribution.
+    hoos_std : float
+        Standard deviation of the HOOS distribution.
+    length_ref : float
+        Reference length.
+    rcm_charac : float
+        Characteristic buckling force for the Residual Curvature Method (RCM).
+    Returns
+    -------
+    x_range : numpy.ndarray
+        An array of values representing the range of the friction factor distribution
+        between probabilities of exceedance between 0.01% and 99.99%.
+    cdf_range : numpy.ndarray
+        An array of cumulative density function (CDF) values corresponding to `x_range`.
+    Notes
+    -----
+    This function computes the parameters of a lognormal distribution for different types of
+    elements such as Straight, Bend, Sleeper, and RCM (Residual Curvature Method). It
+    calculates the cumulative density function (CDF) for the generated range of values
+    based on the HOOS distribution parameters.
+    """
+    # Extract the type of element (e.g., Straight, Bend, Sleeper, RCM)
+    type_elt_split = type_elt.split(' ')[0]
+    # Compute the ratio of the reference length to the element length
+    n = length_ref / length_elt
+    if type_elt_split == 'Straight' or type_elt_split == 'Bend':
+        # Calculate parameters for straight or bend elements
+        shape_hoos = np.sqrt(np.log(1 + hoos_std**2 / hoos_mean**2))
+        scale_hoos = np.log(hoos_mean**2 / (np.sqrt(hoos_mean**2 + hoos_std**2)))
+        # Define the range of the HOOS distribution
+        hoos_lower = 0.0
+        hoos_upper = 20.0
+        x = np.linspace(hoos_lower, hoos_upper, 200000)
+        # Calculate the cumulative density function (CDF) considering the scaling factor
+        cdf = 1-(1-lognorm.cdf(x, shape_hoos, 0.0, np.exp(scale_hoos)))**(1/n)
+        # Generate a range of CDF values
+        cdf_range = np.arange(0.0, 1.0, 0.0001)
+        # Interpolate to get the corresponding values of the distribution
+        x_range = np.interp(cdf_range, cdf, x)
+    elif type_elt_split == 'Sleeper':
+        # Calculate parameters for sleeper elements
+        shape_hoos = np.sqrt(np.log(1 + hoos_std**2 / hoos_mean**2))
+        scale_hoos = np.log(hoos_mean**2 / (np.sqrt(hoos_mean**2 + hoos_std**2)))
+        # Calculate the lower and upper bounds of the distribution for sleeper elements
+        hoos_lower = lognorm(shape_hoos, 0.0, np.exp(scale_hoos)).ppf(0.0001)
+        hoos_upper = lognorm(shape_hoos, 0.0, np.exp(scale_hoos)).ppf(0.9999)
+        # Generate a range of values within the distribution
+        x_range = np.linspace(hoos_lower, hoos_upper, 10000)
+        # Compute the cumulative density function (CDF) for the generated range
+        cdf_range = lognorm.cdf(x_range, shape_hoos, 0.0, np.exp(scale_hoos))
+    elif type_elt_split == 'RCM':
+        # Calculate parameters for RCM elements
+        shape_hoos = np.sqrt(np.log(1 + hoos_std**2 / hoos_mean**2))
+        scale_hoos = np.log(hoos_mean**2 / (np.sqrt(hoos_mean**2 + hoos_std**2)))
+        scale_hoos = scale_hoos + np.log(rcm_charac)
+        # Calculate the lower and upper bounds of the distribution for RCM elements
+        hoos_lower = lognorm(shape_hoos, 0.0, np.exp(scale_hoos)).ppf(0.0001)
+        hoos_upper = lognorm(shape_hoos, 0.0, np.exp(scale_hoos)).ppf(0.9999)
+        # Generate a range of values within the distribution
+        x_range = np.linspace(hoos_lower, hoos_upper, 10000)
+        # Compute the cumulative density function (CDF) for the generated range
+        cdf_range = lognorm.cdf(x_range, shape_hoos, 0.0, np.exp(scale_hoos))
+    return x_range, cdf_range