PyPI - emod-api - Versions diffs - 3.0.2__tar.gz → 3.1.1__tar.gz - Mend

emod-api 3.0.2tar.gz → 3.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

{emod_api-3.0.2/emod_api.egg-info → emod_api-3.1.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: emod-api
-Version: 3.0.2
+Version: 3.1.1
 Summary: Core tools for modeling using EMOD
 Author-email: Sharon Chen <sharon.chen@gatesfoundation.org>, Zhaowei Du <zhaowei.du@gatesfoundation.org>, Clark Kirkman IV <clark.kirkmand@gatesfoundation.org>, Daniel Bridenbecker <daniel.bridenbecker@gatesfoundation.org>, Svetlana Titova <svetlana.titova@gatesfoundation.org>, Ye Chen <ye.chen@gatesfoundation.org>
 License-Expression: MIT
@@ -20,12 +20,9 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: matplotlib
 Requires-Dist: scipy
-Requires-Dist: pandas
 Requires-Dist: numpy
-Requires-Dist: shapely
 Requires-Dist: pyproj
 Requires-Dist: geographiclib
-Requires-Dist: scikit-learn
 Requires-Dist: lz4
 Provides-Extra: docs
 Requires-Dist: mkdocs-material; extra == "docs"

emod_api-3.1.1/emod_api/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "3.1.1"

{emod_api-3.0.2 → emod_api-3.1.1}/emod_api/channelreports/channels.py RENAMED Viewed

@@ -4,9 +4,9 @@
 from datetime import datetime
 import json
+import csv
 from pathlib import Path
 from typing import Union
-import pandas as pd
 _CHANNELS = "Channels"
 _DTK_VERSION = "DTK_Version"
@@ -328,13 +328,6 @@ class ChannelReport(object):
         """Return Channel object by channel name/title"""
         return self._channels[item]
-    def as_dataframe(self) -> pd.DataFrame:
-        """Return underlying data as a Pandas DataFrame"""
-        dataframe = pd.DataFrame(
-            {key: self.channels[key].data for key in self.channel_names}
-        )
-        return dataframe
     def write_file(self, filename: str, indent: int = 0, separators=(",", ":")) -> None:
         """Write inset chart to specified text file."""
@@ -423,11 +416,17 @@ class ChannelReport(object):
         if channel_names is None:
             channel_names = self.channel_names
-        if not transpose:   # default
-            data_frame = pd.DataFrame([[channel_name] + list(self[channel_name]) for channel_name in channel_names])
-            # data_frame = pd.DataFrame(([channel_name] + list(self[channel_name]) for channel_name in channel_names))
-            data_frame.to_csv(filename, header=False, index=False)
-        else:               # transposed
-            self.as_dataframe().to_csv(filename, header=True, index=True, index_label="timestep")
+        if not transpose:  # default
+            with open(filename, "w") as g_f:
+                csv_obj = csv.writer(g_f, dialect='unix', quoting=csv.QUOTE_MINIMAL)
+                for cname in channel_names:
+                    csv_obj.writerow([cname] + list(self[cname]))
+        else:  # transposed
+            with open(filename, "w") as g_f:
+                csv_obj = csv.writer(g_f, dialect='unix', quoting=csv.QUOTE_MINIMAL)
+                csv_obj.writerow(channel_names)
+                for row_idx in range(self.num_time_steps):
+                    csv_obj.writerow([self[cname][row_idx] for cname in channel_names])
         return

{emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/calculators.py RENAMED Viewed

@@ -1,14 +1,10 @@
 import math
 import numpy as np
-import pandas as pd
-import os
 from scipy import sparse as sp
 from scipy.sparse import linalg as la
-from typing import Union
 from emod_api.demographics.age_distribution import AgeDistribution
-from emod_api.demographics.mortality_distribution import MortalityDistribution
 def generate_equilibrium_age_distribution(birth_rate: float = 40.0, mortality_rate: float = 20.0) -> AgeDistribution:
@@ -99,61 +95,3 @@ def _computeAgeDist(bval, mvecX, mvecY, fVec, max_yr=90):
     avecX = np.insert(avecX, 0, np.zeros(1))
     return gR.tolist()[0], avecX[:-1].tolist(), avecY.tolist()
-def generate_mortality_over_time_from_data(data_csv: Union[str, os.PathLike],
-                                           base_year: int) -> MortalityDistribution:
-    """
-    Generate a MortalityDistribution object from a data csv file.
-    Args:
-        data_csv: Path to csv file with the mortality rates by calendar year and age bucket.
-        base_year: The calendar year the sim is treating as the base.
-    Returns:
-        a MortalityDistribution object.
-    """
-    if base_year < 0:
-        raise ValueError(f"User passed negative value of base_year: {base_year}.")
-    if base_year > 2050:
-        raise ValueError(f"User passed too large value of base_year: {base_year}.")
-    # Load csv. Convert rate arrays into DTK-compatiable JSON structures.
-    rates = []  # array of arrays, but leave that for a minute
-    df = pd.read_csv(data_csv)
-    header = df.columns
-    year_start = int(header[1]) # someone's going to come along with 1990.5, etc. Sigh.
-    year_end = int(header[-1])
-    if year_end <= year_start:
-        raise ValueError(f"Failed check that {year_end} is greater than {year_start} in csv dataset.")
-    num_years = year_end - year_start + 1
-    rel_years = list()
-    for year in range(year_start, year_start + num_years):
-        mort_data = list(df[str(year)])
-        rel_years.append(year - base_year)
-    age_key = None
-    for trykey in df.keys():
-        if trykey.lower().startswith("age"):
-            age_key = trykey
-            raw_age_bins = list(df[age_key])
-    if age_key is None:
-        raise ValueError("Failed to find 'Age_Bin' (or similar) column in the csv dataset. Cannot process.")
-    age_bins = list()
-    try:
-        for age_bin in raw_age_bins:
-            left_age = float(age_bin.split("-")[0])
-            age_bins.append(left_age)
-    except Exception as ex:
-        raise ValueError(f"Ran into error processing the values in the Age-Bin column. {ex}")
-    for idx in range(len(age_bins)):  # 18 of these
-        # mort_data is the array of mortality rates (by year bin) for age_bin
-        mort_data = list(df.transpose()[idx][1:])
-        rates.append(mort_data)  # 28 of these, 1 for each year, eg
-    distribution = MortalityDistribution(ages_years=age_bins, mortality_rate_matrix=rates, calendar_years=rel_years)
-    return distribution

{emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/demographics.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import json
+import csv
 import numpy as np
-import pandas as pd
 from pathlib import Path
 from typing import Union
@@ -145,57 +145,93 @@ class Demographics(DemographicsBase):
         Returns:
             A Demographics object
         """
-        def get_value(row, headers):
-            for h in headers:
-                if row.get(h) is not None:
-                    return float(row.get(h))
-            return None
         print(f"{input_file} found and being read for demographics.json file creation.")
-        node_info = pd.read_csv(input_file, encoding='iso-8859-1')
-        out_nodes = []
-        for index, row in node_info.iterrows():
-            if 'under5_pop' in row:
-                pop = int(6 * row['under5_pop'])
-                if pop < 25000:
-                    continue
+        out_nodes = list()
+        with open(input_file, errors='ignore') as csv_file:
+            csv_obj = csv.reader(csv_file, dialect='unix')
+            headers = next(csv_obj, None)
+            # Find header column indicies
+            loc_idx = None
+            for hval in ['loc']:
+                if hval in headers:
+                    loc_idx = headers.index(hval)
+            nid_idx = None
+            for hval in ['node_id']:
+                if hval in headers:
+                    nid_idx = headers.index(hval)
+            lat_idx = None
+            for hval in ["lat", "latitude", "LAT", "LATITUDE", "Latitude", "Lat"]:
+                if hval in headers:
+                    lat_idx = headers.index(hval)
+            lon_idx = None
+            for hval in ["lon", "longitude", "LON", "LONGITUDE", "Longitude", "Lon"]:
+                if hval in headers:
+                    lon_idx = headers.index(hval)
+            cbr_idx = None
+            for hval in ["birth", "Birth", "birth_rate", "birthrate", "BirthRate",
+                         "Birth_Rate", "BIRTH", "birth rate", "Birth Rate"]:
+                if hval in headers:
+                    cbr_idx = headers.index(hval)
+            # Assume either under5 pop or total pop
+            if ('under5_pop' in headers):
+                pop_mult = 6.0
+                pop_idx = headers.index('under5_pop')
             else:
-                pop = int(row['pop'])
+                pop_mult = 1.0
+                pop_idx = headers.index('pop')
-            latitude_headers = ["lat", "latitude", "LAT", "LATITUDE", "Latitude", "Lat"]
-            lat = get_value(row, latitude_headers)
+            # Iterate over rows
+            for csv_row in csv_obj:
+                pop_val = int(float(csv_row[pop_idx]) * pop_mult)
+                if (pop_val < 25000 and pop_mult == 6.0):
+                    continue
-            longitude_headers = ["lon", "longitude", "LON", "LONGITUDE", "Longitude", "Lon"]
-            lon = get_value(row, longitude_headers)
+                if (loc_idx is not None):
+                    loc_val = csv_row[loc_idx]
+                else:
+                    loc_val = None
-            birth_rate_headers = ["birth", "Birth", "birth_rate", "birthrate", "BirthRate", "Birth_Rate", "BIRTH",
-                                  "birth rate", "Birth Rate"]
-            birth_rate = get_value(row, birth_rate_headers)
-            if birth_rate is not None and birth_rate < 0.0:
-                raise ValueError("Birth rate defined in " + input_file + " must be greater 0.")
+                if (lat_idx is not None):
+                    lat_val = float(csv_row[lat_idx])
+                else:
+                    lat_val = None
-            node_id = row.get('node_id')
-            if node_id is not None and int(node_id) == 0:
-                raise ValueError("Node ids can not be '0'.")
+                if (lon_idx is not None):
+                    lon_val = float(csv_row[lon_idx])
+                else:
+                    lon_val = None
-            forced_id = int(cls._node_id_from_lat_lon_res(lat=lat, lon=lon, res=res)) if node_id is None else int(node_id)
+                if (cbr_idx is not None):
+                    cbr_val = float(csv_row[cbr_idx])
+                else:
+                    cbr_val = None
+                if cbr_val is not None and cbr_val < 0.0:
+                    raise ValueError("Birth rate defined in " + input_file + " must be greater 0.")
+                if (nid_idx is not None):
+                    nid_val = int(csv_row[nid_idx])
+                else:
+                    nid_val = None
+                if nid_val is not None and nid_val == 0:
+                    raise ValueError("Node ids can not be '0'.")
+                forced_id = int(cls._node_id_from_lat_lon_res(lat=lat_val, lon=lon_val, res=res)) if nid_val is None else nid_val
+                node_attributes = NodeAttributes(name=loc_val, birth_rate=cbr_val)
+                node = Node(lat_val, lon_val, pop_val, node_attributes=node_attributes, forced_id=forced_id, meta=dict())
+                out_nodes.append(node)
+        print(out_nodes)
-            if 'loc' in row:
-                place_name = str(row['loc'])
-            else:
-                place_name = None
-            meta = {}
-            """
-            meta = {'dot_name': (row['ADM0_NAME']+':'+row['ADM1_NAME']+':'+row['ADM2_NAME']),
-                    'GUID': row['GUID'],
-                    'density': row['under5_pop_weighted_density']}
-            """
-            node_attributes = NodeAttributes(name=place_name, birth_rate=birth_rate)
-            node = Node(lat, lon, pop,
-                        node_attributes=node_attributes,
-                        forced_id=forced_id, meta=meta)
-            out_nodes.append(node)
         return cls(nodes=out_nodes, idref=id_ref)
     # This will be the long-term API for this function.

{emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/demographics_base.py RENAMED Viewed

@@ -2,12 +2,7 @@ import warnings
 from collections import Counter
 from functools import partial
 from collections.abc import Iterable
-from typing import Union, Optional, Callable
-import numpy as np
-import pandas as pd
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import StandardScaler
+from typing import Union, Callable
 from emod_api.demographics.age_distribution import AgeDistribution
 from emod_api.demographics.base_input_file import BaseInputFile
@@ -265,189 +260,6 @@ class DemographicsBase(BaseInputFile):
         self.implicits.append(partial(_set_demographic_filenames, filenames=filenames))
-    def infer_natural_mortality(self,
-                                file_male,
-                                file_female,
-                                interval_fit: Optional[list[Union[int, float]]] = None,
-                                which_point='mid',
-                                predict_horizon=2050,
-                                csv_out=False,
-                                n=0,  # I don't know what this means
-                                results_scale_factor=1.0 / 365.0) -> [dict, dict]:
-        """
-        Calculate and set the expected natural mortality by age, sex, and year from data, predicting what it would
-        have been without disease (HIV-only).
-        """
-        from collections import OrderedDict
-        from sklearn.linear_model import LinearRegression
-        from functools import reduce
-        from emod_api.demographics.implicit_functions import _set_mortality_age_gender_year
-        warnings.warn('infer_natural_mortality() is deprecated. Please use modern country model loading.',
-                      DeprecationWarning, stacklevel=2)
-        if interval_fit is None:
-            interval_fit = [1970, 1980]
-        name_conversion_dict = {'Age (x)': 'Age',
-                                'Central death rate m(x,n)': 'Mortality_mid',
-                                'Age interval (n)': 'Interval',
-                                'Period': 'Years'
-                                }
-        sex_dict = {'Male': 0, 'Female': 1}
-        def construct_interval(x, y):
-            return x, x + y
-        def midpoint(x, y):
-            return (x + y) / 2.0
-        def generate_dict_order(tuple_list, which_entry=1):
-            my_unordered_list = tuple_list.apply(lambda x: x[which_entry])
-            dict_to_order = OrderedDict(zip(tuple_list, my_unordered_list))
-            return dict_to_order
-        def map_year(x_tuple, flag='mid'):
-            valid_entries_loc = ['mid', 'end', 'start']
-            if flag not in valid_entries_loc:
-                raise ValueError('invalid endpoint specified')
-            if flag == 'mid':
-                return (x_tuple[0] + x_tuple[1]) / 2.0
-            elif flag == 'start':
-                return x_tuple[0]
-            else:
-                return x_tuple[1]
-        df_mort_male = pd.read_csv(file_male, usecols=name_conversion_dict)
-        df_mort_male['Sex'] = 'Male'
-        df_mort_female = pd.read_csv(file_female, usecols=name_conversion_dict)
-        df_mort_female['Sex'] = 'Female'
-        df_mort = pd.concat([df_mort_male, df_mort_female], axis=0)
-        df_mort.rename(columns=name_conversion_dict, inplace=True)
-        df_mort['Years'] = df_mort['Years'].apply(lambda x: tuple(
-            [float(zz) for zz in x.split('-')]))  # this might be a bit too format specific (ie dashes in input)
-        # log transform the data and drop unneeded columns
-        df_mort['log_Mortality_mid'] = df_mort['Mortality_mid'].apply(lambda x: np.log(x))
-        df_mort['Age'] = df_mort[['Age', 'Interval']].apply(lambda zz: construct_interval(*zz), axis=1)
-        year_order_dict = generate_dict_order(df_mort['Years'])
-        age_order_dict = generate_dict_order(df_mort['Age'])
-        df_mort['sortby2'] = df_mort['Age'].map(age_order_dict)
-        df_mort['sortby1'] = df_mort['Sex'].map(sex_dict)
-        df_mort['sortby3'] = df_mort['Years'].map(year_order_dict)
-        df_mort.sort_values(['sortby1', 'sortby2', 'sortby3'], inplace=True)
-        df_mort.drop(columns=['Mortality_mid', 'Interval', 'sortby1', 'sortby2', 'sortby3'], inplace=True)
-        # convert to years (and to string for age_list due to really annoying practical slicing reasons
-        df_mort['Years'] = df_mort['Years'].apply(lambda x: map_year(x, which_point))
-        df_mort['Age'] = df_mort['Age'].apply(lambda x: str(x))
-        df_before_time = df_mort[df_mort['Years'].between(0, interval_fit[0])].copy()
-        df_mort.set_index(['Sex', 'Age'], inplace=True)
-        sex_list = list(set(df_mort.index.get_level_values('Sex')))
-        age_list = list(set(df_mort.index.get_level_values('Age')))
-        df_list = []
-        for sex in sex_list:
-            for age in age_list:
-                tmp_data = df_mort.loc[(sex, age, slice(None)), :]
-                extrap_model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())
-                first_extrap_df = tmp_data[tmp_data['Years'].between(interval_fit[0], interval_fit[1])]
-                xx = tmp_data[tmp_data['Years'].between(interval_fit[0], predict_horizon)].values[:, 0]
-                values = first_extrap_df.values
-                extrap_model.fit(values[:, 0].reshape(-1, 1), values[:, 1])
-                extrap_predictions = extrap_model.predict(xx.reshape(-1, 1))
-                loc_df = pd.DataFrame.from_dict({'Sex': sex, 'Age': age, 'Years': xx, 'Extrap': extrap_predictions})
-                loc_df.set_index(['Sex', 'Age', 'Years'], inplace=True)
-                df_list.append(loc_df.copy())
-        df_e1 = pd.concat(df_list, axis=0)
-        df_list_final = [df_mort, df_e1]
-        df_total = reduce(lambda left, right: pd.merge(left, right, on=['Sex', 'Age', 'Years']), df_list_final)
-        df_total = df_total.reset_index(inplace=False).set_index(['Sex', 'Age'], inplace=False)
-        df_total['Extrap'] = df_total['Extrap'].apply(np.exp)
-        df_total['Data'] = df_total['log_Mortality_mid'].apply(np.exp)
-        df_before_time['Data'] = df_before_time['log_Mortality_mid'].apply(np.exp)
-        df_before_time.set_index(['Sex', 'Age'], inplace=True)
-        df_total = pd.concat([df_total, df_before_time], axis=0, join='outer', sort=True)
-        df_total.reset_index(inplace=True)
-        df_total['sortby2'] = df_total['Age'].map(age_order_dict)
-        df_total['sortby1'] = df_total['Sex'].map(sex_dict)
-        df_total.sort_values(by=['sortby1', 'sortby2', 'Years'], inplace=True)
-        df_total.drop(columns=['sortby1', 'sortby2'], inplace=True)
-        estimates_list = []
-        estimates_list.append(df_total.copy())
-        # estimates_list = [df_total.copy()] alternative
-        def min_not_nan(x_list):
-            loc_in = list(filter(lambda x: not np.isnan(x), x_list))
-            return np.min(loc_in)
-        # This was in another function before
-        df = estimates_list[n]
-        df['FE'] = df[['Data', 'Extrap']].apply(min_not_nan, axis=1)
-        df['Age'] = df['Age'].apply(lambda x: int(x.split(',')[1].split(')')[0]))
-        male_df = df[df['Sex'] == 'Male']
-        female_df = df[df['Sex'] == 'Female']
-        male_df.set_index(['Sex', 'Age', 'Years'], inplace=True)
-        female_df.set_index(['Sex', 'Age', 'Years'], inplace=True)
-        male_data = male_df['FE']
-        female_data = female_df['FE']
-        male_data = male_data.unstack(-1)
-        male_data.sort_index(level='Age', inplace=True)
-        female_data = female_data.unstack(-1)
-        female_data.sort_index(level='Age', inplace=True)
-        years_out_male = list(male_data.columns)
-        years_out_female = list(female_data.columns)
-        age_out_male = list(male_data.index.get_level_values('Age'))
-        age_out_female = list(male_data.index.get_level_values('Age'))
-        male_output = male_data.values
-        female_output = female_data.values
-        if csv_out:
-            male_data.to_csv(f'Male{csv_out}')
-            female_data.to_csv(f'Female{csv_out}')
-        # TBD: This is the part that should use base file functionality
-        dict_female = {'AxisNames': ['age', 'year'],
-                       'AxisScaleFactors': [365.0, 1],
-                       'AxisUnits': ['years', 'years'],
-                       'PopulationGroups': [age_out_female, years_out_female],
-                       'ResultScaleFactor': results_scale_factor,
-                       'ResultUnits': 'annual deaths per capita',
-                       'ResultValues': female_output.tolist()
-                       }
-        dict_male = {'AxisNames': ['age', 'year'],
-                     'AxisScaleFactors': [365.0, 1],
-                     'AxisUnits': ['years', 'years'],
-                     'PopulationGroups': [age_out_male, years_out_male],
-                     'ResultScaleFactor': results_scale_factor,
-                     'ResultUnits': 'annual deaths per capita',
-                     'ResultValues': male_output.tolist()
-                     }
-        self.implicits.append(_set_mortality_age_gender_year)
-        return dict_female, dict_male
     def to_dict(self) -> dict:
         self.verify_demographics_integrity()
         demographics_dict = {

emod_api-3.1.1/emod_api/demographics/service/grid_construction.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""
+- construct a grid from a bounding box
+- label a collection of points by grid cells
+- input:     - points csv file with required columns lat,lon # see example input files (structures_households.csv)
+- output:    - csv file of grid locations
+             - csv with grid cell id added for each point record
+"""
+import numpy as np
+import pyproj
+# square grid cell/pixel side (in m)
+cell_size = 1000.0
+# projection param
+geod = pyproj.Geod(ellps='WGS84')
+def get_grid_cell_id(idx, idy):
+    return str(idx) + "_" + str(idy)
+def point_2_grid_cell_id_lookup(point, grid_id_2_cell_id, origin):
+    (_, _, dx) = geod.inv(origin[0], origin[1], point[0], origin[1])
+    (_, _, dy) = geod.inv(origin[0], origin[1], origin[0], point[1])
+    idx = int(dx / cell_size) + 1
+    idy = int(dy / cell_size) + 1
+    grid_id = get_grid_cell_id(idx, idy)
+    if grid_id in grid_id_2_cell_id:
+        cid = int(grid_id_2_cell_id[grid_id])
+    else:
+        cid = None
+    return (cid, idx, idy)
+def construct(x_min, y_min, x_max, y_max):
+    '''
+    Creating grid
+    '''
+    print("Creating grid...")
+    # get the centroid of the cell left-down from the grid min corner; that is the origin of the grid
+    origin = geod.fwd(x_min, y_min, -135, cell_size / np.sqrt(2))
+    # get the centroid of the cell right-up from the grid max corner; that is the final point of the grid
+    final = geod.fwd(x_max, y_max, 45, cell_size / np.sqrt(2))
+    (fwdax, _, dx) = geod.inv(origin[0], origin[1], final[0], origin[1])
+    (fwday, _, dy) = geod.inv(origin[0], origin[1], origin[0], final[1])
+    # construct grid
+    x = origin[0]
+    y = origin[1]
+    current_point = (x, y)
+    grid_id_2_cell_id = dict()
+    idx = 0
+    cell_id = 0
+    grid_lons = list()
+    grid_lats = list()
+    gcids = list()
+    while x < final[0]:
+        y = origin[1]
+        idy = 0
+        while y < final[1]:
+            y = geod.fwd(current_point[0], y, fwday, cell_size)[1]
+            current_point = (x, y)
+            grid_lats.append(current_point[1])
+            grid_lons.append(current_point[0])
+            grid_id = get_grid_cell_id(idx, idy)
+            grid_id_2_cell_id[grid_id] = cell_id
+            cell_id += 1
+            gcids.append(cell_id)
+            idy += 1
+        x = geod.fwd(current_point[0], current_point[1], fwdax, cell_size)[0]
+        current_point = (x, current_point[1])
+        idx += 1
+    grid_dict = {"lat": grid_lats, "lon": grid_lons, "gcid": gcids}
+    print("Created grid of size")
+    print(str(len(set(grid_lons))) + "x" + str(len(set(grid_lats))))
+    return grid_dict, grid_id_2_cell_id, origin, final

emod-api 3.0.2__tar.gz → 3.1.1__tar.gz

emod-api 3.0.2tar.gz → 3.1.1tar.gz