emod-api 3.0.2__tar.gz → 3.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {emod_api-3.0.2/emod_api.egg-info → emod_api-3.1.1}/PKG-INFO +1 -4
  2. emod_api-3.1.1/emod_api/__init__.py +1 -0
  3. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/channelreports/channels.py +13 -14
  4. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/calculators.py +0 -62
  5. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/demographics.py +80 -44
  6. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/demographics_base.py +1 -189
  7. emod_api-3.1.1/emod_api/demographics/service/grid_construction.py +99 -0
  8. emod_api-3.1.1/emod_api/demographics/service/service.py +117 -0
  9. {emod_api-3.0.2 → emod_api-3.1.1/emod_api.egg-info}/PKG-INFO +1 -4
  10. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api.egg-info/SOURCES.txt +0 -1
  11. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api.egg-info/requires.txt +0 -3
  12. {emod_api-3.0.2 → emod_api-3.1.1}/pyproject.toml +2 -5
  13. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_channel_reports.py +7 -58
  14. emod_api-3.1.1/tests/test_demog_from_pop.py +101 -0
  15. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_demographics.py +121 -108
  16. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_migration.py +17 -7
  17. emod_api-3.0.2/emod_api/__init__.py +0 -1
  18. emod_api-3.0.2/emod_api/channelreports/icj_to_csv.py +0 -65
  19. emod_api-3.0.2/emod_api/demographics/service/grid_construction.py +0 -143
  20. emod_api-3.0.2/emod_api/demographics/service/service.py +0 -55
  21. emod_api-3.0.2/tests/test_demog_from_pop.py +0 -74
  22. {emod_api-3.0.2 → emod_api-3.1.1}/LICENSE +0 -0
  23. {emod_api-3.0.2 → emod_api-3.1.1}/MANIFEST.in +0 -0
  24. {emod_api-3.0.2 → emod_api-3.1.1}/README.md +0 -0
  25. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/campaign.py +0 -0
  26. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/channelreports/__init__.py +0 -0
  27. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/channelreports/plot_icj_means.py +0 -0
  28. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/channelreports/plot_prop_report.py +0 -0
  29. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/channelreports/utils.py +0 -0
  30. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/config/__init__.py +0 -0
  31. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/config/default_from_schema.py +0 -0
  32. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/config/default_from_schema_no_validation.py +0 -0
  33. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/config/from_overrides.py +0 -0
  34. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/__init__.py +0 -0
  35. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/age_distribution.py +0 -0
  36. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/base_input_file.py +0 -0
  37. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/demographic_exceptions.py +0 -0
  38. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/demographics_overlay.py +0 -0
  39. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/fertility_distribution.py +0 -0
  40. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/implicit_functions.py +0 -0
  41. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/mortality_distribution.py +0 -0
  42. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/node.py +0 -0
  43. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/overlay_node.py +0 -0
  44. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/properties_and_attributes.py +0 -0
  45. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/service/__init__.py +0 -0
  46. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/susceptibility_distribution.py +0 -0
  47. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/demographics/updateable.py +0 -0
  48. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/legacy/__init__.py +0 -0
  49. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/legacy/plotAllCharts.py +0 -0
  50. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/migration/__init__.py +0 -0
  51. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/migration/__main__.py +0 -0
  52. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/migration/migration.py +0 -0
  53. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/multidim_plotter.py +0 -0
  54. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/schema_to_class.py +0 -0
  55. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/serialization/__init__.py +0 -0
  56. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/serialization/census_and_mod_pop.py +0 -0
  57. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/serialization/dtk_file_support.py +0 -0
  58. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/serialization/dtk_file_tools.py +0 -0
  59. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/serialization/dtk_file_utility.py +0 -0
  60. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/serialization/serialized_population.py +0 -0
  61. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/spatialreports/__init__.py +0 -0
  62. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/spatialreports/__main__.py +0 -0
  63. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/spatialreports/plot_spat_means.py +0 -0
  64. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/spatialreports/spatial.py +0 -0
  65. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/__init__.py +0 -0
  66. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/__init__.py +0 -0
  67. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/base_distribution.py +0 -0
  68. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/bimodal_distribution.py +0 -0
  69. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/constant_distribution.py +0 -0
  70. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/demographic_distribution_flag.py +0 -0
  71. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/distribution_type.py +0 -0
  72. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/dual_constant_distribution.py +0 -0
  73. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/dual_exponential_distribution.py +0 -0
  74. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/exponential_distribution.py +0 -0
  75. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/gaussian_distribution.py +0 -0
  76. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/log_normal_distribution.py +0 -0
  77. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/poisson_distribution.py +0 -0
  78. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/uniform_distribution.py +0 -0
  79. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/distributions/weibull_distribution.py +0 -0
  80. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/utils/str_enum.py +0 -0
  81. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/weather/__init__.py +0 -0
  82. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api/weather/weather.py +0 -0
  83. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api.egg-info/dependency_links.txt +0 -0
  84. {emod_api-3.0.2 → emod_api-3.1.1}/emod_api.egg-info/top_level.txt +0 -0
  85. {emod_api-3.0.2 → emod_api-3.1.1}/setup.cfg +0 -0
  86. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_campaign_module.py +0 -0
  87. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_config.py +0 -0
  88. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_config_demog.py +0 -0
  89. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_demographics_calculators.py +0 -0
  90. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_distributions.py +0 -0
  91. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_node.py +0 -0
  92. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_property_reports.py +0 -0
  93. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_schema.py +0 -0
  94. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_serialization.py +0 -0
  95. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_spatial_reports.py +0 -0
  96. {emod_api-3.0.2 → emod_api-3.1.1}/tests/test_weather_files.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: emod-api
3
- Version: 3.0.2
3
+ Version: 3.1.1
4
4
  Summary: Core tools for modeling using EMOD
5
5
  Author-email: Sharon Chen <sharon.chen@gatesfoundation.org>, Zhaowei Du <zhaowei.du@gatesfoundation.org>, Clark Kirkman IV <clark.kirkmand@gatesfoundation.org>, Daniel Bridenbecker <daniel.bridenbecker@gatesfoundation.org>, Svetlana Titova <svetlana.titova@gatesfoundation.org>, Ye Chen <ye.chen@gatesfoundation.org>
6
6
  License-Expression: MIT
@@ -20,12 +20,9 @@ Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: matplotlib
22
22
  Requires-Dist: scipy
23
- Requires-Dist: pandas
24
23
  Requires-Dist: numpy
25
- Requires-Dist: shapely
26
24
  Requires-Dist: pyproj
27
25
  Requires-Dist: geographiclib
28
- Requires-Dist: scikit-learn
29
26
  Requires-Dist: lz4
30
27
  Provides-Extra: docs
31
28
  Requires-Dist: mkdocs-material; extra == "docs"
@@ -0,0 +1 @@
1
+ __version__ = "3.1.1"
@@ -4,9 +4,9 @@
4
4
 
5
5
  from datetime import datetime
6
6
  import json
7
+ import csv
7
8
  from pathlib import Path
8
9
  from typing import Union
9
- import pandas as pd
10
10
 
11
11
  _CHANNELS = "Channels"
12
12
  _DTK_VERSION = "DTK_Version"
@@ -328,13 +328,6 @@ class ChannelReport(object):
328
328
  """Return Channel object by channel name/title"""
329
329
  return self._channels[item]
330
330
 
331
- def as_dataframe(self) -> pd.DataFrame:
332
- """Return underlying data as a Pandas DataFrame"""
333
- dataframe = pd.DataFrame(
334
- {key: self.channels[key].data for key in self.channel_names}
335
- )
336
- return dataframe
337
-
338
331
  def write_file(self, filename: str, indent: int = 0, separators=(",", ":")) -> None:
339
332
  """Write inset chart to specified text file."""
340
333
 
@@ -423,11 +416,17 @@ class ChannelReport(object):
423
416
  if channel_names is None:
424
417
  channel_names = self.channel_names
425
418
 
426
- if not transpose: # default
427
- data_frame = pd.DataFrame([[channel_name] + list(self[channel_name]) for channel_name in channel_names])
428
- # data_frame = pd.DataFrame(([channel_name] + list(self[channel_name]) for channel_name in channel_names))
429
- data_frame.to_csv(filename, header=False, index=False)
430
- else: # transposed
431
- self.as_dataframe().to_csv(filename, header=True, index=True, index_label="timestep")
419
+ if not transpose: # default
420
+ with open(filename, "w") as g_f:
421
+ csv_obj = csv.writer(g_f, dialect='unix', quoting=csv.QUOTE_MINIMAL)
422
+ for cname in channel_names:
423
+ csv_obj.writerow([cname] + list(self[cname]))
424
+
425
+ else: # transposed
426
+ with open(filename, "w") as g_f:
427
+ csv_obj = csv.writer(g_f, dialect='unix', quoting=csv.QUOTE_MINIMAL)
428
+ csv_obj.writerow(channel_names)
429
+ for row_idx in range(self.num_time_steps):
430
+ csv_obj.writerow([self[cname][row_idx] for cname in channel_names])
432
431
 
433
432
  return
@@ -1,14 +1,10 @@
1
1
  import math
2
2
  import numpy as np
3
- import pandas as pd
4
- import os
5
3
 
6
4
  from scipy import sparse as sp
7
5
  from scipy.sparse import linalg as la
8
- from typing import Union
9
6
 
10
7
  from emod_api.demographics.age_distribution import AgeDistribution
11
- from emod_api.demographics.mortality_distribution import MortalityDistribution
12
8
 
13
9
 
14
10
  def generate_equilibrium_age_distribution(birth_rate: float = 40.0, mortality_rate: float = 20.0) -> AgeDistribution:
@@ -99,61 +95,3 @@ def _computeAgeDist(bval, mvecX, mvecY, fVec, max_yr=90):
99
95
  avecX = np.insert(avecX, 0, np.zeros(1))
100
96
 
101
97
  return gR.tolist()[0], avecX[:-1].tolist(), avecY.tolist()
102
-
103
-
104
- def generate_mortality_over_time_from_data(data_csv: Union[str, os.PathLike],
105
- base_year: int) -> MortalityDistribution:
106
- """
107
- Generate a MortalityDistribution object from a data csv file.
108
-
109
- Args:
110
- data_csv: Path to csv file with the mortality rates by calendar year and age bucket.
111
- base_year: The calendar year the sim is treating as the base.
112
-
113
- Returns:
114
- a MortalityDistribution object.
115
- """
116
- if base_year < 0:
117
- raise ValueError(f"User passed negative value of base_year: {base_year}.")
118
- if base_year > 2050:
119
- raise ValueError(f"User passed too large value of base_year: {base_year}.")
120
-
121
- # Load csv. Convert rate arrays into DTK-compatiable JSON structures.
122
- rates = [] # array of arrays, but leave that for a minute
123
- df = pd.read_csv(data_csv)
124
- header = df.columns
125
- year_start = int(header[1]) # someone's going to come along with 1990.5, etc. Sigh.
126
- year_end = int(header[-1])
127
- if year_end <= year_start:
128
- raise ValueError(f"Failed check that {year_end} is greater than {year_start} in csv dataset.")
129
- num_years = year_end - year_start + 1
130
- rel_years = list()
131
- for year in range(year_start, year_start + num_years):
132
- mort_data = list(df[str(year)])
133
- rel_years.append(year - base_year)
134
-
135
- age_key = None
136
- for trykey in df.keys():
137
- if trykey.lower().startswith("age"):
138
- age_key = trykey
139
- raw_age_bins = list(df[age_key])
140
-
141
- if age_key is None:
142
- raise ValueError("Failed to find 'Age_Bin' (or similar) column in the csv dataset. Cannot process.")
143
-
144
- age_bins = list()
145
- try:
146
- for age_bin in raw_age_bins:
147
- left_age = float(age_bin.split("-")[0])
148
- age_bins.append(left_age)
149
-
150
- except Exception as ex:
151
- raise ValueError(f"Ran into error processing the values in the Age-Bin column. {ex}")
152
-
153
- for idx in range(len(age_bins)): # 18 of these
154
- # mort_data is the array of mortality rates (by year bin) for age_bin
155
- mort_data = list(df.transpose()[idx][1:])
156
- rates.append(mort_data) # 28 of these, 1 for each year, eg
157
-
158
- distribution = MortalityDistribution(ages_years=age_bins, mortality_rate_matrix=rates, calendar_years=rel_years)
159
- return distribution
@@ -1,6 +1,6 @@
1
1
  import json
2
+ import csv
2
3
  import numpy as np
3
- import pandas as pd
4
4
 
5
5
  from pathlib import Path
6
6
  from typing import Union
@@ -145,57 +145,93 @@ class Demographics(DemographicsBase):
145
145
  Returns:
146
146
  A Demographics object
147
147
  """
148
- def get_value(row, headers):
149
- for h in headers:
150
- if row.get(h) is not None:
151
- return float(row.get(h))
152
- return None
153
-
154
148
  print(f"{input_file} found and being read for demographics.json file creation.")
155
- node_info = pd.read_csv(input_file, encoding='iso-8859-1')
156
- out_nodes = []
157
- for index, row in node_info.iterrows():
158
- if 'under5_pop' in row:
159
- pop = int(6 * row['under5_pop'])
160
- if pop < 25000:
161
- continue
149
+
150
+ out_nodes = list()
151
+ with open(input_file, errors='ignore') as csv_file:
152
+ csv_obj = csv.reader(csv_file, dialect='unix')
153
+ headers = next(csv_obj, None)
154
+
155
+ # Find header column indicies
156
+ loc_idx = None
157
+ for hval in ['loc']:
158
+ if hval in headers:
159
+ loc_idx = headers.index(hval)
160
+
161
+ nid_idx = None
162
+ for hval in ['node_id']:
163
+ if hval in headers:
164
+ nid_idx = headers.index(hval)
165
+
166
+ lat_idx = None
167
+ for hval in ["lat", "latitude", "LAT", "LATITUDE", "Latitude", "Lat"]:
168
+ if hval in headers:
169
+ lat_idx = headers.index(hval)
170
+
171
+ lon_idx = None
172
+ for hval in ["lon", "longitude", "LON", "LONGITUDE", "Longitude", "Lon"]:
173
+ if hval in headers:
174
+ lon_idx = headers.index(hval)
175
+
176
+ cbr_idx = None
177
+ for hval in ["birth", "Birth", "birth_rate", "birthrate", "BirthRate",
178
+ "Birth_Rate", "BIRTH", "birth rate", "Birth Rate"]:
179
+ if hval in headers:
180
+ cbr_idx = headers.index(hval)
181
+
182
+ # Assume either under5 pop or total pop
183
+ if ('under5_pop' in headers):
184
+ pop_mult = 6.0
185
+ pop_idx = headers.index('under5_pop')
162
186
  else:
163
- pop = int(row['pop'])
187
+ pop_mult = 1.0
188
+ pop_idx = headers.index('pop')
164
189
 
165
- latitude_headers = ["lat", "latitude", "LAT", "LATITUDE", "Latitude", "Lat"]
166
- lat = get_value(row, latitude_headers)
190
+ # Iterate over rows
191
+ for csv_row in csv_obj:
192
+ pop_val = int(float(csv_row[pop_idx]) * pop_mult)
193
+ if (pop_val < 25000 and pop_mult == 6.0):
194
+ continue
167
195
 
168
- longitude_headers = ["lon", "longitude", "LON", "LONGITUDE", "Longitude", "Lon"]
169
- lon = get_value(row, longitude_headers)
196
+ if (loc_idx is not None):
197
+ loc_val = csv_row[loc_idx]
198
+ else:
199
+ loc_val = None
170
200
 
171
- birth_rate_headers = ["birth", "Birth", "birth_rate", "birthrate", "BirthRate", "Birth_Rate", "BIRTH",
172
- "birth rate", "Birth Rate"]
173
- birth_rate = get_value(row, birth_rate_headers)
174
- if birth_rate is not None and birth_rate < 0.0:
175
- raise ValueError("Birth rate defined in " + input_file + " must be greater 0.")
201
+ if (lat_idx is not None):
202
+ lat_val = float(csv_row[lat_idx])
203
+ else:
204
+ lat_val = None
176
205
 
177
- node_id = row.get('node_id')
178
- if node_id is not None and int(node_id) == 0:
179
- raise ValueError("Node ids can not be '0'.")
206
+ if (lon_idx is not None):
207
+ lon_val = float(csv_row[lon_idx])
208
+ else:
209
+ lon_val = None
180
210
 
181
- forced_id = int(cls._node_id_from_lat_lon_res(lat=lat, lon=lon, res=res)) if node_id is None else int(node_id)
211
+ if (cbr_idx is not None):
212
+ cbr_val = float(csv_row[cbr_idx])
213
+ else:
214
+ cbr_val = None
215
+
216
+ if cbr_val is not None and cbr_val < 0.0:
217
+ raise ValueError("Birth rate defined in " + input_file + " must be greater 0.")
218
+
219
+ if (nid_idx is not None):
220
+ nid_val = int(csv_row[nid_idx])
221
+ else:
222
+ nid_val = None
223
+
224
+ if nid_val is not None and nid_val == 0:
225
+ raise ValueError("Node ids can not be '0'.")
226
+
227
+ forced_id = int(cls._node_id_from_lat_lon_res(lat=lat_val, lon=lon_val, res=res)) if nid_val is None else nid_val
228
+
229
+ node_attributes = NodeAttributes(name=loc_val, birth_rate=cbr_val)
230
+ node = Node(lat_val, lon_val, pop_val, node_attributes=node_attributes, forced_id=forced_id, meta=dict())
231
+ out_nodes.append(node)
232
+
233
+ print(out_nodes)
182
234
 
183
- if 'loc' in row:
184
- place_name = str(row['loc'])
185
- else:
186
- place_name = None
187
- meta = {}
188
- """
189
- meta = {'dot_name': (row['ADM0_NAME']+':'+row['ADM1_NAME']+':'+row['ADM2_NAME']),
190
- 'GUID': row['GUID'],
191
- 'density': row['under5_pop_weighted_density']}
192
- """
193
- node_attributes = NodeAttributes(name=place_name, birth_rate=birth_rate)
194
- node = Node(lat, lon, pop,
195
- node_attributes=node_attributes,
196
- forced_id=forced_id, meta=meta)
197
-
198
- out_nodes.append(node)
199
235
  return cls(nodes=out_nodes, idref=id_ref)
200
236
 
201
237
  # This will be the long-term API for this function.
@@ -2,12 +2,7 @@ import warnings
2
2
  from collections import Counter
3
3
  from functools import partial
4
4
  from collections.abc import Iterable
5
- from typing import Union, Optional, Callable
6
-
7
- import numpy as np
8
- import pandas as pd
9
- from sklearn.pipeline import make_pipeline
10
- from sklearn.preprocessing import StandardScaler
5
+ from typing import Union, Callable
11
6
 
12
7
  from emod_api.demographics.age_distribution import AgeDistribution
13
8
  from emod_api.demographics.base_input_file import BaseInputFile
@@ -265,189 +260,6 @@ class DemographicsBase(BaseInputFile):
265
260
 
266
261
  self.implicits.append(partial(_set_demographic_filenames, filenames=filenames))
267
262
 
268
- def infer_natural_mortality(self,
269
- file_male,
270
- file_female,
271
- interval_fit: Optional[list[Union[int, float]]] = None,
272
- which_point='mid',
273
- predict_horizon=2050,
274
- csv_out=False,
275
- n=0, # I don't know what this means
276
- results_scale_factor=1.0 / 365.0) -> [dict, dict]:
277
- """
278
- Calculate and set the expected natural mortality by age, sex, and year from data, predicting what it would
279
- have been without disease (HIV-only).
280
- """
281
- from collections import OrderedDict
282
- from sklearn.linear_model import LinearRegression
283
- from functools import reduce
284
- from emod_api.demographics.implicit_functions import _set_mortality_age_gender_year
285
- warnings.warn('infer_natural_mortality() is deprecated. Please use modern country model loading.',
286
- DeprecationWarning, stacklevel=2)
287
-
288
- if interval_fit is None:
289
- interval_fit = [1970, 1980]
290
-
291
- name_conversion_dict = {'Age (x)': 'Age',
292
- 'Central death rate m(x,n)': 'Mortality_mid',
293
- 'Age interval (n)': 'Interval',
294
- 'Period': 'Years'
295
- }
296
- sex_dict = {'Male': 0, 'Female': 1}
297
-
298
- def construct_interval(x, y):
299
- return x, x + y
300
-
301
- def midpoint(x, y):
302
- return (x + y) / 2.0
303
-
304
- def generate_dict_order(tuple_list, which_entry=1):
305
- my_unordered_list = tuple_list.apply(lambda x: x[which_entry])
306
- dict_to_order = OrderedDict(zip(tuple_list, my_unordered_list))
307
- return dict_to_order
308
-
309
- def map_year(x_tuple, flag='mid'):
310
- valid_entries_loc = ['mid', 'end', 'start']
311
-
312
- if flag not in valid_entries_loc:
313
- raise ValueError('invalid endpoint specified')
314
-
315
- if flag == 'mid':
316
- return (x_tuple[0] + x_tuple[1]) / 2.0
317
- elif flag == 'start':
318
- return x_tuple[0]
319
- else:
320
- return x_tuple[1]
321
-
322
- df_mort_male = pd.read_csv(file_male, usecols=name_conversion_dict)
323
- df_mort_male['Sex'] = 'Male'
324
- df_mort_female = pd.read_csv(file_female, usecols=name_conversion_dict)
325
- df_mort_female['Sex'] = 'Female'
326
- df_mort = pd.concat([df_mort_male, df_mort_female], axis=0)
327
- df_mort.rename(columns=name_conversion_dict, inplace=True)
328
- df_mort['Years'] = df_mort['Years'].apply(lambda x: tuple(
329
- [float(zz) for zz in x.split('-')])) # this might be a bit too format specific (ie dashes in input)
330
-
331
- # log transform the data and drop unneeded columns
332
- df_mort['log_Mortality_mid'] = df_mort['Mortality_mid'].apply(lambda x: np.log(x))
333
- df_mort['Age'] = df_mort[['Age', 'Interval']].apply(lambda zz: construct_interval(*zz), axis=1)
334
-
335
- year_order_dict = generate_dict_order(df_mort['Years'])
336
- age_order_dict = generate_dict_order(df_mort['Age'])
337
- df_mort['sortby2'] = df_mort['Age'].map(age_order_dict)
338
- df_mort['sortby1'] = df_mort['Sex'].map(sex_dict)
339
- df_mort['sortby3'] = df_mort['Years'].map(year_order_dict)
340
- df_mort.sort_values(['sortby1', 'sortby2', 'sortby3'], inplace=True)
341
- df_mort.drop(columns=['Mortality_mid', 'Interval', 'sortby1', 'sortby2', 'sortby3'], inplace=True)
342
-
343
- # convert to years (and to string for age_list due to really annoying practical slicing reasons
344
- df_mort['Years'] = df_mort['Years'].apply(lambda x: map_year(x, which_point))
345
- df_mort['Age'] = df_mort['Age'].apply(lambda x: str(x))
346
- df_before_time = df_mort[df_mort['Years'].between(0, interval_fit[0])].copy()
347
-
348
- df_mort.set_index(['Sex', 'Age'], inplace=True)
349
- sex_list = list(set(df_mort.index.get_level_values('Sex')))
350
- age_list = list(set(df_mort.index.get_level_values('Age')))
351
-
352
- df_list = []
353
- for sex in sex_list:
354
- for age in age_list:
355
- tmp_data = df_mort.loc[(sex, age, slice(None)), :]
356
- extrap_model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())
357
-
358
- first_extrap_df = tmp_data[tmp_data['Years'].between(interval_fit[0], interval_fit[1])]
359
- xx = tmp_data[tmp_data['Years'].between(interval_fit[0], predict_horizon)].values[:, 0]
360
-
361
- values = first_extrap_df.values
362
- extrap_model.fit(values[:, 0].reshape(-1, 1), values[:, 1])
363
-
364
- extrap_predictions = extrap_model.predict(xx.reshape(-1, 1))
365
-
366
- loc_df = pd.DataFrame.from_dict({'Sex': sex, 'Age': age, 'Years': xx, 'Extrap': extrap_predictions})
367
- loc_df.set_index(['Sex', 'Age', 'Years'], inplace=True)
368
-
369
- df_list.append(loc_df.copy())
370
-
371
- df_e1 = pd.concat(df_list, axis=0)
372
-
373
- df_list_final = [df_mort, df_e1]
374
- df_total = reduce(lambda left, right: pd.merge(left, right, on=['Sex', 'Age', 'Years']), df_list_final)
375
-
376
- df_total = df_total.reset_index(inplace=False).set_index(['Sex', 'Age'], inplace=False)
377
-
378
- df_total['Extrap'] = df_total['Extrap'].apply(np.exp)
379
- df_total['Data'] = df_total['log_Mortality_mid'].apply(np.exp)
380
- df_before_time['Data'] = df_before_time['log_Mortality_mid'].apply(np.exp)
381
-
382
- df_before_time.set_index(['Sex', 'Age'], inplace=True)
383
- df_total = pd.concat([df_total, df_before_time], axis=0, join='outer', sort=True)
384
-
385
- df_total.reset_index(inplace=True)
386
- df_total['sortby2'] = df_total['Age'].map(age_order_dict)
387
- df_total['sortby1'] = df_total['Sex'].map(sex_dict)
388
- df_total.sort_values(by=['sortby1', 'sortby2', 'Years'], inplace=True)
389
- df_total.drop(columns=['sortby1', 'sortby2'], inplace=True)
390
-
391
- estimates_list = []
392
- estimates_list.append(df_total.copy())
393
- # estimates_list = [df_total.copy()] alternative
394
-
395
- def min_not_nan(x_list):
396
- loc_in = list(filter(lambda x: not np.isnan(x), x_list))
397
- return np.min(loc_in)
398
-
399
- # This was in another function before
400
- df = estimates_list[n]
401
- df['FE'] = df[['Data', 'Extrap']].apply(min_not_nan, axis=1)
402
- df['Age'] = df['Age'].apply(lambda x: int(x.split(',')[1].split(')')[0]))
403
- male_df = df[df['Sex'] == 'Male']
404
- female_df = df[df['Sex'] == 'Female']
405
-
406
- male_df.set_index(['Sex', 'Age', 'Years'], inplace=True)
407
- female_df.set_index(['Sex', 'Age', 'Years'], inplace=True)
408
- male_data = male_df['FE']
409
- female_data = female_df['FE']
410
-
411
- male_data = male_data.unstack(-1)
412
- male_data.sort_index(level='Age', inplace=True)
413
- female_data = female_data.unstack(-1)
414
- female_data.sort_index(level='Age', inplace=True)
415
-
416
- years_out_male = list(male_data.columns)
417
- years_out_female = list(female_data.columns)
418
-
419
- age_out_male = list(male_data.index.get_level_values('Age'))
420
- age_out_female = list(male_data.index.get_level_values('Age'))
421
-
422
- male_output = male_data.values
423
- female_output = female_data.values
424
-
425
- if csv_out:
426
- male_data.to_csv(f'Male{csv_out}')
427
- female_data.to_csv(f'Female{csv_out}')
428
-
429
- # TBD: This is the part that should use base file functionality
430
-
431
- dict_female = {'AxisNames': ['age', 'year'],
432
- 'AxisScaleFactors': [365.0, 1],
433
- 'AxisUnits': ['years', 'years'],
434
- 'PopulationGroups': [age_out_female, years_out_female],
435
- 'ResultScaleFactor': results_scale_factor,
436
- 'ResultUnits': 'annual deaths per capita',
437
- 'ResultValues': female_output.tolist()
438
- }
439
-
440
- dict_male = {'AxisNames': ['age', 'year'],
441
- 'AxisScaleFactors': [365.0, 1],
442
- 'AxisUnits': ['years', 'years'],
443
- 'PopulationGroups': [age_out_male, years_out_male],
444
- 'ResultScaleFactor': results_scale_factor,
445
- 'ResultUnits': 'annual deaths per capita',
446
- 'ResultValues': male_output.tolist()
447
- }
448
- self.implicits.append(_set_mortality_age_gender_year)
449
- return dict_female, dict_male
450
-
451
263
  def to_dict(self) -> dict:
452
264
  self.verify_demographics_integrity()
453
265
  demographics_dict = {
@@ -0,0 +1,99 @@
1
+ """
2
+ - construct a grid from a bounding box
3
+ - label a collection of points by grid cells
4
+
5
+ - input: - points csv file with required columns lat,lon # see example input files (structures_households.csv)
6
+
7
+ - output: - csv file of grid locations
8
+ - csv with grid cell id added for each point record
9
+ """
10
+ import numpy as np
11
+ import pyproj
12
+
13
+ # square grid cell/pixel side (in m)
14
+ cell_size = 1000.0
15
+
16
+ # projection param
17
+ geod = pyproj.Geod(ellps='WGS84')
18
+
19
+
20
+ def get_grid_cell_id(idx, idy):
21
+
22
+ return str(idx) + "_" + str(idy)
23
+
24
+
25
+ def point_2_grid_cell_id_lookup(point, grid_id_2_cell_id, origin):
26
+
27
+ (_, _, dx) = geod.inv(origin[0], origin[1], point[0], origin[1])
28
+ (_, _, dy) = geod.inv(origin[0], origin[1], origin[0], point[1])
29
+
30
+ idx = int(dx / cell_size) + 1
31
+ idy = int(dy / cell_size) + 1
32
+
33
+ grid_id = get_grid_cell_id(idx, idy)
34
+
35
+ if grid_id in grid_id_2_cell_id:
36
+ cid = int(grid_id_2_cell_id[grid_id])
37
+ else:
38
+ cid = None
39
+
40
+ return (cid, idx, idy)
41
+
42
+
43
+ def construct(x_min, y_min, x_max, y_max):
44
+ '''
45
+ Creating grid
46
+ '''
47
+
48
+ print("Creating grid...")
49
+
50
+ # get the centroid of the cell left-down from the grid min corner; that is the origin of the grid
51
+ origin = geod.fwd(x_min, y_min, -135, cell_size / np.sqrt(2))
52
+
53
+ # get the centroid of the cell right-up from the grid max corner; that is the final point of the grid
54
+ final = geod.fwd(x_max, y_max, 45, cell_size / np.sqrt(2))
55
+
56
+ (fwdax, _, dx) = geod.inv(origin[0], origin[1], final[0], origin[1])
57
+ (fwday, _, dy) = geod.inv(origin[0], origin[1], origin[0], final[1])
58
+
59
+ # construct grid
60
+ x = origin[0]
61
+ y = origin[1]
62
+
63
+ current_point = (x, y)
64
+ grid_id_2_cell_id = dict()
65
+
66
+ idx = 0
67
+ cell_id = 0
68
+ grid_lons = list()
69
+ grid_lats = list()
70
+ gcids = list()
71
+
72
+ while x < final[0]:
73
+ y = origin[1]
74
+ idy = 0
75
+
76
+ while y < final[1]:
77
+ y = geod.fwd(current_point[0], y, fwday, cell_size)[1]
78
+ current_point = (x, y)
79
+
80
+ grid_lats.append(current_point[1])
81
+ grid_lons.append(current_point[0])
82
+
83
+ grid_id = get_grid_cell_id(idx, idy)
84
+ grid_id_2_cell_id[grid_id] = cell_id
85
+
86
+ cell_id += 1
87
+ gcids.append(cell_id)
88
+ idy += 1
89
+
90
+ x = geod.fwd(current_point[0], current_point[1], fwdax, cell_size)[0]
91
+ current_point = (x, current_point[1])
92
+ idx += 1
93
+
94
+ grid_dict = {"lat": grid_lats, "lon": grid_lons, "gcid": gcids}
95
+
96
+ print("Created grid of size")
97
+ print(str(len(set(grid_lons))) + "x" + str(len(set(grid_lats))))
98
+
99
+ return grid_dict, grid_id_2_cell_id, origin, final