ladim 1.3.4__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ladim/output.py ADDED
@@ -0,0 +1,246 @@
1
+ from .model import Model, Module
2
+ import netCDF4 as nc
3
+ import numpy as np
4
+
5
+
6
+ class Output(Module):
7
+ def __init__(self, model: Model):
8
+ super().__init__(model)
9
+
10
+
11
+ class RaggedOutput(Output):
12
+ def __init__(self, model: Model, variables: dict, file: str, frequency):
13
+ """
14
+ Writes simulation output to netCDF file in ragged array format
15
+
16
+ :param model: Parent model
17
+ :param variables: Simulation variables to include in output, and their formatting
18
+ :param file: Name of output file, or empty if a diskless dataset is desired
19
+ :param frequency: Output frequency in seconds. Alternatively, as a two-element
20
+ tuple (freq_value, freq_unit) where freq_unit can be any numpy-compatible time
21
+ unit.
22
+
23
+ """
24
+ super().__init__(model)
25
+
26
+ # Convert output format specification from ladim.yaml config to OutputFormat
27
+ self._formats = {
28
+ k: OutputFormat.from_ladim_conf(v)
29
+ for k, v in variables.items()
30
+ }
31
+
32
+ self._init_vars = {k for k, v in self._formats.items() if v.is_initial()}
33
+ self._inst_vars = {k for k, v in self._formats.items() if v.is_instance()}
34
+
35
+ if not file:
36
+ from uuid import uuid4
37
+ self._fname = uuid4()
38
+ self._diskless = True
39
+ else:
40
+ self._fname = file
41
+ self._diskless = False
42
+
43
+ try:
44
+ freq_num, freq_unit = frequency
45
+ except TypeError:
46
+ freq_num = frequency
47
+ freq_unit = 's'
48
+ self._write_frequency = np.timedelta64(freq_num, freq_unit).astype('timedelta64[s]').astype('int64')
49
+
50
+ self._dset = None
51
+ self._num_writes = 0
52
+ self._last_write_time = np.int64(-4611686018427387904)
53
+
54
+ @property
55
+ def dataset(self) -> nc.Dataset:
56
+ """Returns a handle to the netCDF dataset currently being written to"""
57
+ return self._dset
58
+
59
+ def update(self):
60
+ if self._dset is None:
61
+ self._create_dset()
62
+
63
+ self._write_init_vars()
64
+ self._write_instance_vars()
65
+
66
+ def _write_init_vars(self):
67
+ """
68
+ Write the initial state of new particles
69
+ """
70
+
71
+ # Check if there are any new particles
72
+ part_size = self._dset.dimensions['particle'].size
73
+ num_new = self.model.state.released - part_size
74
+ if num_new == 0:
75
+ return
76
+
77
+ # Write variable data
78
+ idx = self.model.state['pid'] > part_size - 1
79
+ pid = self.model.state['pid'][idx]
80
+ for v in set(self._init_vars) - {'release_time'}:
81
+ # The idx array is not necessarily monotonically increasing by 1
82
+ # all the way. We therefore copy the data into a temporary,
83
+ # continuous array.
84
+ data_raw = self.model.state[v][idx]
85
+ data = np.zeros(num_new, dtype=data_raw.dtype)
86
+ data[pid - part_size] = data_raw
87
+ self._dset.variables[v][part_size:part_size + num_new] = data
88
+
89
+ # Write release time variable
90
+ data = np.broadcast_to(self.model.solver.time, shape=(num_new, ))
91
+ self._dset.variables['release_time'][part_size:part_size + num_new] = data
92
+
93
+ def _write_instance_vars(self):
94
+ """
95
+ Write the current state of dynamic varaibles
96
+ """
97
+
98
+ # Check if this is a write time step
99
+ current_time = self.model.solver.time
100
+ elapsed_since_last_write = current_time - self._last_write_time
101
+ if elapsed_since_last_write < self._write_frequency:
102
+ return
103
+ self._last_write_time = current_time
104
+
105
+ # Write current time
106
+ time_size = self._dset.dimensions['time'].size
107
+ time_value = current_time.astype('datetime64[s]').astype('int64')
108
+ self._dset.variables['time'][time_size] = time_value
109
+
110
+ # Write variable values
111
+ inst_size = self._dset.dimensions['particle_instance'].size
112
+ inst_num = self.model.state.size
113
+ inst_vars = {k: self.model.state[k] for k in set(self._inst_vars) - {'lat', 'lon'}}
114
+ if {'lat', 'lon'}.intersection(self._inst_vars):
115
+ x, y = self.model.state['X'], self.model.state['Y']
116
+ inst_vars['lon'], inst_vars['lat'] = self.model.grid.xy2ll(x, y)
117
+ for name, data in inst_vars.items():
118
+ self._dset.variables[name][inst_size:inst_size + inst_num] = data
119
+
120
+ # Write particle count
121
+ self._dset.variables['particle_count'][time_size] = inst_num
122
+
123
+ def _create_dset(self):
124
+ default_formats = dict(
125
+ time=OutputFormat(
126
+ ncformat='i8',
127
+ dimensions='time',
128
+ attributes=dict(
129
+ long_name="time",
130
+ standard_name="time",
131
+ units="seconds since 1970-01-01",
132
+ ),
133
+ ),
134
+ instance_offset=OutputFormat(
135
+ ncformat='i8',
136
+ dimensions=(),
137
+ attributes=dict(long_name='particle instance offset for file'),
138
+ ),
139
+ particle_count=OutputFormat(
140
+ ncformat='i4',
141
+ dimensions='time',
142
+ attributes=dict(
143
+ long_name='number of particles in a given timestep',
144
+ ragged_row_count='particle count at nth timestep',
145
+ ),
146
+ ),
147
+ release_time=OutputFormat(
148
+ ncformat='i8',
149
+ dimensions='particle',
150
+ attributes=dict(
151
+ long_name='particle release time',
152
+ units='seconds since 1970-01-01',
153
+ )
154
+ )
155
+ )
156
+
157
+ self._dset = create_netcdf_file(
158
+ fname=self._fname,
159
+ formats={**default_formats, **self._formats},
160
+ diskless=self._diskless,
161
+ )
162
+
163
+ self._dset.variables['instance_offset'][:] = 0
164
+
165
+ def close(self):
166
+ if self._dset is not None:
167
+ self._dset.close()
168
+ self._dset = None
169
+
170
+
171
+ class OutputFormat:
172
+ def __init__(self, ncformat, dimensions, attributes, kind=None):
173
+ self.ncformat = ncformat
174
+ self.dimensions = dimensions
175
+ self.attributes = attributes
176
+ self.kind = kind
177
+
178
+ def is_initial(self):
179
+ return self.kind == 'initial'
180
+
181
+ def is_instance(self):
182
+ return self.kind == 'instance'
183
+
184
+ @staticmethod
185
+ def from_ladim_conf(conf) -> "OutputFormat":
186
+ def get_keywords(ncformat='f4', kind='instance', **kwargs):
187
+ return dict(
188
+ props=dict(ncformat=ncformat, kind=kind),
189
+ attrs=kwargs,
190
+ )
191
+
192
+ keywords = get_keywords(**conf)
193
+ vkind = keywords['props']['kind']
194
+ if vkind == 'initial':
195
+ dims = 'particle'
196
+ elif vkind == 'instance':
197
+ dims = 'particle_instance'
198
+ else:
199
+ raise ValueError(f"Unknown kind: {vkind}")
200
+
201
+ return OutputFormat(
202
+ ncformat=keywords['props']['ncformat'],
203
+ dimensions=dims,
204
+ attributes=keywords['attrs'],
205
+ kind=vkind,
206
+ )
207
+
208
+
209
+ def create_netcdf_file(fname: str, formats: dict[str, OutputFormat], diskless=False) -> nc.Dataset:
210
+ """
211
+ Create new netCDF file
212
+
213
+ :param fname: File name
214
+ :param formats: Formats, one entry for each variable
215
+ :param diskless: True if a memory dataset should be generated
216
+ :return: Empty, initialized dataset
217
+ """
218
+ from . import __version__ as ladim_version
219
+
220
+ dset = nc.Dataset(filename=fname, mode='w', format='NETCDF4', diskless=diskless)
221
+ dset.set_auto_mask(False)
222
+
223
+ # Create attributes
224
+ dset.Conventions = "CF-1.8"
225
+ dset.institution = "Institute of Marine Research"
226
+ dset.source = "Lagrangian Advection and Diffusion Model"
227
+ dset.history = "Created by ladim " + ladim_version
228
+ dset.date = str(np.datetime64('now', 'D'))
229
+
230
+ # Create dimensions
231
+ dset.createDimension(dimname="particle", size=None)
232
+ dset.createDimension(dimname="particle_instance", size=None)
233
+ dset.createDimension(dimname="time", size=None)
234
+
235
+ # Create variables
236
+ for varname, item in formats.items():
237
+ dset.createVariable(
238
+ varname=varname,
239
+ datatype=item.ncformat,
240
+ dimensions=item.dimensions,
241
+ )
242
+ dset.variables[varname].set_auto_mask(False)
243
+ dset.variables[varname].setncatts(item.attributes)
244
+
245
+
246
+ return dset
File without changes
ladim/release.py ADDED
@@ -0,0 +1,241 @@
1
+ import contextlib
2
+
3
+ from .model import Model, Module
4
+ import numpy as np
5
+ import pandas as pd
6
+ from .utilities import read_timedelta
7
+ import logging
8
+
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class Releaser(Module):
14
+ def __init__(self, model: Model):
15
+ super().__init__(model)
16
+
17
+
18
+ class TextFileReleaser(Releaser):
19
+ def __init__(
20
+ self, model: Model, file, colnames: list = None, formats: dict = None,
21
+ frequency=(0, 's'), defaults=None,
22
+ ):
23
+ """
24
+ Release module which reads from a text file
25
+
26
+ The text file must be a whitespace-separated csv file
27
+
28
+ :param model: Parent model
29
+ :param file: Release file
30
+
31
+ :param colnames: Column names, if the release file does not contain any
32
+
33
+ :param formats: Data column formats, one dict entry per column. If any column
34
+ is missing, the default format is used. Keys should correspond to column names.
35
+ Values should be either ``"float"``, ``"int"`` or ``"time"``. Default value
36
+ is ``"float"`` for all columns except ``release_time``, which has default
37
+ value ``"time"``.
38
+
39
+ :param frequency: A two-element list with entries ``[value, unit]``, where
40
+ ``unit`` can be any numpy-compatible timedelta unit (such as "s", "m", "h", "D").
41
+
42
+ :param defaults: A dict of variables to be added to each particle. The keys
43
+ are the variable names, the values are the initial values at particle
44
+ release.
45
+ """
46
+
47
+ super().__init__(model)
48
+
49
+ # Release file
50
+ self._csv_fname = file # Path name
51
+ self._csv_column_names = colnames # Column headers
52
+ self._csv_column_formats = formats or dict()
53
+ self._dataframe = None
54
+
55
+ # Continuous release variables
56
+ self._frequency = read_timedelta(frequency) / np.timedelta64(1, 's')
57
+ self._last_release_dataframe = pd.DataFrame()
58
+ self._last_release_time = np.int64(-4611686018427387904)
59
+
60
+ # Other parameters
61
+ self._defaults = defaults or dict()
62
+
63
+ def update(self):
64
+ self._add_new()
65
+ self._kill_old()
66
+
67
+ def _kill_old(self):
68
+ state = self.model.state
69
+ if 'alive' in state:
70
+ alive = state['alive']
71
+ alive &= self.model.grid.ingrid(state['X'], state['Y'])
72
+ state.remove(~alive)
73
+
74
+ def _add_new(self):
75
+ # Get the portion of the release dataset that corresponds to
76
+ # current simulation time
77
+ df = release_data_subset(
78
+ dataframe=self.dataframe,
79
+ start_time=self.model.solver.time,
80
+ stop_time=self.model.solver.time + self.model.solver.step,
81
+ ).copy(deep=True)
82
+
83
+ # If there are no new particles, but the state is empty, we should
84
+ # still initialize the state by adding the appropriate columns
85
+ if (len(df) == 0) and ('X' not in self.model.state):
86
+ self.model.state.append(df.to_dict(orient='list'))
87
+ self._last_release_dataframe = df
88
+
89
+ # If there are no new particles and we don't use continuous release,
90
+ # we are done.
91
+ continuous_release = bool(self._frequency)
92
+ if (len(df) == 0) and not continuous_release:
93
+ return
94
+
95
+ # If we have continuous release, but there are no new particles and
96
+ # the last release is recent, we are also done
97
+ current_time = self.model.solver.time
98
+ elapsed_since_last_write = current_time - self._last_release_time
99
+ last_release_is_recent = (elapsed_since_last_write < self._frequency)
100
+ if continuous_release and (len(df) == 0) and last_release_is_recent:
101
+ return
102
+
103
+ # If we are at the final time step, we should not release any more particles
104
+ if continuous_release and self.model.solver.time >= self.model.solver.stop:
105
+ return
106
+
107
+ # If we have continuous release, but there are no new particles and
108
+ # the last release is NOT recent, we should replace empty
109
+ # dataframe with the previously released dataframe
110
+ if continuous_release:
111
+ if (len(df) == 0) and not last_release_is_recent:
112
+ df = self._last_release_dataframe
113
+ self._last_release_dataframe = df # Update release dataframe
114
+ self._last_release_time = current_time
115
+
116
+ # If positions are given as lat/lon coordinates, we should convert
117
+ if "X" not in df.columns or "Y" not in df.columns:
118
+ if "lon" not in df.columns or "lat" not in df.columns:
119
+ logger.critical("Particle release must have position")
120
+ raise ValueError()
121
+ # else
122
+ X, Y = self.model.grid.ll2xy(df["lon"].values, df["lat"].values)
123
+ df.rename(columns=dict(lon="X", lat="Y"), inplace=True)
124
+ df["X"] = X
125
+ df["Y"] = Y
126
+
127
+ # Add default variables, if any
128
+ for k, v in self._defaults.items():
129
+ if k not in df:
130
+ df[k] = v
131
+
132
+ # Expand multiplicity variable, if any
133
+ if 'mult' in df:
134
+ df = df.loc[np.repeat(df.index, df['mult'].values.astype('i4'))]
135
+ df = df.reset_index(drop=True).drop(columns='mult')
136
+
137
+ # Add new particles
138
+ new_particles = df.to_dict(orient='list')
139
+ state = self.model.state
140
+ state.append(new_particles)
141
+
142
+ @property
143
+ def dataframe(self):
144
+ @contextlib.contextmanager
145
+ def open_or_relay(file_or_buf, *args, **kwargs):
146
+ if hasattr(file_or_buf, 'read'):
147
+ yield file_or_buf
148
+ else:
149
+ with open(file_or_buf, *args, **kwargs) as f:
150
+ yield f
151
+
152
+ if self._dataframe is None:
153
+ if isinstance(self._csv_fname, pd.DataFrame):
154
+ self._dataframe = self._csv_fname
155
+
156
+ else:
157
+ # noinspection PyArgumentList
158
+ with open_or_relay(self._csv_fname, 'r', encoding='utf-8') as fp:
159
+ self._dataframe = load_release_file(
160
+ stream=fp,
161
+ names=self._csv_column_names,
162
+ formats=self._csv_column_formats,
163
+ )
164
+ return self._dataframe
165
+
166
+
167
+ def release_data_subset(dataframe, start_time, stop_time):
168
+ start_idx, stop_idx = sorted_interval(
169
+ dataframe['release_time'].values,
170
+ start_time,
171
+ stop_time,
172
+ )
173
+ return dataframe.iloc[start_idx:stop_idx]
174
+
175
+
176
+ def load_release_file(stream, names: list, formats: dict) -> pd.DataFrame:
177
+ if names is None:
178
+ import re
179
+ first_line = stream.readline()
180
+ names = re.split(pattern=r'\s+', string=first_line.strip())
181
+
182
+ converters = get_converters(varnames=names, conf=formats)
183
+
184
+ df = pd.read_csv(
185
+ stream,
186
+ names=names,
187
+ converters=converters,
188
+ sep='\\s+',
189
+ )
190
+ df = df.sort_values(by='release_time')
191
+ return df
192
+
193
+
194
+ def sorted_interval(v, a, b):
195
+ """
196
+ Searches for an interval in a sorted array
197
+
198
+ Returns the start (inclusive) and stop (exclusive) indices of
199
+ elements in *v* that are greater than or equal to *a* and
200
+ less than *b*. In other words, returns *start* and *stop* such
201
+ that v[start:stop] == v[(v >= a) & (v < b)]
202
+
203
+ :param v: Sorted input array
204
+ :param a: Lower bound of array values (inclusive)
205
+ :param b: Upper bound of array values (exclusive)
206
+ :returns: A tuple (start, stop) defining the output interval
207
+ """
208
+ start = np.searchsorted(v, a, side='left')
209
+ stop = np.searchsorted(v, b, side='left')
210
+ return start, stop
211
+
212
+
213
+ def get_converters(varnames: list, conf: dict) -> dict:
214
+ """
215
+ Given a list of varnames and config keywords, return a dict of converters
216
+
217
+ Returns a dict where the keys are ``varnames`` and the values are
218
+ callables.
219
+
220
+ :param varnames: For instance, ['release_time', 'X', 'Y']
221
+ :param conf: For instance, {'release_time': 'time', 'X': 'float'}
222
+ :return: A mapping of varnames to converters
223
+ """
224
+ dtype_funcs = dict(
225
+ time=lambda item: np.datetime64(item, 's').astype('int64'),
226
+ int=int,
227
+ float=float,
228
+ )
229
+
230
+ dtype_defaults = dict(
231
+ release_time='time',
232
+ )
233
+
234
+ converters = {}
235
+ for varname in varnames:
236
+ dtype_default = dtype_defaults.get(varname, 'float')
237
+ dtype_str = conf.get(varname, dtype_default)
238
+ dtype_func = dtype_funcs[dtype_str]
239
+ converters[varname] = dtype_func
240
+
241
+ return converters
ladim/sample.py CHANGED
@@ -277,6 +277,9 @@ def bilin_inv(f, g, F, G, maxiter=7, tol=1.0e-7):
277
277
  i = x.astype("i")
278
278
  j = y.astype("i")
279
279
 
280
+ i = np.maximum(np.minimum(i, imax - 2), 0)
281
+ j = np.maximum(np.minimum(j, jmax - 2), 0)
282
+
280
283
  p, q = x - i, y - j
281
284
 
282
285
  # Bilinear estimate of F[x,y] and G[x,y]
@@ -1,13 +1,13 @@
1
1
  import numpy as np
2
2
 
3
3
 
4
- class TimeStepper:
4
+ class Solver:
5
5
  def __init__(self, modules, start, stop, step, order=None, seed=None):
6
- self.order = order or ('forcing', 'release', 'output', 'ibm', 'tracker')
6
+ self.order = order or ('release', 'forcing', 'tracker', 'ibm', 'output')
7
7
  self.modules = modules
8
- self.start = np.datetime64(start)
9
- self.stop = np.datetime64(stop)
10
- self.step = np.timedelta64(step, 's')
8
+ self.start = np.datetime64(start, 's').astype('int64')
9
+ self.stop = np.datetime64(stop, 's').astype('int64')
10
+ self.step = np.timedelta64(step, 's').astype('int64')
11
11
  self.time = None
12
12
 
13
13
  if seed is not None:
ladim/state.py ADDED
@@ -0,0 +1,142 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from .model import Model, Module
4
+
5
+
6
+ class State(Module):
7
+ def __init__(self, model: Model):
8
+ """
9
+ The state module contains static and dynamic particle properties
10
+
11
+ The other modules interact with the state module mostly through
12
+ the getitem and setitem methods. For instance, to increase the
13
+ depth of all particles by 1, use
14
+
15
+ >>> model.state['Z'] += 1
16
+
17
+ :param model: Parent model
18
+ """
19
+ super().__init__(model)
20
+
21
+ @property
22
+ def size(self):
23
+ """
24
+ Current number of particles
25
+ """
26
+ raise NotImplementedError
27
+
28
+ @property
29
+ def released(self):
30
+ """
31
+ Total number of released particles
32
+ """
33
+ raise NotImplementedError
34
+
35
+ def append(self, particles: dict):
36
+ """
37
+ Add new particles
38
+
39
+ Missing variables are assigned a default value of 0.
40
+
41
+ :param particles: A mapping from variable names to values
42
+ """
43
+ raise NotImplementedError
44
+
45
+ def remove(self, particles):
46
+ """
47
+ Remove particles
48
+
49
+ :param particles: Boolean index of particles to remove
50
+ :return:
51
+ """
52
+ raise NotImplementedError
53
+
54
+ def __getitem__(self, item):
55
+ raise NotImplementedError
56
+
57
+ def __setitem__(self, key, value):
58
+ raise NotImplementedError
59
+
60
+ def __len__(self):
61
+ return self.size
62
+
63
+ def __contains__(self, item):
64
+ raise NotImplementedError
65
+
66
+
67
+ class DynamicState(State):
68
+ def __init__(self, model: Model):
69
+ super().__init__(model)
70
+
71
+ self._num_released = 0
72
+ self._varnames = set()
73
+
74
+ self._data = pd.DataFrame()
75
+
76
+ @property
77
+ def released(self):
78
+ return self._num_released
79
+
80
+ def append(self, particles: dict):
81
+ # If there are no new particles, do nothing
82
+ if not particles:
83
+ return
84
+
85
+ num_new_particles = next(len(v) for v in particles.values())
86
+ particles['pid'] = np.arange(num_new_particles) + self._num_released
87
+ particles['alive'] = np.ones(num_new_particles, dtype=bool)
88
+ if 'active' in particles:
89
+ particles['active'] = np.array(particles['active'], dtype=bool)
90
+ else:
91
+ particles['active'] = np.ones(num_new_particles, dtype=bool)
92
+
93
+ new_particles = pd.DataFrame(data=particles)
94
+ self._data = pd.concat(
95
+ objs=[self._data, new_particles],
96
+ axis='index',
97
+ ignore_index=True,
98
+ join='outer',
99
+ )
100
+
101
+ self._num_released += num_new_particles
102
+
103
+ def remove(self, particles):
104
+ if not np.any(particles):
105
+ return
106
+
107
+ keep = ~particles
108
+ self._data = self._data.iloc[keep]
109
+
110
+ @property
111
+ def size(self):
112
+ return len(self._data)
113
+
114
+ def __getitem__(self, item):
115
+ return self._data[item].values
116
+
117
+ def __setitem__(self, item, value):
118
+ self._data[item] = value
119
+
120
+ def __getattr__(self, item):
121
+ if item not in self:
122
+ raise AttributeError(f'Attribute not defined: {item}')
123
+ return self[item]
124
+
125
+ def __contains__(self, item):
126
+ return item in self._data
127
+
128
+ @property
129
+ def dt(self):
130
+ """Backwards-compatibility function for returning model.solver.step"""
131
+ return self.model.solver.step
132
+
133
+ @property
134
+ def timestamp(self):
135
+ """Backwards-compatibility function for returning solver time as numpy datetime"""
136
+ return np.int64(self.model.solver.time).astype('datetime64[s]')
137
+
138
+ @property
139
+ def timestep(self):
140
+ """Backwards-compatibility function for returning solver time as timestep"""
141
+ elapsed = self.model.solver.time - self.model.solver.start
142
+ return elapsed // self.model.solver.step