pyxla 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyxla/__about__.py +1 -0
- pyxla/__init__.py +1380 -0
- pyxla/sampling.py +508 -0
- pyxla/util.py +573 -0
- pyxla-0.0.1.dist-info/METADATA +63 -0
- pyxla-0.0.1.dist-info/RECORD +7 -0
- pyxla-0.0.1.dist-info/WHEEL +4 -0
pyxla/util.py
ADDED
|
@@ -0,0 +1,573 @@
|
|
|
1
|
+
"""Utility functions.
|
|
2
|
+
|
|
3
|
+
A set of utility functions that are not meant to be part of
|
|
4
|
+
pyXla.
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
import moocore
|
|
10
|
+
import os
|
|
11
|
+
import json
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import random
|
|
14
|
+
import numpy as np
|
|
15
|
+
import scipy
|
|
16
|
+
from tqdm.auto import tqdm
|
|
17
|
+
import inspect
|
|
18
|
+
import logging
|
|
19
|
+
from typing import List, Generator, Callable, Union
|
|
20
|
+
import matplotlib
|
|
21
|
+
import matplotlib.pyplot as plt
|
|
22
|
+
from matplotlib import cm
|
|
23
|
+
from matplotlib.ticker import MaxNLocator
|
|
24
|
+
import math
|
|
25
|
+
|
|
26
|
+
logging.basicConfig(level=logging.INFO)
|
|
27
|
+
|
|
28
|
+
# load all CSV files provided by the user and update th structure
|
|
29
|
+
def load_data(sample, sep = ' '):
|
|
30
|
+
# read F (required)
|
|
31
|
+
if 'F' not in sample:
|
|
32
|
+
sample['F'] = pd.read_csv(sample['Fcsv'], sep = sep).drop(columns=['id'], errors='ignore')
|
|
33
|
+
sample['size'] = len(sample['F'])
|
|
34
|
+
sample['numF'] = len(sample['F'].columns)
|
|
35
|
+
if not 'max' in sample:
|
|
36
|
+
sample['max'] = False # we minimize by default
|
|
37
|
+
# read X (optional)
|
|
38
|
+
if 'X' not in sample:
|
|
39
|
+
sample['X'] = None
|
|
40
|
+
if 'Xcsv' in sample:
|
|
41
|
+
sample['X'] = pd.read_csv(sample['Xcsv'], sep = sep).drop(columns=['id'], errors='ignore')
|
|
42
|
+
assert sample['size'] == len(sample['X'])
|
|
43
|
+
# read V (optional)
|
|
44
|
+
if not present(sample, 'V'):
|
|
45
|
+
sample['V'] = None
|
|
46
|
+
sample['numV'] = 0
|
|
47
|
+
else:
|
|
48
|
+
V = sample['V']
|
|
49
|
+
sample['numV'] = len(V.loc[:, V.columns != 'feasible'].columns)
|
|
50
|
+
if 'Vcsv' in sample:
|
|
51
|
+
sample['V'] = pd.read_csv(sample['Vcsv'], sep = sep)
|
|
52
|
+
assert sample['size'] == len(sample['V'])
|
|
53
|
+
sample['numV'] = len(sample['V'].columns)
|
|
54
|
+
if sample['V'] is not None:
|
|
55
|
+
# sample['V']['feasible'] = sample['V'].iloc[:,1:].sum(axis = 1) == 0
|
|
56
|
+
# removed from above `.iloc[:,1:]` as starting from the 2nd column (index 1)
|
|
57
|
+
# assumes `id` column is present, but it is not
|
|
58
|
+
sample['V']['feasible'] = sample['V'].sum(axis = 1) == 0
|
|
59
|
+
# read N (optional)
|
|
60
|
+
if 'N' not in sample:
|
|
61
|
+
sample['N'] = None
|
|
62
|
+
if 'Ncsv' in sample:
|
|
63
|
+
sample['N'] = pd.read_csv(sample['Ncsv'], sep = sep)
|
|
64
|
+
# read D (optional)
|
|
65
|
+
if 'D' not in sample:
|
|
66
|
+
sample['D'] = None
|
|
67
|
+
else: sample['D'].set_index(['id1', 'id2'], inplace=True)
|
|
68
|
+
if 'Dcsv' in sample:
|
|
69
|
+
sample['D'] = pd.read_csv(sample['Dcsv'], sep = sep)
|
|
70
|
+
# use multi-indexing to improve lookup
|
|
71
|
+
sample['D'].set_index(['id1', 'id2'], inplace=True)
|
|
72
|
+
# distance metric: euclidean distance by default
|
|
73
|
+
if not 'p' in sample:
|
|
74
|
+
sample['p'] = 2
|
|
75
|
+
if 'representation' not in sample:
|
|
76
|
+
sample['representation'] = 'continuous'
|
|
77
|
+
if 'd_metric_func' not in sample:
|
|
78
|
+
sample['d_metric_func'] = None
|
|
79
|
+
if 'neighbourhood_func' not in sample:
|
|
80
|
+
sample['neighbourhood_func'] = None
|
|
81
|
+
|
|
82
|
+
compute_R(sample)
|
|
83
|
+
|
|
84
|
+
def to_sample(dict: dict) -> dict:
|
|
85
|
+
"""Convert a dictionary to a pyxla sample dictionary.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
dict : dict
|
|
90
|
+
Dictionary with input files.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
dict
|
|
95
|
+
A pyxla dictionary fit for use with pyxla features.
|
|
96
|
+
"""
|
|
97
|
+
load_data(dict)
|
|
98
|
+
return dict
|
|
99
|
+
|
|
100
|
+
def compute_R(sample):
|
|
101
|
+
sample['R'] = pd.DataFrame()
|
|
102
|
+
R = sample['R']
|
|
103
|
+
F = sample['F']
|
|
104
|
+
V = sample['V']
|
|
105
|
+
# objective-wise rank
|
|
106
|
+
for col in F.columns:
|
|
107
|
+
# use dense ranking: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rank.html
|
|
108
|
+
R[col] = F[col].rank(ascending = not sample['max'], method = 'dense').astype(int)
|
|
109
|
+
|
|
110
|
+
# Pareto rank on objectives
|
|
111
|
+
R['paretoF'] = moocore.pareto_rank(F, maximise = sample['max'])
|
|
112
|
+
|
|
113
|
+
if present(sample, 'V'):
|
|
114
|
+
# violation-wise rank
|
|
115
|
+
for col in V.loc[:, V.columns != 'feasible']:
|
|
116
|
+
# use dense ranking
|
|
117
|
+
R[col] = V[col].rank(ascending = True, method = 'dense').astype(int)
|
|
118
|
+
|
|
119
|
+
# Pareto rank on violations
|
|
120
|
+
R['paretoV'] = moocore.pareto_rank(V.loc[:, V.columns != 'feasible'], maximise = False)
|
|
121
|
+
|
|
122
|
+
# feasibility rules from Deb
|
|
123
|
+
div = max(R['paretoF']) + 1
|
|
124
|
+
R['tmp'] = R['paretoF'] / div + R['paretoV'] # summing up violation ranks (as integers) and f-ranks (as real numbers lower than 1)
|
|
125
|
+
# use dense ranking
|
|
126
|
+
R['Deb'] = R['tmp'].rank(ascending=True, method = 'dense').astype(int)
|
|
127
|
+
R.drop('tmp', axis = 1, inplace = True)
|
|
128
|
+
# Pareto rank on objectives AND violations
|
|
129
|
+
FV = pd.merge(F, V.loc[:, V.columns != 'feasible'], left_index = True, right_index = True)
|
|
130
|
+
# specify whether or not to maximize the columns (objectives and violation)
|
|
131
|
+
maximise_F = [sample['max']] * sample['numF']
|
|
132
|
+
maximise_V = [False] * sample['numV']
|
|
133
|
+
R['paretoFV'] = moocore.pareto_rank(FV, maximise=(maximise_F + maximise_V))
|
|
134
|
+
R['feasible'] = V['feasible']
|
|
135
|
+
# Pareto rank on F && V: iff 2+
|
|
136
|
+
numR = sample['numF'] + sample['numV'] + 4
|
|
137
|
+
if sample['numF'] <= 1:
|
|
138
|
+
R.drop('paretoF', axis = 1, inplace = True)
|
|
139
|
+
numR -= 1
|
|
140
|
+
if sample['numV'] <= 1:
|
|
141
|
+
R.drop('paretoV', axis = 1, inplace = True, errors='ignore')
|
|
142
|
+
numR -= 1
|
|
143
|
+
if sample['numF'] == 0 or sample['numV'] == 0:
|
|
144
|
+
R.drop('paretoFV', axis = 1, inplace = True, errors='ignore')
|
|
145
|
+
R.drop('Deb', axis = 1, inplace = True, errors='ignore')
|
|
146
|
+
numR -= 2
|
|
147
|
+
sample['numR'] = numR
|
|
148
|
+
|
|
149
|
+
def present(sample: dict, input: str) -> bool:
|
|
150
|
+
"""Check presence of a given input file in a sample.
|
|
151
|
+
|
|
152
|
+
An input is considered present if there is a pandas.Dataframe
|
|
153
|
+
associated to the a key corresponding to the input file.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
sample : dict
|
|
158
|
+
A loaded sample containing input files.
|
|
159
|
+
input : str
|
|
160
|
+
A character representing an input file type i.e. 'X' or 'F'
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
bool
|
|
165
|
+
`True` if the input file is present and `False` otherwise.
|
|
166
|
+
"""
|
|
167
|
+
if input not in sample: return False
|
|
168
|
+
return True if isinstance(sample[input], pd.DataFrame) else False
|
|
169
|
+
|
|
170
|
+
def list_samples(test: bool = False) -> list[str]:
|
|
171
|
+
path = f"../data/{'test_' if test else ''}samples"
|
|
172
|
+
samples = os.listdir(path)
|
|
173
|
+
if '.gitkeep' in samples: samples.remove('.gitkeep')
|
|
174
|
+
return samples
|
|
175
|
+
|
|
176
|
+
def load_samples_with(input: str, test: bool = False) -> Generator:
|
|
177
|
+
for name in list_samples(test=test):
|
|
178
|
+
s = load_sample(name, test=test)
|
|
179
|
+
if present(s, input): yield s
|
|
180
|
+
|
|
181
|
+
def load_samples_without(input: str, test: bool = False) -> Generator:
|
|
182
|
+
for name in list_samples(test=test):
|
|
183
|
+
s = load_sample(name, test=test)
|
|
184
|
+
if not present(s, input): yield s
|
|
185
|
+
|
|
186
|
+
def load_sample(name: str, test=False, exclude=[]):
|
|
187
|
+
"""Load an example dataset.
|
|
188
|
+
|
|
189
|
+
pyXla provided a few example datasets that can be used
|
|
190
|
+
to try out the framework.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
name : str
|
|
195
|
+
Name of the dataset. [@todo: include link to folder of datasets]
|
|
196
|
+
test : bool, optional
|
|
197
|
+
If `True`, a lightweight test version of the sample is loaded.
|
|
198
|
+
If `False`, the whole dataset is loaded.
|
|
199
|
+
exclude: list[str], optional
|
|
200
|
+
Selectively load input files to reduce memory footprint. This is
|
|
201
|
+
useful where some input files are not needed for the desired
|
|
202
|
+
operation.
|
|
203
|
+
|
|
204
|
+
Returns
|
|
205
|
+
-------
|
|
206
|
+
sample : dict
|
|
207
|
+
A dictionary with the input files of a problem sample i.e. F or V files.
|
|
208
|
+
|
|
209
|
+
Examples
|
|
210
|
+
--------
|
|
211
|
+
>>> from pyxla.util import load_sample
|
|
212
|
+
>>> import pandas as pd
|
|
213
|
+
>>> sample = load_sample('cec2010_c18_2d_F1_V2')
|
|
214
|
+
>>> type(sample)
|
|
215
|
+
<class 'dict'>
|
|
216
|
+
"""
|
|
217
|
+
# sample = {'name': name}
|
|
218
|
+
dir = f"../data/{'test_' if test else ''}samples"
|
|
219
|
+
return load_sample_from(dir, name, exclude=exclude)
|
|
220
|
+
|
|
221
|
+
def load_sample_from(dir: str, name: str, exclude=[]):
|
|
222
|
+
sample = {'name': name}
|
|
223
|
+
path = os.path.join(dir, name)
|
|
224
|
+
for f in os.listdir(path):
|
|
225
|
+
# get the input type i.e X or F
|
|
226
|
+
if f == "metadata.json":
|
|
227
|
+
with open(f'{path}/{f}', 'r') as file:
|
|
228
|
+
metadata = json.load(file)
|
|
229
|
+
for k, v in metadata.items():
|
|
230
|
+
sample[k] = v
|
|
231
|
+
else:
|
|
232
|
+
input = f[-5:-4]
|
|
233
|
+
if input in exclude: continue
|
|
234
|
+
sample[f'{input}csv'] = f"{path}/{f}"
|
|
235
|
+
load_data(sample)
|
|
236
|
+
return sample
|
|
237
|
+
|
|
238
|
+
def compute_D(sample: dict,
|
|
239
|
+
metric: Union[Callable, str] = None,
|
|
240
|
+
representation: str = None,
|
|
241
|
+
force: bool = False) -> pd.DataFrame:
|
|
242
|
+
"""Compute a D file containing pairwise distance between solutions.
|
|
243
|
+
|
|
244
|
+
Parameters
|
|
245
|
+
----------
|
|
246
|
+
sample : dict
|
|
247
|
+
_description_
|
|
248
|
+
metric : Callable or str, optional
|
|
249
|
+
A metric function or the name of a distance metric as listed
|
|
250
|
+
`scipy`'s `pdist function
|
|
251
|
+
<https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html>`_.
|
|
252
|
+
If a metric function is defined it must take two solutions and computes distance
|
|
253
|
+
between them of the form dist(Xa, Xb) -> d where Xa and Xb
|
|
254
|
+
are `pandas` Series representing solutions, by default `None`.
|
|
255
|
+
For example: `lambda Xa, Xb: abs(Xa.sum() - Xb.sum())`.
|
|
256
|
+
representation : {'continuous', 'binary'}, optional
|
|
257
|
+
Representation of the data i.e. continuous or discrete, by
|
|
258
|
+
default `None`.
|
|
259
|
+
force : bool, optional
|
|
260
|
+
If set to True, the D file is recomputed even if there is an
|
|
261
|
+
existing D file, by default `False`.
|
|
262
|
+
|
|
263
|
+
Returns
|
|
264
|
+
-------
|
|
265
|
+
pandas.DataFrame
|
|
266
|
+
Returns a dataframe of pairwise distances.
|
|
267
|
+
|
|
268
|
+
es
|
|
269
|
+
------
|
|
270
|
+
ValueError
|
|
271
|
+
Raised if no X file is in the sample. It is also raised if there
|
|
272
|
+
is an attempt to compute a D file when one is already present and
|
|
273
|
+
`force` is set to `False`.
|
|
274
|
+
ExceptionRais
|
|
275
|
+
Raised if neither a metric function nor a representation category
|
|
276
|
+
is specified.
|
|
277
|
+
"""
|
|
278
|
+
if not force and present(sample, 'D'):
|
|
279
|
+
raise ValueError('A D file already exists. Are you sure you want to recompute it? To recompute it retry with `force=True`.')
|
|
280
|
+
if not present(sample, 'X'):
|
|
281
|
+
raise ValueError('An X file is required to compute a D file.')
|
|
282
|
+
if not metric and not representation:
|
|
283
|
+
raise Exception('Neither a metric function nor representation specified. Please specify a metric function or representation i.e. `continuous`, `discrete`.')
|
|
284
|
+
|
|
285
|
+
X = sample['X']
|
|
286
|
+
D = pd.DataFrame(columns=['id1', 'id2', 'd'])
|
|
287
|
+
|
|
288
|
+
# efficiently generate columns `id1` and `id2`
|
|
289
|
+
a, b = np.triu_indices(len(X), k=1)
|
|
290
|
+
D['id1'] = a
|
|
291
|
+
D['id2'] = b
|
|
292
|
+
|
|
293
|
+
D['d'] = calc_pairwise_dist(X, metric=metric, representation=representation)
|
|
294
|
+
|
|
295
|
+
return D
|
|
296
|
+
|
|
297
|
+
def calc_pairwise_dist(X,
|
|
298
|
+
metric: Union[Callable, str] = None,
|
|
299
|
+
representation: str = None,) -> pd.DataFrame:
|
|
300
|
+
|
|
301
|
+
if isinstance(metric, Callable): check_metric_func(X, metric)
|
|
302
|
+
|
|
303
|
+
if not metric:
|
|
304
|
+
# euclidean distance if continuous, hamming if binary
|
|
305
|
+
metric = 'euclidean' if representation == 'continuous' else 'hamming'
|
|
306
|
+
|
|
307
|
+
X = X.to_numpy()
|
|
308
|
+
|
|
309
|
+
# reshape X if it does not have 2 dim.
|
|
310
|
+
if len(X.shape) < 2: X = X.reshape(-1, 1)
|
|
311
|
+
|
|
312
|
+
return scipy.spatial.distance.pdist(X, metric)
|
|
313
|
+
|
|
314
|
+
def check_metric_func(nd_array_like, metric_func: callable):
|
|
315
|
+
sig = inspect.signature(metric_func)
|
|
316
|
+
if len(sig.parameters) != 2:
|
|
317
|
+
raise Exception('A metric function must take exactly two arguments.')
|
|
318
|
+
|
|
319
|
+
if isinstance(nd_array_like, pd.DataFrame):
|
|
320
|
+
a, b = nd_array_like.iloc[0], nd_array_like.iloc[1]
|
|
321
|
+
else:
|
|
322
|
+
a, b = nd_array_like[0], nd_array_like[1]
|
|
323
|
+
|
|
324
|
+
if metric_func(a, b) == None:
|
|
325
|
+
raise Exception('A metric function must return a value.')
|
|
326
|
+
return True
|
|
327
|
+
|
|
328
|
+
def save_input_file(sample: dict,
|
|
329
|
+
input: str,
|
|
330
|
+
dir: str = None,
|
|
331
|
+
sep: str = ' ',
|
|
332
|
+
index: bool = False) -> str:
|
|
333
|
+
"""Save input file currently in memory to storage.
|
|
334
|
+
|
|
335
|
+
Parameters
|
|
336
|
+
----------
|
|
337
|
+
sample : dict
|
|
338
|
+
A sample containing the various input files i.e `F`, `V`.
|
|
339
|
+
input : {'F', 'X', 'V', 'D', 'N'}
|
|
340
|
+
A single character string indicating the type of input.
|
|
341
|
+
dir : str, optional
|
|
342
|
+
Path to the directory where the file should be saved.
|
|
343
|
+
If `None` the file is save in the current working directory.
|
|
344
|
+
|
|
345
|
+
Returns
|
|
346
|
+
-------
|
|
347
|
+
str
|
|
348
|
+
Path to the saved input file.
|
|
349
|
+
|
|
350
|
+
"""
|
|
351
|
+
if not present(sample, input): return
|
|
352
|
+
|
|
353
|
+
path = f"{dir if dir else '.'}/{sample['name']}_{input}.csv"
|
|
354
|
+
sample[input].to_csv(path, sep=sep, index=index)
|
|
355
|
+
return path
|
|
356
|
+
|
|
357
|
+
def sample_test_X(sample: dict, num_x: int = 10) -> list[int]:
|
|
358
|
+
"""Samples a few solutions from large sample.
|
|
359
|
+
|
|
360
|
+
Sample a list of length `num_x` of
|
|
361
|
+
|
|
362
|
+
Parameters
|
|
363
|
+
----------
|
|
364
|
+
sample : dict
|
|
365
|
+
A sample containing the various input files i.e `F`, `V`.
|
|
366
|
+
num_x : int, optional
|
|
367
|
+
Number of samples to generate, by default 10.
|
|
368
|
+
|
|
369
|
+
Returns
|
|
370
|
+
-------
|
|
371
|
+
list[int]
|
|
372
|
+
List of indices of sampled solutions.
|
|
373
|
+
"""
|
|
374
|
+
X_idx = defaultdict()
|
|
375
|
+
V = sample['V'][:-1]
|
|
376
|
+
for v in V:
|
|
377
|
+
feas_v, infeas_v = V.query(f'{v} == 0'), V.query(f'{v} != 0')
|
|
378
|
+
if not feas_v.empty: X_idx[feas_v.index[0].item()] = None
|
|
379
|
+
if not len(infeas_v) < 2:
|
|
380
|
+
X_idx[infeas_v.index[0].item()] = None
|
|
381
|
+
X_idx[infeas_v.index[1].item()] = None
|
|
382
|
+
|
|
383
|
+
while len(X_idx) != num_x:
|
|
384
|
+
X_idx[random.choice([i for i in range(len(V))])] = None
|
|
385
|
+
return list(X_idx.keys())
|
|
386
|
+
|
|
387
|
+
def gen_test_samples(num_x: int = 10):
|
|
388
|
+
"""Generate small test samples for use in tests.
|
|
389
|
+
|
|
390
|
+
The samples are generated from the samples provided by the
|
|
391
|
+
pyXla core team.
|
|
392
|
+
|
|
393
|
+
Parameters
|
|
394
|
+
----------
|
|
395
|
+
num_x : int, optional
|
|
396
|
+
Number of solutions to include in the test sample.
|
|
397
|
+
|
|
398
|
+
Returns
|
|
399
|
+
-------
|
|
400
|
+
test_samples: list
|
|
401
|
+
List of lightweight samples of size `num_x`
|
|
402
|
+
"""
|
|
403
|
+
inputs = ["F", "X", "V"]
|
|
404
|
+
test_samples = []
|
|
405
|
+
samples = os.listdir('../data/samples')
|
|
406
|
+
for s in samples:
|
|
407
|
+
sample = load_sample(s)
|
|
408
|
+
# drop feasible column in V
|
|
409
|
+
if present(sample, 'V'):
|
|
410
|
+
X_idx = sample_test_X(sample, num_x=num_x)
|
|
411
|
+
sample['V'].drop(columns=['feasible'], inplace=True)
|
|
412
|
+
else: X_idx = [i for i in range(num_x)]
|
|
413
|
+
for i in inputs:
|
|
414
|
+
if not present(sample, i): continue
|
|
415
|
+
# pick the first 10 solutions
|
|
416
|
+
sample[i] = sample[i].loc[X_idx]
|
|
417
|
+
if present(sample, 'D'):
|
|
418
|
+
D = compute_D(sample, representation='continuous', force=True)
|
|
419
|
+
# load D file into the sample
|
|
420
|
+
sample['D'] = D
|
|
421
|
+
if present(sample, 'N') and present(sample, 'X'):
|
|
422
|
+
N = compute_N(sample, neighbourhood_func=lambda x, y: random.choice([True, False]), force=True)
|
|
423
|
+
# load N file into the sample
|
|
424
|
+
sample['N'] = N
|
|
425
|
+
if present(sample, 'N') and not present(sample, 'X'):
|
|
426
|
+
# Remove N file
|
|
427
|
+
sample.pop('N')
|
|
428
|
+
test_samples.append(sample)
|
|
429
|
+
return test_samples
|
|
430
|
+
|
|
431
|
+
def save_test_samples(samples: list[dict]):
|
|
432
|
+
"""Save samples to file system
|
|
433
|
+
|
|
434
|
+
The function saves the samples generated by `gen_test_samples`.
|
|
435
|
+
|
|
436
|
+
Parameters
|
|
437
|
+
----------
|
|
438
|
+
sample : dict
|
|
439
|
+
A sample containing the various input files i.e `F`, `V`.
|
|
440
|
+
"""
|
|
441
|
+
inputs = ["F", "X", "V", "D", "N"]
|
|
442
|
+
for s in samples:
|
|
443
|
+
s_dir = f"../data/test_samples/{s["name"]}"
|
|
444
|
+
os.makedirs(s_dir, exist_ok=True)
|
|
445
|
+
# for each input present
|
|
446
|
+
for i in inputs:
|
|
447
|
+
if not present(s, i): continue
|
|
448
|
+
path = f"{s_dir}/{s["name"]}_{i}.csv"
|
|
449
|
+
s[i].to_csv(path, sep=' ', index=False)
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def generate_D_file(sample, dir: str = None) -> str:
|
|
453
|
+
metric_func = None if not 'd_metric_func' in sample else sample['d_metric_func']
|
|
454
|
+
representation = None if not 'representation' in sample else sample['representation']
|
|
455
|
+
D = compute_D(sample, metric=metric_func, representation=representation)
|
|
456
|
+
# load D file into the sample
|
|
457
|
+
sample['D'] = D
|
|
458
|
+
path = save_input_file(sample, 'D', dir=dir)
|
|
459
|
+
# index D file, for efficient lookup
|
|
460
|
+
D.set_index(['id1', 'id2'], inplace=True)
|
|
461
|
+
return path
|
|
462
|
+
|
|
463
|
+
def generate_N_file(sample, dir: str = None) -> str:
|
|
464
|
+
N = compute_N(sample, neighbourhood_func=sample['neighbourhood_func'])
|
|
465
|
+
# load N file into the sample
|
|
466
|
+
sample['N'] = N
|
|
467
|
+
path = save_input_file(sample, 'N', dir=dir)
|
|
468
|
+
return path
|
|
469
|
+
|
|
470
|
+
def compute_N(sample: dict,
|
|
471
|
+
neighbourhood_func: Callable = None,
|
|
472
|
+
force=False) -> pd.DataFrame:
|
|
473
|
+
if not force and present(sample, 'N'):
|
|
474
|
+
raise ValueError('A N file already exists. Are you sure you want to recompute it? To recompute it retry with `force=True`.')
|
|
475
|
+
if not present(sample, 'X'):
|
|
476
|
+
raise ValueError('An X file is required to compute a N file.')
|
|
477
|
+
n_func = sample['neighbourhood_func'] if sample['neighbourhood_func'] else neighbourhood_func
|
|
478
|
+
if not n_func:
|
|
479
|
+
raise Exception('No neighbourhood function specified. Please specify a neighbourhood function of the form form f(X1, X2) -> bool.')
|
|
480
|
+
|
|
481
|
+
X = sample['X']
|
|
482
|
+
N = pd.DataFrame(columns=['id1', 'id2', 'n'])
|
|
483
|
+
|
|
484
|
+
# efficiently generate columns `id1` and `id2`
|
|
485
|
+
a, b = np.triu_indices(len([i for i in range(len(X))]), k=1)
|
|
486
|
+
N['id1'] = list(a)
|
|
487
|
+
N['id2'] = list(b)
|
|
488
|
+
|
|
489
|
+
N['n'] = calc_pairwise_dist(X, metric=n_func)
|
|
490
|
+
|
|
491
|
+
# filter to retain only if n(neighbour) == True
|
|
492
|
+
N = N[N['n'] == True]
|
|
493
|
+
|
|
494
|
+
return N.drop('n', axis=1)
|
|
495
|
+
|
|
496
|
+
def handle_missing_D_file(sample: dict, compute_D_file: bool, warn: bool = True):
|
|
497
|
+
if not present(sample, 'D') and not present(sample, 'X'): raise Exception('Both D and X are absent. Please provide either D or X.')
|
|
498
|
+
|
|
499
|
+
if not present(sample, 'D') and compute_D_file:
|
|
500
|
+
if warn: logging.warning('No D file is present, thus, computing the D file... Computing an entire D file can be time consuming. Instead, you can call the function with the keyword argument `compute_D_file` set to `False` to speed up computation, as only the required distances will be calculated.')
|
|
501
|
+
path = generate_D_file(sample)
|
|
502
|
+
logging.info(f'D file has been loaded to the current sample and is saved to {path}')
|
|
503
|
+
|
|
504
|
+
def handle_missing_N_file(sample: dict, compute_N_file: bool, warn: bool = True):
|
|
505
|
+
"""Handles scenarios where a required N file is missing.
|
|
506
|
+
|
|
507
|
+
This function abstracts away the checking of the presence an N
|
|
508
|
+
file when it is required. It handles computation of the neighbourhood
|
|
509
|
+
when the N file is absent.
|
|
510
|
+
|
|
511
|
+
Parameters
|
|
512
|
+
----------
|
|
513
|
+
sample : dict
|
|
514
|
+
A sample containing the at least input i.e `F`, `N`.
|
|
515
|
+
compute_N_file : bool
|
|
516
|
+
Determine whether the missing N file should be computed.
|
|
517
|
+
warn : bool, optional
|
|
518
|
+
Whether to ignore warning that computing N file can take a while.
|
|
519
|
+
It is by default, `True`.
|
|
520
|
+
|
|
521
|
+
Raises
|
|
522
|
+
------
|
|
523
|
+
Exception
|
|
524
|
+
Raised if both N and X inputs are absent, as neighbourhood
|
|
525
|
+
cannot be determined without having either.
|
|
526
|
+
"""
|
|
527
|
+
if not present(sample, 'N') and not present(sample, 'X'): raise Exception('Both N and X are absent. Please provide either N or X.')
|
|
528
|
+
|
|
529
|
+
if not present(sample, 'N') and compute_N_file:
|
|
530
|
+
if warn: logging.warning('No N file is present, thus, computing the N file... Computing an entire N file can be time consuming. Instead, you can call the function with the keyword argument `compute_N_file` set to `False` to speed up computation, as only the required distances will be calculated.')
|
|
531
|
+
path = generate_N_file(sample)
|
|
532
|
+
logging.info(f'N file has been loaded to the current sample and is saved to {path}')
|
|
533
|
+
|
|
534
|
+
def equalize_axes_(ax: matplotlib.axes.Axes):
|
|
535
|
+
limits = [
|
|
536
|
+
np.min([ax.get_xlim(), ax.get_ylim()]),
|
|
537
|
+
np.max([ax.get_xlim(), ax.get_ylim()]),
|
|
538
|
+
]
|
|
539
|
+
|
|
540
|
+
ax.set_xlim(*limits)
|
|
541
|
+
ax.set_ylim(*limits)
|
|
542
|
+
|
|
543
|
+
# make plot aspect ratio square
|
|
544
|
+
ax.set_aspect('equal', adjustable='box')
|
|
545
|
+
|
|
546
|
+
x_ticks = ax.get_xticks()
|
|
547
|
+
x_ticks = x_ticks[(x_ticks >= limits[0]) & (x_ticks <= limits[1])]
|
|
548
|
+
|
|
549
|
+
y_ticks = ax.get_yticks()
|
|
550
|
+
y_ticks = y_ticks[(y_ticks >= limits[0]) & (y_ticks <= limits[1])]
|
|
551
|
+
|
|
552
|
+
tick_count = min(len(x_ticks), len(y_ticks))
|
|
553
|
+
ax.xaxis.set_major_locator(MaxNLocator(nbins=tick_count))
|
|
554
|
+
ax.yaxis.set_major_locator(MaxNLocator(nbins=tick_count))
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def plot_3d_(x, y, z, ax=None):
|
|
558
|
+
if not ax: ax = plt.figure().add_subplot(projection='3d')
|
|
559
|
+
ax.plot_trisurf(x, y, z, cmap=cm.coolwarm)
|
|
560
|
+
|
|
561
|
+
def save_ax_(i: int, from_fig: matplotlib.figure.Figure, filename: str = 'fig.png', area=(1.25, 1.25)):
|
|
562
|
+
"""
|
|
563
|
+
Extract an axis from a figure and save it. This function is only used
|
|
564
|
+
for writing Hons research report, it should be removed in future.
|
|
565
|
+
"""
|
|
566
|
+
axs = from_fig.get_axes()
|
|
567
|
+
|
|
568
|
+
if i > len(axs) - 1: raise Exception('Axis index out of bounds.')
|
|
569
|
+
|
|
570
|
+
ax = axs[i]
|
|
571
|
+
ax.set_title('')
|
|
572
|
+
extent = ax.get_window_extent().transformed(from_fig.dpi_scale_trans.inverted())
|
|
573
|
+
from_fig.savefig(filename, bbox_inches=extent.expanded(*area), dpi=300)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyxla
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A flexible and generic framework for explainable landscape analysis.
|
|
5
|
+
Project-URL: Documentation, https://gitlab.com/aliefooghe/pyxla-wg/main/README.md
|
|
6
|
+
Project-URL: Issues, https://gitlab.com/aliefooghe/pyxla-wg/-/issues
|
|
7
|
+
Project-URL: Source, https://gitlab.com/aliefooghe/pyxla-wg
|
|
8
|
+
Author-email: "T. Ombaso" <mogoa.tonny@gmail.com>, "A. Liefooghe" <arnaud.liefooghe@univ-littoral.fr>, "K. M. Malan" <malankm@unisa.ac.za>, "S. Verel" <verel@univ-littoral.fr>, "A. Bosman" <a.bosman@unisa.ac.za>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
Keywords: Explainable landscape analysis,Landscape analysis
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
19
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
20
|
+
Requires-Python: >=3.8
|
|
21
|
+
Requires-Dist: hilbertcurve
|
|
22
|
+
Requires-Dist: iprogress
|
|
23
|
+
Requires-Dist: ipykernel
|
|
24
|
+
Requires-Dist: ipywidgets
|
|
25
|
+
Requires-Dist: matplotlib
|
|
26
|
+
Requires-Dist: moocore
|
|
27
|
+
Requires-Dist: more-itertools
|
|
28
|
+
Requires-Dist: pandas
|
|
29
|
+
Requires-Dist: pytest
|
|
30
|
+
Requires-Dist: scikit-learn
|
|
31
|
+
Requires-Dist: scipy
|
|
32
|
+
Requires-Dist: seaborn
|
|
33
|
+
Requires-Dist: sphinx
|
|
34
|
+
Requires-Dist: tqdm
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# pyXla
|
|
38
|
+
|
|
39
|
+
A flexible and generic framework for explainable landscape analysis.
|
|
40
|
+
|
|
41
|
+
### Install required packages (i.e pytest, sphinx)
|
|
42
|
+
Navigate to the `src/` directory and then run:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
$ pip install -r requirements.txt
|
|
46
|
+
```
|
|
47
|
+
### How to run tests
|
|
48
|
+
Navigate to the `src/` directory and then run:
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
$ pytest
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Some tests will fail, but only the first time. Re-run it again to get all tests to pass.
|
|
55
|
+
|
|
56
|
+
### How build docs
|
|
57
|
+
Navigate to the `docs/` directory and then run:
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
$ make clean html
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
It might take a while. The built docs will be saved in `docs/build/` directory
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
pyxla/__about__.py,sha256=3AOtWwQ4fRRYQ-YAd-edBCDPPQoIuESpwSl3Oo3HRJ4,22
|
|
2
|
+
pyxla/__init__.py,sha256=qvb5P_r3Gp9CstY8r4SILTSZpmvrpdl-9023G2qzauY,49279
|
|
3
|
+
pyxla/sampling.py,sha256=iTilF8M_4cgTOuw-Ge5KmQw1jtxNAQ0cou1H-DAJjfc,18537
|
|
4
|
+
pyxla/util.py,sha256=DRUGl2ipLt0eWoPWvUrC-Grk7whKgnTY8uREmWwyt20,20979
|
|
5
|
+
pyxla-0.0.1.dist-info/METADATA,sha256=HV6fMjrAraZJdyxwPp8Dtsyl326JoON-yvv8ZOQNE8g,2090
|
|
6
|
+
pyxla-0.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
+
pyxla-0.0.1.dist-info/RECORD,,
|