modulo-vki 2.0.6__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,339 +1,339 @@
1
- import numpy as np
2
- import os
3
- from tqdm import tqdm
4
- import math
5
-
6
-
7
- class ReadData:
8
- """
9
- A MODULO helper class for input data. ReadData allows to load the data directly before using MODULO, and
10
- hence assembling the data matrix D from data.
11
- """
12
-
13
-
14
- def __init__(self):
15
- pass
16
-
17
-
18
- @classmethod
19
- def _data_processing(cls,D: np.array, FOLDER_OUT: str='./',
20
- N_PARTITIONS: int = 1,
21
- MR: bool = False, SAVE_D: bool = False,
22
- FOLDER_IN: str = './', filename: str = '',
23
- h: int = 0, f: int = 0, c: int = 0,
24
- N: int = 0, N_S: int = 0, N_T: int = 0):
25
- """
26
- First, if the D matrix is not provided, this method attempts to load the data and assembles the D matrix.
27
- Then, it performs pre-processing operations on the data matrix, D. if MR=True, the mean (per each column - i.e.: snapshot at time t_i) is removed;
28
- If the MEMORY_SAVING=True the data matrix is splitted to optimize memory usage. Moreover, D is stored on disk and removed from the live memory.
29
- Finally, if in this condition, also the data type of the matrix is self is changed: from float64 -> float32, with the same purpose.
30
-
31
- :param D: np.array
32
- data matrix D
33
- :param FOLDER_OUT: str
34
- folder in which the data (partitions and/or data matrix itself) will be eventually saved.
35
- :param MEMORY_SAVING: bool, optional
36
- If True, memory saving feature is activated. Passed through __init__
37
- :param N_PARTITIONS: int
38
- In memory saving environment, this parameter refers to the number of partitions to be applied
39
- to the data matrix. If the number indicated by the user is not a multiple of the N_T
40
- i.e.: if (N_T % N_PARTITIONS) !=0 - then an additional partition is introduced, that contains
41
- the remaining columns
42
- :param MR: bool, optional
43
- If True, it removes the mean (per column) from each snapshot
44
- :param SAVE_D: bool, optional
45
- If True, the matrix D is saved into memory. If the Memory Saving feature is active, this is performed
46
- by default.
47
- :param FOLDER_IN: str, optional. Needed only if database=None
48
- If the D matrix is not provided (database = None), read it from the path FOLDER_IN
49
- :param filename: str, optional. Needed only if database=None
50
- If the database is not provided, read it from the files filename
51
- The files must be named "filenamexxxx.dat" where x is the number of the file
52
- that goes from 0 to the number of time steps saved
53
- :param h: int, optional. Needed only if database=None
54
- Lines to be skipped from the header of filename
55
- :param f: int, optional. Needed only if database=None
56
- Lines to be skipped from the footer of filename
57
- :param c: int, optional. Needed only if database=None
58
- Columns to be skipped (for example if the first c columns contain the mesh grid.)
59
- :param N: int, optional. Needed only if database=None
60
- Components to be analysed.
61
- :param N_S: int, optional. Needed only if database=None
62
- Number of points in space.
63
- :param N_T: int, optional. Needed only if database=None
64
- components to be analysed.
65
-
66
-
67
- :return:
68
- There are four possible scenario:
69
- 1. if N_Partitions ==1 and MR = True, return is D,D_MEAN (the mean snapshot!)
70
- 2. if N_Partitions ==1 and MR = False, return is D.
71
- 3. if N_Partitions >1 and MR = True, return is D_MEAN
72
- 4. if N_Partitions >1 and MR=False, return is None
73
-
74
-
75
- """
76
-
77
- if isinstance(D, np.ndarray): # D was already initialised
78
- N_S = int(np.shape(D)[0])
79
- N_T = int(np.shape(D)[1])
80
- if MR:
81
- '''Removing mean from data matrix'''
82
-
83
- print("Removing the mean from D ...")
84
- D_MEAN = np.mean(D, 1) # Temporal average (along the columns)
85
- D_Mr = D - np.array([D_MEAN, ] * N_T).transpose() # Mean Removed
86
- print("Computing the mean-removed D ... ")
87
- np.copyto(D, D_Mr)
88
- del D_Mr
89
-
90
- if N_PARTITIONS > 1:
91
- '''Converting D into float32, applying partitions and saving all.'''
92
- SAVE_D = True
93
- database = D.astype('float32', casting='same_kind')
94
- os.makedirs(FOLDER_OUT + "/data_partitions/", exist_ok=True)
95
- print("Memory Saving feature is active. Partitioning Data Matrix...")
96
- if N_T % N_PARTITIONS != 0:
97
- dim_col = math.floor(N_T / N_PARTITIONS)
98
-
99
- columns_to_part = dim_col * N_PARTITIONS
100
- splitted_tmp = np.hsplit(database[:, :columns_to_part], N_PARTITIONS)
101
- for ii in range(1, len(splitted_tmp) + 1):
102
- np.savez(FOLDER_OUT + f"/data_partitions/di_{ii}", di=splitted_tmp[ii - 1])
103
-
104
- np.savez(FOLDER_OUT + f"/data_partitions/di_{N_PARTITIONS + 1}",
105
- di=database[:, columns_to_part:])
106
- else:
107
- splitted_tmp = np.hsplit(database, N_PARTITIONS)
108
- for ii in range(1, len(splitted_tmp) + 1):
109
- np.savez(FOLDER_OUT + f"/data_partitions/di_{ii}", di=splitted_tmp[ii - 1])
110
-
111
- print("\n Data Matrix has been successfully splitted. \n")
112
-
113
- if SAVE_D:
114
- '''Saving data matrix in FOLDER_OUT'''
115
- os.makedirs(FOLDER_OUT + "/data_matrix", exist_ok=True)
116
- print(f"Saving the matrix D in {FOLDER_OUT}")
117
- np.savez(FOLDER_OUT + '/data_matrix/database', D=D.astype('float32', casting='same_kind'), n_t=N_T, n_s=N_S)
118
- else: # try to read the data
119
- print("Data matrix was not provided, reading it from {}".format(FOLDER_IN))
120
- # First check if the data were saved in the supported format
121
- try:
122
- Name = FOLDER_IN + os.sep + filename % (0 + 1) + '.dat' # Name of the file to read
123
- # Read data from a file
124
- DATA = np.genfromtxt(Name, skip_header=h, skip_footer=f) # Here we have the two colums
125
- except:
126
- raise AttributeError(
127
- "FOLDER_IN {} does not exist or filename {} has not the good format. Check the help!".format(
128
- FOLDER_IN, filename))
129
-
130
- if N_PARTITIONS == 1: # If you have only one partition (one matrix! )
131
- D = np.zeros((N_S, N_T))
132
-
133
- print("\n \n Importing data with no partitions... \n \n")
134
-
135
- if MR:
136
- print("Mean removal activated")
137
- D_MEAN = np.zeros(N_S)
138
-
139
- for k in tqdm(range(0, N_T)):
140
- Name = FOLDER_IN + os.sep + filename % (k + 1) + '.dat' # Name of the file to read
141
- # Read data from a file
142
- DATA = np.genfromtxt(Name, # usecols=np.arange(0, 2),
143
- skip_header=h, skip_footer=f) # Here we have the two colums
144
- # Dat = DATA[1:, :] # Here we remove the first raw, containing the header
145
- for ii in range(c, N + c):
146
- tmp = DATA[:, ii]
147
- if ii == c:
148
- V = np.copy(tmp)
149
- else:
150
- V = np.concatenate([V, tmp], axis=0)
151
- if MR:
152
- D_MEAN += 1 / N_T * V # Snapshot contribution to the mean
153
-
154
- D[:, k] = V # Reshape and assign
155
-
156
- if MR:
157
- print("Removing the mean from D ...")
158
- D_Mr = D - D_MEAN.reshape(-1, 1) # Mean Removed
159
- print("Computing the mean-removed D ... ")
160
- np.copyto(D, D_Mr)
161
- del D_Mr
162
-
163
- elif N_PARTITIONS > 1: # then we enter in the memory saving loop
164
- # prepare the folder to store the parittions
165
- os.makedirs(FOLDER_OUT + "/data_partitions/", exist_ok=True)
166
- print("Memory Saving feature is active. Partitioning Data Matrix...")
167
-
168
- dim_col = math.floor(N_T / N_PARTITIONS)
169
- columns_to_part = dim_col * N_PARTITIONS # These are integer multiples of N_PARTITIONS
170
- vec = np.arange(0, columns_to_part)
171
- # This gets the blocks
172
- splitted_tmp = np.hsplit(vec, N_PARTITIONS)
173
- if columns_to_part != N_T:
174
- print("WARNING: the last " + str(
175
- N_T - 1 - splitted_tmp[N_PARTITIONS - 1][-1]) + ' snapshots are not considered')
176
-
177
- if MR:
178
- print("Mean removal activated")
179
- D_MEAN = np.zeros(N_S)
180
-
181
- for ii in range(1, len(splitted_tmp) + 1):
182
- count = 0
183
- print('Working on block ' + str(ii) + '/' + str(N_PARTITIONS))
184
- D = np.zeros((N_S, len(splitted_tmp[0])))
185
- i1 = splitted_tmp[ii - 1][0];
186
- i2 = splitted_tmp[ii - 1][-1] # ranges
187
- for k in tqdm(range(i1, i2 + 1)):
188
- Name = FOLDER_IN + os.sep + filename % (k + 1) + '.dat' # Name of the file to read
189
- DATA = np.genfromtxt(Name, # usecols=np.arange(0, 2),
190
- skip_header=h, skip_footer=f) # Here we have the two colums
191
- for nn in range(c, N + c):
192
- tmp = DATA[:, nn]
193
- if nn == c:
194
- V = np.copy(tmp)
195
- else:
196
- V = np.concatenate([V, tmp], axis=0)
197
-
198
- if MR:
199
- D_MEAN += 1 / N_T * V # Snapshot contribution to the mean
200
-
201
- D[:, count] = V # Reshape and assign
202
- count += 1
203
- np.savez(FOLDER_OUT + f"/data_partitions/di_{ii}", di=D)
204
- print('Partition ' + str(ii) + '/' + str(N_PARTITIONS) + ' saved')
205
-
206
- if MR:
207
- print('Reloading the data for removing the mean')
208
- for ii in range(1, len(splitted_tmp) + 1):
209
- print(f"Mean centering block {ii}")
210
- di = np.load(FOLDER_OUT + f"/data_partitions/di_{ii}.npz")['di']
211
- di_mr = di - D_MEAN.reshape(-1, 1) # Mean Removed
212
- np.savez(FOLDER_OUT + f"/data_partitions/di_{ii}", di=di_mr)
213
- else:
214
- raise TypeError("number of partitions not valid.")
215
-
216
- if (N_PARTITIONS ==1 and MR==True):
217
- return D, D_MEAN
218
- elif (N_PARTITIONS ==1 and MR==False):
219
- return D
220
- elif (N_PARTITIONS >1 and MR==True):
221
- return D_MEAN
222
- else:
223
- return None
224
-
225
- '''
226
- @classmethod
227
- def from_xls(cls, filename, **kwargs):
228
- """
229
- This class method builds the df from an excel file.
230
-
231
- work
232
-
233
- """
234
- ## TBD
235
- return
236
-
237
- @classmethod
238
- def _from_csv(cls, folder, filename, N, N_S,
239
- h: int = 0, f: int = 0,
240
- c: int = 0):
241
- """
242
- This method imports data (in the specified format) and then assemblies the corresponding
243
- data matrix, D.
244
-
245
- :param folder: str
246
- Folder in which the data is stored
247
- :param filename: str
248
- Name of the files to be imported
249
- :param N number of components: int
250
- Components to be analysed
251
- :param h: int
252
- Lines to be skipped from header
253
- :param f: int
254
- Lines to be skipped from footer
255
- :param c: int
256
- Columns to be skipped
257
-
258
- :return: np.array
259
- Assembled DataMarix
260
-
261
- """
262
- path, dirs, files = next(os.walk(folder))
263
- files = [f for f in files if f.endswith('.csv')]
264
- N_T = len(files)
265
- D = np.zeros((N_S, N_T))
266
-
267
- print("\n \n Importing data... \n \n")
268
-
269
- for k in tqdm(range(0, N_T)):
270
- Name = folder + files[k] #os.sep + filename % (k + 1) + '.csv' # Name of the file to read
271
- # Read data from a file
272
- DATA = np.genfromtxt(Name, # usecols=np.arange(0, 2),
273
- skip_header=h, skip_footer=f) # Here we have the two colums
274
- # Dat = DATA[1:, :] # Here we remove the first raw, containing the header
275
- for ii in range(c, N + c):
276
- tmp = DATA[:, ii]
277
- if ii == c:
278
- V = np.copy(tmp)
279
- else:
280
- V = np.concatenate([V, tmp], axis=0)
281
-
282
- D[:, k] = V # Reshape and assign
283
-
284
- return D
285
-
286
- @classmethod
287
- def _from_txt(cls, folder, filename, N, N_S,
288
- h: int = 0, f: int = 0,
289
- c: int = 0):
290
- """
291
- This method imports data (in the specified format) and then assemblies the corresponding
292
- data matrix, D.
293
-
294
- :param folder: str
295
- Folder in which the data is stored
296
- :param filename: str
297
- Name of the files to be imported
298
- :param N number of components: int
299
- Components to be analysed
300
- :param h: int
301
- Lines to be skipped from header
302
- :param f: int
303
- Lines to be skipped from footer
304
- :param c: int
305
- Columns to be skipped
306
-
307
- :return: np.array
308
- Assembled DataMarix
309
-
310
- """
311
- path, dirs, files = next(os.walk(folder))
312
- N_T = len(files)
313
- D = np.zeros((N_S, N_T))
314
-
315
- print("\n \n Importing data... \n \n")
316
-
317
- for k in tqdm(range(0, N_T)):
318
- Name = folder + os.sep + filename % (k + 1) + '.txt' # Name of the file to read
319
- # Read data from a file
320
- DATA = np.genfromtxt(Name, # usecols=np.arange(0, 2),
321
- skip_header=h, skip_footer=f) # Here we have the two colums
322
- # Dat = DATA[1:, :] # Here we remove the first raw, containing the header
323
- for ii in range(c, N + c):
324
- tmp = DATA[:, ii]
325
- if ii == c:
326
- V = np.copy(tmp)
327
- else:
328
- V = np.concatenate([V, tmp], axis=0)
329
-
330
- D[:, k] = V # Reshape and assign
331
-
332
- return D
333
-
334
-
335
- '''
336
-
337
-
338
- #%%
339
-
1
+ import numpy as np
2
+ import os
3
+ from tqdm import tqdm
4
+ import math
5
+
6
+
7
+ class ReadData:
8
+ """
9
+ A MODULO helper class for input data. ReadData allows to load the data directly before using MODULO, and
10
+ hence assembling the data matrix D from data.
11
+ """
12
+
13
+
14
+ def __init__(self):
15
+ pass
16
+
17
+
18
+ @classmethod
19
+ def _data_processing(cls,D: np.array, FOLDER_OUT: str='./',
20
+ N_PARTITIONS: int = 1,
21
+ MR: bool = False, SAVE_D: bool = False,
22
+ FOLDER_IN: str = './', filename: str = '',
23
+ h: int = 0, f: int = 0, c: int = 0,
24
+ N: int = 0, N_S: int = 0, N_T: int = 0):
25
+ """
26
+ First, if the D matrix is not provided, this method attempts to load the data and assembles the D matrix.
27
+ Then, it performs pre-processing operations on the data matrix, D. if MR=True, the mean (per each column - i.e.: snapshot at time t_i) is removed;
28
+ If the MEMORY_SAVING=True the data matrix is splitted to optimize memory usage. Moreover, D is stored on disk and removed from the live memory.
29
+ Finally, if in this condition, also the data type of the matrix is self is changed: from float64 -> float32, with the same purpose.
30
+
31
+ :param D: np.array
32
+ data matrix D
33
+ :param FOLDER_OUT: str
34
+ folder in which the data (partitions and/or data matrix itself) will be eventually saved.
35
+ :param MEMORY_SAVING: bool, optional
36
+ If True, memory saving feature is activated. Passed through __init__
37
+ :param N_PARTITIONS: int
38
+ In memory saving environment, this parameter refers to the number of partitions to be applied
39
+ to the data matrix. If the number indicated by the user is not a multiple of the N_T
40
+ i.e.: if (N_T % N_PARTITIONS) !=0 - then an additional partition is introduced, that contains
41
+ the remaining columns
42
+ :param MR: bool, optional
43
+ If True, it removes the mean (per column) from each snapshot
44
+ :param SAVE_D: bool, optional
45
+ If True, the matrix D is saved into memory. If the Memory Saving feature is active, this is performed
46
+ by default.
47
+ :param FOLDER_IN: str, optional. Needed only if database=None
48
+ If the D matrix is not provided (database = None), read it from the path FOLDER_IN
49
+ :param filename: str, optional. Needed only if database=None
50
+ If the database is not provided, read it from the files filename
51
+ The files must be named "filenamexxxx.dat" where x is the number of the file
52
+ that goes from 0 to the number of time steps saved
53
+ :param h: int, optional. Needed only if database=None
54
+ Lines to be skipped from the header of filename
55
+ :param f: int, optional. Needed only if database=None
56
+ Lines to be skipped from the footer of filename
57
+ :param c: int, optional. Needed only if database=None
58
+ Columns to be skipped (for example if the first c columns contain the mesh grid.)
59
+ :param N: int, optional. Needed only if database=None
60
+ Components to be analysed.
61
+ :param N_S: int, optional. Needed only if database=None
62
+ Number of points in space.
63
+ :param N_T: int, optional. Needed only if database=None
64
+ components to be analysed.
65
+
66
+
67
+ :return:
68
+ There are four possible scenario:
69
+ 1. if N_Partitions ==1 and MR = True, return is D,D_MEAN (the mean snapshot!)
70
+ 2. if N_Partitions ==1 and MR = False, return is D.
71
+ 3. if N_Partitions >1 and MR = True, return is D_MEAN
72
+ 4. if N_Partitions >1 and MR=False, return is None
73
+
74
+
75
+ """
76
+
77
+ if isinstance(D, np.ndarray): # D was already initialised
78
+ N_S = int(np.shape(D)[0])
79
+ N_T = int(np.shape(D)[1])
80
+ if MR:
81
+ '''Removing mean from data matrix'''
82
+
83
+ print("Removing the mean from D ...")
84
+ D_MEAN = np.mean(D, 1) # Temporal average (along the columns)
85
+ D_Mr = D - np.array([D_MEAN, ] * N_T).transpose() # Mean Removed
86
+ print("Computing the mean-removed D ... ")
87
+ np.copyto(D, D_Mr)
88
+ del D_Mr
89
+
90
+ if N_PARTITIONS > 1:
91
+ '''Converting D into float32, applying partitions and saving all.'''
92
+ SAVE_D = True
93
+ database = D.astype('float32', casting='same_kind')
94
+ os.makedirs(FOLDER_OUT + "/data_partitions/", exist_ok=True)
95
+ print("Memory Saving feature is active. Partitioning Data Matrix...")
96
+ if N_T % N_PARTITIONS != 0:
97
+ dim_col = math.floor(N_T / N_PARTITIONS)
98
+
99
+ columns_to_part = dim_col * N_PARTITIONS
100
+ splitted_tmp = np.hsplit(database[:, :columns_to_part], N_PARTITIONS)
101
+ for ii in range(1, len(splitted_tmp) + 1):
102
+ np.savez(FOLDER_OUT + f"/data_partitions/di_{ii}", di=splitted_tmp[ii - 1])
103
+
104
+ np.savez(FOLDER_OUT + f"/data_partitions/di_{N_PARTITIONS + 1}",
105
+ di=database[:, columns_to_part:])
106
+ else:
107
+ splitted_tmp = np.hsplit(database, N_PARTITIONS)
108
+ for ii in range(1, len(splitted_tmp) + 1):
109
+ np.savez(FOLDER_OUT + f"/data_partitions/di_{ii}", di=splitted_tmp[ii - 1])
110
+
111
+ print("\n Data Matrix has been successfully splitted. \n")
112
+
113
+ if SAVE_D:
114
+ '''Saving data matrix in FOLDER_OUT'''
115
+ os.makedirs(FOLDER_OUT + "/data_matrix", exist_ok=True)
116
+ print(f"Saving the matrix D in {FOLDER_OUT}")
117
+ np.savez(FOLDER_OUT + '/data_matrix/database', D=D.astype('float32', casting='same_kind'), n_t=N_T, n_s=N_S)
118
+ else: # try to read the data
119
+ print("Data matrix was not provided, reading it from {}".format(FOLDER_IN))
120
+ # First check if the data were saved in the supported format
121
+ try:
122
+ Name = FOLDER_IN + os.sep + filename % (0 + 1) + '.dat' # Name of the file to read
123
+ # Read data from a file
124
+ DATA = np.genfromtxt(Name, skip_header=h, skip_footer=f) # Here we have the two colums
125
+ except:
126
+ raise AttributeError(
127
+ "FOLDER_IN {} does not exist or filename {} has not the good format. Check the help!".format(
128
+ FOLDER_IN, filename))
129
+
130
+ if N_PARTITIONS == 1: # If you have only one partition (one matrix! )
131
+ D = np.zeros((N_S, N_T))
132
+
133
+ print("\n \n Importing data with no partitions... \n \n")
134
+
135
+ if MR:
136
+ print("Mean removal activated")
137
+ D_MEAN = np.zeros(N_S)
138
+
139
+ for k in tqdm(range(0, N_T)):
140
+ Name = FOLDER_IN + os.sep + filename % (k + 1) + '.dat' # Name of the file to read
141
+ # Read data from a file
142
+ DATA = np.genfromtxt(Name, # usecols=np.arange(0, 2),
143
+ skip_header=h, skip_footer=f) # Here we have the two colums
144
+ # Dat = DATA[1:, :] # Here we remove the first raw, containing the header
145
+ for ii in range(c, N + c):
146
+ tmp = DATA[:, ii]
147
+ if ii == c:
148
+ V = np.copy(tmp)
149
+ else:
150
+ V = np.concatenate([V, tmp], axis=0)
151
+ if MR:
152
+ D_MEAN += 1 / N_T * V # Snapshot contribution to the mean
153
+
154
+ D[:, k] = V # Reshape and assign
155
+
156
+ if MR:
157
+ print("Removing the mean from D ...")
158
+ D_Mr = D - D_MEAN.reshape(-1, 1) # Mean Removed
159
+ print("Computing the mean-removed D ... ")
160
+ np.copyto(D, D_Mr)
161
+ del D_Mr
162
+
163
+ elif N_PARTITIONS > 1: # then we enter in the memory saving loop
164
+ # prepare the folder to store the parittions
165
+ os.makedirs(FOLDER_OUT + "/data_partitions/", exist_ok=True)
166
+ print("Memory Saving feature is active. Partitioning Data Matrix...")
167
+
168
+ dim_col = math.floor(N_T / N_PARTITIONS)
169
+ columns_to_part = dim_col * N_PARTITIONS # These are integer multiples of N_PARTITIONS
170
+ vec = np.arange(0, columns_to_part)
171
+ # This gets the blocks
172
+ splitted_tmp = np.hsplit(vec, N_PARTITIONS)
173
+ if columns_to_part != N_T:
174
+ print("WARNING: the last " + str(
175
+ N_T - 1 - splitted_tmp[N_PARTITIONS - 1][-1]) + ' snapshots are not considered')
176
+
177
+ if MR:
178
+ print("Mean removal activated")
179
+ D_MEAN = np.zeros(N_S)
180
+
181
+ for ii in range(1, len(splitted_tmp) + 1):
182
+ count = 0
183
+ print('Working on block ' + str(ii) + '/' + str(N_PARTITIONS))
184
+ D = np.zeros((N_S, len(splitted_tmp[0])))
185
+ i1 = splitted_tmp[ii - 1][0];
186
+ i2 = splitted_tmp[ii - 1][-1] # ranges
187
+ for k in tqdm(range(i1, i2 + 1)):
188
+ Name = FOLDER_IN + os.sep + filename % (k + 1) + '.dat' # Name of the file to read
189
+ DATA = np.genfromtxt(Name, # usecols=np.arange(0, 2),
190
+ skip_header=h, skip_footer=f) # Here we have the two colums
191
+ for nn in range(c, N + c):
192
+ tmp = DATA[:, nn]
193
+ if nn == c:
194
+ V = np.copy(tmp)
195
+ else:
196
+ V = np.concatenate([V, tmp], axis=0)
197
+
198
+ if MR:
199
+ D_MEAN += 1 / N_T * V # Snapshot contribution to the mean
200
+
201
+ D[:, count] = V # Reshape and assign
202
+ count += 1
203
+ np.savez(FOLDER_OUT + f"/data_partitions/di_{ii}", di=D)
204
+ print('Partition ' + str(ii) + '/' + str(N_PARTITIONS) + ' saved')
205
+
206
+ if MR:
207
+ print('Reloading the data for removing the mean')
208
+ for ii in range(1, len(splitted_tmp) + 1):
209
+ print(f"Mean centering block {ii}")
210
+ di = np.load(FOLDER_OUT + f"/data_partitions/di_{ii}.npz")['di']
211
+ di_mr = di - D_MEAN.reshape(-1, 1) # Mean Removed
212
+ np.savez(FOLDER_OUT + f"/data_partitions/di_{ii}", di=di_mr)
213
+ else:
214
+ raise TypeError("number of partitions not valid.")
215
+
216
+ if (N_PARTITIONS ==1 and MR==True):
217
+ return D, D_MEAN
218
+ elif (N_PARTITIONS ==1 and MR==False):
219
+ return D
220
+ elif (N_PARTITIONS >1 and MR==True):
221
+ return D_MEAN
222
+ else:
223
+ return None
224
+
225
+ '''
226
+ @classmethod
227
+ def from_xls(cls, filename, **kwargs):
228
+ """
229
+ This class method builds the df from an excel file.
230
+
231
+ work
232
+
233
+ """
234
+ ## TBD
235
+ return
236
+
237
+ @classmethod
238
+ def _from_csv(cls, folder, filename, N, N_S,
239
+ h: int = 0, f: int = 0,
240
+ c: int = 0):
241
+ """
242
+ This method imports data (in the specified format) and then assemblies the corresponding
243
+ data matrix, D.
244
+
245
+ :param folder: str
246
+ Folder in which the data is stored
247
+ :param filename: str
248
+ Name of the files to be imported
249
+ :param N number of components: int
250
+ Components to be analysed
251
+ :param h: int
252
+ Lines to be skipped from header
253
+ :param f: int
254
+ Lines to be skipped from footer
255
+ :param c: int
256
+ Columns to be skipped
257
+
258
+ :return: np.array
259
+ Assembled DataMarix
260
+
261
+ """
262
+ path, dirs, files = next(os.walk(folder))
263
+ files = [f for f in files if f.endswith('.csv')]
264
+ N_T = len(files)
265
+ D = np.zeros((N_S, N_T))
266
+
267
+ print("\n \n Importing data... \n \n")
268
+
269
+ for k in tqdm(range(0, N_T)):
270
+ Name = folder + files[k] #os.sep + filename % (k + 1) + '.csv' # Name of the file to read
271
+ # Read data from a file
272
+ DATA = np.genfromtxt(Name, # usecols=np.arange(0, 2),
273
+ skip_header=h, skip_footer=f) # Here we have the two colums
274
+ # Dat = DATA[1:, :] # Here we remove the first raw, containing the header
275
+ for ii in range(c, N + c):
276
+ tmp = DATA[:, ii]
277
+ if ii == c:
278
+ V = np.copy(tmp)
279
+ else:
280
+ V = np.concatenate([V, tmp], axis=0)
281
+
282
+ D[:, k] = V # Reshape and assign
283
+
284
+ return D
285
+
286
+ @classmethod
287
+ def _from_txt(cls, folder, filename, N, N_S,
288
+ h: int = 0, f: int = 0,
289
+ c: int = 0):
290
+ """
291
+ This method imports data (in the specified format) and then assemblies the corresponding
292
+ data matrix, D.
293
+
294
+ :param folder: str
295
+ Folder in which the data is stored
296
+ :param filename: str
297
+ Name of the files to be imported
298
+ :param N number of components: int
299
+ Components to be analysed
300
+ :param h: int
301
+ Lines to be skipped from header
302
+ :param f: int
303
+ Lines to be skipped from footer
304
+ :param c: int
305
+ Columns to be skipped
306
+
307
+ :return: np.array
308
+ Assembled DataMarix
309
+
310
+ """
311
+ path, dirs, files = next(os.walk(folder))
312
+ N_T = len(files)
313
+ D = np.zeros((N_S, N_T))
314
+
315
+ print("\n \n Importing data... \n \n")
316
+
317
+ for k in tqdm(range(0, N_T)):
318
+ Name = folder + os.sep + filename % (k + 1) + '.txt' # Name of the file to read
319
+ # Read data from a file
320
+ DATA = np.genfromtxt(Name, # usecols=np.arange(0, 2),
321
+ skip_header=h, skip_footer=f) # Here we have the two colums
322
+ # Dat = DATA[1:, :] # Here we remove the first raw, containing the header
323
+ for ii in range(c, N + c):
324
+ tmp = DATA[:, ii]
325
+ if ii == c:
326
+ V = np.copy(tmp)
327
+ else:
328
+ V = np.concatenate([V, tmp], axis=0)
329
+
330
+ D[:, k] = V # Reshape and assign
331
+
332
+ return D
333
+
334
+
335
+ '''
336
+
337
+
338
+ #%%
339
+