mlpack 4.6.0__cp311-cp311-win_amd64.whl → 4.6.2__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlpack/__init__.py +4 -4
- mlpack/adaboost_classify.cp311-win_amd64.pyd +0 -0
- mlpack/adaboost_probabilities.cp311-win_amd64.pyd +0 -0
- mlpack/adaboost_train.cp311-win_amd64.pyd +0 -0
- mlpack/approx_kfn.cp311-win_amd64.pyd +0 -0
- mlpack/arma_numpy.cp311-win_amd64.pyd +0 -0
- mlpack/bayesian_linear_regression.cp311-win_amd64.pyd +0 -0
- mlpack/cf.cp311-win_amd64.pyd +0 -0
- mlpack/dbscan.cp311-win_amd64.pyd +0 -0
- mlpack/decision_tree.cp311-win_amd64.pyd +0 -0
- mlpack/det.cp311-win_amd64.pyd +0 -0
- mlpack/emst.cp311-win_amd64.pyd +0 -0
- mlpack/fastmks.cp311-win_amd64.pyd +0 -0
- mlpack/gmm_generate.cp311-win_amd64.pyd +0 -0
- mlpack/gmm_probability.cp311-win_amd64.pyd +0 -0
- mlpack/gmm_train.cp311-win_amd64.pyd +0 -0
- mlpack/hmm_generate.cp311-win_amd64.pyd +0 -0
- mlpack/hmm_loglik.cp311-win_amd64.pyd +0 -0
- mlpack/hmm_train.cp311-win_amd64.pyd +0 -0
- mlpack/hmm_viterbi.cp311-win_amd64.pyd +0 -0
- mlpack/hoeffding_tree.cp311-win_amd64.pyd +0 -0
- mlpack/image_converter.cp311-win_amd64.pyd +0 -0
- mlpack/include/mlpack/base.hpp +1 -0
- mlpack/include/mlpack/core/cv/k_fold_cv.hpp +21 -12
- mlpack/include/mlpack/core/cv/k_fold_cv_impl.hpp +49 -39
- mlpack/include/mlpack/core/data/data.hpp +5 -1
- mlpack/include/mlpack/core/data/data_options.hpp +219 -0
- mlpack/include/mlpack/core/data/detect_file_type.hpp +6 -8
- mlpack/include/mlpack/core/data/detect_file_type_impl.hpp +30 -76
- mlpack/include/mlpack/core/data/load.hpp +41 -3
- mlpack/include/mlpack/core/data/load_arff.hpp +4 -3
- mlpack/include/mlpack/core/data/load_arff_impl.hpp +68 -20
- mlpack/include/mlpack/core/data/{load_csv.hpp → load_categorical.hpp} +44 -80
- mlpack/include/mlpack/core/data/{load_categorical_csv.hpp → load_categorical_impl.hpp} +86 -46
- mlpack/include/mlpack/core/data/load_impl.hpp +264 -289
- mlpack/include/mlpack/core/data/load_model_impl.hpp +2 -1
- mlpack/include/mlpack/core/data/load_numeric.hpp +130 -0
- mlpack/include/mlpack/core/data/load_vec_impl.hpp +14 -10
- mlpack/include/mlpack/core/data/map_policies/missing_policy.hpp +3 -2
- mlpack/include/mlpack/core/data/matrix_options.hpp +172 -0
- mlpack/include/mlpack/core/data/save.hpp +32 -2
- mlpack/include/mlpack/core/data/save_impl.hpp +136 -167
- mlpack/include/mlpack/core/data/text_options.hpp +244 -0
- mlpack/include/mlpack/core/data/types.hpp +3 -4
- mlpack/include/mlpack/core/data/utilities.hpp +158 -0
- mlpack/include/mlpack/core/math/ccov.hpp +1 -0
- mlpack/include/mlpack/core/math/ccov_impl.hpp +4 -5
- mlpack/include/mlpack/core/math/make_alias.hpp +98 -3
- mlpack/include/mlpack/core/math/shuffle_data.hpp +68 -0
- mlpack/include/mlpack/core/metrics/bleu_impl.hpp +1 -1
- mlpack/include/mlpack/core/tree/binary_space_tree/traits.hpp +36 -178
- mlpack/include/mlpack/core/tree/space_split/hyperplane.hpp +20 -14
- mlpack/include/mlpack/core/tree/space_split/mean_space_split_impl.hpp +2 -2
- mlpack/include/mlpack/core/tree/space_split/midpoint_space_split_impl.hpp +1 -1
- mlpack/include/mlpack/core/tree/space_split/projection_vector.hpp +6 -5
- mlpack/include/mlpack/core/tree/space_split/space_split.hpp +4 -4
- mlpack/include/mlpack/core/tree/space_split/space_split_impl.hpp +18 -12
- mlpack/include/mlpack/core/tree/spill_tree/is_spill_tree.hpp +1 -1
- mlpack/include/mlpack/core/tree/spill_tree/spill_dual_tree_traverser.hpp +2 -1
- mlpack/include/mlpack/core/tree/spill_tree/spill_dual_tree_traverser_impl.hpp +4 -2
- mlpack/include/mlpack/core/tree/spill_tree/spill_single_tree_traverser.hpp +2 -1
- mlpack/include/mlpack/core/tree/spill_tree/spill_single_tree_traverser_impl.hpp +4 -2
- mlpack/include/mlpack/core/tree/spill_tree/spill_tree.hpp +13 -16
- mlpack/include/mlpack/core/tree/spill_tree/spill_tree_impl.hpp +78 -51
- mlpack/include/mlpack/core/tree/spill_tree/traits.hpp +2 -1
- mlpack/include/mlpack/core/tree/spill_tree/typedef.hpp +12 -4
- mlpack/include/mlpack/core/util/arma_traits.hpp +67 -2
- mlpack/include/mlpack/core/util/gitversion.hpp +1 -1
- mlpack/include/mlpack/core/util/sfinae_utility.hpp +24 -2
- mlpack/include/mlpack/core/util/version.hpp +1 -1
- mlpack/include/mlpack/methods/CMakeLists.txt +96 -96
- mlpack/include/mlpack/methods/amf/init_rules/no_init.hpp +1 -1
- mlpack/include/mlpack/methods/amf/update_rules/svd_batch_learning.hpp +0 -2
- mlpack/include/mlpack/methods/ann/dists/bernoulli_distribution_impl.hpp +1 -2
- mlpack/include/mlpack/methods/ann/init_rules/network_init.hpp +5 -5
- mlpack/include/mlpack/methods/ann/layer/batch_norm.hpp +3 -2
- mlpack/include/mlpack/methods/ann/layer/batch_norm_impl.hpp +19 -20
- mlpack/include/mlpack/methods/ann/layer/concat.hpp +1 -0
- mlpack/include/mlpack/methods/ann/layer/concat_impl.hpp +6 -7
- mlpack/include/mlpack/methods/ann/layer/convolution_impl.hpp +3 -3
- mlpack/include/mlpack/methods/ann/layer/grouped_convolution_impl.hpp +3 -3
- mlpack/include/mlpack/methods/ann/layer/linear3d.hpp +1 -0
- mlpack/include/mlpack/methods/ann/layer/linear3d_impl.hpp +11 -14
- mlpack/include/mlpack/methods/ann/layer/max_pooling.hpp +5 -4
- mlpack/include/mlpack/methods/ann/layer/max_pooling_impl.hpp +15 -14
- mlpack/include/mlpack/methods/ann/layer/mean_pooling.hpp +3 -2
- mlpack/include/mlpack/methods/ann/layer/mean_pooling_impl.hpp +14 -15
- mlpack/include/mlpack/methods/ann/layer/multihead_attention.hpp +6 -5
- mlpack/include/mlpack/methods/ann/layer/multihead_attention_impl.hpp +24 -25
- mlpack/include/mlpack/methods/ann/layer/nearest_interpolation.hpp +1 -0
- mlpack/include/mlpack/methods/ann/layer/nearest_interpolation_impl.hpp +4 -4
- mlpack/include/mlpack/methods/ann/layer/padding.hpp +1 -0
- mlpack/include/mlpack/methods/ann/layer/padding_impl.hpp +12 -13
- mlpack/include/mlpack/methods/ann/layer/recurrent_layer.hpp +3 -2
- mlpack/include/mlpack/methods/ann/loss_functions/cosine_embedding_loss_impl.hpp +5 -4
- mlpack/include/mlpack/methods/ann/loss_functions/empty_loss.hpp +1 -1
- mlpack/include/mlpack/methods/ann/loss_functions/mean_absolute_percentage_error.hpp +1 -1
- mlpack/include/mlpack/methods/ann/rnn.hpp +19 -18
- mlpack/include/mlpack/methods/ann/rnn_impl.hpp +24 -16
- mlpack/include/mlpack/methods/bayesian_linear_regression/bayesian_linear_regression_impl.hpp +3 -8
- mlpack/include/mlpack/methods/decision_tree/fitness_functions/gini_gain.hpp +5 -8
- mlpack/include/mlpack/methods/decision_tree/fitness_functions/information_gain.hpp +5 -8
- mlpack/include/mlpack/methods/gmm/diagonal_gmm_impl.hpp +2 -1
- mlpack/include/mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp +3 -3
- mlpack/include/mlpack/methods/gmm/gmm_impl.hpp +2 -1
- mlpack/include/mlpack/methods/hmm/hmm_impl.hpp +10 -5
- mlpack/include/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp +1 -1
- mlpack/include/mlpack/methods/lmnn/lmnn_impl.hpp +1 -1
- mlpack/include/mlpack/methods/random_forest/random_forest.hpp +57 -37
- mlpack/include/mlpack/methods/random_forest/random_forest_impl.hpp +69 -59
- mlpack/kde.cp311-win_amd64.pyd +0 -0
- mlpack/kernel_pca.cp311-win_amd64.pyd +0 -0
- mlpack/kfn.cp311-win_amd64.pyd +0 -0
- mlpack/kmeans.cp311-win_amd64.pyd +0 -0
- mlpack/knn.cp311-win_amd64.pyd +0 -0
- mlpack/krann.cp311-win_amd64.pyd +0 -0
- mlpack/lars.cp311-win_amd64.pyd +0 -0
- mlpack/linear_regression_predict.cp311-win_amd64.pyd +0 -0
- mlpack/linear_regression_train.cp311-win_amd64.pyd +0 -0
- mlpack/linear_svm.cp311-win_amd64.pyd +0 -0
- mlpack/lmnn.cp311-win_amd64.pyd +0 -0
- mlpack/local_coordinate_coding.cp311-win_amd64.pyd +0 -0
- mlpack/logistic_regression.cp311-win_amd64.pyd +0 -0
- mlpack/lsh.cp311-win_amd64.pyd +0 -0
- mlpack/mean_shift.cp311-win_amd64.pyd +0 -0
- mlpack/nbc.cp311-win_amd64.pyd +0 -0
- mlpack/nca.cp311-win_amd64.pyd +0 -0
- mlpack/nmf.cp311-win_amd64.pyd +0 -0
- mlpack/pca.cp311-win_amd64.pyd +0 -0
- mlpack/perceptron.cp311-win_amd64.pyd +0 -0
- mlpack/preprocess_binarize.cp311-win_amd64.pyd +0 -0
- mlpack/preprocess_describe.cp311-win_amd64.pyd +0 -0
- mlpack/preprocess_one_hot_encoding.cp311-win_amd64.pyd +0 -0
- mlpack/preprocess_scale.cp311-win_amd64.pyd +0 -0
- mlpack/preprocess_split.cp311-win_amd64.pyd +0 -0
- mlpack/radical.cp311-win_amd64.pyd +0 -0
- mlpack/random_forest.cp311-win_amd64.pyd +0 -0
- mlpack/softmax_regression.cp311-win_amd64.pyd +0 -0
- mlpack/sparse_coding.cp311-win_amd64.pyd +0 -0
- mlpack-4.6.2.dist-info/DELVEWHEEL +2 -0
- {mlpack-4.6.0.dist-info → mlpack-4.6.2.dist-info}/METADATA +6 -2
- {mlpack-4.6.0.dist-info → mlpack-4.6.2.dist-info}/RECORD +144 -140
- {mlpack-4.6.0.dist-info → mlpack-4.6.2.dist-info}/WHEEL +1 -1
- mlpack/include/mlpack/core/data/load_numeric_csv.hpp +0 -192
- mlpack-4.6.0.dist-info/DELVEWHEEL +0 -2
- {mlpack-4.6.0.dist-info → mlpack-4.6.2.dist-info}/top_level.txt +0 -0
mlpack/__init__.py
CHANGED
|
@@ -11,14 +11,14 @@ http://www.opensource.org/licenses/BSD-3-Clause for more information.
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
# start delvewheel patch
|
|
14
|
-
def
|
|
14
|
+
def _delvewheel_patch_1_10_1():
|
|
15
15
|
import os
|
|
16
16
|
if os.path.isdir(libs_dir := os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'mlpack.libs'))):
|
|
17
17
|
os.add_dll_directory(libs_dir)
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
del
|
|
20
|
+
_delvewheel_patch_1_10_1()
|
|
21
|
+
del _delvewheel_patch_1_10_1
|
|
22
22
|
# end delvewheel patch
|
|
23
23
|
|
|
24
24
|
import warnings
|
|
@@ -74,4 +74,4 @@ from .adaboost import *
|
|
|
74
74
|
from .linear_regression_train import linear_regression_train
|
|
75
75
|
from .linear_regression_predict import linear_regression_predict
|
|
76
76
|
from .linear_regression import *
|
|
77
|
-
__version__='4.6.
|
|
77
|
+
__version__='4.6.2'
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
mlpack/cf.cp311-win_amd64.pyd
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
mlpack/det.cp311-win_amd64.pyd
CHANGED
|
Binary file
|
mlpack/emst.cp311-win_amd64.pyd
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
mlpack/include/mlpack/base.hpp
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
#include <mlpack/core/cv/meta_info_extractor.hpp>
|
|
16
16
|
#include <mlpack/core/cv/cv_base.hpp>
|
|
17
|
+
#include <mlpack/core/util/arma_traits.hpp>
|
|
17
18
|
|
|
18
19
|
namespace mlpack {
|
|
19
20
|
|
|
@@ -280,30 +281,38 @@ class KFoldCV
|
|
|
280
281
|
/**
|
|
281
282
|
* Get the ith training subset from a variable of a matrix type.
|
|
282
283
|
*/
|
|
283
|
-
template<typename
|
|
284
|
-
inline
|
|
285
|
-
|
|
284
|
+
template<typename SubsetMatType>
|
|
285
|
+
inline SubsetMatType GetTrainingSubset(
|
|
286
|
+
SubsetMatType& m,
|
|
287
|
+
const size_t i,
|
|
288
|
+
const typename std::enable_if_t<IsMatrix<SubsetMatType>::value>* = 0);
|
|
286
289
|
|
|
287
290
|
/**
|
|
288
291
|
* Get the ith training subset from a variable of a row type.
|
|
289
292
|
*/
|
|
290
|
-
template<typename
|
|
291
|
-
inline
|
|
292
|
-
|
|
293
|
+
template<typename SubsetRowType>
|
|
294
|
+
inline SubsetRowType GetTrainingSubset(
|
|
295
|
+
SubsetRowType& r,
|
|
296
|
+
const size_t i,
|
|
297
|
+
const typename std::enable_if_t<IsRow<SubsetRowType>::value>* = 0);
|
|
293
298
|
|
|
294
299
|
/**
|
|
295
300
|
* Get the ith validation subset from a variable of a matrix type.
|
|
296
301
|
*/
|
|
297
|
-
template<typename
|
|
298
|
-
inline
|
|
299
|
-
|
|
302
|
+
template<typename SubsetMatType>
|
|
303
|
+
inline SubsetMatType GetValidationSubset(
|
|
304
|
+
SubsetMatType& m,
|
|
305
|
+
const size_t i,
|
|
306
|
+
const typename std::enable_if_t<IsMatrix<SubsetMatType>::value>* = 0);
|
|
300
307
|
|
|
301
308
|
/**
|
|
302
309
|
* Get the ith validation subset from a variable of a row type.
|
|
303
310
|
*/
|
|
304
|
-
template<typename
|
|
305
|
-
inline
|
|
306
|
-
|
|
311
|
+
template<typename SubsetRowType>
|
|
312
|
+
inline SubsetRowType GetValidationSubset(
|
|
313
|
+
SubsetRowType& r,
|
|
314
|
+
const size_t i,
|
|
315
|
+
const typename std::enable_if_t<IsRow<SubsetRowType>::value>* = 0);
|
|
307
316
|
};
|
|
308
317
|
|
|
309
318
|
} // namespace mlpack
|
|
@@ -375,14 +375,15 @@ template<typename MLAlgorithm,
|
|
|
375
375
|
typename MatType,
|
|
376
376
|
typename PredictionsType,
|
|
377
377
|
typename WeightsType>
|
|
378
|
-
template<typename
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
const size_t i
|
|
378
|
+
template<typename SubsetMatType>
|
|
379
|
+
SubsetMatType KFoldCV<MLAlgorithm,
|
|
380
|
+
Metric,
|
|
381
|
+
MatType,
|
|
382
|
+
PredictionsType,
|
|
383
|
+
WeightsType>::GetTrainingSubset(
|
|
384
|
+
SubsetMatType& m,
|
|
385
|
+
const size_t i,
|
|
386
|
+
const typename std::enable_if_t<IsMatrix<SubsetMatType>::value>*)
|
|
386
387
|
{
|
|
387
388
|
// If this is not the first fold, we have to handle it a little bit
|
|
388
389
|
// differently, since the last fold may contain slightly more than 'binSize'
|
|
@@ -390,8 +391,9 @@ arma::Mat<ElementType> KFoldCV<MLAlgorithm,
|
|
|
390
391
|
const size_t subsetSize = (i != 0) ? lastBinSize + (k - 2) * binSize :
|
|
391
392
|
(k - 1) * binSize;
|
|
392
393
|
|
|
393
|
-
|
|
394
|
-
|
|
394
|
+
SubsetMatType alias;
|
|
395
|
+
MakeAlias(alias, m, m.n_rows, subsetSize, m.n_rows * binSize * i);
|
|
396
|
+
return alias;
|
|
395
397
|
}
|
|
396
398
|
|
|
397
399
|
template<typename MLAlgorithm,
|
|
@@ -399,14 +401,15 @@ template<typename MLAlgorithm,
|
|
|
399
401
|
typename MatType,
|
|
400
402
|
typename PredictionsType,
|
|
401
403
|
typename WeightsType>
|
|
402
|
-
template<typename
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
const size_t i
|
|
404
|
+
template<typename SubsetRowType>
|
|
405
|
+
SubsetRowType KFoldCV<MLAlgorithm,
|
|
406
|
+
Metric,
|
|
407
|
+
MatType,
|
|
408
|
+
PredictionsType,
|
|
409
|
+
WeightsType>::GetTrainingSubset(
|
|
410
|
+
SubsetRowType& r,
|
|
411
|
+
const size_t i,
|
|
412
|
+
const typename std::enable_if_t<IsRow<SubsetRowType>::value>*)
|
|
410
413
|
{
|
|
411
414
|
// If this is not the first fold, we have to handle it a little bit
|
|
412
415
|
// differently, since the last fold may contain slightly more than 'binSize'
|
|
@@ -414,7 +417,9 @@ arma::Row<ElementType> KFoldCV<MLAlgorithm,
|
|
|
414
417
|
const size_t subsetSize = (i != 0) ? lastBinSize + (k - 2) * binSize :
|
|
415
418
|
(k - 1) * binSize;
|
|
416
419
|
|
|
417
|
-
|
|
420
|
+
SubsetRowType alias;
|
|
421
|
+
MakeAlias(alias, r, subsetSize, r.n_rows * binSize * i);
|
|
422
|
+
return alias;
|
|
418
423
|
}
|
|
419
424
|
|
|
420
425
|
template<typename MLAlgorithm,
|
|
@@ -422,18 +427,21 @@ template<typename MLAlgorithm,
|
|
|
422
427
|
typename MatType,
|
|
423
428
|
typename PredictionsType,
|
|
424
429
|
typename WeightsType>
|
|
425
|
-
template<typename
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
const size_t i
|
|
430
|
+
template<typename SubsetMatType>
|
|
431
|
+
SubsetMatType KFoldCV<MLAlgorithm,
|
|
432
|
+
Metric,
|
|
433
|
+
MatType,
|
|
434
|
+
PredictionsType,
|
|
435
|
+
WeightsType>::GetValidationSubset(
|
|
436
|
+
SubsetMatType& m,
|
|
437
|
+
const size_t i,
|
|
438
|
+
const typename std::enable_if_t<IsMatrix<SubsetMatType>::value>*)
|
|
433
439
|
{
|
|
434
440
|
const size_t subsetSize = (i == 0) ? lastBinSize : binSize;
|
|
435
|
-
|
|
436
|
-
|
|
441
|
+
SubsetMatType alias;
|
|
442
|
+
MakeAlias(alias, m, m.n_rows, subsetSize,
|
|
443
|
+
m.n_rows * ValidationSubsetFirstCol(i));
|
|
444
|
+
return alias;
|
|
437
445
|
}
|
|
438
446
|
|
|
439
447
|
template<typename MLAlgorithm,
|
|
@@ -441,18 +449,20 @@ template<typename MLAlgorithm,
|
|
|
441
449
|
typename MatType,
|
|
442
450
|
typename PredictionsType,
|
|
443
451
|
typename WeightsType>
|
|
444
|
-
template<typename
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
const size_t i
|
|
452
|
+
template<typename SubsetRowType>
|
|
453
|
+
SubsetRowType KFoldCV<MLAlgorithm,
|
|
454
|
+
Metric,
|
|
455
|
+
MatType,
|
|
456
|
+
PredictionsType,
|
|
457
|
+
WeightsType>::GetValidationSubset(
|
|
458
|
+
SubsetRowType& r,
|
|
459
|
+
const size_t i,
|
|
460
|
+
const typename std::enable_if_t<IsRow<SubsetRowType>::value>*)
|
|
452
461
|
{
|
|
453
462
|
const size_t subsetSize = (i == 0) ? lastBinSize : binSize;
|
|
454
|
-
|
|
455
|
-
|
|
463
|
+
SubsetRowType alias;
|
|
464
|
+
MakeAlias(alias, r, subsetSize, r.n_rows * ValidationSubsetFirstCol(i));
|
|
465
|
+
return alias;
|
|
456
466
|
}
|
|
457
467
|
|
|
458
468
|
} // namespace mlpack
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
#ifndef MLPACK_CORE_DATA_DATA_HPP
|
|
13
13
|
#define MLPACK_CORE_DATA_DATA_HPP
|
|
14
14
|
|
|
15
|
-
#include "detect_file_type.hpp"
|
|
16
15
|
#include "extension.hpp"
|
|
17
16
|
#include "format.hpp"
|
|
18
17
|
#include "has_serialize.hpp"
|
|
@@ -30,14 +29,19 @@
|
|
|
30
29
|
#include "check_categorical_param.hpp"
|
|
31
30
|
#include "confusion_matrix.hpp"
|
|
32
31
|
#include "dataset_mapper.hpp"
|
|
32
|
+
#include "data_options.hpp"
|
|
33
|
+
#include "detect_file_type.hpp"
|
|
33
34
|
#include "image_info.hpp"
|
|
34
35
|
#include "image_resize_crop.hpp"
|
|
35
36
|
#include "imputer.hpp"
|
|
36
37
|
#include "is_naninf.hpp"
|
|
38
|
+
#include "matrix_options.hpp"
|
|
37
39
|
#include "normalize_labels.hpp"
|
|
38
40
|
#include "one_hot_encoding.hpp"
|
|
39
41
|
#include "split_data.hpp"
|
|
40
42
|
#include "string_algorithms.hpp"
|
|
43
|
+
#include "text_options.hpp"
|
|
41
44
|
#include "types.hpp"
|
|
45
|
+
#include "utilities.hpp"
|
|
42
46
|
|
|
43
47
|
#endif
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file core/data/data_options.hpp
|
|
3
|
+
* @author Ryan Curtin
|
|
4
|
+
* @author Omar Shrit
|
|
5
|
+
*
|
|
6
|
+
* Data options, all possible options to load different data types and format
|
|
7
|
+
* with specific settings into mlpack.
|
|
8
|
+
*
|
|
9
|
+
* mlpack is free software; you may redistribute it and/or modify it under the
|
|
10
|
+
* terms of the 3-clause BSD license. You should have received a copy of the
|
|
11
|
+
* 3-clause BSD license along with mlpack. If not, see
|
|
12
|
+
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
|
|
13
|
+
*/
|
|
14
|
+
#ifndef MLPACK_CORE_DATA_DATA_OPTIONS_HPP
|
|
15
|
+
#define MLPACK_CORE_DATA_DATA_OPTIONS_HPP
|
|
16
|
+
|
|
17
|
+
#include <mlpack/prereqs.hpp>
|
|
18
|
+
|
|
19
|
+
#include "types.hpp"
|
|
20
|
+
#include "dataset_mapper.hpp"
|
|
21
|
+
#include "map_policies/map_policies.hpp"
|
|
22
|
+
#include "format.hpp"
|
|
23
|
+
#include "image_info.hpp"
|
|
24
|
+
|
|
25
|
+
namespace mlpack {
|
|
26
|
+
namespace data {
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* All possible DataOptions grouped under one class.
|
|
30
|
+
* This will allow us to have consistent data API for mlpack. If new data
|
|
31
|
+
* options might be necessary, then they should be added in the following.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
template<typename Derived>
|
|
35
|
+
class DataOptionsBase
|
|
36
|
+
{
|
|
37
|
+
public:
|
|
38
|
+
DataOptionsBase(const bool fatal = defaultFatal,
|
|
39
|
+
const FileType format = defaultFormat) :
|
|
40
|
+
fatal(fatal),
|
|
41
|
+
format(format)
|
|
42
|
+
{
|
|
43
|
+
// Do nothing.
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
template<typename Derived2>
|
|
47
|
+
explicit DataOptionsBase(const DataOptionsBase<Derived2>& opts)
|
|
48
|
+
{
|
|
49
|
+
CopyOptions(opts);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
template<typename Derived2>
|
|
53
|
+
explicit DataOptionsBase(DataOptionsBase<Derived2>&& opts)
|
|
54
|
+
{
|
|
55
|
+
MoveOptions(std::move(opts));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Convert any other DataOptions type to this DataOptions type, printing
|
|
59
|
+
// warnings for any members that cannot be converted. If this object and
|
|
60
|
+
// `opts` are of the same type, then the constructor for that type will be
|
|
61
|
+
// called instead.
|
|
62
|
+
template<typename Derived2>
|
|
63
|
+
DataOptionsBase& operator=(const DataOptionsBase<Derived2>& other)
|
|
64
|
+
{
|
|
65
|
+
if ((void*) &other == (void*) this)
|
|
66
|
+
return *this;
|
|
67
|
+
|
|
68
|
+
// Print warnings for any members that cannot be converted.
|
|
69
|
+
const char* dataDesc = static_cast<const Derived&>(*this).DataDescription();
|
|
70
|
+
static_cast<const Derived2&>(other).WarnBaseConversion(dataDesc);
|
|
71
|
+
|
|
72
|
+
CopyOptions(other);
|
|
73
|
+
return *this;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Take ownership of the options of another `DataOptionsBase` type.
|
|
77
|
+
template<typename Derived2>
|
|
78
|
+
DataOptionsBase& operator=(DataOptionsBase<Derived2>&& other)
|
|
79
|
+
{
|
|
80
|
+
if ((void*) &other != (void*) this)
|
|
81
|
+
return *this;
|
|
82
|
+
|
|
83
|
+
// Print warnings for any members that cannot be converted.
|
|
84
|
+
const char* dataDesc = static_cast<const Derived&>(*this).DataDescription();
|
|
85
|
+
static_cast<const Derived2&>(other).WarnBaseConversion(dataDesc);
|
|
86
|
+
|
|
87
|
+
MoveOptions(std::move(other));
|
|
88
|
+
return *this;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
template<typename Derived2>
|
|
92
|
+
void CopyOptions(const DataOptionsBase<Derived2>& other)
|
|
93
|
+
{
|
|
94
|
+
// Only copy options that have been set in the other object.
|
|
95
|
+
if (other.fatal.has_value())
|
|
96
|
+
fatal = *other.fatal;
|
|
97
|
+
if (other.format.has_value())
|
|
98
|
+
format = *other.format;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
template<typename Derived2>
|
|
102
|
+
void MoveOptions(DataOptionsBase<Derived2>&& other)
|
|
103
|
+
{
|
|
104
|
+
fatal = std::move(other.fatal);
|
|
105
|
+
format = std::move(other.format);
|
|
106
|
+
|
|
107
|
+
// Reset all of the options in the other object.
|
|
108
|
+
other.Reset();
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
void Reset()
|
|
112
|
+
{
|
|
113
|
+
fatal.reset();
|
|
114
|
+
format.reset();
|
|
115
|
+
|
|
116
|
+
// Reset any child members.
|
|
117
|
+
static_cast<Derived&>(*this).Reset();
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// If true, then exceptions are thrown on failures.
|
|
121
|
+
const bool& Fatal() const { return AccessMember(fatal, defaultFatal); }
|
|
122
|
+
// Modify whether or not exceptions are thrown on failures.
|
|
123
|
+
bool& Fatal() { return ModifyMember(fatal, defaultFatal); }
|
|
124
|
+
|
|
125
|
+
// Get the type of the file that will be loaded.
|
|
126
|
+
const FileType& Format() const { return AccessMember(format, defaultFormat); }
|
|
127
|
+
// Modify the file format to load.
|
|
128
|
+
FileType& Format() { return ModifyMember(format, defaultFormat); }
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Given a file type, return a logical name corresponding to that file type.
|
|
132
|
+
*/
|
|
133
|
+
const std::string FileTypeToString() const
|
|
134
|
+
{
|
|
135
|
+
FileType f = format.has_value() ? *format : defaultFormat;
|
|
136
|
+
switch (f)
|
|
137
|
+
{
|
|
138
|
+
case FileType::CSVASCII: return "CSV data";
|
|
139
|
+
case FileType::RawASCII: return "raw ASCII formatted data";
|
|
140
|
+
case FileType::RawBinary: return "raw binary formatted data";
|
|
141
|
+
case FileType::ArmaASCII: return "Armadillo ASCII formatted data";
|
|
142
|
+
case FileType::ArmaBinary: return "Armadillo binary formatted data";
|
|
143
|
+
case FileType::PGMBinary: return "PGM data";
|
|
144
|
+
case FileType::PPMBinary: return "PGM data";
|
|
145
|
+
case FileType::HDF5Binary: return "HDF5 data";
|
|
146
|
+
case FileType::CoordASCII:
|
|
147
|
+
return "ASCII formatted sparse coordinate data";
|
|
148
|
+
case FileType::AutoDetect: return "Detect automatically data type";
|
|
149
|
+
case FileType::FileTypeUnknown: return "Unknown data type";
|
|
150
|
+
default: return "";
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
protected:
|
|
155
|
+
template<typename T>
|
|
156
|
+
const T& AccessMember(const std::optional<T>& value,
|
|
157
|
+
const T& defaultValue) const
|
|
158
|
+
{
|
|
159
|
+
if (value.has_value())
|
|
160
|
+
return *value;
|
|
161
|
+
else
|
|
162
|
+
return defaultValue;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
template<typename T>
|
|
166
|
+
T& ModifyMember(std::optional<T>& value, const T defaultValue)
|
|
167
|
+
{
|
|
168
|
+
// Set the default value if needed so that (*value) has defined behavior
|
|
169
|
+
// according to the spec.
|
|
170
|
+
if (!value.has_value())
|
|
171
|
+
value = defaultValue;
|
|
172
|
+
|
|
173
|
+
return *value;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
void WarnOptionConversion(const char* optionName, const char* dataType) const
|
|
177
|
+
{
|
|
178
|
+
if (fatal.has_value() && *fatal)
|
|
179
|
+
{
|
|
180
|
+
Log::Fatal << "Option '" << optionName << "' cannot be specified when "
|
|
181
|
+
<< dataType << " is being loaded!" << std::endl;
|
|
182
|
+
}
|
|
183
|
+
else
|
|
184
|
+
{
|
|
185
|
+
Log::Warn << "Option '" << optionName << "' ignored; not applicable when "
|
|
186
|
+
<< dataType << " is being loaded!" << std::endl;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
private:
|
|
191
|
+
std::optional<bool> fatal;
|
|
192
|
+
std::optional<FileType> format;
|
|
193
|
+
|
|
194
|
+
constexpr static const bool defaultFatal = false;
|
|
195
|
+
constexpr static const FileType defaultFormat = FileType::AutoDetect;
|
|
196
|
+
|
|
197
|
+
// For access to internal optional members.
|
|
198
|
+
template<typename Derived2>
|
|
199
|
+
friend class DataOptionsBase;
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
// This utility class is meant to be used as the Derived parameter for an option
|
|
203
|
+
// that is not actually a derived type. It provides the WarnBaseConversion()
|
|
204
|
+
// member, which does nothing.
|
|
205
|
+
class EmptyOptions : public DataOptionsBase<EmptyOptions>
|
|
206
|
+
{
|
|
207
|
+
public:
|
|
208
|
+
void WarnBaseConversion(const char* /* dataDescription */) const { }
|
|
209
|
+
static const char* DataDescription() { return "general data"; }
|
|
210
|
+
void Reset() { }
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
using DataOptions = DataOptionsBase<EmptyOptions>;
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
} // namespace data
|
|
217
|
+
} // namespace mlpack
|
|
218
|
+
|
|
219
|
+
#endif
|
|
@@ -16,17 +16,13 @@
|
|
|
16
16
|
#define MLPACK_CORE_DATA_DETECT_FILE_TYPE_HPP
|
|
17
17
|
|
|
18
18
|
#include "types.hpp"
|
|
19
|
+
#include "extension.hpp"
|
|
20
|
+
#include "string_algorithms.hpp"
|
|
21
|
+
#include "text_options.hpp"
|
|
19
22
|
|
|
20
23
|
namespace mlpack {
|
|
21
24
|
namespace data {
|
|
22
25
|
|
|
23
|
-
/**
|
|
24
|
-
* Given a file type, return a logical name corresponding to that file type.
|
|
25
|
-
*
|
|
26
|
-
* @param type Type to get the logical name of.
|
|
27
|
-
*/
|
|
28
|
-
inline std::string GetStringType(const FileType& type);
|
|
29
|
-
|
|
30
26
|
/**
|
|
31
27
|
* Given an istream, attempt to guess the file type. This is taken originally
|
|
32
28
|
* from Armadillo's function guess_file_type_internal(), but we avoid using
|
|
@@ -62,7 +58,9 @@ inline FileType AutoDetect(std::fstream& stream,
|
|
|
62
58
|
* @param filename Name of the file whose type we should detect.
|
|
63
59
|
* @return Detected type of file. arma::file_type_unknown if unknown.
|
|
64
60
|
*/
|
|
65
|
-
|
|
61
|
+
template<typename MatType, typename DataOptionsType>
|
|
62
|
+
void DetectFromExtension(const std::string& filename,
|
|
63
|
+
DataOptionsType& opts);
|
|
66
64
|
|
|
67
65
|
/**
|
|
68
66
|
* Count the number of columns in the file. The file must be a CSV/TSV/TXT file
|