mlpack 4.6.0__pp38-pypy38_pp73-win_amd64.whl → 4.6.2__pp38-pypy38_pp73-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. mlpack/__init__.py +3 -3
  2. mlpack/adaboost_classify.pypy38-pp73-win_amd64.pyd +0 -0
  3. mlpack/adaboost_probabilities.pypy38-pp73-win_amd64.pyd +0 -0
  4. mlpack/adaboost_train.pypy38-pp73-win_amd64.pyd +0 -0
  5. mlpack/approx_kfn.pypy38-pp73-win_amd64.pyd +0 -0
  6. mlpack/arma_numpy.pypy38-pp73-win_amd64.pyd +0 -0
  7. mlpack/bayesian_linear_regression.pypy38-pp73-win_amd64.pyd +0 -0
  8. mlpack/cf.pypy38-pp73-win_amd64.pyd +0 -0
  9. mlpack/dbscan.pypy38-pp73-win_amd64.pyd +0 -0
  10. mlpack/decision_tree.pypy38-pp73-win_amd64.pyd +0 -0
  11. mlpack/det.pypy38-pp73-win_amd64.pyd +0 -0
  12. mlpack/emst.pypy38-pp73-win_amd64.pyd +0 -0
  13. mlpack/fastmks.pypy38-pp73-win_amd64.pyd +0 -0
  14. mlpack/gmm_generate.pypy38-pp73-win_amd64.pyd +0 -0
  15. mlpack/gmm_probability.pypy38-pp73-win_amd64.pyd +0 -0
  16. mlpack/gmm_train.pypy38-pp73-win_amd64.pyd +0 -0
  17. mlpack/hmm_generate.pypy38-pp73-win_amd64.pyd +0 -0
  18. mlpack/hmm_loglik.pypy38-pp73-win_amd64.pyd +0 -0
  19. mlpack/hmm_train.pypy38-pp73-win_amd64.pyd +0 -0
  20. mlpack/hmm_viterbi.pypy38-pp73-win_amd64.pyd +0 -0
  21. mlpack/hoeffding_tree.pypy38-pp73-win_amd64.pyd +0 -0
  22. mlpack/image_converter.pypy38-pp73-win_amd64.pyd +0 -0
  23. mlpack/include/mlpack/base.hpp +1 -0
  24. mlpack/include/mlpack/core/cv/k_fold_cv.hpp +21 -12
  25. mlpack/include/mlpack/core/cv/k_fold_cv_impl.hpp +49 -39
  26. mlpack/include/mlpack/core/data/data.hpp +5 -1
  27. mlpack/include/mlpack/core/data/data_options.hpp +219 -0
  28. mlpack/include/mlpack/core/data/detect_file_type.hpp +6 -8
  29. mlpack/include/mlpack/core/data/detect_file_type_impl.hpp +30 -76
  30. mlpack/include/mlpack/core/data/load.hpp +41 -3
  31. mlpack/include/mlpack/core/data/load_arff.hpp +4 -3
  32. mlpack/include/mlpack/core/data/load_arff_impl.hpp +68 -20
  33. mlpack/include/mlpack/core/data/{load_csv.hpp → load_categorical.hpp} +44 -80
  34. mlpack/include/mlpack/core/data/{load_categorical_csv.hpp → load_categorical_impl.hpp} +86 -46
  35. mlpack/include/mlpack/core/data/load_impl.hpp +264 -289
  36. mlpack/include/mlpack/core/data/load_model_impl.hpp +2 -1
  37. mlpack/include/mlpack/core/data/load_numeric.hpp +130 -0
  38. mlpack/include/mlpack/core/data/load_vec_impl.hpp +14 -10
  39. mlpack/include/mlpack/core/data/map_policies/missing_policy.hpp +3 -2
  40. mlpack/include/mlpack/core/data/matrix_options.hpp +172 -0
  41. mlpack/include/mlpack/core/data/save.hpp +32 -2
  42. mlpack/include/mlpack/core/data/save_impl.hpp +136 -167
  43. mlpack/include/mlpack/core/data/text_options.hpp +244 -0
  44. mlpack/include/mlpack/core/data/types.hpp +3 -4
  45. mlpack/include/mlpack/core/data/utilities.hpp +158 -0
  46. mlpack/include/mlpack/core/math/ccov.hpp +1 -0
  47. mlpack/include/mlpack/core/math/ccov_impl.hpp +4 -5
  48. mlpack/include/mlpack/core/math/make_alias.hpp +98 -3
  49. mlpack/include/mlpack/core/math/shuffle_data.hpp +68 -0
  50. mlpack/include/mlpack/core/metrics/bleu_impl.hpp +1 -1
  51. mlpack/include/mlpack/core/tree/binary_space_tree/traits.hpp +36 -178
  52. mlpack/include/mlpack/core/tree/space_split/hyperplane.hpp +20 -14
  53. mlpack/include/mlpack/core/tree/space_split/mean_space_split_impl.hpp +2 -2
  54. mlpack/include/mlpack/core/tree/space_split/midpoint_space_split_impl.hpp +1 -1
  55. mlpack/include/mlpack/core/tree/space_split/projection_vector.hpp +6 -5
  56. mlpack/include/mlpack/core/tree/space_split/space_split.hpp +4 -4
  57. mlpack/include/mlpack/core/tree/space_split/space_split_impl.hpp +18 -12
  58. mlpack/include/mlpack/core/tree/spill_tree/is_spill_tree.hpp +1 -1
  59. mlpack/include/mlpack/core/tree/spill_tree/spill_dual_tree_traverser.hpp +2 -1
  60. mlpack/include/mlpack/core/tree/spill_tree/spill_dual_tree_traverser_impl.hpp +4 -2
  61. mlpack/include/mlpack/core/tree/spill_tree/spill_single_tree_traverser.hpp +2 -1
  62. mlpack/include/mlpack/core/tree/spill_tree/spill_single_tree_traverser_impl.hpp +4 -2
  63. mlpack/include/mlpack/core/tree/spill_tree/spill_tree.hpp +13 -16
  64. mlpack/include/mlpack/core/tree/spill_tree/spill_tree_impl.hpp +78 -51
  65. mlpack/include/mlpack/core/tree/spill_tree/traits.hpp +2 -1
  66. mlpack/include/mlpack/core/tree/spill_tree/typedef.hpp +12 -4
  67. mlpack/include/mlpack/core/util/arma_traits.hpp +67 -2
  68. mlpack/include/mlpack/core/util/gitversion.hpp +1 -1
  69. mlpack/include/mlpack/core/util/sfinae_utility.hpp +24 -2
  70. mlpack/include/mlpack/core/util/version.hpp +1 -1
  71. mlpack/include/mlpack/methods/CMakeLists.txt +96 -96
  72. mlpack/include/mlpack/methods/amf/init_rules/no_init.hpp +1 -1
  73. mlpack/include/mlpack/methods/amf/update_rules/svd_batch_learning.hpp +0 -2
  74. mlpack/include/mlpack/methods/ann/dists/bernoulli_distribution_impl.hpp +1 -2
  75. mlpack/include/mlpack/methods/ann/init_rules/network_init.hpp +5 -5
  76. mlpack/include/mlpack/methods/ann/layer/batch_norm.hpp +3 -2
  77. mlpack/include/mlpack/methods/ann/layer/batch_norm_impl.hpp +19 -20
  78. mlpack/include/mlpack/methods/ann/layer/concat.hpp +1 -0
  79. mlpack/include/mlpack/methods/ann/layer/concat_impl.hpp +6 -7
  80. mlpack/include/mlpack/methods/ann/layer/convolution_impl.hpp +3 -3
  81. mlpack/include/mlpack/methods/ann/layer/grouped_convolution_impl.hpp +3 -3
  82. mlpack/include/mlpack/methods/ann/layer/linear3d.hpp +1 -0
  83. mlpack/include/mlpack/methods/ann/layer/linear3d_impl.hpp +11 -14
  84. mlpack/include/mlpack/methods/ann/layer/max_pooling.hpp +5 -4
  85. mlpack/include/mlpack/methods/ann/layer/max_pooling_impl.hpp +15 -14
  86. mlpack/include/mlpack/methods/ann/layer/mean_pooling.hpp +3 -2
  87. mlpack/include/mlpack/methods/ann/layer/mean_pooling_impl.hpp +14 -15
  88. mlpack/include/mlpack/methods/ann/layer/multihead_attention.hpp +6 -5
  89. mlpack/include/mlpack/methods/ann/layer/multihead_attention_impl.hpp +24 -25
  90. mlpack/include/mlpack/methods/ann/layer/nearest_interpolation.hpp +1 -0
  91. mlpack/include/mlpack/methods/ann/layer/nearest_interpolation_impl.hpp +4 -4
  92. mlpack/include/mlpack/methods/ann/layer/padding.hpp +1 -0
  93. mlpack/include/mlpack/methods/ann/layer/padding_impl.hpp +12 -13
  94. mlpack/include/mlpack/methods/ann/layer/recurrent_layer.hpp +3 -2
  95. mlpack/include/mlpack/methods/ann/loss_functions/cosine_embedding_loss_impl.hpp +5 -4
  96. mlpack/include/mlpack/methods/ann/loss_functions/empty_loss.hpp +1 -1
  97. mlpack/include/mlpack/methods/ann/loss_functions/mean_absolute_percentage_error.hpp +1 -1
  98. mlpack/include/mlpack/methods/ann/rnn.hpp +19 -18
  99. mlpack/include/mlpack/methods/ann/rnn_impl.hpp +24 -16
  100. mlpack/include/mlpack/methods/bayesian_linear_regression/bayesian_linear_regression_impl.hpp +3 -8
  101. mlpack/include/mlpack/methods/decision_tree/fitness_functions/gini_gain.hpp +5 -8
  102. mlpack/include/mlpack/methods/decision_tree/fitness_functions/information_gain.hpp +5 -8
  103. mlpack/include/mlpack/methods/gmm/diagonal_gmm_impl.hpp +2 -1
  104. mlpack/include/mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp +3 -3
  105. mlpack/include/mlpack/methods/gmm/gmm_impl.hpp +2 -1
  106. mlpack/include/mlpack/methods/hmm/hmm_impl.hpp +10 -5
  107. mlpack/include/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp +1 -1
  108. mlpack/include/mlpack/methods/lmnn/lmnn_impl.hpp +1 -1
  109. mlpack/include/mlpack/methods/random_forest/random_forest.hpp +57 -37
  110. mlpack/include/mlpack/methods/random_forest/random_forest_impl.hpp +69 -59
  111. mlpack/kde.pypy38-pp73-win_amd64.pyd +0 -0
  112. mlpack/kernel_pca.pypy38-pp73-win_amd64.pyd +0 -0
  113. mlpack/kfn.pypy38-pp73-win_amd64.pyd +0 -0
  114. mlpack/kmeans.pypy38-pp73-win_amd64.pyd +0 -0
  115. mlpack/knn.pypy38-pp73-win_amd64.pyd +0 -0
  116. mlpack/krann.pypy38-pp73-win_amd64.pyd +0 -0
  117. mlpack/lars.pypy38-pp73-win_amd64.pyd +0 -0
  118. mlpack/linear_regression_predict.pypy38-pp73-win_amd64.pyd +0 -0
  119. mlpack/linear_regression_train.pypy38-pp73-win_amd64.pyd +0 -0
  120. mlpack/linear_svm.pypy38-pp73-win_amd64.pyd +0 -0
  121. mlpack/lmnn.pypy38-pp73-win_amd64.pyd +0 -0
  122. mlpack/local_coordinate_coding.pypy38-pp73-win_amd64.pyd +0 -0
  123. mlpack/logistic_regression.pypy38-pp73-win_amd64.pyd +0 -0
  124. mlpack/lsh.pypy38-pp73-win_amd64.pyd +0 -0
  125. mlpack/mean_shift.pypy38-pp73-win_amd64.pyd +0 -0
  126. mlpack/nbc.pypy38-pp73-win_amd64.pyd +0 -0
  127. mlpack/nca.pypy38-pp73-win_amd64.pyd +0 -0
  128. mlpack/nmf.pypy38-pp73-win_amd64.pyd +0 -0
  129. mlpack/pca.pypy38-pp73-win_amd64.pyd +0 -0
  130. mlpack/perceptron.pypy38-pp73-win_amd64.pyd +0 -0
  131. mlpack/preprocess_binarize.pypy38-pp73-win_amd64.pyd +0 -0
  132. mlpack/preprocess_describe.pypy38-pp73-win_amd64.pyd +0 -0
  133. mlpack/preprocess_one_hot_encoding.pypy38-pp73-win_amd64.pyd +0 -0
  134. mlpack/preprocess_scale.pypy38-pp73-win_amd64.pyd +0 -0
  135. mlpack/preprocess_split.pypy38-pp73-win_amd64.pyd +0 -0
  136. mlpack/radical.pypy38-pp73-win_amd64.pyd +0 -0
  137. mlpack/random_forest.pypy38-pp73-win_amd64.pyd +0 -0
  138. mlpack/softmax_regression.pypy38-pp73-win_amd64.pyd +0 -0
  139. mlpack/sparse_coding.pypy38-pp73-win_amd64.pyd +0 -0
  140. mlpack-4.6.2.dist-info/DELVEWHEEL +2 -0
  141. {mlpack-4.6.0.dist-info → mlpack-4.6.2.dist-info}/METADATA +5 -2
  142. {mlpack-4.6.0.dist-info → mlpack-4.6.2.dist-info}/RECORD +147 -143
  143. mlpack.libs/{.load-order-mlpack-4.6.0 → .load-order-mlpack-4.6.2} +1 -1
  144. mlpack.libs/msvcp140-50208655e42969b9a5ab8a4e0186bbb9.dll +0 -0
  145. mlpack.libs/{vcruntime140_1-d890de56b3b2d87da64381c825eb1e2a.dll → vcruntime140_1-334c6985712ed6bcb8c936e867548687.dll} +0 -0
  146. mlpack/include/mlpack/core/data/load_numeric_csv.hpp +0 -192
  147. mlpack-4.6.0.dist-info/DELVEWHEEL +0 -2
  148. {mlpack-4.6.0.dist-info → mlpack-4.6.2.dist-info}/WHEEL +0 -0
  149. {mlpack-4.6.0.dist-info → mlpack-4.6.2.dist-info}/top_level.txt +0 -0
mlpack/__init__.py CHANGED
@@ -22,10 +22,10 @@ def _delvewheel_patch_1_10_0():
22
22
  if os.path.isdir(libs_dir):
23
23
  os.add_dll_directory(libs_dir)
24
24
  else:
25
- load_order_filepath = os.path.join(libs_dir, '.load-order-mlpack-4.6.0')
25
+ load_order_filepath = os.path.join(libs_dir, '.load-order-mlpack-4.6.2')
26
26
  if os.path.isfile(load_order_filepath):
27
27
  import ctypes.wintypes
28
- with open(os.path.join(libs_dir, '.load-order-mlpack-4.6.0')) as file:
28
+ with open(os.path.join(libs_dir, '.load-order-mlpack-4.6.2')) as file:
29
29
  load_order = file.read().split()
30
30
  kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
31
31
  kernel32.LoadLibraryExW.restype = ctypes.wintypes.HMODULE
@@ -93,4 +93,4 @@ from .adaboost import *
93
93
  from .linear_regression_train import linear_regression_train
94
94
  from .linear_regression_predict import linear_regression_predict
95
95
  from .linear_regression import *
96
- __version__='4.6.0'
96
+ __version__='4.6.2'
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -33,6 +33,7 @@
33
33
  #include <numeric>
34
34
  #include <vector>
35
35
  #include <queue>
36
+ #include <string>
36
37
 
37
38
  // But if it's not defined, we'll do it.
38
39
  #ifndef M_PI
@@ -14,6 +14,7 @@
14
14
 
15
15
  #include <mlpack/core/cv/meta_info_extractor.hpp>
16
16
  #include <mlpack/core/cv/cv_base.hpp>
17
+ #include <mlpack/core/util/arma_traits.hpp>
17
18
 
18
19
  namespace mlpack {
19
20
 
@@ -280,30 +281,38 @@ class KFoldCV
280
281
  /**
281
282
  * Get the ith training subset from a variable of a matrix type.
282
283
  */
283
- template<typename ElementType>
284
- inline arma::Mat<ElementType> GetTrainingSubset(arma::Mat<ElementType>& m,
285
- const size_t i);
284
+ template<typename SubsetMatType>
285
+ inline SubsetMatType GetTrainingSubset(
286
+ SubsetMatType& m,
287
+ const size_t i,
288
+ const typename std::enable_if_t<IsMatrix<SubsetMatType>::value>* = 0);
286
289
 
287
290
  /**
288
291
  * Get the ith training subset from a variable of a row type.
289
292
  */
290
- template<typename ElementType>
291
- inline arma::Row<ElementType> GetTrainingSubset(arma::Row<ElementType>& r,
292
- const size_t i);
293
+ template<typename SubsetRowType>
294
+ inline SubsetRowType GetTrainingSubset(
295
+ SubsetRowType& r,
296
+ const size_t i,
297
+ const typename std::enable_if_t<IsRow<SubsetRowType>::value>* = 0);
293
298
 
294
299
  /**
295
300
  * Get the ith validation subset from a variable of a matrix type.
296
301
  */
297
- template<typename ElementType>
298
- inline arma::Mat<ElementType> GetValidationSubset(arma::Mat<ElementType>& m,
299
- const size_t i);
302
+ template<typename SubsetMatType>
303
+ inline SubsetMatType GetValidationSubset(
304
+ SubsetMatType& m,
305
+ const size_t i,
306
+ const typename std::enable_if_t<IsMatrix<SubsetMatType>::value>* = 0);
300
307
 
301
308
  /**
302
309
  * Get the ith validation subset from a variable of a row type.
303
310
  */
304
- template<typename ElementType>
305
- inline arma::Row<ElementType> GetValidationSubset(arma::Row<ElementType>& r,
306
- const size_t i);
311
+ template<typename SubsetRowType>
312
+ inline SubsetRowType GetValidationSubset(
313
+ SubsetRowType& r,
314
+ const size_t i,
315
+ const typename std::enable_if_t<IsRow<SubsetRowType>::value>* = 0);
307
316
  };
308
317
 
309
318
  } // namespace mlpack
@@ -375,14 +375,15 @@ template<typename MLAlgorithm,
375
375
  typename MatType,
376
376
  typename PredictionsType,
377
377
  typename WeightsType>
378
- template<typename ElementType>
379
- arma::Mat<ElementType> KFoldCV<MLAlgorithm,
380
- Metric,
381
- MatType,
382
- PredictionsType,
383
- WeightsType>::GetTrainingSubset(
384
- arma::Mat<ElementType>& m,
385
- const size_t i)
378
+ template<typename SubsetMatType>
379
+ SubsetMatType KFoldCV<MLAlgorithm,
380
+ Metric,
381
+ MatType,
382
+ PredictionsType,
383
+ WeightsType>::GetTrainingSubset(
384
+ SubsetMatType& m,
385
+ const size_t i,
386
+ const typename std::enable_if_t<IsMatrix<SubsetMatType>::value>*)
386
387
  {
387
388
  // If this is not the first fold, we have to handle it a little bit
388
389
  // differently, since the last fold may contain slightly more than 'binSize'
@@ -390,8 +391,9 @@ arma::Mat<ElementType> KFoldCV<MLAlgorithm,
390
391
  const size_t subsetSize = (i != 0) ? lastBinSize + (k - 2) * binSize :
391
392
  (k - 1) * binSize;
392
393
 
393
- return arma::Mat<ElementType>(m.colptr(binSize * i), m.n_rows, subsetSize,
394
- false, true);
394
+ SubsetMatType alias;
395
+ MakeAlias(alias, m, m.n_rows, subsetSize, m.n_rows * binSize * i);
396
+ return alias;
395
397
  }
396
398
 
397
399
  template<typename MLAlgorithm,
@@ -399,14 +401,15 @@ template<typename MLAlgorithm,
399
401
  typename MatType,
400
402
  typename PredictionsType,
401
403
  typename WeightsType>
402
- template<typename ElementType>
403
- arma::Row<ElementType> KFoldCV<MLAlgorithm,
404
- Metric,
405
- MatType,
406
- PredictionsType,
407
- WeightsType>::GetTrainingSubset(
408
- arma::Row<ElementType>& r,
409
- const size_t i)
404
+ template<typename SubsetRowType>
405
+ SubsetRowType KFoldCV<MLAlgorithm,
406
+ Metric,
407
+ MatType,
408
+ PredictionsType,
409
+ WeightsType>::GetTrainingSubset(
410
+ SubsetRowType& r,
411
+ const size_t i,
412
+ const typename std::enable_if_t<IsRow<SubsetRowType>::value>*)
410
413
  {
411
414
  // If this is not the first fold, we have to handle it a little bit
412
415
  // differently, since the last fold may contain slightly more than 'binSize'
@@ -414,7 +417,9 @@ arma::Row<ElementType> KFoldCV<MLAlgorithm,
414
417
  const size_t subsetSize = (i != 0) ? lastBinSize + (k - 2) * binSize :
415
418
  (k - 1) * binSize;
416
419
 
417
- return arma::Row<ElementType>(r.colptr(binSize * i), subsetSize, false, true);
420
+ SubsetRowType alias;
421
+ MakeAlias(alias, r, subsetSize, r.n_rows * binSize * i);
422
+ return alias;
418
423
  }
419
424
 
420
425
  template<typename MLAlgorithm,
@@ -422,18 +427,21 @@ template<typename MLAlgorithm,
422
427
  typename MatType,
423
428
  typename PredictionsType,
424
429
  typename WeightsType>
425
- template<typename ElementType>
426
- arma::Mat<ElementType> KFoldCV<MLAlgorithm,
427
- Metric,
428
- MatType,
429
- PredictionsType,
430
- WeightsType>::GetValidationSubset(
431
- arma::Mat<ElementType>& m,
432
- const size_t i)
430
+ template<typename SubsetMatType>
431
+ SubsetMatType KFoldCV<MLAlgorithm,
432
+ Metric,
433
+ MatType,
434
+ PredictionsType,
435
+ WeightsType>::GetValidationSubset(
436
+ SubsetMatType& m,
437
+ const size_t i,
438
+ const typename std::enable_if_t<IsMatrix<SubsetMatType>::value>*)
433
439
  {
434
440
  const size_t subsetSize = (i == 0) ? lastBinSize : binSize;
435
- return arma::Mat<ElementType>(m.colptr(ValidationSubsetFirstCol(i)), m.n_rows,
436
- subsetSize, false, true);
441
+ SubsetMatType alias;
442
+ MakeAlias(alias, m, m.n_rows, subsetSize,
443
+ m.n_rows * ValidationSubsetFirstCol(i));
444
+ return alias;
437
445
  }
438
446
 
439
447
  template<typename MLAlgorithm,
@@ -441,18 +449,20 @@ template<typename MLAlgorithm,
441
449
  typename MatType,
442
450
  typename PredictionsType,
443
451
  typename WeightsType>
444
- template<typename ElementType>
445
- arma::Row<ElementType> KFoldCV<MLAlgorithm,
446
- Metric,
447
- MatType,
448
- PredictionsType,
449
- WeightsType>::GetValidationSubset(
450
- arma::Row<ElementType>& r,
451
- const size_t i)
452
+ template<typename SubsetRowType>
453
+ SubsetRowType KFoldCV<MLAlgorithm,
454
+ Metric,
455
+ MatType,
456
+ PredictionsType,
457
+ WeightsType>::GetValidationSubset(
458
+ SubsetRowType& r,
459
+ const size_t i,
460
+ const typename std::enable_if_t<IsRow<SubsetRowType>::value>*)
452
461
  {
453
462
  const size_t subsetSize = (i == 0) ? lastBinSize : binSize;
454
- return arma::Row<ElementType>(r.colptr(ValidationSubsetFirstCol(i)),
455
- subsetSize, false, true);
463
+ SubsetRowType alias;
464
+ MakeAlias(alias, r, subsetSize, r.n_rows * ValidationSubsetFirstCol(i));
465
+ return alias;
456
466
  }
457
467
 
458
468
  } // namespace mlpack
@@ -12,7 +12,6 @@
12
12
  #ifndef MLPACK_CORE_DATA_DATA_HPP
13
13
  #define MLPACK_CORE_DATA_DATA_HPP
14
14
 
15
- #include "detect_file_type.hpp"
16
15
  #include "extension.hpp"
17
16
  #include "format.hpp"
18
17
  #include "has_serialize.hpp"
@@ -30,14 +29,19 @@
30
29
  #include "check_categorical_param.hpp"
31
30
  #include "confusion_matrix.hpp"
32
31
  #include "dataset_mapper.hpp"
32
+ #include "data_options.hpp"
33
+ #include "detect_file_type.hpp"
33
34
  #include "image_info.hpp"
34
35
  #include "image_resize_crop.hpp"
35
36
  #include "imputer.hpp"
36
37
  #include "is_naninf.hpp"
38
+ #include "matrix_options.hpp"
37
39
  #include "normalize_labels.hpp"
38
40
  #include "one_hot_encoding.hpp"
39
41
  #include "split_data.hpp"
40
42
  #include "string_algorithms.hpp"
43
+ #include "text_options.hpp"
41
44
  #include "types.hpp"
45
+ #include "utilities.hpp"
42
46
 
43
47
  #endif
@@ -0,0 +1,219 @@
1
+ /**
2
+ * @file core/data/data_options.hpp
3
+ * @author Ryan Curtin
4
+ * @author Omar Shrit
5
+ *
6
+ * Data options, all possible options to load different data types and format
7
+ * with specific settings into mlpack.
8
+ *
9
+ * mlpack is free software; you may redistribute it and/or modify it under the
10
+ * terms of the 3-clause BSD license. You should have received a copy of the
11
+ * 3-clause BSD license along with mlpack. If not, see
12
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
13
+ */
14
+ #ifndef MLPACK_CORE_DATA_DATA_OPTIONS_HPP
15
+ #define MLPACK_CORE_DATA_DATA_OPTIONS_HPP
16
+
17
+ #include <mlpack/prereqs.hpp>
18
+
19
+ #include "types.hpp"
20
+ #include "dataset_mapper.hpp"
21
+ #include "map_policies/map_policies.hpp"
22
+ #include "format.hpp"
23
+ #include "image_info.hpp"
24
+
25
+ namespace mlpack {
26
+ namespace data {
27
+
28
+ /**
29
+ * All possible DataOptions grouped under one class.
30
+ * This will allow us to have consistent data API for mlpack. If new data
31
+ * options might be necessary, then they should be added in the following.
32
+ */
33
+
34
+ template<typename Derived>
35
+ class DataOptionsBase
36
+ {
37
+ public:
38
+ DataOptionsBase(const bool fatal = defaultFatal,
39
+ const FileType format = defaultFormat) :
40
+ fatal(fatal),
41
+ format(format)
42
+ {
43
+ // Do nothing.
44
+ }
45
+
46
+ template<typename Derived2>
47
+ explicit DataOptionsBase(const DataOptionsBase<Derived2>& opts)
48
+ {
49
+ CopyOptions(opts);
50
+ }
51
+
52
+ template<typename Derived2>
53
+ explicit DataOptionsBase(DataOptionsBase<Derived2>&& opts)
54
+ {
55
+ MoveOptions(std::move(opts));
56
+ }
57
+
58
+ // Convert any other DataOptions type to this DataOptions type, printing
59
+ // warnings for any members that cannot be converted. If this object and
60
+ // `opts` are of the same type, then the constructor for that type will be
61
+ // called instead.
62
+ template<typename Derived2>
63
+ DataOptionsBase& operator=(const DataOptionsBase<Derived2>& other)
64
+ {
65
+ if ((void*) &other == (void*) this)
66
+ return *this;
67
+
68
+ // Print warnings for any members that cannot be converted.
69
+ const char* dataDesc = static_cast<const Derived&>(*this).DataDescription();
70
+ static_cast<const Derived2&>(other).WarnBaseConversion(dataDesc);
71
+
72
+ CopyOptions(other);
73
+ return *this;
74
+ }
75
+
76
+ // Take ownership of the options of another `DataOptionsBase` type.
77
+ template<typename Derived2>
78
+ DataOptionsBase& operator=(DataOptionsBase<Derived2>&& other)
79
+ {
80
+ if ((void*) &other != (void*) this)
81
+ return *this;
82
+
83
+ // Print warnings for any members that cannot be converted.
84
+ const char* dataDesc = static_cast<const Derived&>(*this).DataDescription();
85
+ static_cast<const Derived2&>(other).WarnBaseConversion(dataDesc);
86
+
87
+ MoveOptions(std::move(other));
88
+ return *this;
89
+ }
90
+
91
+ template<typename Derived2>
92
+ void CopyOptions(const DataOptionsBase<Derived2>& other)
93
+ {
94
+ // Only copy options that have been set in the other object.
95
+ if (other.fatal.has_value())
96
+ fatal = *other.fatal;
97
+ if (other.format.has_value())
98
+ format = *other.format;
99
+ }
100
+
101
+ template<typename Derived2>
102
+ void MoveOptions(DataOptionsBase<Derived2>&& other)
103
+ {
104
+ fatal = std::move(other.fatal);
105
+ format = std::move(other.format);
106
+
107
+ // Reset all of the options in the other object.
108
+ other.Reset();
109
+ }
110
+
111
+ void Reset()
112
+ {
113
+ fatal.reset();
114
+ format.reset();
115
+
116
+ // Reset any child members.
117
+ static_cast<Derived&>(*this).Reset();
118
+ }
119
+
120
+ // If true, then exceptions are thrown on failures.
121
+ const bool& Fatal() const { return AccessMember(fatal, defaultFatal); }
122
+ // Modify whether or not exceptions are thrown on failures.
123
+ bool& Fatal() { return ModifyMember(fatal, defaultFatal); }
124
+
125
+ // Get the type of the file that will be loaded.
126
+ const FileType& Format() const { return AccessMember(format, defaultFormat); }
127
+ // Modify the file format to load.
128
+ FileType& Format() { return ModifyMember(format, defaultFormat); }
129
+
130
+ /**
131
+ * Given a file type, return a logical name corresponding to that file type.
132
+ */
133
+ const std::string FileTypeToString() const
134
+ {
135
+ FileType f = format.has_value() ? *format : defaultFormat;
136
+ switch (f)
137
+ {
138
+ case FileType::CSVASCII: return "CSV data";
139
+ case FileType::RawASCII: return "raw ASCII formatted data";
140
+ case FileType::RawBinary: return "raw binary formatted data";
141
+ case FileType::ArmaASCII: return "Armadillo ASCII formatted data";
142
+ case FileType::ArmaBinary: return "Armadillo binary formatted data";
143
+ case FileType::PGMBinary: return "PGM data";
144
+ case FileType::PPMBinary: return "PGM data";
145
+ case FileType::HDF5Binary: return "HDF5 data";
146
+ case FileType::CoordASCII:
147
+ return "ASCII formatted sparse coordinate data";
148
+ case FileType::AutoDetect: return "Detect automatically data type";
149
+ case FileType::FileTypeUnknown: return "Unknown data type";
150
+ default: return "";
151
+ }
152
+ }
153
+
154
+ protected:
155
+ template<typename T>
156
+ const T& AccessMember(const std::optional<T>& value,
157
+ const T& defaultValue) const
158
+ {
159
+ if (value.has_value())
160
+ return *value;
161
+ else
162
+ return defaultValue;
163
+ }
164
+
165
+ template<typename T>
166
+ T& ModifyMember(std::optional<T>& value, const T defaultValue)
167
+ {
168
+ // Set the default value if needed so that (*value) has defined behavior
169
+ // according to the spec.
170
+ if (!value.has_value())
171
+ value = defaultValue;
172
+
173
+ return *value;
174
+ }
175
+
176
+ void WarnOptionConversion(const char* optionName, const char* dataType) const
177
+ {
178
+ if (fatal.has_value() && *fatal)
179
+ {
180
+ Log::Fatal << "Option '" << optionName << "' cannot be specified when "
181
+ << dataType << " is being loaded!" << std::endl;
182
+ }
183
+ else
184
+ {
185
+ Log::Warn << "Option '" << optionName << "' ignored; not applicable when "
186
+ << dataType << " is being loaded!" << std::endl;
187
+ }
188
+ }
189
+
190
+ private:
191
+ std::optional<bool> fatal;
192
+ std::optional<FileType> format;
193
+
194
+ constexpr static const bool defaultFatal = false;
195
+ constexpr static const FileType defaultFormat = FileType::AutoDetect;
196
+
197
+ // For access to internal optional members.
198
+ template<typename Derived2>
199
+ friend class DataOptionsBase;
200
+ };
201
+
202
+ // This utility class is meant to be used as the Derived parameter for an option
203
+ // that is not actually a derived type. It provides the WarnBaseConversion()
204
+ // member, which does nothing.
205
+ class EmptyOptions : public DataOptionsBase<EmptyOptions>
206
+ {
207
+ public:
208
+ void WarnBaseConversion(const char* /* dataDescription */) const { }
209
+ static const char* DataDescription() { return "general data"; }
210
+ void Reset() { }
211
+ };
212
+
213
+ using DataOptions = DataOptionsBase<EmptyOptions>;
214
+
215
+
216
+ } // namespace data
217
+ } // namespace mlpack
218
+
219
+ #endif
@@ -16,17 +16,13 @@
16
16
  #define MLPACK_CORE_DATA_DETECT_FILE_TYPE_HPP
17
17
 
18
18
  #include "types.hpp"
19
+ #include "extension.hpp"
20
+ #include "string_algorithms.hpp"
21
+ #include "text_options.hpp"
19
22
 
20
23
  namespace mlpack {
21
24
  namespace data {
22
25
 
23
- /**
24
- * Given a file type, return a logical name corresponding to that file type.
25
- *
26
- * @param type Type to get the logical name of.
27
- */
28
- inline std::string GetStringType(const FileType& type);
29
-
30
26
  /**
31
27
  * Given an istream, attempt to guess the file type. This is taken originally
32
28
  * from Armadillo's function guess_file_type_internal(), but we avoid using
@@ -62,7 +58,9 @@ inline FileType AutoDetect(std::fstream& stream,
62
58
  * @param filename Name of the file whose type we should detect.
63
59
  * @return Detected type of file. arma::file_type_unknown if unknown.
64
60
  */
65
- inline FileType DetectFromExtension(const std::string& filename);
61
+ template<typename MatType, typename DataOptionsType>
62
+ void DetectFromExtension(const std::string& filename,
63
+ DataOptionsType& opts);
66
64
 
67
65
  /**
68
66
  * Count the number of columns in the file. The file must be a CSV/TSV/TXT file