nlptoolkit-classification 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Attribute/Attribute.js +6 -16
- package/dist/Attribute/Attribute.js.map +1 -1
- package/dist/Attribute/AttributeType.js +22 -32
- package/dist/Attribute/AttributeType.js.map +1 -1
- package/dist/Attribute/BinaryAttribute.js +14 -24
- package/dist/Attribute/BinaryAttribute.js.map +1 -1
- package/dist/Attribute/ContinuousAttribute.js +46 -55
- package/dist/Attribute/ContinuousAttribute.js.map +1 -1
- package/dist/Attribute/DiscreteAttribute.js +38 -48
- package/dist/Attribute/DiscreteAttribute.js.map +1 -1
- package/dist/Attribute/DiscreteIndexedAttribute.js +47 -55
- package/dist/Attribute/DiscreteIndexedAttribute.js.map +1 -1
- package/dist/DataSet/DataDefinition.js +115 -123
- package/dist/DataSet/DataDefinition.js.map +1 -1
- package/dist/DataSet/DataSet.js +268 -245
- package/dist/DataSet/DataSet.js.map +1 -1
- package/dist/DistanceMetric/DistanceMetric.js +2 -12
- package/dist/DistanceMetric/EuclidianDistance.js +27 -37
- package/dist/DistanceMetric/EuclidianDistance.js.map +1 -1
- package/dist/DistanceMetric/MahalanobisDistance.js +27 -36
- package/dist/DistanceMetric/MahalanobisDistance.js.map +1 -1
- package/dist/Experiment/BootstrapRun.js +31 -40
- package/dist/Experiment/BootstrapRun.js.map +1 -1
- package/dist/Experiment/Experiment.js +46 -53
- package/dist/Experiment/Experiment.js.map +1 -1
- package/dist/Experiment/KFoldRun.js +45 -54
- package/dist/Experiment/KFoldRun.js.map +1 -1
- package/dist/Experiment/KFoldRunSeparateTest.js +48 -58
- package/dist/Experiment/KFoldRunSeparateTest.js.map +1 -1
- package/dist/Experiment/MultipleRun.js +2 -12
- package/dist/Experiment/MxKFoldRun.js +31 -40
- package/dist/Experiment/MxKFoldRun.js.map +1 -1
- package/dist/Experiment/MxKFoldRunSeparateTest.js +35 -44
- package/dist/Experiment/MxKFoldRunSeparateTest.js.map +1 -1
- package/dist/Experiment/SingleRun.js +2 -12
- package/dist/Experiment/SingleRunWithK.js +39 -48
- package/dist/Experiment/SingleRunWithK.js.map +1 -1
- package/dist/Experiment/StratifiedKFoldRun.js +26 -36
- package/dist/Experiment/StratifiedKFoldRun.js.map +1 -1
- package/dist/Experiment/StratifiedKFoldRunSeparateTest.js +30 -40
- package/dist/Experiment/StratifiedKFoldRunSeparateTest.js.map +1 -1
- package/dist/Experiment/StratifiedMxKFoldRun.js +29 -39
- package/dist/Experiment/StratifiedMxKFoldRun.js.map +1 -1
- package/dist/Experiment/StratifiedMxKFoldRunSeparateTest.js +34 -43
- package/dist/Experiment/StratifiedMxKFoldRunSeparateTest.js.map +1 -1
- package/dist/Experiment/StratifiedSingleRunWithK.js +27 -36
- package/dist/Experiment/StratifiedSingleRunWithK.js.map +1 -1
- package/dist/FeatureSelection/BackwardSelection.js +26 -36
- package/dist/FeatureSelection/BackwardSelection.js.map +1 -1
- package/dist/FeatureSelection/FeatureSubSet.js +72 -82
- package/dist/FeatureSelection/FeatureSubSet.js.map +1 -1
- package/dist/FeatureSelection/FloatingSelection.js +25 -35
- package/dist/FeatureSelection/FloatingSelection.js.map +1 -1
- package/dist/FeatureSelection/ForwardSelection.js +25 -35
- package/dist/FeatureSelection/ForwardSelection.js.map +1 -1
- package/dist/FeatureSelection/SubSetSelection.js +69 -78
- package/dist/FeatureSelection/SubSetSelection.js.map +1 -1
- package/dist/Filter/DiscreteToContinuous.js +50 -60
- package/dist/Filter/DiscreteToContinuous.js.map +1 -1
- package/dist/Filter/DiscreteToIndexed.js +40 -50
- package/dist/Filter/DiscreteToIndexed.js.map +1 -1
- package/dist/Filter/FeatureFilter.js +23 -32
- package/dist/Filter/FeatureFilter.js.map +1 -1
- package/dist/Filter/LaryFilter.js +46 -55
- package/dist/Filter/LaryFilter.js.map +1 -1
- package/dist/Filter/LaryToBinary.js +48 -58
- package/dist/Filter/LaryToBinary.js.map +1 -1
- package/dist/Filter/Normalize.js +33 -41
- package/dist/Filter/Normalize.js.map +1 -1
- package/dist/Filter/Pca.js +86 -96
- package/dist/Filter/Pca.js.map +1 -1
- package/dist/Filter/TrainedFeatureFilter.js +14 -24
- package/dist/Filter/TrainedFeatureFilter.js.map +1 -1
- package/dist/Instance/CompositeInstance.js +46 -56
- package/dist/Instance/CompositeInstance.js.map +1 -1
- package/dist/Instance/Instance.js +145 -154
- package/dist/Instance/Instance.js.map +1 -1
- package/dist/InstanceList/InstanceList.js +466 -443
- package/dist/InstanceList/InstanceList.js.map +1 -1
- package/dist/InstanceList/InstanceListOfSameClass.js +23 -32
- package/dist/InstanceList/InstanceListOfSameClass.js.map +1 -1
- package/dist/InstanceList/Partition.js +167 -177
- package/dist/InstanceList/Partition.js.map +1 -1
- package/dist/Model/DecisionTree/DecisionCondition.js +71 -80
- package/dist/Model/DecisionTree/DecisionCondition.js.map +1 -1
- package/dist/Model/DecisionTree/DecisionNode.js +302 -311
- package/dist/Model/DecisionTree/DecisionNode.js.map +1 -1
- package/dist/Model/DecisionTree/DecisionStump.js +22 -32
- package/dist/Model/DecisionTree/DecisionStump.js.map +1 -1
- package/dist/Model/DecisionTree/DecisionTree.js +89 -98
- package/dist/Model/DecisionTree/DecisionTree.js.map +1 -1
- package/dist/Model/DummyModel.js +64 -73
- package/dist/Model/DummyModel.js.map +1 -1
- package/dist/Model/Ensemble/BaggingModel.js +34 -44
- package/dist/Model/Ensemble/BaggingModel.js.map +1 -1
- package/dist/Model/Ensemble/RandomForestModel.js +31 -41
- package/dist/Model/Ensemble/RandomForestModel.js.map +1 -1
- package/dist/Model/Ensemble/TreeEnsembleModel.js +55 -64
- package/dist/Model/Ensemble/TreeEnsembleModel.js.map +1 -1
- package/dist/Model/Model.js +130 -140
- package/dist/Model/Model.js.map +1 -1
- package/dist/Model/NeuralNetwork/DeepNetworkModel.js +162 -169
- package/dist/Model/NeuralNetwork/DeepNetworkModel.js.map +1 -1
- package/dist/Model/NeuralNetwork/LinearPerceptronModel.js +69 -78
- package/dist/Model/NeuralNetwork/LinearPerceptronModel.js.map +1 -1
- package/dist/Model/NeuralNetwork/MultiLayerPerceptronModel.js +112 -120
- package/dist/Model/NeuralNetwork/MultiLayerPerceptronModel.js.map +1 -1
- package/dist/Model/NeuralNetwork/NeuralNetworkModel.js +197 -201
- package/dist/Model/NeuralNetwork/NeuralNetworkModel.js.map +1 -1
- package/dist/Model/NonParametric/KnnInstance.js +21 -29
- package/dist/Model/NonParametric/KnnInstance.js.map +1 -1
- package/dist/Model/NonParametric/KnnModel.js +101 -108
- package/dist/Model/NonParametric/KnnModel.js.map +1 -1
- package/dist/Model/Parametric/GaussianModel.js +82 -91
- package/dist/Model/Parametric/GaussianModel.js.map +1 -1
- package/dist/Model/Parametric/KMeansModel.js +59 -67
- package/dist/Model/Parametric/KMeansModel.js.map +1 -1
- package/dist/Model/Parametric/LdaModel.js +83 -91
- package/dist/Model/Parametric/LdaModel.js.map +1 -1
- package/dist/Model/Parametric/NaiveBayesModel.js +119 -132
- package/dist/Model/Parametric/NaiveBayesModel.js.map +1 -1
- package/dist/Model/Parametric/QdaModel.js +70 -79
- package/dist/Model/Parametric/QdaModel.js.map +1 -1
- package/dist/Model/RandomModel.js +85 -92
- package/dist/Model/RandomModel.js.map +1 -1
- package/dist/Model/ValidatedModel.js +21 -31
- package/dist/Model/ValidatedModel.js.map +1 -1
- package/dist/Parameter/ActivationFunction.js +9 -19
- package/dist/Parameter/ActivationFunction.js.map +1 -1
- package/dist/Parameter/BaggingParameter.js +24 -33
- package/dist/Parameter/BaggingParameter.js.map +1 -1
- package/dist/Parameter/C45Parameter.js +34 -42
- package/dist/Parameter/C45Parameter.js.map +1 -1
- package/dist/Parameter/DeepNetworkParameter.js +48 -56
- package/dist/Parameter/DeepNetworkParameter.js.map +1 -1
- package/dist/Parameter/KMeansParameter.js +29 -38
- package/dist/Parameter/KMeansParameter.js.map +1 -1
- package/dist/Parameter/KnnParameter.js +25 -34
- package/dist/Parameter/KnnParameter.js.map +1 -1
- package/dist/Parameter/LinearPerceptronParameter.js +56 -62
- package/dist/Parameter/LinearPerceptronParameter.js.map +1 -1
- package/dist/Parameter/MultiLayerPerceptronParameter.js +38 -46
- package/dist/Parameter/MultiLayerPerceptronParameter.js.map +1 -1
- package/dist/Parameter/Parameter.js +21 -30
- package/dist/Parameter/Parameter.js.map +1 -1
- package/dist/Parameter/RandomForestParameter.js +25 -34
- package/dist/Parameter/RandomForestParameter.js.map +1 -1
- package/dist/Performance/ClassificationPerformance.js +24 -33
- package/dist/Performance/ClassificationPerformance.js.map +1 -1
- package/dist/Performance/ConfusionMatrix.js +149 -158
- package/dist/Performance/ConfusionMatrix.js.map +1 -1
- package/dist/Performance/DetailedClassificationPerformance.js +23 -32
- package/dist/Performance/DetailedClassificationPerformance.js.map +1 -1
- package/dist/Performance/ExperimentPerformance.js +184 -161
- package/dist/Performance/ExperimentPerformance.js.map +1 -1
- package/dist/Performance/Performance.js +21 -30
- package/dist/Performance/Performance.js.map +1 -1
- package/dist/StatisticalTest/Combined5x2F.js +41 -51
- package/dist/StatisticalTest/Combined5x2F.js.map +1 -1
- package/dist/StatisticalTest/Combined5x2t.js +42 -52
- package/dist/StatisticalTest/Combined5x2t.js.map +1 -1
- package/dist/StatisticalTest/Paired5x2t.js +40 -50
- package/dist/StatisticalTest/Paired5x2t.js.map +1 -1
- package/dist/StatisticalTest/PairedTest.js +31 -41
- package/dist/StatisticalTest/PairedTest.js.map +1 -1
- package/dist/StatisticalTest/Pairedt.js +38 -48
- package/dist/StatisticalTest/Pairedt.js.map +1 -1
- package/dist/StatisticalTest/Sign.js +50 -60
- package/dist/StatisticalTest/Sign.js.map +1 -1
- package/dist/StatisticalTest/StatisticalTestResult.js +65 -73
- package/dist/StatisticalTest/StatisticalTestResult.js.map +1 -1
- package/dist/StatisticalTest/StatisticalTestResultType.js +11 -21
- package/dist/StatisticalTest/StatisticalTestResultType.js.map +1 -1
- package/dist/index.js +95 -101
- package/dist/index.js.map +1 -1
- package/package.json +7 -6
- package/tsconfig.json +4 -3
- package/source/tsconfig.json +0 -13
|
@@ -1,361 +1,352 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
this.
|
|
50
|
-
this.EPSILON = 0.0000000001;
|
|
51
|
-
this.classLabel = undefined;
|
|
52
|
-
this.leaf = false;
|
|
53
|
-
this.condition = undefined;
|
|
54
|
-
if (data instanceof InstanceList_1.InstanceList && (condition instanceof DecisionCondition_1.DecisionCondition || condition == undefined)) {
|
|
55
|
-
this.constructor1(data, condition, parameter, isStump);
|
|
56
|
-
}
|
|
57
|
-
else {
|
|
58
|
-
if (data instanceof FileContents_1.FileContents) {
|
|
59
|
-
this.constructor2(data);
|
|
60
|
-
}
|
|
61
|
-
}
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DecisionNode = void 0;
|
|
4
|
+
const InstanceList_1 = require("../../InstanceList/InstanceList");
|
|
5
|
+
const DecisionCondition_1 = require("./DecisionCondition");
|
|
6
|
+
const Model_1 = require("../Model");
|
|
7
|
+
const DiscreteIndexedAttribute_1 = require("../../Attribute/DiscreteIndexedAttribute");
|
|
8
|
+
const DiscreteAttribute_1 = require("../../Attribute/DiscreteAttribute");
|
|
9
|
+
const ContinuousAttribute_1 = require("../../Attribute/ContinuousAttribute");
|
|
10
|
+
const DiscreteDistribution_1 = require("nlptoolkit-math/dist/DiscreteDistribution");
|
|
11
|
+
const Partition_1 = require("../../InstanceList/Partition");
|
|
12
|
+
const CompositeInstance_1 = require("../../Instance/CompositeInstance");
|
|
13
|
+
const RandomArray_1 = require("nlptoolkit-util/dist/RandomArray");
|
|
14
|
+
const FileContents_1 = require("nlptoolkit-util/dist/FileContents");
|
|
15
|
+
class DecisionNode {
|
|
16
|
+
children = undefined;
|
|
17
|
+
EPSILON = 0.0000000001;
|
|
18
|
+
classLabel = undefined;
|
|
19
|
+
leaf = false;
|
|
20
|
+
condition = undefined;
|
|
21
|
+
classLabelsDistribution;
|
|
22
|
+
/**
|
|
23
|
+
* The DecisionNode method takes {@link InstanceList} data as input and then it sets the class label parameter by finding
|
|
24
|
+
* the most occurred class label of given data, it then gets distinct class labels as class labels ArrayList. Later, it adds ordered
|
|
25
|
+
* indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value
|
|
26
|
+
* of these class distribution.
|
|
27
|
+
* <p>
|
|
28
|
+
* If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution
|
|
29
|
+
* and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to
|
|
30
|
+
* the newly founded best entropy's index. At the end, it also add new distribution to the class distribution .
|
|
31
|
+
* <p>
|
|
32
|
+
* If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it
|
|
33
|
+
* reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index.
|
|
34
|
+
* <p>
|
|
35
|
+
* If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution
|
|
36
|
+
* and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy,
|
|
37
|
+
* best attribute and best split value according to the newly founded best entropy's index. At the end, it also add new distribution to
|
|
38
|
+
* the right distribution and removes from left distribution .
|
|
39
|
+
*
|
|
40
|
+
* @param data {@link InstanceList} input.
|
|
41
|
+
* @param condition {@link DecisionCondition} to check.
|
|
42
|
+
* @param parameter RandomForestParameter like seed, ensembleSize, attributeSubsetSize.
|
|
43
|
+
* @param isStump Refers to decision trees with only 1 splitting rule.
|
|
44
|
+
*/
|
|
45
|
+
constructor1(data, condition, parameter, isStump) {
|
|
46
|
+
let bestAttribute = -1;
|
|
47
|
+
let bestSplitValue = 0;
|
|
48
|
+
if (condition instanceof DecisionCondition_1.DecisionCondition) {
|
|
49
|
+
this.condition = condition;
|
|
62
50
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
let bestAttribute = -1;
|
|
88
|
-
let bestSplitValue = 0;
|
|
89
|
-
if (condition instanceof DecisionCondition_1.DecisionCondition) {
|
|
90
|
-
this.condition = condition;
|
|
91
|
-
}
|
|
92
|
-
this.classLabelsDistribution = new DiscreteDistribution_1.DiscreteDistribution();
|
|
93
|
-
let labels = data.getClassLabels();
|
|
94
|
-
for (let label of labels) {
|
|
95
|
-
this.classLabelsDistribution.addItem(label);
|
|
96
|
-
}
|
|
97
|
-
this.classLabel = Model_1.Model.getMaximum(labels);
|
|
98
|
-
this.leaf = true;
|
|
99
|
-
let classLabels = data.getDistinctClassLabels();
|
|
100
|
-
if (classLabels.length == 1) {
|
|
101
|
-
return;
|
|
102
|
-
}
|
|
103
|
-
if (isStump && condition != null) {
|
|
104
|
-
return;
|
|
105
|
-
}
|
|
106
|
-
let indexList;
|
|
107
|
-
let size;
|
|
108
|
-
if (parameter != undefined && parameter.getAttributeSubsetSize() < data.get(0).attributeSize()) {
|
|
109
|
-
indexList = RandomArray_1.RandomArray.indexArray(data.get(0).attributeSize(), parameter.getSeed());
|
|
110
|
-
size = parameter.getAttributeSubsetSize();
|
|
111
|
-
}
|
|
112
|
-
else {
|
|
113
|
-
indexList = new Array();
|
|
114
|
-
for (let i = 0; i < data.get(0).attributeSize(); i++) {
|
|
115
|
-
indexList.push(i);
|
|
116
|
-
}
|
|
117
|
-
size = data.get(0).attributeSize();
|
|
51
|
+
this.classLabelsDistribution = new DiscreteDistribution_1.DiscreteDistribution();
|
|
52
|
+
let labels = data.getClassLabels();
|
|
53
|
+
for (let label of labels) {
|
|
54
|
+
this.classLabelsDistribution.addItem(label);
|
|
55
|
+
}
|
|
56
|
+
this.classLabel = Model_1.Model.getMaximum(labels);
|
|
57
|
+
this.leaf = true;
|
|
58
|
+
let classLabels = data.getDistinctClassLabels();
|
|
59
|
+
if (classLabels.length == 1) {
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
if (isStump && condition != null) {
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
let indexList;
|
|
66
|
+
let size;
|
|
67
|
+
if (parameter != undefined && parameter.getAttributeSubsetSize() < data.get(0).attributeSize()) {
|
|
68
|
+
indexList = RandomArray_1.RandomArray.indexArray(data.get(0).attributeSize(), parameter.getSeed());
|
|
69
|
+
size = parameter.getAttributeSubsetSize();
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
indexList = new Array();
|
|
73
|
+
for (let i = 0; i < data.get(0).attributeSize(); i++) {
|
|
74
|
+
indexList.push(i);
|
|
118
75
|
}
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
bestAttribute = index;
|
|
132
|
-
bestSplitValue = k;
|
|
133
|
-
}
|
|
134
|
-
classDistribution.addDistribution(distribution);
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
else {
|
|
139
|
-
if (data.get(0).getAttribute(index) instanceof DiscreteAttribute_1.DiscreteAttribute) {
|
|
140
|
-
let entropy = this.entropyForDiscreteAttribute(data, index);
|
|
76
|
+
size = data.get(0).attributeSize();
|
|
77
|
+
}
|
|
78
|
+
let classDistribution = data.classDistribution();
|
|
79
|
+
let bestEntropy = data.classDistribution().entropy();
|
|
80
|
+
for (let j = 0; j < size; j++) {
|
|
81
|
+
let index = indexList[j];
|
|
82
|
+
if (data.get(0).getAttribute(index) instanceof DiscreteIndexedAttribute_1.DiscreteIndexedAttribute) {
|
|
83
|
+
for (let k = 0; k < data.get(0).getAttribute(index).getMaxIndex(); k++) {
|
|
84
|
+
let distribution = data.discreteIndexedAttributeClassDistribution(index, k);
|
|
85
|
+
if (distribution.getSum() > 0) {
|
|
86
|
+
classDistribution.removeDistribution(distribution);
|
|
87
|
+
let entropy = (classDistribution.entropy() * classDistribution.getSum() + distribution.entropy() * distribution.getSum()) / data.size();
|
|
141
88
|
if (entropy + this.EPSILON < bestEntropy) {
|
|
142
89
|
bestEntropy = entropy;
|
|
143
90
|
bestAttribute = index;
|
|
91
|
+
bestSplitValue = k;
|
|
144
92
|
}
|
|
93
|
+
classDistribution.addDistribution(distribution);
|
|
145
94
|
}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
else {
|
|
98
|
+
if (data.get(0).getAttribute(index) instanceof DiscreteAttribute_1.DiscreteAttribute) {
|
|
99
|
+
let entropy = this.entropyForDiscreteAttribute(data, index);
|
|
100
|
+
if (entropy + this.EPSILON < bestEntropy) {
|
|
101
|
+
bestEntropy = entropy;
|
|
102
|
+
bestAttribute = index;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
if (data.get(0).getAttribute(index) instanceof ContinuousAttribute_1.ContinuousAttribute) {
|
|
107
|
+
data.sort(index);
|
|
108
|
+
let previousValue = Number.NEGATIVE_INFINITY;
|
|
109
|
+
let leftDistribution = data.classDistribution();
|
|
110
|
+
let rightDistribution = new DiscreteDistribution_1.DiscreteDistribution();
|
|
111
|
+
for (let k = 0; k < data.size(); k++) {
|
|
112
|
+
let instance = data.get(k);
|
|
113
|
+
if (k == 0) {
|
|
114
|
+
previousValue = instance.getAttribute(index).getValue();
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
if (instance.getAttribute(index).getValue() != previousValue) {
|
|
118
|
+
let splitValue = (previousValue + instance.getAttribute(index).getValue()) / 2;
|
|
155
119
|
previousValue = instance.getAttribute(index).getValue();
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
let entropy = (leftDistribution.getSum() / data.size()) * leftDistribution.entropy() + (rightDistribution.getSum() / data.size()) * rightDistribution.entropy();
|
|
162
|
-
if (entropy + this.EPSILON < bestEntropy) {
|
|
163
|
-
bestEntropy = entropy;
|
|
164
|
-
bestSplitValue = splitValue;
|
|
165
|
-
bestAttribute = index;
|
|
166
|
-
}
|
|
120
|
+
let entropy = (leftDistribution.getSum() / data.size()) * leftDistribution.entropy() + (rightDistribution.getSum() / data.size()) * rightDistribution.entropy();
|
|
121
|
+
if (entropy + this.EPSILON < bestEntropy) {
|
|
122
|
+
bestEntropy = entropy;
|
|
123
|
+
bestSplitValue = splitValue;
|
|
124
|
+
bestAttribute = index;
|
|
167
125
|
}
|
|
168
126
|
}
|
|
169
|
-
leftDistribution.removeItem(instance.getClassLabel());
|
|
170
|
-
rightDistribution.addItem(instance.getClassLabel());
|
|
171
127
|
}
|
|
128
|
+
leftDistribution.removeItem(instance.getClassLabel());
|
|
129
|
+
rightDistribution.addItem(instance.getClassLabel());
|
|
172
130
|
}
|
|
173
131
|
}
|
|
174
132
|
}
|
|
175
133
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
134
|
+
}
|
|
135
|
+
if (bestAttribute != -1) {
|
|
136
|
+
this.leaf = false;
|
|
137
|
+
if (data.get(0).getAttribute(bestAttribute) instanceof DiscreteIndexedAttribute_1.DiscreteIndexedAttribute) {
|
|
138
|
+
this.createChildrenForDiscreteIndexed(data, bestAttribute, bestSplitValue, parameter, isStump);
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
if (data.get(0).getAttribute(bestAttribute) instanceof DiscreteAttribute_1.DiscreteAttribute) {
|
|
142
|
+
this.createChildrenForDiscrete(data, bestAttribute, parameter, isStump);
|
|
180
143
|
}
|
|
181
144
|
else {
|
|
182
|
-
if (data.get(0).getAttribute(bestAttribute) instanceof
|
|
183
|
-
this.
|
|
184
|
-
}
|
|
185
|
-
else {
|
|
186
|
-
if (data.get(0).getAttribute(bestAttribute) instanceof ContinuousAttribute_1.ContinuousAttribute) {
|
|
187
|
-
this.createChildrenForContinuous(data, bestAttribute, bestSplitValue, parameter, isStump);
|
|
188
|
-
}
|
|
145
|
+
if (data.get(0).getAttribute(bestAttribute) instanceof ContinuousAttribute_1.ContinuousAttribute) {
|
|
146
|
+
this.createChildrenForContinuous(data, bestAttribute, bestSplitValue, parameter, isStump);
|
|
189
147
|
}
|
|
190
148
|
}
|
|
191
149
|
}
|
|
192
150
|
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
else {
|
|
204
|
-
if (items[1][0] == ':') {
|
|
205
|
-
this.condition = new DecisionCondition_1.DecisionCondition(parseInt(items[0]), new DiscreteIndexedAttribute_1.DiscreteIndexedAttribute("", parseInt(items[2]), parseInt(items[3])), '=');
|
|
206
|
-
}
|
|
207
|
-
else {
|
|
208
|
-
this.condition = new DecisionCondition_1.DecisionCondition(parseInt(items[0]), new ContinuousAttribute_1.ContinuousAttribute(parseFloat(items[2])), items[1][0]);
|
|
209
|
-
}
|
|
210
|
-
}
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Reads the decision node model (as one line) from model file.
|
|
154
|
+
* @param contents Model file
|
|
155
|
+
*/
|
|
156
|
+
constructor2(contents) {
|
|
157
|
+
let items = contents.readLine().split(" ");
|
|
158
|
+
if (items[0] != "-1") {
|
|
159
|
+
if (items[1][0] == '=') {
|
|
160
|
+
this.condition = new DecisionCondition_1.DecisionCondition(parseInt(items[0]), new DiscreteAttribute_1.DiscreteAttribute(items[2]), items[1][0]);
|
|
211
161
|
}
|
|
212
162
|
else {
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
this.children = new Array();
|
|
219
|
-
for (let i = 0; i < numberOfChildren; i++) {
|
|
220
|
-
this.children.push(new DecisionNode(contents));
|
|
163
|
+
if (items[1][0] == ':') {
|
|
164
|
+
this.condition = new DecisionCondition_1.DecisionCondition(parseInt(items[0]), new DiscreteIndexedAttribute_1.DiscreteIndexedAttribute("", parseInt(items[2]), parseInt(items[3])), '=');
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
this.condition = new DecisionCondition_1.DecisionCondition(parseInt(items[0]), new ContinuousAttribute_1.ContinuousAttribute(parseFloat(items[2])), items[1][0]);
|
|
221
168
|
}
|
|
222
|
-
}
|
|
223
|
-
else {
|
|
224
|
-
this.leaf = true;
|
|
225
|
-
this.classLabel = contents.readLine();
|
|
226
|
-
this.classLabelsDistribution = Model_1.Model.loadDiscreteDistribution(contents);
|
|
227
169
|
}
|
|
228
170
|
}
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
* Then loops through the distributions and calculates the total entropy.
|
|
232
|
-
*
|
|
233
|
-
* @param data Instance list.
|
|
234
|
-
* @param attributeIndex Index of the attribute.
|
|
235
|
-
* @return Total entropy for the discrete attribute.
|
|
236
|
-
*/
|
|
237
|
-
entropyForDiscreteAttribute(data, attributeIndex) {
|
|
238
|
-
let sum = 0.0;
|
|
239
|
-
let distributions = data.attributeClassDistribution(attributeIndex);
|
|
240
|
-
for (let distribution of distributions) {
|
|
241
|
-
sum += (distribution.getSum() / data.size()) * distribution.entropy();
|
|
242
|
-
}
|
|
243
|
-
return sum;
|
|
171
|
+
else {
|
|
172
|
+
this.condition = null;
|
|
244
173
|
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
*
|
|
249
|
-
* @param data Instance list.
|
|
250
|
-
* @param attributeIndex Index of the attribute.
|
|
251
|
-
* @param attributeValue Value of the attribute.
|
|
252
|
-
* @param parameter RandomForestParameter like seed, ensembleSize, attributeSubsetSize.
|
|
253
|
-
* @param isStump Refers to decision trees with only 1 splitting rule.
|
|
254
|
-
*/
|
|
255
|
-
createChildrenForDiscreteIndexed(data, attributeIndex, attributeValue, parameter, isStump) {
|
|
256
|
-
let childrenData = new Partition_1.Partition(data, attributeIndex, attributeValue);
|
|
174
|
+
let numberOfChildren = parseInt(contents.readLine());
|
|
175
|
+
if (numberOfChildren != 0) {
|
|
176
|
+
this.leaf = false;
|
|
257
177
|
this.children = new Array();
|
|
258
|
-
|
|
259
|
-
|
|
178
|
+
for (let i = 0; i < numberOfChildren; i++) {
|
|
179
|
+
this.children.push(new DecisionNode(contents));
|
|
180
|
+
}
|
|
260
181
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
182
|
+
else {
|
|
183
|
+
this.leaf = true;
|
|
184
|
+
this.classLabel = contents.readLine();
|
|
185
|
+
this.classLabelsDistribution = Model_1.Model.loadDiscreteDistribution(contents);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* The DecisionNode method takes {@link InstanceList} data as input, and then it sets the class label parameter by finding
|
|
190
|
+
* the most occurred class label of given data, it then gets distinct class labels as class labels ArrayList. Later, it adds ordered
|
|
191
|
+
* indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value
|
|
192
|
+
* of these class distribution.
|
|
193
|
+
* <p>
|
|
194
|
+
* If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution
|
|
195
|
+
* and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to
|
|
196
|
+
* the newly founded best entropy's index. At the end, it also adds new distribution to the class distribution .
|
|
197
|
+
* <p>
|
|
198
|
+
* If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it
|
|
199
|
+
* reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index.
|
|
200
|
+
* <p>
|
|
201
|
+
* If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution
|
|
202
|
+
* and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy,
|
|
203
|
+
* best attribute and best split value according to the newly founded best entropy's index. At the end, it also adds new distribution to
|
|
204
|
+
* the right distribution and removes from left distribution .
|
|
205
|
+
*
|
|
206
|
+
* @param data {@link InstanceList} input.
|
|
207
|
+
* @param condition {@link DecisionCondition} to check.
|
|
208
|
+
* @param parameter RandomForestParameter like seed, ensembleSize, attributeSubsetSize.
|
|
209
|
+
* @param isStump Refers to decision trees with only 1 splitting rule.
|
|
210
|
+
*/
|
|
211
|
+
constructor(data, condition, parameter, isStump) {
|
|
212
|
+
if (data instanceof InstanceList_1.InstanceList && (condition instanceof DecisionCondition_1.DecisionCondition || condition == undefined)) {
|
|
213
|
+
this.constructor1(data, condition, parameter, isStump);
|
|
214
|
+
}
|
|
215
|
+
else {
|
|
216
|
+
if (data instanceof FileContents_1.FileContents) {
|
|
217
|
+
this.constructor2(data);
|
|
276
218
|
}
|
|
277
219
|
}
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
this.children.push(new DecisionNode(childrenData.get(1), new DecisionCondition_1.DecisionCondition(attributeIndex, new ContinuousAttribute_1.ContinuousAttribute(splitValue), ">"), parameter, isStump));
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* The entropyForDiscreteAttribute method takes an attributeIndex and creates an ArrayList of DiscreteDistribution.
|
|
223
|
+
* Then loops through the distributions and calculates the total entropy.
|
|
224
|
+
*
|
|
225
|
+
* @param data Instance list.
|
|
226
|
+
* @param attributeIndex Index of the attribute.
|
|
227
|
+
* @return Total entropy for the discrete attribute.
|
|
228
|
+
*/
|
|
229
|
+
entropyForDiscreteAttribute(data, attributeIndex) {
|
|
230
|
+
let sum = 0.0;
|
|
231
|
+
let distributions = data.attributeClassDistribution(attributeIndex);
|
|
232
|
+
for (let distribution of distributions) {
|
|
233
|
+
sum += (distribution.getSum() / data.size()) * distribution.entropy();
|
|
293
234
|
}
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
235
|
+
return sum;
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* The createChildrenForDiscreteIndexed method creates an ArrayList of DecisionNodes as children and a partition with respect to
|
|
239
|
+
* indexed attribute.
|
|
240
|
+
*
|
|
241
|
+
* @param data Instance list.
|
|
242
|
+
* @param attributeIndex Index of the attribute.
|
|
243
|
+
* @param attributeValue Value of the attribute.
|
|
244
|
+
* @param parameter RandomForestParameter like seed, ensembleSize, attributeSubsetSize.
|
|
245
|
+
* @param isStump Refers to decision trees with only 1 splitting rule.
|
|
246
|
+
*/
|
|
247
|
+
createChildrenForDiscreteIndexed(data, attributeIndex, attributeValue, parameter, isStump) {
|
|
248
|
+
let childrenData = new Partition_1.Partition(data, attributeIndex, attributeValue);
|
|
249
|
+
this.children = new Array();
|
|
250
|
+
this.children.push(new DecisionNode(childrenData.get(0), new DecisionCondition_1.DecisionCondition(attributeIndex, new DiscreteIndexedAttribute_1.DiscreteIndexedAttribute("", attributeValue, data.get(0).getAttribute(attributeIndex).getMaxIndex())), parameter, isStump));
|
|
251
|
+
this.children.push(new DecisionNode(childrenData.get(1), new DecisionCondition_1.DecisionCondition(attributeIndex, new DiscreteIndexedAttribute_1.DiscreteIndexedAttribute("", -1, data.get(0).getAttribute(attributeIndex).getMaxIndex())), parameter, isStump));
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* The createChildrenForDiscrete method creates an ArrayList of values, a partition with respect to attributes and an ArrayList
|
|
255
|
+
* of DecisionNodes as children.
|
|
256
|
+
*
|
|
257
|
+
* @param data Instance list.
|
|
258
|
+
* @param attributeIndex Index of the attribute.
|
|
259
|
+
* @param parameter RandomForestParameter like seed, ensembleSize, attributeSubsetSize.
|
|
260
|
+
* @param isStump Refers to decision trees with only 1 splitting rule.
|
|
261
|
+
*/
|
|
262
|
+
createChildrenForDiscrete(data, attributeIndex, parameter, isStump) {
|
|
263
|
+
let valueList = data.getAttributeValueList(attributeIndex);
|
|
264
|
+
let childrenData = new Partition_1.Partition(data, attributeIndex);
|
|
265
|
+
this.children = new Array();
|
|
266
|
+
for (let i = 0; i < valueList.length; i++) {
|
|
267
|
+
this.children.push(new DecisionNode(childrenData.get(i), new DecisionCondition_1.DecisionCondition(attributeIndex, new DiscreteAttribute_1.DiscreteAttribute(valueList[i])), parameter, isStump));
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* The createChildrenForContinuous method creates an ArrayList of DecisionNodes as children and a partition with respect to
|
|
272
|
+
* continuous attribute and the given split value.
|
|
273
|
+
*
|
|
274
|
+
* @param data Instance list.
|
|
275
|
+
* @param attributeIndex Index of the attribute.
|
|
276
|
+
* @param parameter RandomForestParameter like seed, ensembleSize, attributeSubsetSize.
|
|
277
|
+
* @param isStump Refers to decision trees with only 1 splitting rule.
|
|
278
|
+
* @param splitValue Split value is used for partitioning.
|
|
279
|
+
*/
|
|
280
|
+
createChildrenForContinuous(data, attributeIndex, splitValue, parameter, isStump) {
|
|
281
|
+
let childrenData = new Partition_1.Partition(data, attributeIndex, splitValue + 0.0000001);
|
|
282
|
+
this.children = new Array();
|
|
283
|
+
this.children.push(new DecisionNode(childrenData.get(0), new DecisionCondition_1.DecisionCondition(attributeIndex, new ContinuousAttribute_1.ContinuousAttribute(splitValue), "<"), parameter, isStump));
|
|
284
|
+
this.children.push(new DecisionNode(childrenData.get(1), new DecisionCondition_1.DecisionCondition(attributeIndex, new ContinuousAttribute_1.ContinuousAttribute(splitValue), ">"), parameter, isStump));
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* The predict method takes an {@link Instance} as input and performs prediction on the DecisionNodes and returns the prediction
|
|
288
|
+
* for that instance.
|
|
289
|
+
*
|
|
290
|
+
* @param instance Instance to make prediction.
|
|
291
|
+
* @return The prediction for given instance.
|
|
292
|
+
*/
|
|
293
|
+
predict(instance) {
|
|
294
|
+
if (instance instanceof CompositeInstance_1.CompositeInstance) {
|
|
295
|
+
let possibleClassLabels = instance.getPossibleClassLabels();
|
|
296
|
+
let distribution = this.classLabelsDistribution;
|
|
297
|
+
let predictedClass = distribution.getMaxItem(possibleClassLabels);
|
|
298
|
+
if (this.leaf) {
|
|
299
|
+
return predictedClass;
|
|
323
300
|
}
|
|
324
301
|
else {
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
302
|
+
for (let node of this.children) {
|
|
303
|
+
if (node.condition.satisfy(instance)) {
|
|
304
|
+
let childPrediction = node.predict(instance);
|
|
305
|
+
if (childPrediction != undefined) {
|
|
306
|
+
return childPrediction;
|
|
307
|
+
}
|
|
308
|
+
else {
|
|
309
|
+
return predictedClass;
|
|
332
310
|
}
|
|
333
311
|
}
|
|
334
|
-
return this.classLabel;
|
|
335
312
|
}
|
|
313
|
+
return predictedClass;
|
|
336
314
|
}
|
|
337
315
|
}
|
|
338
|
-
|
|
339
|
-
* Recursive method that returns the posterior probability distribution of a given instance. If the node is a leaf
|
|
340
|
-
* node, it returns the class label distribution, otherwise it checks in which direction (child node) this instance
|
|
341
|
-
* is forwarded.
|
|
342
|
-
* @param instance Instance for which the posterior probability distribution is calculated.
|
|
343
|
-
* @return Posterior probability distribution for this instance.
|
|
344
|
-
*/
|
|
345
|
-
predictProbabilityDistribution(instance) {
|
|
316
|
+
else {
|
|
346
317
|
if (this.leaf) {
|
|
347
|
-
return this.
|
|
318
|
+
return this.classLabel;
|
|
348
319
|
}
|
|
349
320
|
else {
|
|
350
321
|
for (let node of this.children) {
|
|
351
322
|
if (node.condition.satisfy(instance)) {
|
|
352
|
-
return node.
|
|
323
|
+
return node.predict(instance);
|
|
353
324
|
}
|
|
354
325
|
}
|
|
355
|
-
return this.
|
|
326
|
+
return this.classLabel;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
/**
|
|
331
|
+
* Recursive method that returns the posterior probability distribution of a given instance. If the node is a leaf
|
|
332
|
+
* node, it returns the class label distribution, otherwise it checks in which direction (child node) this instance
|
|
333
|
+
* is forwarded.
|
|
334
|
+
* @param instance Instance for which the posterior probability distribution is calculated.
|
|
335
|
+
* @return Posterior probability distribution for this instance.
|
|
336
|
+
*/
|
|
337
|
+
predictProbabilityDistribution(instance) {
|
|
338
|
+
if (this.leaf) {
|
|
339
|
+
return this.classLabelsDistribution.getProbabilityDistribution();
|
|
340
|
+
}
|
|
341
|
+
else {
|
|
342
|
+
for (let node of this.children) {
|
|
343
|
+
if (node.condition.satisfy(instance)) {
|
|
344
|
+
return node.predictProbabilityDistribution(instance);
|
|
345
|
+
}
|
|
356
346
|
}
|
|
347
|
+
return this.classLabelsDistribution.getProbabilityDistribution();
|
|
357
348
|
}
|
|
358
349
|
}
|
|
359
|
-
|
|
360
|
-
|
|
350
|
+
}
|
|
351
|
+
exports.DecisionNode = DecisionNode;
|
|
361
352
|
//# sourceMappingURL=DecisionNode.js.map
|