nlptoolkit-classification 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Classifier/Bagging.d.ts +1 -0
- package/dist/Classifier/Bagging.js +3 -0
- package/dist/Classifier/Bagging.js.map +1 -1
- package/dist/Classifier/C45.d.ts +1 -0
- package/dist/Classifier/C45.js +3 -0
- package/dist/Classifier/C45.js.map +1 -1
- package/dist/Classifier/C45Stump.d.ts +1 -0
- package/dist/Classifier/C45Stump.js +3 -0
- package/dist/Classifier/C45Stump.js.map +1 -1
- package/dist/Classifier/Classifier.d.ts +1 -0
- package/dist/Classifier/Classifier.js.map +1 -1
- package/dist/Classifier/DeepNetwork.d.ts +1 -0
- package/dist/Classifier/DeepNetwork.js +3 -0
- package/dist/Classifier/DeepNetwork.js.map +1 -1
- package/dist/Classifier/Dummy.d.ts +1 -0
- package/dist/Classifier/Dummy.js +3 -0
- package/dist/Classifier/Dummy.js.map +1 -1
- package/dist/Classifier/KMeans.d.ts +1 -0
- package/dist/Classifier/KMeans.js +3 -0
- package/dist/Classifier/KMeans.js.map +1 -1
- package/dist/Classifier/Knn.d.ts +1 -0
- package/dist/Classifier/Knn.js +3 -0
- package/dist/Classifier/Knn.js.map +1 -1
- package/dist/Classifier/Lda.d.ts +1 -0
- package/dist/Classifier/Lda.js +3 -0
- package/dist/Classifier/Lda.js.map +1 -1
- package/dist/Classifier/LinearPerceptron.d.ts +1 -0
- package/dist/Classifier/LinearPerceptron.js +3 -0
- package/dist/Classifier/LinearPerceptron.js.map +1 -1
- package/dist/Classifier/MultiLayerPerceptron.d.ts +1 -0
- package/dist/Classifier/MultiLayerPerceptron.js +3 -0
- package/dist/Classifier/MultiLayerPerceptron.js.map +1 -1
- package/dist/Classifier/NaiveBayes.d.ts +1 -0
- package/dist/Classifier/NaiveBayes.js +3 -0
- package/dist/Classifier/NaiveBayes.js.map +1 -1
- package/dist/Classifier/Qda.d.ts +1 -0
- package/dist/Classifier/Qda.js +3 -0
- package/dist/Classifier/Qda.js.map +1 -1
- package/dist/Classifier/RandomClassifier.d.ts +1 -0
- package/dist/Classifier/RandomClassifier.js +3 -0
- package/dist/Classifier/RandomClassifier.js.map +1 -1
- package/dist/Classifier/RandomForest.d.ts +1 -0
- package/dist/Classifier/RandomForest.js +3 -0
- package/dist/Classifier/RandomForest.js.map +1 -1
- package/dist/DataSet/DataDefinition.d.ts +5 -1
- package/dist/DataSet/DataDefinition.js +16 -1
- package/dist/DataSet/DataDefinition.js.map +1 -1
- package/dist/DataSet/DataSet.js +1 -1
- package/dist/DataSet/DataSet.js.map +1 -1
- package/dist/InstanceList/InstanceList.js +3 -0
- package/dist/InstanceList/InstanceList.js.map +1 -1
- package/dist/Model/DecisionTree/DecisionNode.d.ts +6 -3
- package/dist/Model/DecisionTree/DecisionNode.js +48 -3
- package/dist/Model/DecisionTree/DecisionNode.js.map +1 -1
- package/dist/Model/DecisionTree/DecisionTree.d.ts +4 -3
- package/dist/Model/DecisionTree/DecisionTree.js +14 -4
- package/dist/Model/DecisionTree/DecisionTree.js.map +1 -1
- package/dist/Model/DeepNetworkModel.d.ts +6 -3
- package/dist/Model/DeepNetworkModel.js +53 -30
- package/dist/Model/DeepNetworkModel.js.map +1 -1
- package/dist/Model/DummyModel.d.ts +2 -1
- package/dist/Model/DummyModel.js +22 -2
- package/dist/Model/DummyModel.js.map +1 -1
- package/dist/Model/GaussianModel.d.ts +4 -0
- package/dist/Model/GaussianModel.js +28 -1
- package/dist/Model/GaussianModel.js.map +1 -1
- package/dist/Model/KMeansModel.d.ts +3 -2
- package/dist/Model/KMeansModel.js +19 -6
- package/dist/Model/KMeansModel.js.map +1 -1
- package/dist/Model/KnnModel.d.ts +3 -2
- package/dist/Model/KnnModel.js +18 -6
- package/dist/Model/KnnModel.js.map +1 -1
- package/dist/Model/LdaModel.d.ts +5 -2
- package/dist/Model/LdaModel.js +28 -6
- package/dist/Model/LdaModel.js.map +1 -1
- package/dist/Model/LinearPerceptronModel.d.ts +5 -2
- package/dist/Model/LinearPerceptronModel.js +38 -10
- package/dist/Model/LinearPerceptronModel.js.map +1 -1
- package/dist/Model/Model.d.ts +7 -0
- package/dist/Model/Model.js +40 -1
- package/dist/Model/Model.js.map +1 -1
- package/dist/Model/MultiLayerPerceptronModel.d.ts +5 -2
- package/dist/Model/MultiLayerPerceptronModel.js +33 -13
- package/dist/Model/MultiLayerPerceptronModel.js.map +1 -1
- package/dist/Model/NaiveBayesModel.d.ts +2 -1
- package/dist/Model/NaiveBayesModel.js +18 -6
- package/dist/Model/NaiveBayesModel.js.map +1 -1
- package/dist/Model/NeuralNetworkModel.d.ts +4 -1
- package/dist/Model/NeuralNetworkModel.js +27 -6
- package/dist/Model/NeuralNetworkModel.js.map +1 -1
- package/dist/Model/QdaModel.d.ts +3 -2
- package/dist/Model/QdaModel.js +23 -5
- package/dist/Model/QdaModel.js.map +1 -1
- package/dist/Model/RandomModel.d.ts +4 -2
- package/dist/Model/RandomModel.js +20 -4
- package/dist/Model/RandomModel.js.map +1 -1
- package/dist/Model/TreeEnsembleModel.d.ts +3 -2
- package/dist/Model/TreeEnsembleModel.js +19 -4
- package/dist/Model/TreeEnsembleModel.js.map +1 -1
- package/models/bagging-bupa.txt +25666 -0
- package/models/bagging-car.txt +78923 -0
- package/models/bagging-dermatology.txt +7276 -0
- package/models/bagging-iris.txt +3131 -0
- package/models/bagging-tictactoe.txt +61186 -0
- package/models/c45-bupa.txt +3 -0
- package/models/c45-car.txt +331 -0
- package/models/c45-carIndexed.txt +188 -0
- package/models/c45-dermatology.txt +43 -0
- package/models/c45-iris.txt +13 -0
- package/models/c45-tictactoe.txt +270 -0
- package/models/c45-tictactoeIndexed.txt +173 -0
- package/models/c45stump-bupa.txt +8 -0
- package/models/c45stump-car.txt +11 -0
- package/models/c45stump-chess.txt +8 -0
- package/models/c45stump-dermatology.txt +8 -0
- package/models/c45stump-iris.txt +8 -0
- package/models/c45stump-nursery.txt +11 -0
- package/models/c45stump-tictactoe.txt +11 -0
- package/models/deepNetwork-bupa.txt +40 -0
- package/models/deepNetwork-dermatology.txt +37 -0
- package/models/deepNetwork-iris.txt +22 -0
- package/models/dummy-bupa.txt +3 -0
- package/models/dummy-car.txt +5 -0
- package/models/dummy-chess.txt +19 -0
- package/models/dummy-dermatology.txt +7 -0
- package/models/dummy-iris.txt +4 -0
- package/models/dummy-nursery.txt +6 -0
- package/models/dummy-tictactoe.txt +3 -0
- package/models/kMeans-bupa.txt +7 -0
- package/models/kMeans-car.txt +11 -0
- package/models/kMeans-chess.txt +39 -0
- package/models/kMeans-dermatology.txt +15 -0
- package/models/kMeans-iris.txt +9 -0
- package/models/kMeans-nursery.txt +13 -0
- package/models/kMeans-tictactoe.txt +7 -0
- package/models/knn-bupa.txt +348 -0
- package/models/knn-car.txt +1731 -0
- package/models/knn-dermatology.txt +369 -0
- package/models/knn-iris.txt +153 -0
- package/models/knn-tictactoe.txt +961 -0
- package/models/lda-bupa.txt +7 -0
- package/models/lda-dermatology.txt +19 -0
- package/models/lda-iris.txt +10 -0
- package/models/linearPerceptron-bupa.txt +6 -0
- package/models/linearPerceptron-dermatology.txt +14 -0
- package/models/linearPerceptron-iris.txt +8 -0
- package/models/multiLayerPerceptron-bupa.txt +38 -0
- package/models/multiLayerPerceptron-dermatology.txt +36 -0
- package/models/multiLayerPerceptron-iris.txt +13 -0
- package/models/naiveBayes-bupa.txt +7 -0
- package/models/naiveBayes-dermatology.txt +19 -0
- package/models/naiveBayes-iris.txt +10 -0
- package/models/qda-bupa.txt +23 -0
- package/models/qda-iris.txt +28 -0
- package/models/random-bupa.txt +4 -0
- package/models/random-car.txt +6 -0
- package/models/random-chess.txt +20 -0
- package/models/random-dermatology.txt +8 -0
- package/models/random-iris.txt +5 -0
- package/models/random-nursery.txt +7 -0
- package/models/random-tictactoe.txt +4 -0
- package/models/randomforest-bupa.txt +25666 -0
- package/models/randomforest-car.txt +78923 -0
- package/models/randomforest-carIndexed.txt +38786 -0
- package/models/randomforest-dermatology.txt +7276 -0
- package/models/randomforest-iris.txt +3131 -0
- package/models/randomforest-tictactoe.txt +61186 -0
- package/package.json +3 -3
- package/source/Classifier/Bagging.ts +3 -0
- package/source/Classifier/C45.ts +4 -0
- package/source/Classifier/C45Stump.ts +4 -0
- package/source/Classifier/Classifier.ts +1 -0
- package/source/Classifier/DeepNetwork.ts +4 -0
- package/source/Classifier/Dummy.ts +4 -0
- package/source/Classifier/KMeans.ts +4 -0
- package/source/Classifier/Knn.ts +5 -1
- package/source/Classifier/Lda.ts +4 -0
- package/source/Classifier/LinearPerceptron.ts +4 -0
- package/source/Classifier/MultiLayerPerceptron.ts +4 -0
- package/source/Classifier/NaiveBayes.ts +4 -0
- package/source/Classifier/Qda.ts +4 -0
- package/source/Classifier/RandomClassifier.ts +4 -0
- package/source/Classifier/RandomForest.ts +4 -0
- package/source/DataSet/DataDefinition.ts +19 -1
- package/source/DataSet/DataSet.ts +1 -1
- package/source/InstanceList/InstanceList.ts +5 -0
- package/source/Model/DecisionTree/DecisionNode.ts +69 -29
- package/source/Model/DecisionTree/DecisionTree.ts +14 -4
- package/source/Model/DeepNetworkModel.ts +40 -16
- package/source/Model/DummyModel.ts +20 -2
- package/source/Model/GaussianModel.ts +29 -0
- package/source/Model/KMeansModel.ts +18 -6
- package/source/Model/KnnModel.ts +19 -6
- package/source/Model/LdaModel.ts +29 -6
- package/source/Model/LinearPerceptronModel.ts +52 -25
- package/source/Model/Model.ts +43 -0
- package/source/Model/MultiLayerPerceptronModel.ts +34 -12
- package/source/Model/NaiveBayesModel.ts +19 -7
- package/source/Model/NeuralNetworkModel.ts +31 -7
- package/source/Model/QdaModel.ts +22 -4
- package/source/Model/RandomModel.ts +21 -4
- package/source/Model/TreeEnsembleModel.ts +17 -3
- package/tests/Classifier/C45Test.ts +98 -0
- package/tests/Classifier/DeepNetworkTest.ts +52 -0
- package/tests/Classifier/DummyTest.ts +89 -0
- package/tests/Classifier/KMeansTest.ts +89 -0
- package/tests/Classifier/KnnTest.ts +68 -0
- package/tests/Classifier/LdaTest.ts +45 -0
- package/tests/Classifier/LinearPerceptronTest.ts +49 -0
- package/tests/Classifier/MultiLayerPerceptronTest.ts +51 -0
- package/tests/Classifier/NaiveBayesTest.ts +45 -0
- package/tests/Classifier/QdaTest.ts +35 -0
- package/tests/Classifier/RandomForestTest.ts +85 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nlptoolkit-classification",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5",
|
|
4
4
|
"description": "Classification Library",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.js",
|
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"nlptoolkit-datastructure": "^1.0.4",
|
|
28
|
-
"nlptoolkit-math": "^1.0.
|
|
28
|
+
"nlptoolkit-math": "^1.0.3",
|
|
29
29
|
"nlptoolkit-sampling": "^1.0.2",
|
|
30
|
-
"nlptoolkit-util": "^1.0.
|
|
30
|
+
"nlptoolkit-util": "^1.0.8"
|
|
31
31
|
}
|
|
32
32
|
}
|
package/source/Classifier/C45.ts
CHANGED
|
@@ -13,6 +13,7 @@ export abstract class Classifier {
|
|
|
13
13
|
protected model: Model
|
|
14
14
|
|
|
15
15
|
abstract train(trainSet: InstanceList, parameters: Parameter):void
|
|
16
|
+
abstract loadModel(fileName: string): void
|
|
16
17
|
|
|
17
18
|
/**
|
|
18
19
|
* Checks given instance's attribute and returns true if it is a discrete indexed attribute, false otherwise.
|
package/source/Classifier/Knn.ts
CHANGED
|
@@ -17,5 +17,9 @@ export class Knn extends Classifier{
|
|
|
17
17
|
train(trainSet: InstanceList, parameters: Parameter): void {
|
|
18
18
|
this.model = new KnnModel(trainSet, (<KnnParameter> parameters).getK(), (<KnnParameter> parameters).getDistanceMetric());
|
|
19
19
|
}
|
|
20
|
-
|
|
20
|
+
|
|
21
|
+
loadModel(fileName: string): void{
|
|
22
|
+
this.model = new KnnModel(fileName)
|
|
23
|
+
}
|
|
24
|
+
|
|
21
25
|
}
|
package/source/Classifier/Lda.ts
CHANGED
|
@@ -20,4 +20,8 @@ export class LinearPerceptron extends Classifier{
|
|
|
20
20
|
this.model = new LinearPerceptronModel(partition.get(1), partition.get(0), <LinearPerceptronParameter> parameters);
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
loadModel(fileName: string): void{
|
|
24
|
+
this.model = new LinearPerceptronModel(fileName)
|
|
25
|
+
}
|
|
26
|
+
|
|
23
27
|
}
|
|
@@ -20,4 +20,8 @@ export class MultiLayerPerceptron extends Classifier{
|
|
|
20
20
|
this.model = new MultiLayerPerceptronModel(partition.get(1), partition.get(0), <MultiLayerPerceptronParameter> parameters);
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
loadModel(fileName: string): void{
|
|
24
|
+
this.model = new MultiLayerPerceptronModel(fileName)
|
|
25
|
+
}
|
|
26
|
+
|
|
23
27
|
}
|
package/source/Classifier/Qda.ts
CHANGED
|
@@ -4,16 +4,34 @@ import {FeatureSubSet} from "../FeatureSelection/FeatureSubSet";
|
|
|
4
4
|
export class DataDefinition {
|
|
5
5
|
|
|
6
6
|
private attributeTypes: Array<AttributeType>
|
|
7
|
+
private readonly attributeValueList: Array<Array<String>>
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
10
|
* Constructor for creating a new {@link DataDefinition} with given attribute types.
|
|
10
11
|
*
|
|
11
12
|
* @param attributeTypes Attribute types of the data definition.
|
|
13
|
+
* @param attributeValueList Array of array of strings to represent all possible values of discrete features.
|
|
12
14
|
*/
|
|
13
|
-
constructor(attributeTypes?: Array<AttributeType
|
|
15
|
+
constructor(attributeTypes?: Array<AttributeType>, attributeValueList?: Array<Array<String>>) {
|
|
14
16
|
if (attributeTypes != undefined){
|
|
15
17
|
this.attributeTypes = attributeTypes
|
|
18
|
+
if (attributeValueList != undefined){
|
|
19
|
+
this.attributeValueList = attributeValueList;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
numberOfValues(attributeIndex: number): number{
|
|
25
|
+
return this.attributeValueList[attributeIndex].length
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
featureValueIndex(attributeIndex: number, value: String): number{
|
|
29
|
+
for (let i = 0; i < this.attributeValueList[attributeIndex].length; i++){
|
|
30
|
+
if (this.attributeValueList[attributeIndex][i] == value){
|
|
31
|
+
return i
|
|
32
|
+
}
|
|
16
33
|
}
|
|
34
|
+
return -1
|
|
17
35
|
}
|
|
18
36
|
|
|
19
37
|
/**
|
|
@@ -25,7 +25,7 @@ export class DataSet {
|
|
|
25
25
|
*/
|
|
26
26
|
constructor(definition?: any, separator?: string, fileName?: string) {
|
|
27
27
|
if (definition != undefined){
|
|
28
|
-
if (definition instanceof DataDefinition){
|
|
28
|
+
if (definition instanceof DataDefinition && fileName == undefined){
|
|
29
29
|
this.definition = definition
|
|
30
30
|
} else {
|
|
31
31
|
if (separator == undefined){
|
|
@@ -50,6 +50,11 @@ export class InstanceList {
|
|
|
50
50
|
case AttributeType.DISCRETE:
|
|
51
51
|
current.addAttribute(new DiscreteAttribute(attributeList[i]));
|
|
52
52
|
break;
|
|
53
|
+
case AttributeType.DISCRETE_INDEXED:
|
|
54
|
+
current.addAttribute(new DiscreteIndexedAttribute(attributeList[i],
|
|
55
|
+
definition.featureValueIndex(i, attributeList[i]),
|
|
56
|
+
definition.numberOfValues(i)))
|
|
57
|
+
break;
|
|
53
58
|
case AttributeType.BINARY:
|
|
54
59
|
if (attributeList[i].toLowerCase() == "yes" || attributeList[i] == "1"){
|
|
55
60
|
current.addAttribute(new BinaryAttribute(true));
|
|
@@ -9,45 +9,24 @@ import {DiscreteDistribution} from "nlptoolkit-math/dist/DiscreteDistribution";
|
|
|
9
9
|
import {Partition} from "../../InstanceList/Partition";
|
|
10
10
|
import {Instance} from "../../Instance/Instance";
|
|
11
11
|
import {CompositeInstance} from "../../Instance/CompositeInstance";
|
|
12
|
-
import {Random} from "nlptoolkit-util/dist/Random";
|
|
13
12
|
import {RandomArray} from "nlptoolkit-util/dist/RandomArray";
|
|
13
|
+
import {FileContents} from "nlptoolkit-util/dist/FileContents";
|
|
14
14
|
|
|
15
15
|
export class DecisionNode {
|
|
16
16
|
|
|
17
17
|
children: Array<DecisionNode> = undefined
|
|
18
18
|
private EPSILON: number = 0.0000000001;
|
|
19
|
-
private
|
|
20
|
-
private
|
|
19
|
+
private data : InstanceList = undefined
|
|
20
|
+
private classLabel : string = undefined
|
|
21
21
|
leaf: boolean = false
|
|
22
22
|
private condition: DecisionCondition = undefined
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
* The DecisionNode method takes {@link InstanceList} data as input, and then it sets the class label parameter by finding
|
|
26
|
-
* the most occurred class label of given data, it then gets distinct class labels as class labels ArrayList. Later, it adds ordered
|
|
27
|
-
* indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value
|
|
28
|
-
* of these class distribution.
|
|
29
|
-
* <p>
|
|
30
|
-
* If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution
|
|
31
|
-
* and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to
|
|
32
|
-
* the newly founded best entropy's index. At the end, it also adds new distribution to the class distribution .
|
|
33
|
-
* <p>
|
|
34
|
-
* If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it
|
|
35
|
-
* reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index.
|
|
36
|
-
* <p>
|
|
37
|
-
* If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution
|
|
38
|
-
* and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy,
|
|
39
|
-
* best attribute and best split value according to the newly founded best entropy's index. At the end, it also adds new distribution to
|
|
40
|
-
* the right distribution and removes from left distribution .
|
|
41
|
-
*
|
|
42
|
-
* @param data {@link InstanceList} input.
|
|
43
|
-
* @param condition {@link DecisionCondition} to check.
|
|
44
|
-
* @param parameter RandomForestParameter like seed, ensembleSize, attributeSubsetSize.
|
|
45
|
-
* @param isStump Refers to decision trees with only 1 splitting rule.
|
|
46
|
-
*/
|
|
47
|
-
constructor(data: InstanceList, condition: DecisionCondition, parameter: RandomForestParameter, isStump: boolean) {
|
|
24
|
+
constructor1(data: InstanceList, condition?: DecisionCondition | number, parameter?: RandomForestParameter, isStump?: boolean){
|
|
48
25
|
let bestAttribute = -1
|
|
49
26
|
let bestSplitValue = 0
|
|
50
|
-
|
|
27
|
+
if (condition instanceof DecisionCondition){
|
|
28
|
+
this.condition = condition;
|
|
29
|
+
}
|
|
51
30
|
this.data = data;
|
|
52
31
|
this.classLabel = Model.getMaximum(data.getClassLabels());
|
|
53
32
|
this.leaf = true;
|
|
@@ -140,6 +119,67 @@ export class DecisionNode {
|
|
|
140
119
|
}
|
|
141
120
|
}
|
|
142
121
|
|
|
122
|
+
constructor2(contents: FileContents){
|
|
123
|
+
let items = contents.readLine().split(" ")
|
|
124
|
+
if (items[0] != "-1"){
|
|
125
|
+
if (items[1][0] == '='){
|
|
126
|
+
this.condition = new DecisionCondition(parseInt(items[0]), new DiscreteAttribute(items[2]), items[1][0])
|
|
127
|
+
} else {
|
|
128
|
+
if (items[1][0] == ':'){
|
|
129
|
+
this.condition = new DecisionCondition(parseInt(items[0]), new DiscreteIndexedAttribute("", parseInt(items[2]), parseInt(items[3])), '=')
|
|
130
|
+
} else {
|
|
131
|
+
this.condition = new DecisionCondition(parseInt(items[0]), new ContinuousAttribute(parseFloat(items[2])), items[1][0])
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
} else {
|
|
135
|
+
this.condition = null
|
|
136
|
+
}
|
|
137
|
+
let numberOfChildren = parseInt(contents.readLine())
|
|
138
|
+
if (numberOfChildren != 0){
|
|
139
|
+
this.leaf = false
|
|
140
|
+
this.children = new Array<DecisionNode>()
|
|
141
|
+
for (let i = 0; i < numberOfChildren; i++){
|
|
142
|
+
this.children.push(new DecisionNode(contents))
|
|
143
|
+
}
|
|
144
|
+
} else {
|
|
145
|
+
this.leaf = true
|
|
146
|
+
this.classLabel = contents.readLine()
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* The DecisionNode method takes {@link InstanceList} data as input, and then it sets the class label parameter by finding
|
|
152
|
+
* the most occurred class label of given data, it then gets distinct class labels as class labels ArrayList. Later, it adds ordered
|
|
153
|
+
* indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value
|
|
154
|
+
* of these class distribution.
|
|
155
|
+
* <p>
|
|
156
|
+
* If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution
|
|
157
|
+
* and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to
|
|
158
|
+
* the newly founded best entropy's index. At the end, it also adds new distribution to the class distribution .
|
|
159
|
+
* <p>
|
|
160
|
+
* If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it
|
|
161
|
+
* reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index.
|
|
162
|
+
* <p>
|
|
163
|
+
* If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution
|
|
164
|
+
* and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy,
|
|
165
|
+
* best attribute and best split value according to the newly founded best entropy's index. At the end, it also adds new distribution to
|
|
166
|
+
* the right distribution and removes from left distribution .
|
|
167
|
+
*
|
|
168
|
+
* @param data {@link InstanceList} input.
|
|
169
|
+
* @param condition {@link DecisionCondition} to check.
|
|
170
|
+
* @param parameter RandomForestParameter like seed, ensembleSize, attributeSubsetSize.
|
|
171
|
+
* @param isStump Refers to decision trees with only 1 splitting rule.
|
|
172
|
+
*/
|
|
173
|
+
constructor(data: InstanceList | FileContents, condition?: DecisionCondition, parameter?: RandomForestParameter, isStump?: boolean) {
|
|
174
|
+
if (data instanceof InstanceList && (condition instanceof DecisionCondition || condition == undefined)){
|
|
175
|
+
this.constructor1(data, condition, parameter, isStump)
|
|
176
|
+
} else {
|
|
177
|
+
if (data instanceof FileContents){
|
|
178
|
+
this.constructor2(data)
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
143
183
|
/**
|
|
144
184
|
* The entropyForDiscreteAttribute method takes an attributeIndex and creates an ArrayList of DiscreteDistribution.
|
|
145
185
|
* Then loops through the distributions and calculates the total entropy.
|
|
@@ -199,7 +239,7 @@ export class DecisionNode {
|
|
|
199
239
|
* @param splitValue Split value is used for partitioning.
|
|
200
240
|
*/
|
|
201
241
|
private createChildrenForContinuous(attributeIndex: number, splitValue: number, parameter: RandomForestParameter, isStump: boolean){
|
|
202
|
-
let childrenData = new Partition(this.data, attributeIndex, splitValue);
|
|
242
|
+
let childrenData = new Partition(this.data, attributeIndex, splitValue + 0.0000001);
|
|
203
243
|
this.children = new Array<DecisionNode>();
|
|
204
244
|
this.children.push(new DecisionNode(childrenData.get(0), new DecisionCondition(attributeIndex, new ContinuousAttribute(splitValue), "<"), parameter, isStump));
|
|
205
245
|
this.children.push(new DecisionNode(childrenData.get(1), new DecisionCondition(attributeIndex, new ContinuousAttribute(splitValue), ">"), parameter, isStump));
|
|
@@ -3,19 +3,26 @@ import {DecisionNode} from "./DecisionNode";
|
|
|
3
3
|
import {Instance} from "../../Instance/Instance";
|
|
4
4
|
import {CompositeInstance} from "../../Instance/CompositeInstance";
|
|
5
5
|
import {InstanceList} from "../../InstanceList/InstanceList";
|
|
6
|
+
import * as fs from "fs";
|
|
7
|
+
import {FileContents} from "nlptoolkit-util/dist/FileContents";
|
|
6
8
|
|
|
7
9
|
export class DecisionTree extends ValidatedModel{
|
|
8
10
|
|
|
9
|
-
private root: DecisionNode
|
|
11
|
+
private readonly root: DecisionNode
|
|
10
12
|
|
|
11
13
|
/**
|
|
12
14
|
* Constructor that sets root node of the decision tree.
|
|
13
15
|
*
|
|
14
|
-
* @param
|
|
16
|
+
* @param rootOrFileName DecisionNode type input or fileName
|
|
15
17
|
*/
|
|
16
|
-
constructor(
|
|
18
|
+
constructor(rootOrFileName: DecisionNode | string) {
|
|
17
19
|
super();
|
|
18
|
-
|
|
20
|
+
if (rootOrFileName instanceof DecisionNode){
|
|
21
|
+
this.root = rootOrFileName
|
|
22
|
+
} else {
|
|
23
|
+
let contents = new FileContents(rootOrFileName)
|
|
24
|
+
this.root = new DecisionNode(contents)
|
|
25
|
+
}
|
|
19
26
|
}
|
|
20
27
|
|
|
21
28
|
/**
|
|
@@ -37,6 +44,9 @@ export class DecisionTree extends ValidatedModel{
|
|
|
37
44
|
return this.root.predictProbabilityDistribution(instance)
|
|
38
45
|
}
|
|
39
46
|
|
|
47
|
+
saveTxt(fileName: string){
|
|
48
|
+
}
|
|
49
|
+
|
|
40
50
|
/**
|
|
41
51
|
* The prune method takes a {@link DecisionNode} and an {@link InstanceList} as inputs. It checks the classification performance
|
|
42
52
|
* of given InstanceList before pruning, i.e making a node leaf, and after pruning. If the after performance is better than the
|
|
@@ -6,12 +6,13 @@ import {InstanceList} from "../InstanceList/InstanceList";
|
|
|
6
6
|
import {Vector} from "nlptoolkit-math/dist/Vector";
|
|
7
7
|
import {ClassificationPerformance} from "../Performance/ClassificationPerformance";
|
|
8
8
|
import {Random} from "nlptoolkit-util/dist/Random";
|
|
9
|
+
import {FileContents} from "nlptoolkit-util/dist/FileContents";
|
|
9
10
|
|
|
10
11
|
export class DeepNetworkModel extends NeuralNetworkModel{
|
|
11
12
|
|
|
12
13
|
private weights: Array<Matrix>
|
|
13
14
|
private hiddenLayerSize: number
|
|
14
|
-
private
|
|
15
|
+
private activationFunction: ActivationFunction
|
|
15
16
|
|
|
16
17
|
/**
|
|
17
18
|
* The allocateWeights method takes {@link DeepNetworkParameter}s as an input. First it adds random weights to the {@link Array}
|
|
@@ -44,21 +45,7 @@ export class DeepNetworkModel extends NeuralNetworkModel{
|
|
|
44
45
|
return bestWeights;
|
|
45
46
|
}
|
|
46
47
|
|
|
47
|
-
|
|
48
|
-
* Constructor that takes two {@link InstanceList} train set and validation set and {@link DeepNetworkParameter} as inputs.
|
|
49
|
-
* First it sets the class labels, their sizes as K and the size of the continuous attributes as d of given train set and
|
|
50
|
-
* allocates weights and sets the best weights. At each epoch, it shuffles the train set and loops through the each item of that train set,
|
|
51
|
-
* it multiplies the weights Matrix with input Vector than applies the sigmoid function and stores the result as hidden and add bias.
|
|
52
|
-
* Then updates weights and at the end it compares the performance of these weights with validation set. It updates the bestClassificationPerformance and
|
|
53
|
-
* bestWeights according to the current situation. At the end it updates the learning rate via etaDecrease value and finishes
|
|
54
|
-
* with clearing the weights.
|
|
55
|
-
*
|
|
56
|
-
* @param trainSet {@link InstanceList} to be used as trainSet.
|
|
57
|
-
* @param validationSet {@link InstanceList} to be used as validationSet.
|
|
58
|
-
* @param parameters {@link DeepNetworkParameter} input.
|
|
59
|
-
*/
|
|
60
|
-
constructor(trainSet: InstanceList, validationSet: InstanceList, parameters: DeepNetworkParameter) {
|
|
61
|
-
super(trainSet);
|
|
48
|
+
constructor1(trainSet: InstanceList, validationSet: InstanceList, parameters: DeepNetworkParameter){
|
|
62
49
|
let tmpHidden = new Vector(0, 0);
|
|
63
50
|
let deltaWeights = new Array<Matrix>();
|
|
64
51
|
let hidden = new Array<Vector>();
|
|
@@ -136,6 +123,40 @@ export class DeepNetworkModel extends NeuralNetworkModel{
|
|
|
136
123
|
}
|
|
137
124
|
}
|
|
138
125
|
|
|
126
|
+
constructor2(fileName: string){
|
|
127
|
+
let input = new FileContents(fileName)
|
|
128
|
+
this.activationFunction = this.loadActivationFunction(input)
|
|
129
|
+
this.loadClassLabels(input)
|
|
130
|
+
this.hiddenLayerSize = parseInt(input.readLine())
|
|
131
|
+
this.weights = new Array<Matrix>()
|
|
132
|
+
for (let i = 0; i < this.hiddenLayerSize + 1; i++){
|
|
133
|
+
this.weights.push(this.loadMatrix(input))
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Constructor that takes two {@link InstanceList} train set and validation set and {@link DeepNetworkParameter} as inputs.
|
|
139
|
+
* First it sets the class labels, their sizes as K and the size of the continuous attributes as d of given train set and
|
|
140
|
+
* allocates weights and sets the best weights. At each epoch, it shuffles the train set and loops through the each item of that train set,
|
|
141
|
+
* it multiplies the weights Matrix with input Vector than applies the sigmoid function and stores the result as hidden and add bias.
|
|
142
|
+
* Then updates weights and at the end it compares the performance of these weights with validation set. It updates the bestClassificationPerformance and
|
|
143
|
+
* bestWeights according to the current situation. At the end it updates the learning rate via etaDecrease value and finishes
|
|
144
|
+
* with clearing the weights.
|
|
145
|
+
*
|
|
146
|
+
* @param trainSetOrFileName {@link InstanceList} to be used as trainSet.
|
|
147
|
+
* @param validationSet {@link InstanceList} to be used as validationSet.
|
|
148
|
+
* @param parameters {@link DeepNetworkParameter} input.
|
|
149
|
+
*/
|
|
150
|
+
constructor(trainSetOrFileName: InstanceList | string, validationSet?: InstanceList, parameters?: DeepNetworkParameter) {
|
|
151
|
+
if (trainSetOrFileName instanceof InstanceList){
|
|
152
|
+
super(trainSetOrFileName)
|
|
153
|
+
this.constructor1(trainSetOrFileName, validationSet, parameters)
|
|
154
|
+
} else {
|
|
155
|
+
super()
|
|
156
|
+
this.constructor2(trainSetOrFileName)
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
139
160
|
/**
|
|
140
161
|
* The calculateOutput method loops size of the weights times and calculate one hidden layer at a time and adds bias term.
|
|
141
162
|
* At the end it updates the output y value.
|
|
@@ -154,4 +175,7 @@ export class DeepNetworkModel extends NeuralNetworkModel{
|
|
|
154
175
|
this.y = this.weights[this.weights.length - 1].multiplyWithVectorFromRight(hiddenBiased);
|
|
155
176
|
}
|
|
156
177
|
|
|
178
|
+
saveTxt(fileName: string){
|
|
179
|
+
}
|
|
180
|
+
|
|
157
181
|
}
|
|
@@ -3,6 +3,7 @@ import {Instance} from "../Instance/Instance";
|
|
|
3
3
|
import {DiscreteDistribution} from "nlptoolkit-math/dist/DiscreteDistribution";
|
|
4
4
|
import {InstanceList} from "../InstanceList/InstanceList";
|
|
5
5
|
import {CompositeInstance} from "../Instance/CompositeInstance";
|
|
6
|
+
import {FileContents} from "nlptoolkit-util/dist/FileContents";
|
|
6
7
|
|
|
7
8
|
export class DummyModel extends Model{
|
|
8
9
|
|
|
@@ -13,9 +14,23 @@ export class DummyModel extends Model{
|
|
|
13
14
|
*
|
|
14
15
|
* @param trainSet {@link InstanceList} which is used to get the class distribution.
|
|
15
16
|
*/
|
|
16
|
-
constructor(trainSet: InstanceList) {
|
|
17
|
+
constructor(trainSet: InstanceList | string) {
|
|
17
18
|
super();
|
|
18
|
-
|
|
19
|
+
if (trainSet instanceof InstanceList){
|
|
20
|
+
this.distribution = trainSet.classDistribution();
|
|
21
|
+
} else {
|
|
22
|
+
let input = new FileContents(trainSet)
|
|
23
|
+
this.distribution = new DiscreteDistribution()
|
|
24
|
+
let size = parseInt(input.readLine())
|
|
25
|
+
for (let i = 0; i < size; i++){
|
|
26
|
+
let line = input.readLine()
|
|
27
|
+
let items = line.split(" ")
|
|
28
|
+
let count = parseInt(items[1])
|
|
29
|
+
for (let j = 0; j < count; j++){
|
|
30
|
+
this.distribution.addItem(items[0])
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
19
34
|
}
|
|
20
35
|
|
|
21
36
|
/**
|
|
@@ -37,4 +52,7 @@ export class DummyModel extends Model{
|
|
|
37
52
|
return this.distribution.getProbabilityDistribution();
|
|
38
53
|
}
|
|
39
54
|
|
|
55
|
+
saveTxt(fileName: string){
|
|
56
|
+
}
|
|
57
|
+
|
|
40
58
|
}
|
|
@@ -2,6 +2,8 @@ import {DiscreteDistribution} from "nlptoolkit-math/dist/DiscreteDistribution";
|
|
|
2
2
|
import {Instance} from "../Instance/Instance";
|
|
3
3
|
import {ValidatedModel} from "./ValidatedModel";
|
|
4
4
|
import {CompositeInstance} from "../Instance/CompositeInstance";
|
|
5
|
+
import {FileContents} from "nlptoolkit-util/dist/FileContents";
|
|
6
|
+
import {Vector} from "nlptoolkit-math/dist/Vector";
|
|
5
7
|
|
|
6
8
|
export abstract class GaussianModel extends ValidatedModel{
|
|
7
9
|
|
|
@@ -52,6 +54,33 @@ export abstract class GaussianModel extends ValidatedModel{
|
|
|
52
54
|
return predictedClass;
|
|
53
55
|
}
|
|
54
56
|
|
|
57
|
+
loadPriorDistribution(input: FileContents): number{
|
|
58
|
+
let size = parseInt(input.readLine())
|
|
59
|
+
this.priorDistribution = new DiscreteDistribution()
|
|
60
|
+
for (let i = 0; i < size; i++){
|
|
61
|
+
let line = input.readLine()
|
|
62
|
+
let items = line.split(" ");
|
|
63
|
+
for (let j = 0; j < parseInt(items[1]); j++){
|
|
64
|
+
this.priorDistribution.addItem(items[0])
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return size
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
loadVectors(input: FileContents, size: number): Map<string, Vector>{
|
|
71
|
+
let map = new Map<string, Vector>()
|
|
72
|
+
for (let i = 0; i < size; i++){
|
|
73
|
+
let line = input.readLine()
|
|
74
|
+
let items = line.split(" ")
|
|
75
|
+
let vector = new Vector(parseInt(items[1]), 0)
|
|
76
|
+
for (let j = 2; j < items.length; j++){
|
|
77
|
+
vector.setValue(j - 2, parseFloat(items[j]))
|
|
78
|
+
}
|
|
79
|
+
map.set(items[0], vector)
|
|
80
|
+
}
|
|
81
|
+
return map
|
|
82
|
+
}
|
|
83
|
+
|
|
55
84
|
predictProbability(instance: Instance): Map<string, number> {
|
|
56
85
|
return undefined;
|
|
57
86
|
}
|
|
@@ -3,6 +3,8 @@ import {InstanceList} from "../InstanceList/InstanceList";
|
|
|
3
3
|
import {DistanceMetric} from "../DistanceMetric/DistanceMetric";
|
|
4
4
|
import {Instance} from "../Instance/Instance";
|
|
5
5
|
import {DiscreteDistribution} from "nlptoolkit-math/dist/DiscreteDistribution";
|
|
6
|
+
import {FileContents} from "nlptoolkit-util/dist/FileContents";
|
|
7
|
+
import {EuclidianDistance} from "../DistanceMetric/EuclidianDistance";
|
|
6
8
|
|
|
7
9
|
export class KMeansModel extends GaussianModel{
|
|
8
10
|
|
|
@@ -12,15 +14,22 @@ export class KMeansModel extends GaussianModel{
|
|
|
12
14
|
/**
|
|
13
15
|
* The constructor that sets the classMeans, priorDistribution and distanceMetric according to given inputs.
|
|
14
16
|
*
|
|
15
|
-
* @param
|
|
17
|
+
* @param priorDistributionOrFileName {@link DiscreteDistribution} input.
|
|
16
18
|
* @param classMeans {@link InstanceList} of class means.
|
|
17
19
|
* @param distanceMetric {@link DistanceMetric} input.
|
|
18
20
|
*/
|
|
19
|
-
constructor(
|
|
20
|
-
super()
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
constructor(priorDistributionOrFileName: DiscreteDistribution | string, classMeans?: InstanceList, distanceMetric?: DistanceMetric) {
|
|
22
|
+
super()
|
|
23
|
+
if (priorDistributionOrFileName instanceof DiscreteDistribution){
|
|
24
|
+
this.classMeans = classMeans
|
|
25
|
+
this.priorDistribution = priorDistributionOrFileName
|
|
26
|
+
this.distanceMetric = distanceMetric
|
|
27
|
+
} else {
|
|
28
|
+
this.distanceMetric = new EuclidianDistance()
|
|
29
|
+
let input = new FileContents(priorDistributionOrFileName)
|
|
30
|
+
this.loadPriorDistribution(input)
|
|
31
|
+
this.classMeans = this.loadInstanceList(input)
|
|
32
|
+
}
|
|
24
33
|
}
|
|
25
34
|
|
|
26
35
|
/**
|
|
@@ -41,4 +50,7 @@ export class KMeansModel extends GaussianModel{
|
|
|
41
50
|
return Number.NEGATIVE_INFINITY;
|
|
42
51
|
}
|
|
43
52
|
|
|
53
|
+
saveTxt(fileName: string){
|
|
54
|
+
}
|
|
55
|
+
|
|
44
56
|
}
|