isotree 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/LICENSE.txt +2 -1
- data/README.md +57 -6
- data/ext/isotree/ext.cpp +170 -39
- data/ext/isotree/extconf.rb +3 -3
- data/lib/isotree.rb +2 -0
- data/lib/isotree/dataset.rb +73 -0
- data/lib/isotree/isolation_forest.rb +182 -29
- data/lib/isotree/version.rb +1 -1
- data/vendor/cereal/LICENSE +24 -0
- data/vendor/cereal/README.md +85 -0
- data/vendor/cereal/include/cereal/access.hpp +351 -0
- data/vendor/cereal/include/cereal/archives/adapters.hpp +163 -0
- data/vendor/cereal/include/cereal/archives/binary.hpp +169 -0
- data/vendor/cereal/include/cereal/archives/json.hpp +1019 -0
- data/vendor/cereal/include/cereal/archives/portable_binary.hpp +334 -0
- data/vendor/cereal/include/cereal/archives/xml.hpp +956 -0
- data/vendor/cereal/include/cereal/cereal.hpp +1089 -0
- data/vendor/cereal/include/cereal/details/helpers.hpp +422 -0
- data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +796 -0
- data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +65 -0
- data/vendor/cereal/include/cereal/details/static_object.hpp +127 -0
- data/vendor/cereal/include/cereal/details/traits.hpp +1411 -0
- data/vendor/cereal/include/cereal/details/util.hpp +84 -0
- data/vendor/cereal/include/cereal/external/base64.hpp +134 -0
- data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +284 -0
- data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +78 -0
- data/vendor/cereal/include/cereal/external/rapidjson/document.h +2652 -0
- data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +299 -0
- data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +716 -0
- data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +74 -0
- data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +161 -0
- data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +99 -0
- data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +104 -0
- data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +151 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +290 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +271 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +245 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +78 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +308 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +186 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +55 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +740 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +232 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +69 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +290 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +46 -0
- data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +128 -0
- data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +70 -0
- data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +71 -0
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +316 -0
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +300 -0
- data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +81 -0
- data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +1414 -0
- data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +277 -0
- data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +656 -0
- data/vendor/cereal/include/cereal/external/rapidjson/reader.h +2230 -0
- data/vendor/cereal/include/cereal/external/rapidjson/schema.h +2497 -0
- data/vendor/cereal/include/cereal/external/rapidjson/stream.h +223 -0
- data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +121 -0
- data/vendor/cereal/include/cereal/external/rapidjson/writer.h +709 -0
- data/vendor/cereal/include/cereal/external/rapidxml/license.txt +52 -0
- data/vendor/cereal/include/cereal/external/rapidxml/manual.html +406 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +2624 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +175 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +428 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +123 -0
- data/vendor/cereal/include/cereal/macros.hpp +154 -0
- data/vendor/cereal/include/cereal/specialize.hpp +139 -0
- data/vendor/cereal/include/cereal/types/array.hpp +79 -0
- data/vendor/cereal/include/cereal/types/atomic.hpp +55 -0
- data/vendor/cereal/include/cereal/types/base_class.hpp +203 -0
- data/vendor/cereal/include/cereal/types/bitset.hpp +176 -0
- data/vendor/cereal/include/cereal/types/boost_variant.hpp +164 -0
- data/vendor/cereal/include/cereal/types/chrono.hpp +72 -0
- data/vendor/cereal/include/cereal/types/common.hpp +129 -0
- data/vendor/cereal/include/cereal/types/complex.hpp +56 -0
- data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +73 -0
- data/vendor/cereal/include/cereal/types/deque.hpp +62 -0
- data/vendor/cereal/include/cereal/types/forward_list.hpp +68 -0
- data/vendor/cereal/include/cereal/types/functional.hpp +43 -0
- data/vendor/cereal/include/cereal/types/list.hpp +62 -0
- data/vendor/cereal/include/cereal/types/map.hpp +36 -0
- data/vendor/cereal/include/cereal/types/memory.hpp +425 -0
- data/vendor/cereal/include/cereal/types/optional.hpp +66 -0
- data/vendor/cereal/include/cereal/types/polymorphic.hpp +483 -0
- data/vendor/cereal/include/cereal/types/queue.hpp +132 -0
- data/vendor/cereal/include/cereal/types/set.hpp +103 -0
- data/vendor/cereal/include/cereal/types/stack.hpp +76 -0
- data/vendor/cereal/include/cereal/types/string.hpp +61 -0
- data/vendor/cereal/include/cereal/types/tuple.hpp +123 -0
- data/vendor/cereal/include/cereal/types/unordered_map.hpp +36 -0
- data/vendor/cereal/include/cereal/types/unordered_set.hpp +99 -0
- data/vendor/cereal/include/cereal/types/utility.hpp +47 -0
- data/vendor/cereal/include/cereal/types/valarray.hpp +89 -0
- data/vendor/cereal/include/cereal/types/variant.hpp +109 -0
- data/vendor/cereal/include/cereal/types/vector.hpp +112 -0
- data/vendor/cereal/include/cereal/version.hpp +52 -0
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +7 -2
- data/vendor/isotree/src/RcppExports.cpp +44 -4
- data/vendor/isotree/src/Rwrapper.cpp +141 -51
- data/vendor/isotree/src/crit.cpp +1 -1
- data/vendor/isotree/src/dealloc.cpp +1 -1
- data/vendor/isotree/src/dist.cpp +6 -6
- data/vendor/isotree/src/extended.cpp +5 -5
- data/vendor/isotree/src/fit_model.cpp +27 -5
- data/vendor/isotree/src/helpers_iforest.cpp +26 -11
- data/vendor/isotree/src/impute.cpp +7 -7
- data/vendor/isotree/src/isoforest.cpp +7 -7
- data/vendor/isotree/src/isotree.hpp +27 -5
- data/vendor/isotree/src/merge_models.cpp +1 -1
- data/vendor/isotree/src/mult.cpp +1 -1
- data/vendor/isotree/src/predict.cpp +20 -16
- data/vendor/isotree/src/serialize.cpp +1 -1
- data/vendor/isotree/src/sql.cpp +545 -0
- data/vendor/isotree/src/utils.cpp +36 -44
- metadata +102 -81
data/ext/isotree/extconf.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require "mkmf-rice"
|
2
2
|
|
3
|
-
|
4
|
-
$CXXFLAGS += " -std=c++11 -D_USE_MERSENNE_TWISTER"
|
3
|
+
$CXXFLAGS += " -std=c++17 -D_USE_MERSENNE_TWISTER -D_ENABLE_CEREAL"
|
5
4
|
|
6
5
|
apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
7
6
|
|
@@ -13,10 +12,11 @@ end
|
|
13
12
|
|
14
13
|
ext = File.expand_path(".", __dir__)
|
15
14
|
isotree = File.expand_path("../../vendor/isotree/src", __dir__)
|
15
|
+
cereal = File.expand_path("../../vendor/cereal/include", __dir__)
|
16
16
|
|
17
17
|
exclude = %w(Rwrapper.cpp RcppExports.cpp)
|
18
18
|
$srcs = Dir["{#{ext},#{isotree}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
|
19
|
-
$INCFLAGS << " -I#{isotree}"
|
19
|
+
$INCFLAGS << " -I#{isotree} -I#{cereal}"
|
20
20
|
$VPATH << isotree
|
21
21
|
|
22
22
|
create_makefile("isotree/ext")
|
data/lib/isotree.rb
CHANGED
@@ -0,0 +1,73 @@
|
|
1
|
+
module IsoTree
|
2
|
+
class Dataset
|
3
|
+
attr_reader :numeric_columns, :categorical_columns, :array_type
|
4
|
+
|
5
|
+
def initialize(data)
|
6
|
+
@data = data
|
7
|
+
|
8
|
+
if defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
|
9
|
+
@vectors = data.vectors
|
10
|
+
@numeric_columns, @categorical_columns = data.keys.partition { |k, v| ![:object, :bool].include?(data[k].type) }
|
11
|
+
@array_type = false
|
12
|
+
elsif defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
13
|
+
raise ArgumentError, "Input must have 2 dimensions" if data.ndim != 2
|
14
|
+
|
15
|
+
data = data.cast_to(Numo::DFloat)
|
16
|
+
ncols = data.shape[1]
|
17
|
+
|
18
|
+
@numeric_columns = ncols.times.to_a
|
19
|
+
@categorical_columns = []
|
20
|
+
|
21
|
+
@vectors = {}
|
22
|
+
@numeric_columns.each do |k|
|
23
|
+
@vectors[k] = data[true, k]
|
24
|
+
end
|
25
|
+
@array_type = true
|
26
|
+
else
|
27
|
+
data = data.to_a
|
28
|
+
|
29
|
+
hashes = data.all? { |d| d.is_a?(Hash) }
|
30
|
+
arrays = !hashes && data.all? { |d| d.is_a?(Array) }
|
31
|
+
unless hashes || arrays
|
32
|
+
raise ArgumentError, "Array elements must be all hashes or arrays"
|
33
|
+
end
|
34
|
+
|
35
|
+
nrows = data.size
|
36
|
+
ncols = data.first ? data.first.size : 0
|
37
|
+
if data.any? { |r| r.size != ncols }
|
38
|
+
raise ArgumentError, "All rows must have the same number of columns"
|
39
|
+
end
|
40
|
+
|
41
|
+
keys =
|
42
|
+
if hashes
|
43
|
+
data.flat_map(&:keys).uniq
|
44
|
+
else
|
45
|
+
ncols.times.to_a
|
46
|
+
end
|
47
|
+
|
48
|
+
@vectors = {}
|
49
|
+
keys.each do |k|
|
50
|
+
@vectors[k] = []
|
51
|
+
end
|
52
|
+
data.each do |d|
|
53
|
+
keys.each do |k|
|
54
|
+
@vectors[k] << d[k]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
@numeric_columns, @categorical_columns = keys.partition { |k| @vectors[k].all? { |v| v.nil? || v.is_a?(Numeric) } }
|
59
|
+
@array_type = arrays
|
60
|
+
end
|
61
|
+
|
62
|
+
raise ArgumentError, "No data" if size == 0
|
63
|
+
end
|
64
|
+
|
65
|
+
def [](k)
|
66
|
+
@vectors[k]
|
67
|
+
end
|
68
|
+
|
69
|
+
def size
|
70
|
+
@vectors.any? ? @vectors.values.first.size : 0
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -4,9 +4,11 @@ module IsoTree
|
|
4
4
|
sample_size: nil, ntrees: 500, ndim: 3, ntry: 3,
|
5
5
|
prob_pick_avg_gain: 0, prob_pick_pooled_gain: 0,
|
6
6
|
prob_split_avg_gain: 0, prob_split_pooled_gain: 0,
|
7
|
-
min_gain: 0,
|
7
|
+
min_gain: 0, missing_action: "impute", new_categ_action: "smallest",
|
8
|
+
categ_split_type: "subset", all_perm: false, coef_by_prop: false,
|
8
9
|
sample_with_replacement: false, penalize_range: true,
|
9
|
-
weigh_by_kurtosis: false,
|
10
|
+
weigh_by_kurtosis: false, coefs: "normal", min_imp_obs: 3, depth_imp: "higher",
|
11
|
+
weigh_imp_rows: "inverse", random_seed: 1, nthreads: -1
|
10
12
|
)
|
11
13
|
|
12
14
|
@sample_size = sample_size
|
@@ -18,12 +20,18 @@ module IsoTree
|
|
18
20
|
@prob_split_avg_gain = prob_split_avg_gain
|
19
21
|
@prob_split_pooled_gain = prob_split_pooled_gain
|
20
22
|
@min_gain = min_gain
|
23
|
+
@missing_action = missing_action
|
24
|
+
@new_categ_action = new_categ_action
|
25
|
+
@categ_split_type = categ_split_type
|
21
26
|
@all_perm = all_perm
|
22
27
|
@coef_by_prop = coef_by_prop
|
23
28
|
@sample_with_replacement = sample_with_replacement
|
24
29
|
@penalize_range = penalize_range
|
25
30
|
@weigh_by_kurtosis = weigh_by_kurtosis
|
31
|
+
@coefs = coefs
|
26
32
|
@min_imp_obs = min_imp_obs
|
33
|
+
@depth_imp = depth_imp
|
34
|
+
@weigh_imp_rows = weigh_imp_rows
|
27
35
|
@random_seed = random_seed
|
28
36
|
|
29
37
|
# etc module returns virtual cores
|
@@ -32,57 +40,202 @@ module IsoTree
|
|
32
40
|
end
|
33
41
|
|
34
42
|
def fit(x)
|
43
|
+
x = Dataset.new(x)
|
44
|
+
prep_fit(x)
|
35
45
|
options = data_options(x).merge(fit_options)
|
36
46
|
options[:sample_size] ||= options[:nrows]
|
37
|
-
@ncols = options[:ncols]
|
38
47
|
@ext_iso_forest = Ext.fit_iforest(options)
|
39
48
|
end
|
40
49
|
|
41
|
-
def predict(x)
|
42
|
-
|
50
|
+
def predict(x, output: "score")
|
51
|
+
check_fit
|
52
|
+
|
53
|
+
x = Dataset.new(x)
|
54
|
+
prep_predict(x)
|
55
|
+
|
43
56
|
options = data_options(x).merge(nthreads: @nthreads)
|
44
|
-
|
45
|
-
|
57
|
+
case output
|
58
|
+
when "score"
|
59
|
+
options[:standardize] = true
|
60
|
+
when "avg_depth"
|
61
|
+
options[:standardize] = false
|
62
|
+
else
|
63
|
+
raise ArgumentError, "Unknown output"
|
46
64
|
end
|
65
|
+
|
47
66
|
Ext.predict_iforest(@ext_iso_forest, options)
|
48
67
|
end
|
49
68
|
|
69
|
+
# same format as Python so models are compatible
|
70
|
+
def export_model(path)
|
71
|
+
check_fit
|
72
|
+
|
73
|
+
File.write("#{path}.metadata", JSON.pretty_generate(export_metadata))
|
74
|
+
Ext.serialize_ext_isoforest(@ext_iso_forest, path)
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.import_model(path)
|
78
|
+
model = new
|
79
|
+
metadata = JSON.parse(File.read("#{path}.metadata"))
|
80
|
+
model.send(:import_metadata, metadata)
|
81
|
+
model.instance_variable_set(:@ext_iso_forest, Ext.deserialize_ext_isoforest(path))
|
82
|
+
model
|
83
|
+
end
|
84
|
+
|
50
85
|
private
|
51
86
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
87
|
+
def export_metadata
|
88
|
+
data_info = {
|
89
|
+
ncols_numeric: @numeric_columns.size,
|
90
|
+
ncols_categ: @categorical_columns.size,
|
91
|
+
cols_numeric: @numeric_columns,
|
92
|
+
cols_categ: @categorical_columns,
|
93
|
+
cat_levels: @categorical_columns.map { |v| @categories[v].keys }
|
94
|
+
}
|
95
|
+
|
96
|
+
# Ruby-specific
|
97
|
+
data_info[:sym_numeric] = @numeric_columns.map { |v| v.is_a?(Symbol) }
|
98
|
+
data_info[:sym_categ] = @categorical_columns.map { |v| v.is_a?(Symbol) }
|
99
|
+
|
100
|
+
model_info = {
|
101
|
+
ndim: @ndim,
|
102
|
+
nthreads: @nthreads,
|
103
|
+
build_imputer: false
|
104
|
+
}
|
105
|
+
|
106
|
+
params = {}
|
107
|
+
PARAM_KEYS.each do |k|
|
108
|
+
params[k] = instance_variable_get("@#{k}")
|
67
109
|
end
|
68
|
-
raise ArgumentError, "No data" if nrows == 0
|
69
110
|
|
70
111
|
{
|
71
|
-
|
72
|
-
|
73
|
-
|
112
|
+
data_info: data_info,
|
113
|
+
model_info: model_info,
|
114
|
+
params: params
|
74
115
|
}
|
75
116
|
end
|
76
117
|
|
118
|
+
def import_metadata(metadata)
|
119
|
+
data_info = metadata["data_info"]
|
120
|
+
model_info = metadata["model_info"]
|
121
|
+
params = metadata["params"]
|
122
|
+
|
123
|
+
# Ruby-specific
|
124
|
+
sym_numeric = data_info["sym_numeric"].to_a
|
125
|
+
sym_categ = data_info["sym_categ"].to_a
|
126
|
+
|
127
|
+
@numeric_columns = data_info["cols_numeric"].map.with_index { |v, i| sym_numeric[i] ? v.to_sym : v }
|
128
|
+
@categorical_columns = data_info["cols_categ"].map.with_index { |v, i| sym_categ[i] ? v.to_sym : v }
|
129
|
+
@categories = {}
|
130
|
+
@categorical_columns.zip(data_info["cat_levels"]) do |col, levels|
|
131
|
+
@categories[col] = levels.map.with_index.to_h
|
132
|
+
end
|
133
|
+
|
134
|
+
@ndim = model_info["ndim"]
|
135
|
+
@nthreads = model_info["nthreads"]
|
136
|
+
|
137
|
+
PARAM_KEYS.each do |k|
|
138
|
+
instance_variable_set("@#{k}", params[k.to_s])
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def check_fit
|
143
|
+
raise "Not fit" unless @ext_iso_forest
|
144
|
+
end
|
145
|
+
|
146
|
+
def prep_fit(df)
|
147
|
+
@numeric_columns = df.numeric_columns
|
148
|
+
@categorical_columns = df.categorical_columns
|
149
|
+
@categories = {}
|
150
|
+
@categorical_columns.each do |k|
|
151
|
+
@categories[k] = df[k].uniq.to_a.compact.map.with_index.to_h
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# TODO handle column type mismatches
|
156
|
+
def prep_predict(df)
|
157
|
+
expected_columns = @numeric_columns + @categorical_columns
|
158
|
+
if df.array_type
|
159
|
+
if df.numeric_columns.size + df.categorical_columns.size != expected_columns.size
|
160
|
+
raise ArgumentError, "Input must have #{expected_columns.size} columns for this model"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
expected_columns.each do |k|
|
164
|
+
raise ArgumentError, "Missing column: #{k}" unless df[k]
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def data_options(df)
|
169
|
+
options = {}
|
170
|
+
|
171
|
+
# numeric
|
172
|
+
numeric_data = String.new
|
173
|
+
@numeric_columns.each do |k|
|
174
|
+
v = df[k]
|
175
|
+
v = v.to_numo if v.respond_to?(:to_numo) # Rover
|
176
|
+
binary_str =
|
177
|
+
if v.respond_to?(:to_binary) # Rover and Numo
|
178
|
+
v.cast_to(Numo::DFloat).to_binary
|
179
|
+
else
|
180
|
+
v.pack("d*")
|
181
|
+
end
|
182
|
+
numeric_data << binary_str
|
183
|
+
end
|
184
|
+
options[:numeric_data] = numeric_data
|
185
|
+
options[:ncols_numeric] = @numeric_columns.size
|
186
|
+
|
187
|
+
# categorical
|
188
|
+
categorical_data = String.new
|
189
|
+
ncat = String.new
|
190
|
+
@categorical_columns.each do |k|
|
191
|
+
categories = @categories[k]
|
192
|
+
# for unseen values, set to categories.size
|
193
|
+
categories_size = categories.size
|
194
|
+
values = df[k].map { |v| v.nil? ? -1 : (categories[v] || categories_size) }
|
195
|
+
# TODO make more efficient
|
196
|
+
if values.any? { |v| v == categories_size }
|
197
|
+
warn "[isotree] Unseen values in column: #{k}"
|
198
|
+
end
|
199
|
+
|
200
|
+
v = values
|
201
|
+
v = v.to_numo if v.respond_to?(:to_numo) # Rover
|
202
|
+
binary_str =
|
203
|
+
if v.respond_to?(:to_binary) # Rover and Numo
|
204
|
+
v.cast_to(Numo::Int32).to_binary
|
205
|
+
else
|
206
|
+
v.pack("i*")
|
207
|
+
end
|
208
|
+
categorical_data << binary_str
|
209
|
+
ncat << [categories.size].pack("i")
|
210
|
+
end
|
211
|
+
options[:categorical_data] = categorical_data
|
212
|
+
options[:ncols_categ] = @categorical_columns.size
|
213
|
+
options[:ncat] = ncat
|
214
|
+
|
215
|
+
options[:nrows] = df.size
|
216
|
+
options
|
217
|
+
end
|
218
|
+
|
219
|
+
PARAM_KEYS = %i(
|
220
|
+
sample_size ntrees ntry max_depth
|
221
|
+
prob_pick_avg_gain prob_pick_pooled_gain
|
222
|
+
prob_split_avg_gain prob_split_pooled_gain min_gain
|
223
|
+
missing_action new_categ_action categ_split_type coefs depth_imp
|
224
|
+
weigh_imp_rows min_imp_obs random_seed all_perm coef_by_prop
|
225
|
+
weights_as_sample_prob sample_with_replacement penalize_range
|
226
|
+
weigh_by_kurtosis assume_full_distr
|
227
|
+
)
|
228
|
+
|
77
229
|
def fit_options
|
78
230
|
keys = %i(
|
79
231
|
sample_size ntrees ndim ntry
|
80
232
|
prob_pick_avg_gain prob_pick_pooled_gain
|
81
233
|
prob_split_avg_gain prob_split_pooled_gain
|
82
|
-
min_gain
|
234
|
+
min_gain missing_action new_categ_action
|
235
|
+
categ_split_type all_perm coef_by_prop
|
83
236
|
sample_with_replacement penalize_range
|
84
|
-
weigh_by_kurtosis min_imp_obs
|
85
|
-
random_seed nthreads
|
237
|
+
weigh_by_kurtosis coefs min_imp_obs depth_imp
|
238
|
+
weigh_imp_rows random_seed nthreads
|
86
239
|
)
|
87
240
|
options = {}
|
88
241
|
keys.each do |k|
|
data/lib/isotree/version.rb
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
Copyright (c) 2014, Randolph Voorhies, Shane Grant
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
* Redistributions of source code must retain the above copyright
|
7
|
+
notice, this list of conditions and the following disclaimer.
|
8
|
+
* Redistributions in binary form must reproduce the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer in the
|
10
|
+
documentation and/or other materials provided with the distribution.
|
11
|
+
* Neither the name of cereal nor the
|
12
|
+
names of its contributors may be used to endorse or promote products
|
13
|
+
derived from this software without specific prior written permission.
|
14
|
+
|
15
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
16
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
17
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
18
|
+
DISCLAIMED. IN NO EVENT SHALL RANDOLPH VOORHIES OR SHANE GRANT BE LIABLE FOR ANY
|
19
|
+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
20
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
21
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
22
|
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
23
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
24
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@@ -0,0 +1,85 @@
|
|
1
|
+
cereal - A C++11 library for serialization
|
2
|
+
==========================================
|
3
|
+
|
4
|
+
<img src="http://uscilab.github.io/cereal/assets/img/cerealboxside.png" align="right"/><p>cereal is a header-only C++11 serialization library. cereal takes arbitrary data types and reversibly turns them into different representations, such as compact binary encodings, XML, or JSON. cereal was designed to be fast, light-weight, and easy to extend - it has no external dependencies and can be easily bundled with other code or used standalone.</p>
|
5
|
+
|
6
|
+
### cereal has great documentation
|
7
|
+
|
8
|
+
Looking for more information on how cereal works and its documentation? Visit [cereal's web page](http://USCiLab.github.com/cereal) to get the latest information.
|
9
|
+
|
10
|
+
### cereal is easy to use
|
11
|
+
|
12
|
+
Installation and use of of cereal is fully documented on the [main web page](http://USCiLab.github.com/cereal), but this is a quick and dirty version:
|
13
|
+
|
14
|
+
* Download cereal and place the headers somewhere your code can see them
|
15
|
+
* Write serialization functions for your custom types or use the built in support for the standard library cereal provides
|
16
|
+
* Use the serialization archives to load and save data
|
17
|
+
|
18
|
+
```cpp
|
19
|
+
#include <cereal/types/unordered_map.hpp>
|
20
|
+
#include <cereal/types/memory.hpp>
|
21
|
+
#include <cereal/archives/binary.hpp>
|
22
|
+
#include <fstream>
|
23
|
+
|
24
|
+
struct MyRecord
|
25
|
+
{
|
26
|
+
uint8_t x, y;
|
27
|
+
float z;
|
28
|
+
|
29
|
+
template <class Archive>
|
30
|
+
void serialize( Archive & ar )
|
31
|
+
{
|
32
|
+
ar( x, y, z );
|
33
|
+
}
|
34
|
+
};
|
35
|
+
|
36
|
+
struct SomeData
|
37
|
+
{
|
38
|
+
int32_t id;
|
39
|
+
std::shared_ptr<std::unordered_map<uint32_t, MyRecord>> data;
|
40
|
+
|
41
|
+
template <class Archive>
|
42
|
+
void save( Archive & ar ) const
|
43
|
+
{
|
44
|
+
ar( data );
|
45
|
+
}
|
46
|
+
|
47
|
+
template <class Archive>
|
48
|
+
void load( Archive & ar )
|
49
|
+
{
|
50
|
+
static int32_t idGen = 0;
|
51
|
+
id = idGen++;
|
52
|
+
ar( data );
|
53
|
+
}
|
54
|
+
};
|
55
|
+
|
56
|
+
int main()
|
57
|
+
{
|
58
|
+
std::ofstream os("out.cereal", std::ios::binary);
|
59
|
+
cereal::BinaryOutputArchive archive( os );
|
60
|
+
|
61
|
+
SomeData myData;
|
62
|
+
archive( myData );
|
63
|
+
|
64
|
+
return 0;
|
65
|
+
}
|
66
|
+
```
|
67
|
+
|
68
|
+
### cereal has a mailing list
|
69
|
+
|
70
|
+
Either get in touch over <a href="mailto:cerealcpp@googlegroups.com">email</a> or [on the web](https://groups.google.com/forum/#!forum/cerealcpp).
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
## cereal has a permissive license
|
75
|
+
|
76
|
+
cereal is licensed under the [BSD license](http://opensource.org/licenses/BSD-3-Clause).
|
77
|
+
|
78
|
+
## cereal build status
|
79
|
+
|
80
|
+
* master : [![Build Status](https://travis-ci.com/USCiLab/cereal.svg?branch=master)](https://travis-ci.com/USCiLab/cereal)
|
81
|
+
[![Build status](https://ci.appveyor.com/api/projects/status/91aou6smj36or0vb/branch/master?svg=true)](https://ci.appveyor.com/project/AzothAmmo/cereal/branch/master)
|
82
|
+
|
83
|
+
---
|
84
|
+
|
85
|
+
Were you looking for the Haskell cereal? Go <a href="https://github.com/GaloisInc/cereal">here</a>.
|