isotree 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8752a98461516e8bffba379295408c157c7bc4b6764364c8246b6138d8080c4
4
- data.tar.gz: 9981293449bbf7b46b6129b6c812164a986dd9af7f8cb0e549fa89080ed7da7e
3
+ metadata.gz: fc614bbd4c6751f60bb22751881a739965950eb4bc8fe225521d6d0caa67e0c1
4
+ data.tar.gz: ef7305b765fc240173173f9926057bef019347a2769f3f91cafd048c9493957b
5
5
  SHA512:
6
- metadata.gz: a65fe9959da2ccd0489a377eecfcd2de535f094f23b2a363ad4768767f62e10af236a7c2b5163c7a37926192ce720c15034695aec4b3adc4868e9c4b89be1a8d
7
- data.tar.gz: f878d2ae9bb5e189e07be86d13aa5efeb2c0cab76857e728eadcb14d485becf62cc5d8526f4e1064f99fe3c0dea5404e61af8c9a51e7780a347966edfd9d5a7c
6
+ metadata.gz: 641b9f59f1ec699dfdcbc19f882173306f677156ddfd9f8f6b49754c4fc703ef1d873ec30db152a5a252937f01b9a233d849645406346bed780e2f87ba827763
7
+ data.tar.gz: 8be8a5643ecd5dfde93aef878d643e3e47cf84943062137c66dbb27c7fd1501a3df746520764b420568bc6e8d162cbd8f3b8d37a94aa8f2e64292a35d5a8e6fa
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.5.0 (2026-04-07)
2
+
3
+ - Dropped support for Ruby < 3.3
4
+
1
5
  ## 0.4.1 (2025-10-26)
2
6
 
3
7
  - Fixed error with Rice 4.7
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  BSD 2-Clause License
2
2
 
3
3
  Copyright (c) 2019-2023, David Cortes
4
- Copyright (c) 2020-2023, Andrew Kane
4
+ Copyright (c) 2020-2026, Andrew Kane
5
5
  All rights reserved.
6
6
 
7
7
  Redistribution and use in source and binary forms, with or without
data/ext/isotree/ext.cpp CHANGED
@@ -1,8 +1,12 @@
1
1
  // stdlib
2
2
  #include <cmath>
3
+ #include <cstddef>
4
+ #include <cstdint>
3
5
  #include <fstream>
4
6
  #include <iostream>
7
+ #include <stdexcept>
5
8
  #include <string>
9
+ #include <string_view>
6
10
  #include <vector>
7
11
 
8
12
  // isotree
@@ -120,77 +124,77 @@ void Init_ext() {
120
124
  ExtIsoForest iso;
121
125
 
122
126
  // data
123
- size_t nrows = options.get<size_t, Symbol>("nrows");
124
- size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
125
- size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
127
+ auto nrows = options.get<size_t, Symbol>("nrows");
128
+ auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
129
+ auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
126
130
 
127
- real_t* numeric_data = NULL;
131
+ real_t* numeric_data = nullptr;
128
132
  if (ncols_numeric > 0) {
129
- numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
133
+ numeric_data = reinterpret_cast<real_t*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
130
134
  }
131
135
 
132
- int* categorical_data = NULL;
133
- int* ncat = NULL;
136
+ int* categorical_data = nullptr;
137
+ int* ncat = nullptr;
134
138
  if (ncols_categ > 0) {
135
- categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
136
- ncat = (int*) options.get<String, Symbol>("ncat").c_str();
139
+ categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
140
+ ncat = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ncat").c_str()));
137
141
  }
138
142
 
139
143
  // not used (sparse matrices)
140
- real_t* Xc = NULL;
141
- sparse_ix* Xc_ind = NULL;
142
- sparse_ix* Xc_indptr = NULL;
144
+ real_t* Xc = nullptr;
145
+ sparse_ix* Xc_ind = nullptr;
146
+ sparse_ix* Xc_indptr = nullptr;
143
147
 
144
148
  // options
145
149
  // Rice has limit of 14 arguments, so use hash
146
- size_t sample_size = options.get<size_t, Symbol>("sample_size");
147
- size_t ndim = options.get<size_t, Symbol>("ndim");
148
- size_t ntrees = options.get<size_t, Symbol>("ntrees");
149
- size_t ntry = options.get<size_t, Symbol>("ntry");
150
- double prob_pick_by_gain_avg = options.get<double, Symbol>("prob_pick_avg_gain");
151
- double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
152
- double min_gain = options.get<double, Symbol>("min_gain");
153
- MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
154
- CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
155
- NewCategAction new_cat_action = options.get<NewCategAction, Symbol>("new_categ_action");
156
- bool all_perm = options.get<bool, Symbol>("all_perm");
157
- bool coef_by_prop = options.get<bool, Symbol>("coef_by_prop");
158
- bool with_replacement = options.get<bool, Symbol>("sample_with_replacement");
159
- bool penalize_range = options.get<bool, Symbol>("penalize_range");
160
- bool weigh_by_kurt = options.get<bool, Symbol>("weigh_by_kurtosis");
161
- CoefType coef_type = options.get<CoefType, Symbol>("coefs");
162
- size_t min_imp_obs = options.get<size_t, Symbol>("min_imp_obs");
163
- UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
164
- WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
165
- uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
166
- bool use_long_double = options.get<bool, Symbol>("use_long_double");
167
- int nthreads = options.get<int, Symbol>("nthreads");
150
+ auto sample_size = options.get<size_t, Symbol>("sample_size");
151
+ auto ndim = options.get<size_t, Symbol>("ndim");
152
+ auto ntrees = options.get<size_t, Symbol>("ntrees");
153
+ auto ntry = options.get<size_t, Symbol>("ntry");
154
+ auto prob_pick_by_gain_avg = options.get<double, Symbol>("prob_pick_avg_gain");
155
+ auto prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
156
+ auto min_gain = options.get<double, Symbol>("min_gain");
157
+ auto missing_action = options.get<MissingAction, Symbol>("missing_action");
158
+ auto cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
159
+ auto new_cat_action = options.get<NewCategAction, Symbol>("new_categ_action");
160
+ auto all_perm = options.get<bool, Symbol>("all_perm");
161
+ auto coef_by_prop = options.get<bool, Symbol>("coef_by_prop");
162
+ auto with_replacement = options.get<bool, Symbol>("sample_with_replacement");
163
+ auto penalize_range = options.get<bool, Symbol>("penalize_range");
164
+ auto weigh_by_kurt = options.get<bool, Symbol>("weigh_by_kurtosis");
165
+ auto coef_type = options.get<CoefType, Symbol>("coefs");
166
+ auto min_imp_obs = options.get<size_t, Symbol>("min_imp_obs");
167
+ auto depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
168
+ auto weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
169
+ auto random_seed = options.get<uint64_t, Symbol>("random_seed");
170
+ auto use_long_double = options.get<bool, Symbol>("use_long_double");
171
+ auto nthreads = options.get<int, Symbol>("nthreads");
168
172
 
169
173
  // TODO options
170
- double* sample_weights = NULL;
171
- bool weight_as_sample = options.get<bool, Symbol>("weights_as_sample_prob");
172
- size_t max_depth = options.get<size_t, Symbol>("max_depth");
173
- bool limit_depth = options.get<bool, Symbol>("limit_depth");
174
+ double* sample_weights = nullptr;
175
+ auto weight_as_sample = options.get<bool, Symbol>("weights_as_sample_prob");
176
+ auto max_depth = options.get<size_t, Symbol>("max_depth");
177
+ auto limit_depth = options.get<bool, Symbol>("limit_depth");
174
178
  bool standardize_dist = false;
175
- double* tmat = NULL;
176
- double* output_depths = NULL;
179
+ double* tmat = nullptr;
180
+ double* output_depths = nullptr;
177
181
  bool standardize_depth = false;
178
- real_t* col_weights = NULL;
179
- Imputer* imputer = NULL;
182
+ real_t* col_weights = nullptr;
183
+ Imputer* imputer = nullptr;
180
184
  bool impute_at_fit = false;
181
185
 
182
- int ncols_per_tree = options.get<int, Symbol>("ncols_per_tree");
183
- bool standardize_data = options.get<bool, Symbol>("standardize_data");
184
- ScoringMetric scoring_metric = options.get<ScoringMetric, Symbol>("scoring_metric");
185
- bool fast_bratio = options.get<bool, Symbol>("fast_bratio");
186
- double prob_pick_by_full_gain = options.get<double, Symbol>("prob_pick_full_gain");
187
- double prob_pick_by_dens = options.get<double, Symbol>("prob_pick_dens");
188
- double prob_pick_col_by_range = options.get<double, Symbol>("prob_pick_col_by_range");
189
- double prob_pick_col_by_var = options.get<double, Symbol>("prob_pick_col_by_var");
190
- double prob_pick_col_by_kurt = options.get<double, Symbol>("prob_pick_col_by_kurt");
186
+ auto ncols_per_tree = options.get<int, Symbol>("ncols_per_tree");
187
+ auto standardize_data = options.get<bool, Symbol>("standardize_data");
188
+ auto scoring_metric = options.get<ScoringMetric, Symbol>("scoring_metric");
189
+ auto fast_bratio = options.get<bool, Symbol>("fast_bratio");
190
+ auto prob_pick_by_full_gain = options.get<double, Symbol>("prob_pick_full_gain");
191
+ auto prob_pick_by_dens = options.get<double, Symbol>("prob_pick_dens");
192
+ auto prob_pick_col_by_range = options.get<double, Symbol>("prob_pick_col_by_range");
193
+ auto prob_pick_col_by_var = options.get<double, Symbol>("prob_pick_col_by_var");
194
+ auto prob_pick_col_by_kurt = options.get<double, Symbol>("prob_pick_col_by_kurt");
191
195
 
192
196
  fit_iforest(
193
- NULL,
197
+ nullptr,
194
198
  &iso,
195
199
  numeric_data,
196
200
  ncols_numeric,
@@ -251,37 +255,37 @@ void Init_ext() {
251
255
  "predict_iforest",
252
256
  [](ExtIsoForest& iso, Hash options) {
253
257
  // data
254
- size_t nrows = options.get<size_t, Symbol>("nrows");
255
- size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
256
- size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
258
+ auto nrows = options.get<size_t, Symbol>("nrows");
259
+ auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
260
+ auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
257
261
 
258
- real_t* numeric_data = NULL;
262
+ real_t* numeric_data = nullptr;
259
263
  if (ncols_numeric > 0) {
260
- numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
264
+ numeric_data = reinterpret_cast<real_t*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
261
265
  }
262
266
 
263
- int* categorical_data = NULL;
267
+ int* categorical_data = nullptr;
264
268
  if (ncols_categ > 0) {
265
- categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
269
+ categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
266
270
  }
267
271
 
268
272
  // not used (sparse matrices)
269
- real_t* Xc = NULL;
270
- sparse_ix* Xc_ind = NULL;
271
- sparse_ix* Xc_indptr = NULL;
272
- real_t* Xr = NULL;
273
- sparse_ix* Xr_ind = NULL;
274
- sparse_ix* Xr_indptr = NULL;
273
+ real_t* Xc = nullptr;
274
+ sparse_ix* Xc_ind = nullptr;
275
+ sparse_ix* Xc_indptr = nullptr;
276
+ real_t* Xr = nullptr;
277
+ sparse_ix* Xr_ind = nullptr;
278
+ sparse_ix* Xr_indptr = nullptr;
275
279
 
276
280
  // options
277
- int nthreads = options.get<int, Symbol>("nthreads");
278
- bool standardize = options.get<bool, Symbol>("standardize");
281
+ auto nthreads = options.get<int, Symbol>("nthreads");
282
+ auto standardize = options.get<bool, Symbol>("standardize");
279
283
  std::vector<double> outlier_scores(nrows);
280
- sparse_ix* tree_num = NULL;
284
+ sparse_ix* tree_num = nullptr;
281
285
  bool is_col_major = true;
282
286
  size_t ld_numeric = 0;
283
287
  size_t ld_categ = 0;
284
- double* per_tree_depths = NULL;
288
+ double* per_tree_depths = nullptr;
285
289
 
286
290
  predict_iforest(
287
291
  numeric_data,
@@ -298,49 +302,49 @@ void Init_ext() {
298
302
  nrows,
299
303
  nthreads,
300
304
  standardize,
301
- NULL,
305
+ nullptr,
302
306
  &iso,
303
307
  outlier_scores.data(),
304
308
  tree_num,
305
309
  per_tree_depths,
306
- NULL
310
+ nullptr
307
311
  );
308
312
 
309
313
  Array ret;
310
- for (size_t i = 0; i < outlier_scores.size(); i++) {
311
- ret.push(outlier_scores[i], false);
314
+ for (auto v : outlier_scores) {
315
+ ret.push(v, false);
312
316
  }
313
317
  return ret;
314
318
  })
315
319
  .define_singleton_function(
316
320
  "serialize_combined",
317
321
  [](ExtIsoForest& iso, String path, String metadata) {
318
- #ifdef _MSC_VER
322
+ #ifdef _MSC_VER
319
323
  // TODO convert to wchar_t
320
324
  throw std::runtime_error("Not supported on Windows yet");
321
- #else
325
+ #else
322
326
  std::ofstream file;
323
327
  file.open(path.c_str());
324
328
  serialize_combined(
325
- NULL,
329
+ nullptr,
326
330
  &iso,
327
- NULL,
328
- NULL,
331
+ nullptr,
332
+ nullptr,
329
333
  metadata.c_str(),
330
334
  // returns bytesize (RSTRING_LEN)
331
335
  metadata.length(),
332
336
  file
333
337
  );
334
338
  file.close();
335
- #endif
339
+ #endif
336
340
  })
337
341
  .define_singleton_function(
338
342
  "deserialize_combined",
339
343
  [](String path) {
340
- #ifdef _MSC_VER
344
+ #ifdef _MSC_VER
341
345
  // TODO convert to wchar_t
342
346
  throw std::runtime_error("Not supported on Windows yet");
343
- #else
347
+ #else
344
348
  Array ret;
345
349
 
346
350
  std::ifstream file;
@@ -386,20 +390,15 @@ void Init_ext() {
386
390
  ExtIsoForest model_ext = ExtIsoForest();
387
391
  Imputer imputer = Imputer();
388
392
  TreesIndexer indexer = TreesIndexer();
389
- char *optional_metadata = (char*) calloc(size_metadata, sizeof(char));
390
- if (optional_metadata == NULL) {
391
- throw std::runtime_error("Cannot allocate memory");
392
- }
393
+ std::vector<char> optional_metadata(size_metadata, 0);
393
394
 
394
- deserialize_combined(file, &model, &model_ext, &imputer, &indexer, optional_metadata);
395
+ deserialize_combined(file, &model, &model_ext, &imputer, &indexer, optional_metadata.data());
395
396
  file.close();
396
397
 
397
398
  ret.push(Object(Rice::detail::To_Ruby<ExtIsoForest>().convert(model_ext)), false);
398
- ret.push(String(std::string(optional_metadata, size_metadata)), false);
399
-
400
- free(optional_metadata);
399
+ ret.push(String(std::string_view{optional_metadata.data(), optional_metadata.size()}), false);
401
400
 
402
401
  return ret;
403
- #endif
402
+ #endif
404
403
  });
405
404
  }
@@ -1,3 +1,3 @@
1
1
  module IsoTree
2
- VERSION = "0.4.1"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isotree
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -93,14 +93,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
- version: '3.1'
96
+ version: '3.3'
97
97
  required_rubygems_version: !ruby/object:Gem::Requirement
98
98
  requirements:
99
99
  - - ">="
100
100
  - !ruby/object:Gem::Version
101
101
  version: '0'
102
102
  requirements: []
103
- rubygems_version: 3.6.9
103
+ rubygems_version: 4.0.6
104
104
  specification_version: 4
105
105
  summary: Outlier/anomaly detection for Ruby using Isolation Forest
106
106
  test_files: []