isotree 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -40,7 +40,7 @@
40
40
  * International Conference on Knowledge Discovery & Data Mining. 2018.
41
41
  *
42
42
  * BSD 2-Clause License
43
- * Copyright (c) 2019-2022, David Cortes
43
+ * Copyright (c) 2019-2023, David Cortes
44
44
  * All rights reserved.
45
45
  * Redistribution and use in source and binary forms, with or without
46
46
  * modification, are permitted provided that the following conditions are met:
@@ -64,12 +64,10 @@
64
64
 
65
65
  #include <Rcpp.h>
66
66
  #include <Rcpp/unwindProtect.h>
67
- // [[Rcpp::plugins(unwindProtect)]]
68
67
  #include <Rinternals.h>
68
+ #include <R_ext/Altrep.h>
69
69
 
70
- #ifndef _FOR_R
71
- #define FOR_R
72
- #endif
70
+ #include <type_traits>
73
71
 
74
72
  /* This is the package's header */
75
73
  #include "isotree.hpp"
@@ -87,7 +85,7 @@
87
85
  so it's not enough to just include 'isotree_exportable.hpp' and let
88
86
  the templates be instantiated elsewhere. */
89
87
 
90
- #define throw_mem_err() Rcpp::stop("Error: insufficient memory. Try smaller sample sizes and fewer trees.\n")
88
+ #define throw_mem_err() throw Rcpp::exception("Error: insufficient memory. Try smaller sample sizes and fewer trees.\n")
91
89
 
92
90
  SEXP alloc_RawVec(void *data)
93
91
  {
@@ -148,9 +146,9 @@ Rcpp::RawVector serialize_cpp_obj(const Model *model_outputs)
148
146
  {
149
147
  size_t serialized_size = determine_serialized_size(*model_outputs);
150
148
  if (unlikely(!serialized_size))
151
- Rcpp::stop("Unexpected error.");
149
+ throw Rcpp::exception("Unexpected error.");
152
150
  if (unlikely(serialized_size > (size_t)std::numeric_limits<R_xlen_t>::max()))
153
- Rcpp::stop("Resulting model is too large for R to handle.");
151
+ throw Rcpp::exception("Resulting model is too large for R to handle.");
154
152
  Rcpp::RawVector out = Rcpp::unwindProtect(alloc_RawVec, (void*)&serialized_size);
155
153
  char *out_ = (char*)RAW(out);
156
154
  serialize_isotree(*model_outputs, out_);
@@ -195,11 +193,279 @@ SEXP deserialize_Indexer(Rcpp::RawVector src)
195
193
  }
196
194
 
197
195
  // [[Rcpp::export(rng = false)]]
198
- Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
196
+ SEXP serialize_IsoForest_from_ptr(SEXP R_ptr)
197
+ {
198
+ const IsoForest* model = (const IsoForest*)R_ExternalPtrAddr(R_ptr);
199
+ return serialize_cpp_obj<IsoForest>(model);
200
+ }
201
+
202
+ // [[Rcpp::export(rng = false)]]
203
+ SEXP serialize_ExtIsoForest_from_ptr(SEXP R_ptr)
204
+ {
205
+ const ExtIsoForest* model = (const ExtIsoForest*)R_ExternalPtrAddr(R_ptr);
206
+ return serialize_cpp_obj<ExtIsoForest>(model);
207
+ }
208
+
209
+ // [[Rcpp::export(rng = false)]]
210
+ SEXP serialize_Imputer_from_ptr(SEXP R_ptr)
211
+ {
212
+ const Imputer* model = (const Imputer*)R_ExternalPtrAddr(R_ptr);
213
+ return serialize_cpp_obj<Imputer>(model);
214
+ }
215
+
216
+ // [[Rcpp::export(rng = false)]]
217
+ SEXP serialize_Indexer_from_ptr(SEXP R_ptr)
218
+ {
219
+ const TreesIndexer* model = (const TreesIndexer*)R_ExternalPtrAddr(R_ptr);
220
+ return serialize_cpp_obj<TreesIndexer>(model);
221
+ }
222
+
223
+ // [[Rcpp::export(rng = false)]]
224
+ Rcpp::LogicalVector check_null_ptr_model_internal(SEXP ptr_model)
199
225
  {
200
226
  return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL);
201
227
  }
202
228
 
229
+ static R_altrep_class_t altrepped_pointer_IsoForest;
230
+ static R_altrep_class_t altrepped_pointer_ExtIsoForest;
231
+ static R_altrep_class_t altrepped_pointer_Imputer;
232
+ static R_altrep_class_t altrepped_pointer_TreesIndexer;
233
+ static R_altrep_class_t altrepped_pointer_NullPointer;
234
+
235
+ template <class Model>
236
+ R_altrep_class_t get_altrep_obj_class()
237
+ {
238
+ if (std::is_same<Model, IsoForest>::value) return altrepped_pointer_IsoForest;
239
+
240
+ if (std::is_same<Model, ExtIsoForest>::value) return altrepped_pointer_ExtIsoForest;
241
+
242
+ if (std::is_same<Model, Imputer>::value) return altrepped_pointer_Imputer;
243
+
244
+ if (std::is_same<Model, TreesIndexer>::value) return altrepped_pointer_TreesIndexer;
245
+
246
+ throw Rcpp::exception("Internal error. Please open a bug report.");
247
+ }
248
+
249
+ R_xlen_t altrepped_pointer_length(SEXP obj)
250
+ {
251
+ return 1;
252
+ }
253
+
254
+ SEXP get_element_from_altrepped_obj(SEXP R_altrepped_obj, R_xlen_t idx)
255
+ {
256
+ return R_altrep_data1(R_altrepped_obj);
257
+ }
258
+
259
+ template <class Model>
260
+ void delete_model_from_R_ptr(SEXP R_ptr)
261
+ {
262
+ Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_ptr);
263
+ delete cpp_ptr;
264
+ R_SetExternalPtrAddr(R_ptr, nullptr);
265
+ R_ClearExternalPtr(R_ptr);
266
+ }
267
+
268
+ template <class Model>
269
+ SEXP get_altrepped_pointer(void *void_ptr)
270
+ {
271
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
272
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
273
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
274
+ SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue));
275
+
276
+ std::unique_ptr<Model> *ptr = (std::unique_ptr<Model>*)void_ptr;
277
+ R_SetExternalPtrAddr(R_ptr, ptr->get());
278
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE);
279
+ ptr->release();
280
+
281
+ R_set_altrep_data1(out, R_ptr);
282
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
283
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
284
+
285
+ UNPROTECT(4);
286
+ return out;
287
+ }
288
+
289
+ template <class Model>
290
+ SEXP serialize_altrepped_pointer(SEXP altrepped_obj)
291
+ {
292
+ try {
293
+ Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
294
+ R_xlen_t state_size = determine_serialized_size(*cpp_ptr);
295
+ SEXP R_state = PROTECT(Rf_allocVector(RAWSXP, state_size));
296
+ serialize_isotree(*cpp_ptr, (char*)RAW(R_state));
297
+ UNPROTECT(1);
298
+ return R_state;
299
+ }
300
+ catch (const std::exception &ex) {
301
+ Rf_error("%s\n", ex.what());
302
+ }
303
+
304
+ return R_NilValue; /* <- won't be reached */
305
+ }
306
+
307
+ template <class Model>
308
+ SEXP deserialize_altrepped_pointer(SEXP cls, SEXP R_state)
309
+ {
310
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
311
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
312
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
313
+ SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue));
314
+
315
+ try {
316
+ std::unique_ptr<Model> model(new Model());
317
+ const char *inp = (const char*)RAW(R_state);
318
+ deserialize_isotree(*model, inp);
319
+
320
+ R_SetExternalPtrAddr(R_ptr, model.get());
321
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE);
322
+ model.release();
323
+ }
324
+ catch (const std::exception &ex) {
325
+ Rf_error("%s\n", ex.what());
326
+ }
327
+
328
+ R_set_altrep_data1(out, R_ptr);
329
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
330
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
331
+
332
+ UNPROTECT(4);
333
+ return out;
334
+ }
335
+
336
+ template <class Model>
337
+ SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep)
338
+ {
339
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
340
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
341
+ SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue));
342
+
343
+ if (!deep) {
344
+ R_set_altrep_data1(out, R_altrep_data1(altrepped_obj));
345
+ }
346
+
347
+ else {
348
+
349
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
350
+
351
+ try {
352
+ std::unique_ptr<Model> new_obj(new Model());
353
+ Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
354
+ *new_obj = *cpp_ptr;
355
+
356
+ R_SetExternalPtrAddr(R_ptr, new_obj.get());
357
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE);
358
+ new_obj.release();
359
+ }
360
+
361
+ catch (const std::exception &ex) {
362
+ Rf_error("%s\n", ex.what());
363
+ }
364
+
365
+ R_set_altrep_data1(out, R_ptr);
366
+ UNPROTECT(1);
367
+ }
368
+
369
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
370
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_class);
371
+ UNPROTECT(3);
372
+ return out;
373
+ }
374
+
375
+ SEXP get_altrepped_null_pointer()
376
+ {
377
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
378
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
379
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
380
+ SEXP out = PROTECT(R_new_altrep(altrepped_pointer_NullPointer, R_ptr, R_NilValue));
381
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
382
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
383
+ UNPROTECT(4);
384
+ return out;
385
+ }
386
+
387
+ SEXP safe_get_altrepped_null_pointer(void *unused)
388
+ {
389
+ return get_altrepped_null_pointer();
390
+ }
391
+
392
+ SEXP serialize_altrepped_null(SEXP altrepped_obj)
393
+ {
394
+ return Rf_allocVector(RAWSXP, 0);
395
+ }
396
+
397
+ SEXP deserialize_altrepped_null(SEXP cls, SEXP R_state)
398
+ {
399
+ return get_altrepped_null_pointer();
400
+ }
401
+
402
+ SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep)
403
+ {
404
+ return get_altrepped_null_pointer();
405
+ }
406
+
407
+ Rboolean inspect_altrepped_pointer(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int))
408
+ {
409
+ Rcpp::Rcout << "Altrepped pointer [address:" << R_ExternalPtrAddr(R_altrep_data1(x)) << "]\n";
410
+ return TRUE;
411
+ }
412
+
413
+ template <class Model>
414
+ Model* get_pointer_from_altrep(SEXP altrepped_obj)
415
+ {
416
+ return (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
417
+ }
418
+
419
+ template <class Model>
420
+ Model* get_pointer_from_xptr(SEXP R_ptr)
421
+ {
422
+ return (Model*)R_ExternalPtrAddr(R_ptr);
423
+ }
424
+
425
+ // [[Rcpp::init]]
426
+ void init_altrepped_vectors(DllInfo* dll)
427
+ {
428
+ altrepped_pointer_IsoForest = R_make_altlist_class("altrepped_pointer_IsoForest", "isotree", dll);
429
+ R_set_altrep_Length_method(altrepped_pointer_IsoForest, altrepped_pointer_length);
430
+ R_set_altrep_Inspect_method(altrepped_pointer_IsoForest, inspect_altrepped_pointer);
431
+ R_set_altrep_Serialized_state_method(altrepped_pointer_IsoForest, serialize_altrepped_pointer<IsoForest>);
432
+ R_set_altrep_Unserialize_method(altrepped_pointer_IsoForest, deserialize_altrepped_pointer<IsoForest>);
433
+ R_set_altrep_Duplicate_method(altrepped_pointer_IsoForest, duplicate_altrepped_pointer<IsoForest>);
434
+ R_set_altlist_Elt_method(altrepped_pointer_IsoForest, get_element_from_altrepped_obj);
435
+
436
+ altrepped_pointer_ExtIsoForest = R_make_altlist_class("altrepped_pointer_ExtIsoForest", "isotree", dll);
437
+ R_set_altrep_Length_method(altrepped_pointer_ExtIsoForest, altrepped_pointer_length);
438
+ R_set_altrep_Inspect_method(altrepped_pointer_ExtIsoForest, inspect_altrepped_pointer);
439
+ R_set_altrep_Serialized_state_method(altrepped_pointer_ExtIsoForest, serialize_altrepped_pointer<ExtIsoForest>);
440
+ R_set_altrep_Unserialize_method(altrepped_pointer_ExtIsoForest, deserialize_altrepped_pointer<ExtIsoForest>);
441
+ R_set_altrep_Duplicate_method(altrepped_pointer_ExtIsoForest, duplicate_altrepped_pointer<ExtIsoForest>);
442
+ R_set_altlist_Elt_method(altrepped_pointer_ExtIsoForest, get_element_from_altrepped_obj);
443
+
444
+ altrepped_pointer_Imputer = R_make_altlist_class("altrepped_pointer_Imputer", "isotree", dll);
445
+ R_set_altrep_Length_method(altrepped_pointer_Imputer, altrepped_pointer_length);
446
+ R_set_altrep_Inspect_method(altrepped_pointer_Imputer, inspect_altrepped_pointer);
447
+ R_set_altrep_Serialized_state_method(altrepped_pointer_Imputer, serialize_altrepped_pointer<Imputer>);
448
+ R_set_altrep_Unserialize_method(altrepped_pointer_Imputer, deserialize_altrepped_pointer<Imputer>);
449
+ R_set_altrep_Duplicate_method(altrepped_pointer_Imputer, duplicate_altrepped_pointer<Imputer>);
450
+ R_set_altlist_Elt_method(altrepped_pointer_Imputer, get_element_from_altrepped_obj);
451
+
452
+ altrepped_pointer_TreesIndexer = R_make_altlist_class("altrepped_pointer_TreesIndexer", "isotree", dll);
453
+ R_set_altrep_Length_method(altrepped_pointer_TreesIndexer, altrepped_pointer_length);
454
+ R_set_altrep_Inspect_method(altrepped_pointer_TreesIndexer, inspect_altrepped_pointer);
455
+ R_set_altrep_Serialized_state_method(altrepped_pointer_TreesIndexer, serialize_altrepped_pointer<TreesIndexer>);
456
+ R_set_altrep_Unserialize_method(altrepped_pointer_TreesIndexer, deserialize_altrepped_pointer<TreesIndexer>);
457
+ R_set_altrep_Duplicate_method(altrepped_pointer_TreesIndexer, duplicate_altrepped_pointer<TreesIndexer>);
458
+ R_set_altlist_Elt_method(altrepped_pointer_TreesIndexer, get_element_from_altrepped_obj);
459
+
460
+ altrepped_pointer_NullPointer = R_make_altlist_class("altrepped_pointer_NullPointer", "isotree", dll);
461
+ R_set_altrep_Length_method(altrepped_pointer_NullPointer, altrepped_pointer_length);
462
+ R_set_altrep_Inspect_method(altrepped_pointer_NullPointer, inspect_altrepped_pointer);
463
+ R_set_altrep_Serialized_state_method(altrepped_pointer_NullPointer, serialize_altrepped_null);
464
+ R_set_altrep_Unserialize_method(altrepped_pointer_NullPointer, deserialize_altrepped_null);
465
+ R_set_altrep_Duplicate_method(altrepped_pointer_NullPointer, duplicate_altrepped_pointer);
466
+ R_set_altlist_Elt_method(altrepped_pointer_NullPointer, get_element_from_altrepped_obj);
467
+ }
468
+
203
469
  double* set_R_nan_as_C_nan(double *x, size_t n, std::vector<double> &v, int nthreads)
204
470
  {
205
471
  v.assign(x, x + n);
@@ -223,6 +489,13 @@ double* set_R_nan_as_C_nan(double *x, size_t n, int nthreads)
223
489
  return x;
224
490
  }
225
491
 
492
+ TreesIndexer* get_indexer_ptr_from_R_obj(SEXP indexer_R_ptr)
493
+ {
494
+ TreesIndexer *out = get_pointer_from_xptr<TreesIndexer>(indexer_R_ptr);
495
+ if (out && out->indices.empty()) out = nullptr;
496
+ return out;
497
+ }
498
+
226
499
  // [[Rcpp::export(rng = false)]]
227
500
  Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp::IntegerVector ncat,
228
501
  Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
@@ -242,7 +515,7 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
242
515
  Rcpp::CharacterVector missing_action, bool all_perm,
243
516
  bool build_imputer, bool output_imputations, size_t min_imp_obs,
244
517
  Rcpp::CharacterVector depth_imp, Rcpp::CharacterVector weigh_imp_rows,
245
- int random_seed, bool use_long_double, int nthreads)
518
+ int random_seed, bool use_long_double, int nthreads, bool lazy_serialization)
246
519
  {
247
520
  double* numeric_data_ptr = NULL;
248
521
  int* categ_data_ptr = NULL;
@@ -384,18 +657,37 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
384
657
  }
385
658
 
386
659
  Rcpp::List outp = Rcpp::List::create(
387
- Rcpp::_["depths"] = depths,
388
- Rcpp::_["tmat"] = tmat,
389
- Rcpp::_["dmat"] = dmat,
390
- Rcpp::_["ptr"] = R_NilValue,
391
- Rcpp::_["serialized"] = R_NilValue,
392
- Rcpp::_["imp_ptr"] = R_NilValue,
393
- Rcpp::_["imp_ser"] = R_NilValue,
394
- Rcpp::_["imputed_num"] = R_NilValue,
395
- Rcpp::_["imputed_cat"] = R_NilValue,
396
- Rcpp::_["err"] = Rcpp::LogicalVector::create(1)
660
+ Rcpp::_["depths"] = depths,
661
+ Rcpp::_["tmat"] = tmat,
662
+ Rcpp::_["dmat"] = dmat,
663
+ Rcpp::_["model"] = R_NilValue,
664
+ Rcpp::_["imputer"] = R_NilValue,
665
+ Rcpp::_["indexer"] = R_NilValue,
666
+ Rcpp::_["imputed_num"] = R_NilValue,
667
+ Rcpp::_["imputed_cat"] = R_NilValue,
668
+ Rcpp::_["err"] = Rcpp::LogicalVector::create(1)
397
669
  );
398
670
 
671
+ Rcpp::List model_lst_nonlazy = Rcpp::List::create(
672
+ Rcpp::_["ptr"] = R_NilValue,
673
+ Rcpp::_["ser"] = R_NilValue
674
+ );
675
+
676
+ Rcpp::List imputer_lst_nonlazy = Rcpp::List::create(
677
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
678
+ Rcpp::_["ser"] = R_NilValue
679
+ );
680
+
681
+ if (lazy_serialization) {
682
+ outp["indexer"] = get_altrepped_null_pointer();
683
+ }
684
+ else {
685
+ outp["indexer"] = Rcpp::List::create(
686
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
687
+ Rcpp::_["ser"] = R_NilValue
688
+ );
689
+ }
690
+
399
691
  std::unique_ptr<IsoForest> model_ptr(nullptr);
400
692
  std::unique_ptr<ExtIsoForest> ext_model_ptr(nullptr);
401
693
  std::unique_ptr<Imputer> imputer_ptr(nullptr);
@@ -408,9 +700,7 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
408
700
  if (build_imputer)
409
701
  imputer_ptr = std::unique_ptr<Imputer>(new Imputer());
410
702
 
411
- int ret_val;
412
- try {
413
- ret_val =
703
+ int ret_val =
414
704
  fit_iforest(model_ptr.get(), ext_model_ptr.get(),
415
705
  numeric_data_ptr, ncols_numeric,
416
706
  categ_data_ptr, ncols_categ, ncat_ptr,
@@ -432,14 +722,21 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
432
722
  all_perm, imputer_ptr.get(), min_imp_obs,
433
723
  depth_imp_C, weigh_imp_rows_C, output_imputations,
434
724
  (uint64_t) random_seed, use_long_double, nthreads);
435
- }
436
- catch (std::bad_alloc &e) {
437
- throw_mem_err();
438
- }
439
- Rcpp::checkUserInterrupt();
440
725
 
726
+ Rcpp::checkUserInterrupt(); /* <- nothing is returned in this case */
727
+ /* Note to self: the procedure has its own interrupt checker, so when an interrupt
728
+ signal is triggered, first it will print a message about it, then re-issue the
729
+ signal, then check for interrupt through Rcpp's, which will return nothing to
730
+ the outside and will not raise any error. In this case, at least the user will
731
+ see the error message. Note that Rcpp's interrupt non-return, unlike R's, triggers
732
+ stack unwinding for C++ objects. */
733
+
734
+ /* Note to self: since the function for fitting the model uses the C++ exception system,
735
+ and the stop signals are translated into Rcpp stops, this section below should not
736
+ be reachable anyhow. */
441
737
  if (ret_val == EXIT_FAILURE)
442
738
  {
739
+ Rcpp::Rcerr << "Unexpected error" << std::endl;
443
740
  return Rcpp::unwindProtect(safe_errlist, nullptr);
444
741
  }
445
742
 
@@ -447,63 +744,66 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
447
744
  tmat_to_dense(tmat_ptr, dmat_ptr, nrows, standardize_dist? 0. : std::numeric_limits<double>::infinity());
448
745
 
449
746
  bool serialization_failed = false;
450
- Rcpp::RawVector serialized_obj;
451
- try {
747
+
748
+ if (lazy_serialization)
749
+ {
750
+ if (ndim == 1) {
751
+ outp["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&model_ptr);
752
+ }
753
+ else {
754
+ outp["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&ext_model_ptr);
755
+ }
756
+
757
+ if (build_imputer) {
758
+ outp["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&imputer_ptr);
759
+ }
760
+ else {
761
+ outp["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
762
+ }
763
+ }
764
+
765
+ else
766
+ {
767
+ Rcpp::RawVector serialized_obj;
768
+ /* Note to self: the serialization functions use unwind protection internally. */
452
769
  if (ndim == 1)
453
770
  serialized_obj = serialize_cpp_obj(model_ptr.get());
454
771
  else
455
772
  serialized_obj = serialize_cpp_obj(ext_model_ptr.get());
456
- }
457
- catch (std::bad_alloc &e) {
458
- throw_mem_err();
459
- }
460
- if (unlikely(!serialized_obj.size())) serialization_failed = true;
461
- if (unlikely(serialization_failed)) {
462
- if (ndim == 1)
463
- model_ptr.reset();
464
- else
465
- ext_model_ptr.reset();
466
- }
467
773
 
468
- if (!serialization_failed)
469
- {
470
- outp["serialized"] = serialized_obj;
774
+ if (unlikely(!serialized_obj.size())) serialization_failed = true;
775
+ if (unlikely(serialization_failed)) {
776
+ throw Rcpp::exception("Error: insufficient memory\n");
777
+ }
778
+
779
+ model_lst_nonlazy["ser"] = serialized_obj;
471
780
  if (ndim == 1) {
472
- outp["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model_ptr.get());
781
+ model_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model_ptr.get());
473
782
  model_ptr.release();
474
783
  }
475
784
  else {
476
- outp["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, ext_model_ptr.get());
785
+ model_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, ext_model_ptr.get());
477
786
  ext_model_ptr.release();
478
787
  }
479
- } else
480
- outp["ptr"] = R_NilValue;
481
788
 
482
- if (build_imputer && !serialization_failed)
483
- {
484
- try {
485
- outp["imp_ser"] = serialize_cpp_obj(imputer_ptr.get());
486
- }
487
- catch (std::bad_alloc &e) {
488
- throw_mem_err();
489
- }
490
- if (!Rf_xlength(outp["imp_ser"]))
789
+ outp["model"] = model_lst_nonlazy;
790
+
791
+ if (build_imputer)
491
792
  {
492
- serialization_failed = true;
493
- imputer_ptr.reset();
494
- if (ndim == 1)
495
- model_ptr.reset();
496
- else
497
- ext_model_ptr.reset();
498
- outp["imp_ptr"] = R_NilValue;
499
- outp["ptr"] = R_NilValue;
500
- } else {
501
- outp["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer_ptr.get());
793
+ imputer_lst_nonlazy["ser"] = serialize_cpp_obj(imputer_ptr.get());
794
+ if (!Rf_xlength(imputer_lst_nonlazy["ser"]))
795
+ {
796
+ throw Rcpp::exception("Error: insufficient memory\n");
797
+ }
798
+
799
+ imputer_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer_ptr.get());
502
800
  imputer_ptr.release();
503
801
  }
802
+
803
+ outp["imputer"] = imputer_lst_nonlazy;
504
804
  }
505
805
 
506
- if (output_imputations && !serialization_failed)
806
+ if (output_imputations)
507
807
  {
508
808
  outp["imputed_num"] = Xcpp;
509
809
  outp["imputed_cat"] = X_cat;
@@ -534,12 +834,13 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
534
834
  Rcpp::NumericVector ref_X_num, Rcpp::IntegerVector ref_X_cat,
535
835
  Rcpp::NumericVector ref_Xc, Rcpp::IntegerVector ref_Xc_ind, Rcpp::IntegerVector ref_Xc_indptr,
536
836
  uint64_t random_seed, bool use_long_double,
537
- Rcpp::List &model_cpp_obj_update, Rcpp::List &model_params_update)
837
+ Rcpp::List &model_cpp_obj_update, Rcpp::List &model_params_update,
838
+ bool is_altrepped)
538
839
  {
539
840
  Rcpp::List out = Rcpp::List::create(
540
- Rcpp::_["serialized"] = R_NilValue,
541
- Rcpp::_["imp_ser"] = R_NilValue,
542
- Rcpp::_["ind_ser"] = R_NilValue
841
+ Rcpp::_["model_ser"] = R_NilValue,
842
+ Rcpp::_["imputer_ser"] = R_NilValue,
843
+ Rcpp::_["indexer_ser"] = R_NilValue
543
844
  );
544
845
 
545
846
  Rcpp::IntegerVector ntrees_plus1 = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"]) + 1);
@@ -674,10 +975,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
674
975
  if (build_imputer)
675
976
  imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
676
977
 
677
- if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL)
678
- indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
679
- if (indexer_ptr != NULL && indexer_ptr->indices.empty())
680
- indexer_ptr = NULL;
978
+ indexer_ptr = get_indexer_ptr_from_R_obj(indexer_R_ptr);
681
979
 
682
980
  size_t old_ntrees = (ndim == 1)? (model_ptr->trees.size()) : (ext_model_ptr->hplanes.size());
683
981
 
@@ -706,6 +1004,9 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
706
1004
 
707
1005
  Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized;
708
1006
  size_t new_size;
1007
+
1008
+ if (is_altrepped) goto dont_serialize;
1009
+
709
1010
  try
710
1011
  {
711
1012
  if (ndim == 1)
@@ -719,7 +1020,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
719
1020
  new_serialized = resize_vec(serialized_obj, new_size);
720
1021
  char *temp = (char*)RAW(new_serialized);
721
1022
  incremental_serialize_isotree(*model_ptr, temp);
722
- out["serialized"] = new_serialized;
1023
+ out["model_ser"] = new_serialized;
723
1024
  }
724
1025
 
725
1026
  catch (std::runtime_error &e) {
@@ -729,7 +1030,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
729
1030
 
730
1031
  else {
731
1032
  serialize_anew_singlevar:
732
- out["serialized"] = serialize_cpp_obj(model_ptr);
1033
+ out["model_ser"] = serialize_cpp_obj(model_ptr);
733
1034
  }
734
1035
  }
735
1036
 
@@ -744,7 +1045,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
744
1045
  new_serialized = resize_vec(serialized_obj, new_size);
745
1046
  char *temp = (char*)RAW(new_serialized);
746
1047
  incremental_serialize_isotree(*ext_model_ptr, temp);
747
- out["serialized"] = new_serialized;
1048
+ out["model_ser"] = new_serialized;
748
1049
  }
749
1050
 
750
1051
  catch (std::runtime_error &e) {
@@ -754,7 +1055,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
754
1055
 
755
1056
  else {
756
1057
  serialize_anew_ext:
757
- out["serialized"] = serialize_cpp_obj(ext_model_ptr);
1058
+ out["model_ser"] = serialize_cpp_obj(ext_model_ptr);
758
1059
  }
759
1060
  }
760
1061
 
@@ -769,7 +1070,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
769
1070
  new_imp_serialized = resize_vec(serialized_imputer, new_size);
770
1071
  char *temp = (char*)RAW(new_imp_serialized);
771
1072
  incremental_serialize_isotree(*imputer_ptr, temp);
772
- out["imp_ser"] = new_imp_serialized;
1073
+ out["imputer_ser"] = new_imp_serialized;
773
1074
  }
774
1075
 
775
1076
  catch (std::runtime_error &e) {
@@ -779,7 +1080,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
779
1080
 
780
1081
  else {
781
1082
  serialize_anew_imp:
782
- out["imp_ser"] = serialize_cpp_obj(imputer_ptr);
1083
+ out["imputer_ser"] = serialize_cpp_obj(imputer_ptr);
783
1084
  }
784
1085
  }
785
1086
 
@@ -794,7 +1095,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
794
1095
  new_ind_serialized = resize_vec(serialized_indexer, new_size);
795
1096
  char *temp = (char*)RAW(new_ind_serialized);
796
1097
  incremental_serialize_isotree(*indexer_ptr, temp);
797
- out["ind_ser"] = new_ind_serialized;
1098
+ out["indexer_ser"] = new_ind_serialized;
798
1099
  }
799
1100
 
800
1101
  catch (std::runtime_error &e) {
@@ -804,7 +1105,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
804
1105
 
805
1106
  else {
806
1107
  serialize_anew_ind:
807
- out["ind_ser"] = serialize_cpp_obj(indexer_ptr);
1108
+ out["indexer_ser"] = serialize_cpp_obj(indexer_ptr);
808
1109
  }
809
1110
  }
810
1111
  }
@@ -822,11 +1123,27 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
822
1123
  throw;
823
1124
  }
824
1125
 
825
- model_cpp_obj_update["serialized"] = out["serialized"];
826
- if (build_imputer)
827
- model_cpp_obj_update["imp_ser"] = out["imp_ser"];
828
- if (indexer_ptr != NULL)
829
- model_cpp_obj_update["ind_ser"] = out["ind_ser"];
1126
+ {
1127
+ Rcpp::List model_lst = model_cpp_obj_update["model"];
1128
+ model_lst["ser"] = out["model_ser"];
1129
+ model_cpp_obj_update["model"] = model_lst;
1130
+
1131
+ if (build_imputer)
1132
+ {
1133
+ Rcpp::List imputer_lst = model_cpp_obj_update["imputer"];
1134
+ imputer_lst["ser"] = out["imputer_ser"];
1135
+ model_cpp_obj_update["imputer"] = imputer_lst;
1136
+ }
1137
+
1138
+ if (indexer_ptr)
1139
+ {
1140
+ Rcpp::List indexer_lst = model_cpp_obj_update["indexer"];
1141
+ indexer_lst["ser"] = out["indexer_ser"];
1142
+ model_cpp_obj_update["indexer"] = indexer_lst;
1143
+ }
1144
+ }
1145
+
1146
+ dont_serialize:
830
1147
  model_params_update["ntrees"] = ntrees_plus1;
831
1148
  }
832
1149
 
@@ -880,14 +1197,10 @@ void predict_iso(SEXP model_R_ptr, bool is_extended,
880
1197
  IsoForest* model_ptr = NULL;
881
1198
  ExtIsoForest* ext_model_ptr = NULL;
882
1199
  if (is_extended)
883
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1200
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
884
1201
  else
885
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
886
- TreesIndexer* indexer = NULL;
887
- if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL)
888
- indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
889
- if (indexer != NULL && indexer->indices.empty())
890
- indexer = NULL;
1202
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
1203
+ TreesIndexer* indexer = get_indexer_ptr_from_R_obj(indexer_R_ptr);
891
1204
 
892
1205
  MissingAction missing_action = is_extended?
893
1206
  ext_model_ptr->missing_action
@@ -951,17 +1264,13 @@ void dist_iso(SEXP model_R_ptr, SEXP indexer_R_ptr,
951
1264
 
952
1265
  IsoForest* model_ptr = NULL;
953
1266
  ExtIsoForest* ext_model_ptr = NULL;
954
- TreesIndexer* indexer = NULL;
1267
+ TreesIndexer* indexer = get_indexer_ptr_from_R_obj(indexer_R_ptr);
955
1268
  if (is_extended)
956
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1269
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
957
1270
  else
958
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
959
- if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL)
960
- indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
961
- if (indexer != NULL && (indexer->indices.empty() || (!as_kernel && indexer->indices.front().node_distances.empty())))
962
- indexer = NULL;
1271
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
963
1272
 
964
- if (use_reference_points && indexer != NULL && !indexer->indices.front().reference_points.empty()) {
1273
+ if (use_reference_points && indexer && !indexer->indices.front().reference_points.empty()) {
965
1274
  tmat_ptr = NULL;
966
1275
  dmat_ptr = NULL;
967
1276
  rmat_ptr = REAL(rmat);
@@ -1044,11 +1353,13 @@ Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
1044
1353
  IsoForest* model_ptr = NULL;
1045
1354
  ExtIsoForest* ext_model_ptr = NULL;
1046
1355
  if (is_extended)
1047
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1356
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
1048
1357
  else
1049
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1358
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
1359
+
1360
+ Imputer* imputer_ptr = get_pointer_from_xptr<Imputer>(imputer_R_ptr);
1050
1361
 
1051
- Imputer* imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
1362
+ if (!imputer_ptr) throw Rcpp::exception("Error: requested missing value imputation, but model was built without imputer.\n");
1052
1363
 
1053
1364
 
1054
1365
  impute_missing_values(numeric_data_ptr, categ_data_ptr, true,
@@ -1064,85 +1375,154 @@ Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
1064
1375
  }
1065
1376
 
1066
1377
  // [[Rcpp::export(rng = false)]]
1067
- void drop_imputer(Rcpp::List lst_modify, Rcpp::List lst_modify2)
1378
+ void drop_imputer(bool is_altrepped, bool free_cpp,
1379
+ SEXP lst_imputer, Rcpp::List lst_cpp_objects, Rcpp::List lst_params)
1068
1380
  {
1069
- Rcpp::RawVector empty_ser = Rcpp::RawVector();
1070
- Rcpp::LogicalVector FalseObj = Rcpp::LogicalVector::create(false);
1071
- Rcpp::XPtr<Imputer> imp_ptr = lst_modify["imp_ptr"];
1072
- imp_ptr.release();
1381
+ SEXP FalseObj = PROTECT(Rf_ScalarLogical(0));
1382
+ SEXP blank_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
1383
+ SEXP altrepped_null = PROTECT(get_altrepped_null_pointer());
1384
+
1385
+ if (is_altrepped) {
1386
+
1387
+ if (free_cpp) {
1388
+ SEXP imp_R_ptr = R_altrep_data1(lst_imputer);
1389
+ Imputer* imputer_ptr = (Imputer*)R_ExternalPtrAddr(imp_R_ptr);
1390
+ delete imputer_ptr;
1391
+ R_SetExternalPtrAddr(imp_R_ptr, nullptr);
1392
+ R_ClearExternalPtr(imp_R_ptr);
1393
+ }
1394
+
1395
+ lst_cpp_objects["imputer"] = altrepped_null;
1396
+
1397
+ }
1398
+
1399
+ else {
1400
+
1401
+ if (free_cpp) {
1402
+ SEXP imp_R_ptr = VECTOR_ELT(lst_imputer, 0);
1403
+ Imputer* imputer_ptr = get_pointer_from_xptr<Imputer>(imp_R_ptr);
1404
+ delete imputer_ptr;
1405
+ R_SetExternalPtrAddr(imp_R_ptr, nullptr);
1406
+ R_ClearExternalPtr(imp_R_ptr);
1407
+ SET_VECTOR_ELT(lst_imputer, 0, imp_R_ptr);
1408
+ }
1073
1409
 
1074
- lst_modify["imp_ser"] = empty_ser;
1075
- lst_modify2["build_imputer"] = FalseObj;
1410
+ SET_VECTOR_ELT(lst_imputer, 0, blank_ptr);
1411
+ SET_VECTOR_ELT(lst_imputer, 1, R_NilValue);
1412
+ }
1413
+
1414
+ lst_params["build_imputer"] = FalseObj;
1415
+ UNPROTECT(3);
1076
1416
  }
1077
1417
 
1078
1418
  // [[Rcpp::export(rng = false)]]
1079
- void drop_indexer(Rcpp::List lst_modify, Rcpp::List lst_modify2)
1419
+ void drop_indexer(bool is_altrepped, bool free_cpp,
1420
+ SEXP lst_indexer, Rcpp::List lst_cpp_objects, Rcpp::List lst_metadata)
1080
1421
  {
1081
- Rcpp::XPtr<TreesIndexer> empty_ptr = Rcpp::XPtr<TreesIndexer>(nullptr, false);
1082
- Rcpp::RawVector empty_ser = Rcpp::RawVector();
1083
- Rcpp::CharacterVector empty_char = Rcpp::CharacterVector();
1084
- Rcpp::XPtr<TreesIndexer> indexer = lst_modify["indexer"];
1085
- indexer.release();
1422
+ SEXP empty_str = PROTECT(Rf_allocVector(STRSXP, 0));
1423
+ SEXP blank_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
1424
+ SEXP altrepped_null = PROTECT(get_altrepped_null_pointer());
1425
+
1426
+ if (is_altrepped) {
1427
+
1428
+ if (free_cpp) {
1429
+ SEXP ind_R_ptr = R_altrep_data1(lst_indexer);
1430
+ TreesIndexer* indexer_ptr = (TreesIndexer*)R_ExternalPtrAddr(ind_R_ptr);
1431
+ delete indexer_ptr;
1432
+ R_SetExternalPtrAddr(ind_R_ptr, nullptr);
1433
+ R_ClearExternalPtr(ind_R_ptr);
1434
+ }
1435
+
1436
+ lst_cpp_objects["indexer"] = altrepped_null;
1437
+ }
1438
+
1439
+ else {
1440
+
1441
+ if (free_cpp) {
1442
+ SEXP ind_R_ptr = VECTOR_ELT(lst_indexer, 0);
1443
+ TreesIndexer* indexer_ptr = get_pointer_from_xptr<TreesIndexer>(ind_R_ptr);
1444
+ delete indexer_ptr;
1445
+ R_SetExternalPtrAddr(ind_R_ptr, nullptr);
1446
+ R_ClearExternalPtr(ind_R_ptr);
1447
+ SET_VECTOR_ELT(lst_indexer, 0, ind_R_ptr);
1448
+ }
1449
+
1450
+ SET_VECTOR_ELT(lst_indexer, 0, blank_ptr);
1451
+ SET_VECTOR_ELT(lst_indexer, 1, R_NilValue);
1452
+ }
1086
1453
 
1087
- lst_modify["ind_ser"] = empty_ser;
1088
- lst_modify2["reference_names"] = empty_char;
1454
+ lst_metadata["reference_names"] = empty_str;
1455
+ UNPROTECT(3);
1089
1456
  }
1090
1457
 
1091
1458
  // [[Rcpp::export(rng = false)]]
1092
- void drop_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2)
1459
+ void drop_reference_points(bool is_altrepped, SEXP lst_indexer, Rcpp::List lst_cpp_objects, Rcpp::List lst_metadata)
1093
1460
  {
1094
- Rcpp::CharacterVector empty_char = Rcpp::CharacterVector();
1095
- Rcpp::RawVector empty_ser = Rcpp::RawVector();
1096
- Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"];
1097
- TreesIndexer *indexer_ptr = indexer_R_ptr.get();
1098
- if (indexer_ptr == NULL) {
1099
- lst_modify["ind_ser"] = empty_ser;
1100
- lst_modify2["reference_names"] = empty_char;
1101
- return;
1102
- }
1103
- if (indexer_ptr->indices.empty()) {
1104
- indexer_R_ptr.release();
1105
- lst_modify["ind_ser"] = empty_ser;
1106
- lst_modify2["reference_names"] = empty_char;
1107
- return;
1108
- }
1109
- if (indexer_ptr->indices.front().reference_points.empty()) {
1110
- lst_modify2["reference_names"] = empty_char;
1111
- return;
1461
+ SEXP empty_str = PROTECT(Rf_allocVector(STRSXP, 0));
1462
+
1463
+ if (is_altrepped)
1464
+ {
1465
+ SEXP ind_R_ptr = R_altrep_data1(lst_indexer);
1466
+ TreesIndexer* indexer_ptr = (TreesIndexer*)R_ExternalPtrAddr(ind_R_ptr);
1467
+ if (!indexer_ptr) return;
1468
+
1469
+ for (auto &tree : indexer_ptr->indices)
1470
+ {
1471
+ tree.reference_points.clear();
1472
+ tree.reference_indptr.clear();
1473
+ tree.reference_mapping.clear();
1474
+ }
1112
1475
  }
1113
1476
 
1114
- std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer_ptr));
1115
- for (auto &tree : new_indexer->indices)
1477
+ else
1116
1478
  {
1117
- tree.reference_points.clear();
1118
- tree.reference_indptr.clear();
1119
- tree.reference_mapping.clear();
1479
+ SEXP ind_R_ptr = VECTOR_ELT(lst_indexer, 0);
1480
+ TreesIndexer* indexer_ptr = get_pointer_from_xptr<TreesIndexer>(ind_R_ptr);
1481
+ if (!indexer_ptr) return;
1482
+
1483
+ std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer_ptr));
1484
+ for (auto &tree : new_indexer->indices)
1485
+ {
1486
+ tree.reference_points.clear();
1487
+ tree.reference_indptr.clear();
1488
+ tree.reference_mapping.clear();
1489
+ }
1490
+
1491
+ SET_VECTOR_ELT(lst_indexer, 1, serialize_cpp_obj(new_indexer.get()));
1492
+ *indexer_ptr = std::move(*new_indexer);
1493
+ new_indexer.release();
1120
1494
  }
1121
- Rcpp::RawVector ind_ser = serialize_cpp_obj(new_indexer.get());
1122
- *indexer_ptr = std::move(*new_indexer);
1123
- new_indexer.release();
1124
- lst_modify["ind_ser"] = ind_ser;
1125
- lst_modify2["reference_names"] = empty_char;
1495
+
1496
+ lst_metadata["reference_names"] = empty_str;
1497
+ UNPROTECT(1);
1126
1498
  }
1127
1499
 
1128
1500
  // [[Rcpp::export(rng = false)]]
1129
1501
  Rcpp::List subset_trees
1130
1502
  (
1131
1503
  SEXP model_R_ptr, SEXP imputer_R_ptr, SEXP indexer_R_ptr,
1132
- bool is_extended, bool has_imputer,
1504
+ bool is_extended, bool is_altrepped,
1133
1505
  Rcpp::IntegerVector trees_take
1134
1506
  )
1135
1507
  {
1136
- bool has_indexer = !Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL;
1137
-
1138
1508
  Rcpp::List out = Rcpp::List::create(
1509
+ Rcpp::_["model"] = R_NilValue,
1510
+ Rcpp::_["imputer"] = R_NilValue,
1511
+ Rcpp::_["indexer"] = R_NilValue
1512
+ );
1513
+ Rcpp::List lst_model = Rcpp::List::create(
1139
1514
  Rcpp::_["ptr"] = R_NilValue,
1140
- Rcpp::_["serialized"] = R_NilValue,
1141
- Rcpp::_["imp_ptr"] = R_NilValue,
1142
- Rcpp::_["imp_ser"] = R_NilValue,
1143
- Rcpp::_["indexer"] = R_NilValue,
1144
- Rcpp::_["ind_ser"] = R_NilValue
1515
+ Rcpp::_["ser"] = R_NilValue
1516
+ );
1517
+ Rcpp::List lst_imputer = Rcpp::List::create(
1518
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
1519
+ Rcpp::_["ser"] = R_NilValue
1145
1520
  );
1521
+ Rcpp::List lst_indexer = Rcpp::List::create(
1522
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
1523
+ Rcpp::_["ser"] = R_NilValue
1524
+ );
1525
+
1146
1526
 
1147
1527
  IsoForest* model_ptr = NULL;
1148
1528
  ExtIsoForest* ext_model_ptr = NULL;
@@ -1161,16 +1541,15 @@ Rcpp::List subset_trees
1161
1541
  model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1162
1542
  new_model_ptr = std::unique_ptr<IsoForest>(new IsoForest());
1163
1543
  }
1164
-
1165
1544
 
1166
- if (has_imputer) {
1167
- imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
1168
- new_imputer_ptr = std::unique_ptr<Imputer>(new Imputer());
1545
+ imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
1546
+ if (imputer_ptr) {
1547
+ new_imputer_ptr = std::unique_ptr<Imputer>(new Imputer());
1169
1548
  }
1170
1549
 
1171
- if (has_indexer) {
1172
- indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1173
- new_indexer_ptr = std::unique_ptr<TreesIndexer>(new TreesIndexer());
1550
+ indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1551
+ if (indexer_ptr) {
1552
+ new_indexer_ptr = std::unique_ptr<TreesIndexer>(new TreesIndexer());
1174
1553
  }
1175
1554
 
1176
1555
  std::unique_ptr<size_t[]> trees_take_(new size_t[trees_take.size()]);
@@ -1184,31 +1563,49 @@ Rcpp::List subset_trees
1184
1563
  trees_take_.get(), trees_take.size());
1185
1564
  trees_take_.reset();
1186
1565
 
1187
- if (!is_extended)
1188
- out["serialized"] = serialize_cpp_obj(new_model_ptr.get());
1566
+ if (is_altrepped)
1567
+ {
1568
+ out["model"] = is_extended?
1569
+ Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&new_ext_model_ptr)
1570
+ :
1571
+ Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&new_model_ptr);
1572
+ out["imputer"] = imputer_ptr?
1573
+ Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&new_imputer_ptr)
1574
+ :
1575
+ Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1576
+ out["indexer"] = indexer_ptr?
1577
+ Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&new_indexer_ptr)
1578
+ :
1579
+ Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1580
+ }
1581
+
1189
1582
  else
1190
- out["serialized"] = serialize_cpp_obj(new_ext_model_ptr.get());
1191
- if (has_imputer)
1192
- out["imp_ser"] = serialize_cpp_obj(new_imputer_ptr.get());
1193
- if (has_indexer)
1194
- out["ind_ser"] = serialize_cpp_obj(new_indexer_ptr.get());
1583
+ {
1584
+ lst_model["ser"] = is_extended? serialize_cpp_obj(new_ext_model_ptr.get()) : serialize_cpp_obj(new_model_ptr.get());
1585
+ if (imputer_ptr) lst_imputer["ser"] = serialize_cpp_obj(new_imputer_ptr.get());
1586
+ if (indexer_ptr) lst_indexer["ser"] = serialize_cpp_obj(new_indexer_ptr.get());
1195
1587
 
1196
- if (!is_extended) {
1197
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, new_model_ptr.get());
1588
+ lst_model["ptr"] = is_extended?
1589
+ Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, new_ext_model_ptr.get())
1590
+ :
1591
+ Rcpp::unwindProtect(safe_XPtr<IsoForest>, new_model_ptr.get());
1198
1592
  new_model_ptr.release();
1593
+
1594
+ if (imputer_ptr) {
1595
+ lst_imputer["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, new_imputer_ptr.get());
1596
+ new_imputer_ptr.release();
1597
+ }
1598
+
1599
+ if (indexer_ptr) {
1600
+ lst_indexer["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, new_indexer_ptr.get());
1601
+ new_indexer_ptr.release();
1602
+ }
1603
+
1604
+ out["model"] = lst_model;
1605
+ out["imputer"] = lst_imputer;
1606
+ out["indexer"] = lst_indexer;
1199
1607
  }
1200
- else {
1201
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, new_ext_model_ptr.get());
1202
- new_ext_model_ptr.release();
1203
- }
1204
- if (has_imputer) {
1205
- out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, new_imputer_ptr.get());
1206
- new_imputer_ptr.release();
1207
- }
1208
- if (has_indexer) {
1209
- out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, new_indexer_ptr.get());
1210
- new_indexer_ptr.release();
1211
- }
1608
+
1212
1609
  return out;
1213
1610
  }
1214
1611
 
@@ -1251,12 +1648,12 @@ Rcpp::List get_n_nodes(SEXP model_R_ptr, bool is_extended, int nthreads)
1251
1648
  ExtIsoForest* ext_model_ptr = NULL;
1252
1649
  if (is_extended)
1253
1650
  {
1254
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1651
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
1255
1652
  ntrees = ext_model_ptr->hplanes.size();
1256
1653
  }
1257
1654
  else
1258
1655
  {
1259
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1656
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
1260
1657
  ntrees = model_ptr->trees.size();
1261
1658
  }
1262
1659
 
@@ -1282,65 +1679,52 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1282
1679
  Rcpp::RawVector serialized_imputer,
1283
1680
  Rcpp::RawVector serialized_indexer,
1284
1681
  Rcpp::List &model_cpp_obj_update,
1285
- Rcpp::List &model_params_update)
1682
+ Rcpp::List &model_params_update,
1683
+ bool is_altrepped)
1286
1684
  {
1287
- if ((!Rf_isNull(imp_R_ptr) && R_ExternalPtrAddr(imp_R_ptr) != NULL)
1288
- &&
1289
- !(!Rf_isNull(oimp_R_ptr) && R_ExternalPtrAddr(oimp_R_ptr) != NULL))
1290
- {
1291
- Rcpp::stop("Model to append trees to has imputer, but model to append from doesn't. Try dropping the imputer.\n");
1292
- }
1293
- if ((!Rf_isNull(ind_R_ptr) && R_ExternalPtrAddr(ind_R_ptr) != NULL)
1294
- &&
1295
- !(!Rf_isNull(oind_R_ptr) && R_ExternalPtrAddr(oind_R_ptr) != NULL))
1296
- {
1297
- Rcpp::stop("Model to append trees to has indexer, but model to append from doesn't. Try dropping the indexer.\n");
1298
- }
1299
-
1300
1685
  Rcpp::List out = Rcpp::List::create(
1301
- Rcpp::_["serialized"] = R_NilValue,
1302
- Rcpp::_["imp_ser"] = R_NilValue,
1303
- Rcpp::_["ind_ser"] = R_NilValue
1686
+ Rcpp::_["model_ser"] = R_NilValue,
1687
+ Rcpp::_["imputer_ser"] = R_NilValue,
1688
+ Rcpp::_["indexer_ser"] = R_NilValue
1304
1689
  );
1305
1690
 
1306
1691
  Rcpp::IntegerVector ntrees_new = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"]));
1307
1692
 
1308
- IsoForest* model_ptr = NULL;
1309
- IsoForest* other_ptr = NULL;
1310
- ExtIsoForest* ext_model_ptr = NULL;
1311
- ExtIsoForest* ext_other_ptr = NULL;
1312
- Imputer* imputer_ptr = NULL;
1313
- Imputer* oimputer_ptr = NULL;
1314
- TreesIndexer* indexer_ptr = NULL;
1315
- TreesIndexer* oindexer_ptr = NULL;
1693
+ IsoForest* model_ptr = nullptr;
1694
+ IsoForest* other_ptr = nullptr;
1695
+ ExtIsoForest* ext_model_ptr = nullptr;
1696
+ ExtIsoForest* ext_other_ptr = nullptr;
1697
+ Imputer* imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
1698
+ Imputer* oimputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(oimp_R_ptr));
1699
+ TreesIndexer* indexer_ptr = get_indexer_ptr_from_R_obj(ind_R_ptr);
1700
+ TreesIndexer* oindexer_ptr = get_indexer_ptr_from_R_obj(oind_R_ptr);
1316
1701
  size_t old_ntrees;
1317
1702
 
1318
1703
  if (is_extended) {
1319
1704
  ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1320
1705
  ext_other_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(other_R_ptr));
1321
1706
  old_ntrees = ext_model_ptr->hplanes.size();
1707
+ if (ext_model_ptr == ext_other_ptr) {
1708
+ throw Rcpp::exception("Error: attempting to append trees from one model to itself.");
1709
+ }
1322
1710
  } else {
1323
1711
  model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1324
1712
  other_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(other_R_ptr));
1325
1713
  old_ntrees = model_ptr->trees.size();
1714
+ if (model_ptr == other_ptr) {
1715
+ throw Rcpp::exception("Error: attempting to append trees from one model to itself.");
1716
+ }
1326
1717
  }
1327
1718
 
1328
- if (!Rf_isNull(imp_R_ptr) && !Rf_isNull(oimp_R_ptr) &&
1329
- R_ExternalPtrAddr(imp_R_ptr) != NULL &&
1330
- R_ExternalPtrAddr(oimp_R_ptr) != NULL)
1331
- {
1332
- imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
1333
- oimputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(oimp_R_ptr));
1719
+ if (imputer_ptr && !oimputer_ptr) {
1720
+ throw Rcpp::exception("Model to append trees to has imputer, but model to append from doesn't. Try dropping the imputer.\n");
1334
1721
  }
1335
1722
 
1336
- if (!Rf_isNull(ind_R_ptr) && !Rf_isNull(oind_R_ptr) &&
1337
- R_ExternalPtrAddr(ind_R_ptr) != NULL &&
1338
- R_ExternalPtrAddr(oind_R_ptr) != NULL)
1339
- {
1340
- indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
1341
- oindexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(oind_R_ptr));
1723
+ if (indexer_ptr && !oindexer_ptr) {
1724
+ throw Rcpp::exception("Model to append trees to has indexer, but model to append from doesn't. Try dropping the indexer.\n");
1342
1725
  }
1343
1726
 
1727
+
1344
1728
  merge_models(model_ptr, other_ptr,
1345
1729
  ext_model_ptr, ext_other_ptr,
1346
1730
  imputer_ptr, oimputer_ptr,
@@ -1348,6 +1732,9 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1348
1732
 
1349
1733
  Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized;
1350
1734
  size_t new_size;
1735
+
1736
+ if (is_altrepped) goto dont_serialize;
1737
+
1351
1738
  try
1352
1739
  {
1353
1740
  if (!is_extended)
@@ -1361,7 +1748,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1361
1748
  new_serialized = resize_vec(serialized_obj, new_size);
1362
1749
  char *temp = (char*)RAW(new_serialized);
1363
1750
  incremental_serialize_isotree(*model_ptr, temp);
1364
- out["serialized"] = new_serialized;
1751
+ out["model_ser"] = new_serialized;
1365
1752
  }
1366
1753
 
1367
1754
  catch (std::runtime_error &e) {
@@ -1371,7 +1758,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1371
1758
 
1372
1759
  else {
1373
1760
  serialize_anew_singlevar:
1374
- out["serialized"] = serialize_cpp_obj(model_ptr);
1761
+ out["model_ser"] = serialize_cpp_obj(model_ptr);
1375
1762
  }
1376
1763
  }
1377
1764
 
@@ -1386,7 +1773,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1386
1773
  new_serialized = resize_vec(serialized_obj, new_size);
1387
1774
  char *temp = (char*)RAW(new_serialized);
1388
1775
  incremental_serialize_isotree(*ext_model_ptr, temp);
1389
- out["serialized"] = new_serialized;
1776
+ out["model_ser"] = new_serialized;
1390
1777
  }
1391
1778
 
1392
1779
  catch (std::runtime_error &e) {
@@ -1396,11 +1783,11 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1396
1783
 
1397
1784
  else {
1398
1785
  serialize_anew_ext:
1399
- out["serialized"] = serialize_cpp_obj(ext_model_ptr);
1786
+ out["model_ser"] = serialize_cpp_obj(ext_model_ptr);
1400
1787
  }
1401
1788
  }
1402
1789
 
1403
- if (imputer_ptr != NULL)
1790
+ if (imputer_ptr)
1404
1791
  {
1405
1792
  if (serialized_imputer.size() &&
1406
1793
  check_can_undergo_incremental_serialization(*imputer_ptr, (char*)RAW(serialized_imputer)))
@@ -1411,7 +1798,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1411
1798
  new_imp_serialized = resize_vec(serialized_imputer, new_size);
1412
1799
  char *temp = (char*)RAW(new_imp_serialized);
1413
1800
  incremental_serialize_isotree(*imputer_ptr, temp);
1414
- out["imp_ser"] = new_imp_serialized;
1801
+ out["imputer_ser"] = new_imp_serialized;
1415
1802
  }
1416
1803
 
1417
1804
  catch (std::runtime_error &e) {
@@ -1421,11 +1808,11 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1421
1808
 
1422
1809
  else {
1423
1810
  serialize_anew_imp:
1424
- out["imp_ser"] = serialize_cpp_obj(imputer_ptr);
1811
+ out["imputer_ser"] = serialize_cpp_obj(imputer_ptr);
1425
1812
  }
1426
1813
  }
1427
1814
 
1428
- if (indexer_ptr != NULL)
1815
+ if (indexer_ptr)
1429
1816
  {
1430
1817
  if (serialized_indexer.size() &&
1431
1818
  check_can_undergo_incremental_serialization(*indexer_ptr, (char*)RAW(serialized_indexer)))
@@ -1436,7 +1823,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1436
1823
  new_ind_serialized = resize_vec(serialized_indexer, new_size);
1437
1824
  char *temp = (char*)RAW(new_ind_serialized);
1438
1825
  incremental_serialize_isotree(*indexer_ptr, temp);
1439
- out["ind_ser"] = new_ind_serialized;
1826
+ out["indexer_ser"] = new_ind_serialized;
1440
1827
  }
1441
1828
 
1442
1829
  catch (std::runtime_error &e) {
@@ -1446,7 +1833,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1446
1833
 
1447
1834
  else {
1448
1835
  serialize_anew_ind:
1449
- out["ind_ser"] = serialize_cpp_obj(indexer_ptr);
1836
+ out["indexer_ser"] = serialize_cpp_obj(indexer_ptr);
1450
1837
  }
1451
1838
  }
1452
1839
  }
@@ -1458,18 +1845,34 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1458
1845
  else
1459
1846
  ext_model_ptr->hplanes.resize(old_ntrees);
1460
1847
 
1461
- if (imputer_ptr != NULL)
1848
+ if (imputer_ptr)
1462
1849
  imputer_ptr->imputer_tree.resize(old_ntrees);
1463
- if (indexer_ptr != NULL)
1850
+ if (indexer_ptr)
1464
1851
  indexer_ptr->indices.resize(old_ntrees);
1465
1852
  throw;
1466
1853
  }
1467
1854
 
1468
- model_cpp_obj_update["serialized"] = out["serialized"];
1469
- if (imputer_ptr)
1470
- model_cpp_obj_update["imp_ser"] = out["imp_ser"];
1471
- if (indexer_ptr)
1472
- model_cpp_obj_update["ind_ser"] = out["ind_ser"];
1855
+ {
1856
+ Rcpp::List model_lst = model_cpp_obj_update["model"];
1857
+ model_lst["ser"] = out["model_ser"];
1858
+ model_cpp_obj_update["model"] = model_lst;
1859
+
1860
+ if (imputer_ptr)
1861
+ {
1862
+ Rcpp::List imputer_lst = model_cpp_obj_update["imputer"];
1863
+ imputer_lst["ser"] = out["imputer_ser"];
1864
+ model_cpp_obj_update["imputer"] = imputer_lst;
1865
+ }
1866
+
1867
+ if (indexer_ptr)
1868
+ {
1869
+ Rcpp::List indexer_lst = model_cpp_obj_update["indexer"];
1870
+ indexer_lst["ser"] = out["indexer_ser"];
1871
+ model_cpp_obj_update["indexer"] = indexer_lst;
1872
+ }
1873
+ }
1874
+
1875
+ dont_serialize:
1473
1876
  *(INTEGER(ntrees_new)) = is_extended? ext_model_ptr->hplanes.size() : model_ptr->trees.size();
1474
1877
  model_params_update["ntrees"] = ntrees_new;
1475
1878
  }
@@ -1551,14 +1954,12 @@ Rcpp::CharacterVector model_to_sql_with_select_from(SEXP model_R_ptr, bool is_ex
1551
1954
  }
1552
1955
 
1553
1956
  // [[Rcpp::export(rng = false)]]
1554
- Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr, bool has_imputer, SEXP ind_R_ptr)
1957
+ Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr, SEXP ind_R_ptr, bool lazy_serialization)
1555
1958
  {
1556
- bool has_indexer = !Rf_isNull(ind_R_ptr) && R_ExternalPtrAddr(ind_R_ptr) != NULL;
1557
-
1558
1959
  Rcpp::List out = Rcpp::List::create(
1559
- Rcpp::_["ptr"] = R_NilValue,
1560
- Rcpp::_["imp_ptr"] = R_NilValue,
1561
- Rcpp::_["indexer"] = R_NilValue
1960
+ Rcpp::_["model"] = Rcpp::XPtr<void*>(nullptr, false),
1961
+ Rcpp::_["imputer"] = Rcpp::XPtr<void*>(nullptr, false),
1962
+ Rcpp::_["indexer"] = Rcpp::XPtr<void*>(nullptr, false)
1562
1963
  );
1563
1964
 
1564
1965
  IsoForest* model_ptr = NULL;
@@ -1569,9 +1970,9 @@ Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr,
1569
1970
  ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1570
1971
  else
1571
1972
  model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1572
- if (has_imputer)
1973
+ if (R_ExternalPtrAddr(imp_R_ptr))
1573
1974
  imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
1574
- if (has_indexer)
1975
+ if (R_ExternalPtrAddr(ind_R_ptr))
1575
1976
  indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
1576
1977
 
1577
1978
  std::unique_ptr<IsoForest> copy_model(new IsoForest());
@@ -1579,84 +1980,115 @@ Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr,
1579
1980
  std::unique_ptr<Imputer> copy_imputer(new Imputer());
1580
1981
  std::unique_ptr<TreesIndexer> copy_indexer(new TreesIndexer());
1581
1982
 
1582
- if (model_ptr != NULL)
1983
+ if (model_ptr)
1583
1984
  *copy_model = *model_ptr;
1584
- if (ext_model_ptr != NULL)
1985
+ if (ext_model_ptr)
1585
1986
  *copy_ext_model = *ext_model_ptr;
1586
- if (imputer_ptr != NULL)
1987
+ if (imputer_ptr)
1587
1988
  *copy_imputer = *imputer_ptr;
1588
- if (indexer_ptr != NULL)
1989
+ if (indexer_ptr)
1589
1990
  *copy_indexer = *indexer_ptr;
1590
1991
 
1591
- if (is_extended) {
1592
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, copy_ext_model.get());
1593
- copy_ext_model.release();
1594
- }
1595
- else {
1596
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, copy_model.get());
1597
- copy_model.release();
1598
- }
1599
- if (has_imputer) {
1600
- out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, copy_imputer.get());
1601
- copy_imputer.release();
1992
+ if (lazy_serialization)
1993
+ {
1994
+ if (is_extended) {
1995
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&copy_ext_model);
1996
+ }
1997
+ else {
1998
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&copy_model);
1999
+ }
2000
+
2001
+ if (imputer_ptr) {
2002
+ out["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&copy_imputer);
2003
+ }
2004
+ else {
2005
+ out["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
2006
+ }
2007
+
2008
+ if (indexer_ptr) {
2009
+ out["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&copy_indexer);
2010
+ }
2011
+ else {
2012
+ out["indexer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
2013
+ }
1602
2014
  }
1603
- if (has_indexer) {
1604
- out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, copy_indexer.get());
1605
- copy_indexer.release();
2015
+
2016
+ else
2017
+ {
2018
+ if (is_extended) {
2019
+ out["model"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, copy_ext_model.get());
2020
+ copy_ext_model.release();
2021
+ }
2022
+ else {
2023
+ out["model"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, copy_model.get());
2024
+ copy_model.release();
2025
+ }
2026
+ if (imputer_ptr) {
2027
+ out["imputer"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, copy_imputer.get());
2028
+ copy_imputer.release();
2029
+ }
2030
+ if (indexer_ptr) {
2031
+ out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, copy_indexer.get());
2032
+ copy_indexer.release();
2033
+ }
1606
2034
  }
2035
+
1607
2036
  return out;
1608
2037
  }
1609
2038
 
1610
2039
  // [[Rcpp::export(rng = false)]]
1611
- void build_tree_indices(Rcpp::List lst_modify, bool is_extended, bool with_distances, int nthreads)
2040
+ void build_tree_indices(Rcpp::List lst_cpp_objects, SEXP ptr_model, bool is_altrepped, bool is_extended, bool with_distances, int nthreads)
1612
2041
  {
1613
- Rcpp::RawVector ind_ser = Rcpp::RawVector();
1614
- Rcpp::List empty_lst = Rcpp::List::create(Rcpp::_["indexer"] = R_NilValue);
2042
+ Rcpp::List lst_out = Rcpp::List::create(
2043
+ Rcpp::_["ptr"] = R_NilValue,
2044
+ Rcpp::_["ser"] = R_NilValue
2045
+ );
1615
2046
  std::unique_ptr<TreesIndexer> indexer(new TreesIndexer());
1616
2047
 
1617
2048
  if (!is_extended) {
1618
2049
  build_tree_indices(*indexer,
1619
- *static_cast<IsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])),
2050
+ *static_cast<IsoForest*>(R_ExternalPtrAddr(ptr_model)),
1620
2051
  nthreads,
1621
2052
  with_distances);
1622
2053
  }
1623
2054
  else {
1624
2055
  build_tree_indices(*indexer,
1625
- *static_cast<ExtIsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])),
2056
+ *static_cast<ExtIsoForest*>(R_ExternalPtrAddr(ptr_model)),
1626
2057
  nthreads,
1627
2058
  with_distances);
1628
2059
  }
1629
2060
 
1630
- ind_ser = serialize_cpp_obj(indexer.get());
1631
- empty_lst["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
1632
- if (!Rf_isNull(lst_modify["indexer"])) {
1633
- Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"];
1634
- indexer_R_ptr.release();
2061
+ if (is_altrepped) {
2062
+ lst_cpp_objects["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&indexer);
2063
+ }
2064
+
2065
+ else {
2066
+ lst_out["ser"] = serialize_cpp_obj(indexer.get());
2067
+ lst_out["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
2068
+ indexer.release();
2069
+ lst_cpp_objects["indexer"] = lst_out;
1635
2070
  }
1636
-
1637
- lst_modify["ind_ser"] = ind_ser;
1638
- lst_modify["indexer"] = empty_lst["indexer"];
1639
- indexer.release();
1640
2071
  }
1641
2072
 
1642
2073
  // [[Rcpp::export(rng = false)]]
1643
2074
  bool check_node_indexer_has_distances(SEXP indexer_R_ptr)
1644
2075
  {
1645
- if (Rf_isNull(indexer_R_ptr) || R_ExternalPtrAddr(indexer_R_ptr) == NULL)
1646
- return false;
1647
- TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1648
- if (indexer->indices.empty()) return false;
2076
+ const TreesIndexer *indexer = (const TreesIndexer*)R_ExternalPtrAddr(indexer_R_ptr);
2077
+ if (!indexer) return false;
1649
2078
  return !indexer->indices.front().node_distances.empty();
1650
2079
  }
1651
2080
 
1652
2081
  // [[Rcpp::export(rng = false)]]
1653
- void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rnames, bool is_extended,
2082
+ void set_reference_points(Rcpp::List lst_cpp_objects, SEXP ptr_model, SEXP ind_R_ptr, bool is_altrepped,
2083
+ Rcpp::List lst_metadata, SEXP rnames, bool is_extended,
1654
2084
  Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
1655
2085
  Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
1656
2086
  size_t nrows, int nthreads, bool with_distances)
1657
2087
  {
1658
- Rcpp::RawVector ind_ser = Rcpp::RawVector();
1659
- Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"];
2088
+ Rcpp::List lst_out = Rcpp::List::create(
2089
+ Rcpp::_["ptr"] = R_NilValue,
2090
+ Rcpp::_["ser"] = R_NilValue
2091
+ );
1660
2092
 
1661
2093
  double* numeric_data_ptr = NULL;
1662
2094
  int* categ_data_ptr = NULL;
@@ -1682,14 +2114,13 @@ void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rn
1682
2114
  Xc_indptr_ptr = INTEGER(Xc_indptr);
1683
2115
  }
1684
2116
 
1685
- IsoForest* model_ptr = NULL;
1686
- ExtIsoForest* ext_model_ptr = NULL;
1687
- TreesIndexer* indexer = NULL;
2117
+ IsoForest* model_ptr = nullptr;
2118
+ ExtIsoForest* ext_model_ptr = nullptr;
2119
+ TreesIndexer* indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
1688
2120
  if (is_extended)
1689
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"]));
2121
+ ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(ptr_model));
1690
2122
  else
1691
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"]));
1692
- indexer = indexer_R_ptr.get();
2123
+ model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(ptr_model));
1693
2124
 
1694
2125
  MissingAction missing_action = is_extended?
1695
2126
  ext_model_ptr->missing_action
@@ -1701,9 +2132,18 @@ void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rn
1701
2132
  if (Xc.size()) Xc_ptr = set_R_nan_as_C_nan(Xc_ptr, Xc.size(), Xcpp, nthreads);
1702
2133
  }
1703
2134
 
1704
- std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer));
2135
+ std::unique_ptr<TreesIndexer> new_indexer(is_altrepped? nullptr : (new TreesIndexer(*indexer)));
2136
+ TreesIndexer *indexer_use = is_altrepped? indexer : new_indexer.get();
1705
2137
 
1706
- set_reference_points(model_ptr, ext_model_ptr, new_indexer.get(),
2138
+ /* Note: if using an altrepped pointer, the indexer is modified in-place. If that fails,
2139
+ it will end up overwitten, with the previous references taken away. OTOH, if using
2140
+ a pointer + serialized, and it fails, it should not overwrite anything, and thus
2141
+ should not re-assign here immediately. */
2142
+ if (is_altrepped) {
2143
+ lst_metadata["reference_names"] = rnames;
2144
+ }
2145
+
2146
+ set_reference_points(model_ptr, ext_model_ptr, indexer_use,
1707
2147
  with_distances,
1708
2148
  numeric_data_ptr, categ_data_ptr,
1709
2149
  true, (size_t)0, (size_t)0,
@@ -1711,39 +2151,41 @@ void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rn
1711
2151
  (double*)NULL, (int*)NULL, (int*)NULL,
1712
2152
  nrows, nthreads);
1713
2153
 
1714
- ind_ser = serialize_cpp_obj(new_indexer.get());
1715
- *indexer = std::move(*new_indexer);
1716
- new_indexer.release();
1717
- lst_modify["ind_ser"] = ind_ser;
1718
- lst_modify2["reference_names"] = rnames;
2154
+ if (!is_altrepped) {
2155
+ lst_out["ser"] = serialize_cpp_obj(new_indexer.get());
2156
+ *indexer = std::move(*new_indexer);
2157
+ lst_metadata["reference_names"] = rnames;
2158
+ }
1719
2159
  }
1720
2160
 
1721
2161
  // [[Rcpp::export(rng = false)]]
1722
2162
  bool check_node_indexer_has_references(SEXP indexer_R_ptr)
1723
2163
  {
1724
- if (Rf_isNull(indexer_R_ptr) || R_ExternalPtrAddr(indexer_R_ptr) == NULL)
1725
- return false;
1726
- TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1727
- if (indexer->indices.empty())
1728
- return false;
1729
- if (indexer->indices.front().reference_points.empty())
1730
- return false;
1731
- else
1732
- return true;
2164
+ const TreesIndexer *indexer = (const TreesIndexer*)R_ExternalPtrAddr(indexer_R_ptr);
2165
+ if (!indexer) return false;
2166
+ return !(indexer->indices.front().reference_points.empty());
1733
2167
  }
1734
2168
 
1735
2169
  // [[Rcpp::export(rng = false)]]
1736
2170
  int get_num_references(SEXP indexer_R_ptr)
1737
2171
  {
1738
- TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1739
- if (indexer == NULL || indexer->indices.empty()) return 0;
2172
+ const TreesIndexer *indexer = static_cast<const TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
2173
+ if (!indexer || indexer->indices.empty()) return 0;
1740
2174
  return indexer->indices.front().reference_points.size();
1741
2175
  }
1742
2176
 
1743
2177
  // [[Rcpp::export(rng = false)]]
1744
- SEXP get_null_R_pointer()
2178
+ SEXP get_null_R_pointer_internal(bool altrepped)
1745
2179
  {
1746
- return R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue);
2180
+ if (!altrepped) {
2181
+ return R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue);
2182
+ }
2183
+ else {
2184
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
2185
+ SEXP out = PROTECT(R_new_altrep(altrepped_pointer_NullPointer, R_ptr, R_NilValue));
2186
+ UNPROTECT(2);
2187
+ return out;
2188
+ }
1747
2189
  }
1748
2190
 
1749
2191
  /* This library will use different code paths for opening a file path
@@ -1852,18 +2294,30 @@ void serialize_to_file
1852
2294
  }
1853
2295
 
1854
2296
  // [[Rcpp::export]]
1855
- Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname)
2297
+ Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname, bool lazy_serialization)
1856
2298
  {
1857
2299
  Rcpp::List out = Rcpp::List::create(
1858
- Rcpp::_["ptr"] = R_NilValue,
1859
- Rcpp::_["serialized"] = R_NilValue,
1860
- Rcpp::_["imp_ptr"] = R_NilValue,
1861
- Rcpp::_["imp_ser"] = R_NilValue,
2300
+ Rcpp::_["model"] = R_NilValue,
2301
+ Rcpp::_["imputer"] = R_NilValue,
1862
2302
  Rcpp::_["indexer"] = R_NilValue,
1863
- Rcpp::_["ind_ser"] = R_NilValue,
1864
2303
  Rcpp::_["metadata"] = R_NilValue
1865
2304
  );
1866
2305
 
2306
+ if (!lazy_serialization) {
2307
+ out["model"] = Rcpp::List::create(
2308
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
2309
+ Rcpp::_["ser"] = R_NilValue
2310
+ );
2311
+ out["imputer"] = Rcpp::List::create(
2312
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
2313
+ Rcpp::_["ser"] = R_NilValue
2314
+ );
2315
+ out["indexer"] = Rcpp::List::create(
2316
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
2317
+ Rcpp::_["ser"] = R_NilValue
2318
+ );
2319
+ }
2320
+
1867
2321
  FileOpener file_(fname[0], "rb");
1868
2322
  FILE *input_file = file_.get_handle();
1869
2323
 
@@ -1928,30 +2382,61 @@ Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname)
1928
2382
  ptr_metadata
1929
2383
  );
1930
2384
 
1931
- if (has_IsoForest)
1932
- out["serialized"] = serialize_cpp_obj(model.get());
1933
- else
1934
- out["serialized"] = serialize_cpp_obj(model_ext.get());
1935
- if (has_Imputer)
1936
- out["imp_ser"] = serialize_cpp_obj(imputer.get());
1937
- if (has_Indexer)
1938
- out["ind_ser"] = serialize_cpp_obj(indexer.get());
2385
+ if (lazy_serialization)
2386
+ {
2387
+ if (has_IsoForest)
2388
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, &model);
2389
+ else
2390
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, &model_ext);
2391
+
2392
+ if (has_Imputer)
2393
+ out["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, &imputer);
2394
+ else
2395
+ out["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1939
2396
 
1940
- if (has_IsoForest) {
1941
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model.get());
1942
- model.release();
1943
- }
1944
- else {
1945
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, model_ext.get());
1946
- model_ext.release();
1947
- }
1948
- if (has_Imputer) {
1949
- out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer.get());
1950
- imputer.release();
2397
+ if (has_Imputer)
2398
+ out["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, &indexer);
2399
+ else
2400
+ out["indexer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1951
2401
  }
1952
- if (has_Indexer) {
1953
- out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
1954
- indexer.release();
2402
+
2403
+ else
2404
+ {
2405
+ Rcpp::List tmp_model = out["model"];
2406
+ Rcpp::List tmp_imputer = out["imputer"];
2407
+ Rcpp::List tmp_indexer = out["indexer"];
2408
+
2409
+ if (has_IsoForest)
2410
+ tmp_model["ser"] = serialize_cpp_obj(model.get());
2411
+ else
2412
+ tmp_model["ser"] = serialize_cpp_obj(model_ext.get());
2413
+
2414
+ if (has_Imputer)
2415
+ tmp_imputer["ser"] = serialize_cpp_obj(imputer.get());
2416
+
2417
+ if (has_Indexer)
2418
+ tmp_indexer["ser"] = serialize_cpp_obj(indexer.get());
2419
+
2420
+ if (has_IsoForest) {
2421
+ tmp_model["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model.get());
2422
+ model.release();
2423
+ }
2424
+ else {
2425
+ tmp_model["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, model_ext.get());
2426
+ model_ext.release();
2427
+ }
2428
+ if (has_Imputer) {
2429
+ tmp_imputer["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer.get());
2430
+ imputer.release();
2431
+ }
2432
+ if (has_Indexer) {
2433
+ tmp_indexer["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
2434
+ indexer.release();
2435
+ }
2436
+
2437
+ out["model"] = tmp_model;
2438
+ out["imputer"] = tmp_imputer;
2439
+ out["indexer"] = tmp_indexer;
1955
2440
  }
1956
2441
 
1957
2442
  return out;
@@ -2473,12 +2958,12 @@ Rcpp::IntegerMatrix get_null_int_mat()
2473
2958
  int get_ntrees(SEXP model_R_ptr, bool is_extended)
2474
2959
  {
2475
2960
  if (is_extended) {
2476
- ExtIsoForest* ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2961
+ const ExtIsoForest* ext_model_ptr = static_cast<const ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2477
2962
  return ext_model_ptr->hplanes.size();
2478
2963
  }
2479
2964
 
2480
2965
  else {
2481
- IsoForest* model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2966
+ const IsoForest* model_ptr = static_cast<const IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2482
2967
  return model_ptr->trees.size();
2483
2968
  }
2484
2969
  }