isotree 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,7 +40,7 @@
40
40
  * International Conference on Knowledge Discovery & Data Mining. 2018.
41
41
  *
42
42
  * BSD 2-Clause License
43
- * Copyright (c) 2019-2022, David Cortes
43
+ * Copyright (c) 2019-2023, David Cortes
44
44
  * All rights reserved.
45
45
  * Redistribution and use in source and binary forms, with or without
46
46
  * modification, are permitted provided that the following conditions are met:
@@ -64,12 +64,10 @@
64
64
 
65
65
  #include <Rcpp.h>
66
66
  #include <Rcpp/unwindProtect.h>
67
- // [[Rcpp::plugins(unwindProtect)]]
68
67
  #include <Rinternals.h>
68
+ #include <R_ext/Altrep.h>
69
69
 
70
- #ifndef _FOR_R
71
- #define FOR_R
72
- #endif
70
+ #include <type_traits>
73
71
 
74
72
  /* This is the package's header */
75
73
  #include "isotree.hpp"
@@ -87,7 +85,7 @@
87
85
  so it's not enough to just include 'isotree_exportable.hpp' and let
88
86
  the templates be instantiated elsewhere. */
89
87
 
90
- #define throw_mem_err() Rcpp::stop("Error: insufficient memory. Try smaller sample sizes and fewer trees.\n")
88
+ #define throw_mem_err() throw Rcpp::exception("Error: insufficient memory. Try smaller sample sizes and fewer trees.\n")
91
89
 
92
90
  SEXP alloc_RawVec(void *data)
93
91
  {
@@ -148,9 +146,9 @@ Rcpp::RawVector serialize_cpp_obj(const Model *model_outputs)
148
146
  {
149
147
  size_t serialized_size = determine_serialized_size(*model_outputs);
150
148
  if (unlikely(!serialized_size))
151
- Rcpp::stop("Unexpected error.");
149
+ throw Rcpp::exception("Unexpected error.");
152
150
  if (unlikely(serialized_size > (size_t)std::numeric_limits<R_xlen_t>::max()))
153
- Rcpp::stop("Resulting model is too large for R to handle.");
151
+ throw Rcpp::exception("Resulting model is too large for R to handle.");
154
152
  Rcpp::RawVector out = Rcpp::unwindProtect(alloc_RawVec, (void*)&serialized_size);
155
153
  char *out_ = (char*)RAW(out);
156
154
  serialize_isotree(*model_outputs, out_);
@@ -195,11 +193,279 @@ SEXP deserialize_Indexer(Rcpp::RawVector src)
195
193
  }
196
194
 
197
195
  // [[Rcpp::export(rng = false)]]
198
- Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
196
+ SEXP serialize_IsoForest_from_ptr(SEXP R_ptr)
197
+ {
198
+ const IsoForest* model = (const IsoForest*)R_ExternalPtrAddr(R_ptr);
199
+ return serialize_cpp_obj<IsoForest>(model);
200
+ }
201
+
202
+ // [[Rcpp::export(rng = false)]]
203
+ SEXP serialize_ExtIsoForest_from_ptr(SEXP R_ptr)
204
+ {
205
+ const ExtIsoForest* model = (const ExtIsoForest*)R_ExternalPtrAddr(R_ptr);
206
+ return serialize_cpp_obj<ExtIsoForest>(model);
207
+ }
208
+
209
+ // [[Rcpp::export(rng = false)]]
210
+ SEXP serialize_Imputer_from_ptr(SEXP R_ptr)
211
+ {
212
+ const Imputer* model = (const Imputer*)R_ExternalPtrAddr(R_ptr);
213
+ return serialize_cpp_obj<Imputer>(model);
214
+ }
215
+
216
+ // [[Rcpp::export(rng = false)]]
217
+ SEXP serialize_Indexer_from_ptr(SEXP R_ptr)
218
+ {
219
+ const TreesIndexer* model = (const TreesIndexer*)R_ExternalPtrAddr(R_ptr);
220
+ return serialize_cpp_obj<TreesIndexer>(model);
221
+ }
222
+
223
+ // [[Rcpp::export(rng = false)]]
224
+ Rcpp::LogicalVector check_null_ptr_model_internal(SEXP ptr_model)
199
225
  {
200
226
  return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL);
201
227
  }
202
228
 
229
+ static R_altrep_class_t altrepped_pointer_IsoForest;
230
+ static R_altrep_class_t altrepped_pointer_ExtIsoForest;
231
+ static R_altrep_class_t altrepped_pointer_Imputer;
232
+ static R_altrep_class_t altrepped_pointer_TreesIndexer;
233
+ static R_altrep_class_t altrepped_pointer_NullPointer;
234
+
235
+ template <class Model>
236
+ R_altrep_class_t get_altrep_obj_class()
237
+ {
238
+ if (std::is_same<Model, IsoForest>::value) return altrepped_pointer_IsoForest;
239
+
240
+ if (std::is_same<Model, ExtIsoForest>::value) return altrepped_pointer_ExtIsoForest;
241
+
242
+ if (std::is_same<Model, Imputer>::value) return altrepped_pointer_Imputer;
243
+
244
+ if (std::is_same<Model, TreesIndexer>::value) return altrepped_pointer_TreesIndexer;
245
+
246
+ throw Rcpp::exception("Internal error. Please open a bug report.");
247
+ }
248
+
249
+ R_xlen_t altrepped_pointer_length(SEXP obj)
250
+ {
251
+ return 1;
252
+ }
253
+
254
+ SEXP get_element_from_altrepped_obj(SEXP R_altrepped_obj, R_xlen_t idx)
255
+ {
256
+ return R_altrep_data1(R_altrepped_obj);
257
+ }
258
+
259
+ template <class Model>
260
+ void delete_model_from_R_ptr(SEXP R_ptr)
261
+ {
262
+ Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_ptr);
263
+ delete cpp_ptr;
264
+ R_SetExternalPtrAddr(R_ptr, nullptr);
265
+ R_ClearExternalPtr(R_ptr);
266
+ }
267
+
268
+ template <class Model>
269
+ SEXP get_altrepped_pointer(void *void_ptr)
270
+ {
271
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
272
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
273
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
274
+ SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue));
275
+
276
+ std::unique_ptr<Model> *ptr = (std::unique_ptr<Model>*)void_ptr;
277
+ R_SetExternalPtrAddr(R_ptr, ptr->get());
278
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE);
279
+ ptr->release();
280
+
281
+ R_set_altrep_data1(out, R_ptr);
282
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
283
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
284
+
285
+ UNPROTECT(4);
286
+ return out;
287
+ }
288
+
289
+ template <class Model>
290
+ SEXP serialize_altrepped_pointer(SEXP altrepped_obj)
291
+ {
292
+ try {
293
+ Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
294
+ R_xlen_t state_size = determine_serialized_size(*cpp_ptr);
295
+ SEXP R_state = PROTECT(Rf_allocVector(RAWSXP, state_size));
296
+ serialize_isotree(*cpp_ptr, (char*)RAW(R_state));
297
+ UNPROTECT(1);
298
+ return R_state;
299
+ }
300
+ catch (const std::exception &ex) {
301
+ Rf_error("%s\n", ex.what());
302
+ }
303
+
304
+ return R_NilValue; /* <- won't be reached */
305
+ }
306
+
307
+ template <class Model>
308
+ SEXP deserialize_altrepped_pointer(SEXP cls, SEXP R_state)
309
+ {
310
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
311
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
312
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
313
+ SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue));
314
+
315
+ try {
316
+ std::unique_ptr<Model> model(new Model());
317
+ const char *inp = (const char*)RAW(R_state);
318
+ deserialize_isotree(*model, inp);
319
+
320
+ R_SetExternalPtrAddr(R_ptr, model.get());
321
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE);
322
+ model.release();
323
+ }
324
+ catch (const std::exception &ex) {
325
+ Rf_error("%s\n", ex.what());
326
+ }
327
+
328
+ R_set_altrep_data1(out, R_ptr);
329
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
330
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
331
+
332
+ UNPROTECT(4);
333
+ return out;
334
+ }
335
+
336
+ template <class Model>
337
+ SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep)
338
+ {
339
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
340
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
341
+ SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue));
342
+
343
+ if (!deep) {
344
+ R_set_altrep_data1(out, R_altrep_data1(altrepped_obj));
345
+ }
346
+
347
+ else {
348
+
349
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
350
+
351
+ try {
352
+ std::unique_ptr<Model> new_obj(new Model());
353
+ Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
354
+ *new_obj = *cpp_ptr;
355
+
356
+ R_SetExternalPtrAddr(R_ptr, new_obj.get());
357
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE);
358
+ new_obj.release();
359
+ }
360
+
361
+ catch (const std::exception &ex) {
362
+ Rf_error("%s\n", ex.what());
363
+ }
364
+
365
+ R_set_altrep_data1(out, R_ptr);
366
+ UNPROTECT(1);
367
+ }
368
+
369
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
370
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_class);
371
+ UNPROTECT(3);
372
+ return out;
373
+ }
374
+
375
+ SEXP get_altrepped_null_pointer()
376
+ {
377
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
378
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
379
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
380
+ SEXP out = PROTECT(R_new_altrep(altrepped_pointer_NullPointer, R_ptr, R_NilValue));
381
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
382
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
383
+ UNPROTECT(4);
384
+ return out;
385
+ }
386
+
387
+ SEXP safe_get_altrepped_null_pointer(void *unused)
388
+ {
389
+ return get_altrepped_null_pointer();
390
+ }
391
+
392
+ SEXP serialize_altrepped_null(SEXP altrepped_obj)
393
+ {
394
+ return Rf_allocVector(RAWSXP, 0);
395
+ }
396
+
397
+ SEXP deserialize_altrepped_null(SEXP cls, SEXP R_state)
398
+ {
399
+ return get_altrepped_null_pointer();
400
+ }
401
+
402
+ SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep)
403
+ {
404
+ return get_altrepped_null_pointer();
405
+ }
406
+
407
+ Rboolean inspect_altrepped_pointer(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int))
408
+ {
409
+ Rcpp::Rcout << "Altrepped pointer [address:" << R_ExternalPtrAddr(R_altrep_data1(x)) << "]\n";
410
+ return TRUE;
411
+ }
412
+
413
+ template <class Model>
414
+ Model* get_pointer_from_altrep(SEXP altrepped_obj)
415
+ {
416
+ return (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
417
+ }
418
+
419
+ template <class Model>
420
+ Model* get_pointer_from_xptr(SEXP R_ptr)
421
+ {
422
+ return (Model*)R_ExternalPtrAddr(R_ptr);
423
+ }
424
+
425
+ // [[Rcpp::init]]
426
+ void init_altrepped_vectors(DllInfo* dll)
427
+ {
428
+ altrepped_pointer_IsoForest = R_make_altlist_class("altrepped_pointer_IsoForest", "isotree", dll);
429
+ R_set_altrep_Length_method(altrepped_pointer_IsoForest, altrepped_pointer_length);
430
+ R_set_altrep_Inspect_method(altrepped_pointer_IsoForest, inspect_altrepped_pointer);
431
+ R_set_altrep_Serialized_state_method(altrepped_pointer_IsoForest, serialize_altrepped_pointer<IsoForest>);
432
+ R_set_altrep_Unserialize_method(altrepped_pointer_IsoForest, deserialize_altrepped_pointer<IsoForest>);
433
+ R_set_altrep_Duplicate_method(altrepped_pointer_IsoForest, duplicate_altrepped_pointer<IsoForest>);
434
+ R_set_altlist_Elt_method(altrepped_pointer_IsoForest, get_element_from_altrepped_obj);
435
+
436
+ altrepped_pointer_ExtIsoForest = R_make_altlist_class("altrepped_pointer_ExtIsoForest", "isotree", dll);
437
+ R_set_altrep_Length_method(altrepped_pointer_ExtIsoForest, altrepped_pointer_length);
438
+ R_set_altrep_Inspect_method(altrepped_pointer_ExtIsoForest, inspect_altrepped_pointer);
439
+ R_set_altrep_Serialized_state_method(altrepped_pointer_ExtIsoForest, serialize_altrepped_pointer<ExtIsoForest>);
440
+ R_set_altrep_Unserialize_method(altrepped_pointer_ExtIsoForest, deserialize_altrepped_pointer<ExtIsoForest>);
441
+ R_set_altrep_Duplicate_method(altrepped_pointer_ExtIsoForest, duplicate_altrepped_pointer<ExtIsoForest>);
442
+ R_set_altlist_Elt_method(altrepped_pointer_ExtIsoForest, get_element_from_altrepped_obj);
443
+
444
+ altrepped_pointer_Imputer = R_make_altlist_class("altrepped_pointer_Imputer", "isotree", dll);
445
+ R_set_altrep_Length_method(altrepped_pointer_Imputer, altrepped_pointer_length);
446
+ R_set_altrep_Inspect_method(altrepped_pointer_Imputer, inspect_altrepped_pointer);
447
+ R_set_altrep_Serialized_state_method(altrepped_pointer_Imputer, serialize_altrepped_pointer<Imputer>);
448
+ R_set_altrep_Unserialize_method(altrepped_pointer_Imputer, deserialize_altrepped_pointer<Imputer>);
449
+ R_set_altrep_Duplicate_method(altrepped_pointer_Imputer, duplicate_altrepped_pointer<Imputer>);
450
+ R_set_altlist_Elt_method(altrepped_pointer_Imputer, get_element_from_altrepped_obj);
451
+
452
+ altrepped_pointer_TreesIndexer = R_make_altlist_class("altrepped_pointer_TreesIndexer", "isotree", dll);
453
+ R_set_altrep_Length_method(altrepped_pointer_TreesIndexer, altrepped_pointer_length);
454
+ R_set_altrep_Inspect_method(altrepped_pointer_TreesIndexer, inspect_altrepped_pointer);
455
+ R_set_altrep_Serialized_state_method(altrepped_pointer_TreesIndexer, serialize_altrepped_pointer<TreesIndexer>);
456
+ R_set_altrep_Unserialize_method(altrepped_pointer_TreesIndexer, deserialize_altrepped_pointer<TreesIndexer>);
457
+ R_set_altrep_Duplicate_method(altrepped_pointer_TreesIndexer, duplicate_altrepped_pointer<TreesIndexer>);
458
+ R_set_altlist_Elt_method(altrepped_pointer_TreesIndexer, get_element_from_altrepped_obj);
459
+
460
+ altrepped_pointer_NullPointer = R_make_altlist_class("altrepped_pointer_NullPointer", "isotree", dll);
461
+ R_set_altrep_Length_method(altrepped_pointer_NullPointer, altrepped_pointer_length);
462
+ R_set_altrep_Inspect_method(altrepped_pointer_NullPointer, inspect_altrepped_pointer);
463
+ R_set_altrep_Serialized_state_method(altrepped_pointer_NullPointer, serialize_altrepped_null);
464
+ R_set_altrep_Unserialize_method(altrepped_pointer_NullPointer, deserialize_altrepped_null);
465
+ R_set_altrep_Duplicate_method(altrepped_pointer_NullPointer, duplicate_altrepped_pointer);
466
+ R_set_altlist_Elt_method(altrepped_pointer_NullPointer, get_element_from_altrepped_obj);
467
+ }
468
+
203
469
  double* set_R_nan_as_C_nan(double *x, size_t n, std::vector<double> &v, int nthreads)
204
470
  {
205
471
  v.assign(x, x + n);
@@ -223,6 +489,13 @@ double* set_R_nan_as_C_nan(double *x, size_t n, int nthreads)
223
489
  return x;
224
490
  }
225
491
 
492
+ TreesIndexer* get_indexer_ptr_from_R_obj(SEXP indexer_R_ptr)
493
+ {
494
+ TreesIndexer *out = get_pointer_from_xptr<TreesIndexer>(indexer_R_ptr);
495
+ if (out && out->indices.empty()) out = nullptr;
496
+ return out;
497
+ }
498
+
226
499
  // [[Rcpp::export(rng = false)]]
227
500
  Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp::IntegerVector ncat,
228
501
  Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
@@ -242,7 +515,7 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
242
515
  Rcpp::CharacterVector missing_action, bool all_perm,
243
516
  bool build_imputer, bool output_imputations, size_t min_imp_obs,
244
517
  Rcpp::CharacterVector depth_imp, Rcpp::CharacterVector weigh_imp_rows,
245
- int random_seed, bool use_long_double, int nthreads)
518
+ int random_seed, bool use_long_double, int nthreads, bool lazy_serialization)
246
519
  {
247
520
  double* numeric_data_ptr = NULL;
248
521
  int* categ_data_ptr = NULL;
@@ -384,18 +657,37 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
384
657
  }
385
658
 
386
659
  Rcpp::List outp = Rcpp::List::create(
387
- Rcpp::_["depths"] = depths,
388
- Rcpp::_["tmat"] = tmat,
389
- Rcpp::_["dmat"] = dmat,
390
- Rcpp::_["ptr"] = R_NilValue,
391
- Rcpp::_["serialized"] = R_NilValue,
392
- Rcpp::_["imp_ptr"] = R_NilValue,
393
- Rcpp::_["imp_ser"] = R_NilValue,
394
- Rcpp::_["imputed_num"] = R_NilValue,
395
- Rcpp::_["imputed_cat"] = R_NilValue,
396
- Rcpp::_["err"] = Rcpp::LogicalVector::create(1)
660
+ Rcpp::_["depths"] = depths,
661
+ Rcpp::_["tmat"] = tmat,
662
+ Rcpp::_["dmat"] = dmat,
663
+ Rcpp::_["model"] = R_NilValue,
664
+ Rcpp::_["imputer"] = R_NilValue,
665
+ Rcpp::_["indexer"] = R_NilValue,
666
+ Rcpp::_["imputed_num"] = R_NilValue,
667
+ Rcpp::_["imputed_cat"] = R_NilValue,
668
+ Rcpp::_["err"] = Rcpp::LogicalVector::create(1)
397
669
  );
398
670
 
671
+ Rcpp::List model_lst_nonlazy = Rcpp::List::create(
672
+ Rcpp::_["ptr"] = R_NilValue,
673
+ Rcpp::_["ser"] = R_NilValue
674
+ );
675
+
676
+ Rcpp::List imputer_lst_nonlazy = Rcpp::List::create(
677
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
678
+ Rcpp::_["ser"] = R_NilValue
679
+ );
680
+
681
+ if (lazy_serialization) {
682
+ outp["indexer"] = get_altrepped_null_pointer();
683
+ }
684
+ else {
685
+ outp["indexer"] = Rcpp::List::create(
686
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
687
+ Rcpp::_["ser"] = R_NilValue
688
+ );
689
+ }
690
+
399
691
  std::unique_ptr<IsoForest> model_ptr(nullptr);
400
692
  std::unique_ptr<ExtIsoForest> ext_model_ptr(nullptr);
401
693
  std::unique_ptr<Imputer> imputer_ptr(nullptr);
@@ -408,9 +700,7 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
408
700
  if (build_imputer)
409
701
  imputer_ptr = std::unique_ptr<Imputer>(new Imputer());
410
702
 
411
- int ret_val;
412
- try {
413
- ret_val =
703
+ int ret_val =
414
704
  fit_iforest(model_ptr.get(), ext_model_ptr.get(),
415
705
  numeric_data_ptr, ncols_numeric,
416
706
  categ_data_ptr, ncols_categ, ncat_ptr,
@@ -432,14 +722,21 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
432
722
  all_perm, imputer_ptr.get(), min_imp_obs,
433
723
  depth_imp_C, weigh_imp_rows_C, output_imputations,
434
724
  (uint64_t) random_seed, use_long_double, nthreads);
435
- }
436
- catch (std::bad_alloc &e) {
437
- throw_mem_err();
438
- }
439
- Rcpp::checkUserInterrupt();
440
725
 
726
+ Rcpp::checkUserInterrupt(); /* <- nothing is returned in this case */
727
+ /* Note to self: the procedure has its own interrupt checker, so when an interrupt
728
+ signal is triggered, first it will print a message about it, then re-issue the
729
+ signal, then check for interrupt through Rcpp's, which will return nothing to
730
+ the outside and will not raise any error. In this case, at least the user will
731
+ see the error message. Note that Rcpp's interrupt non-return, unlike R's, triggers
732
+ stack unwinding for C++ objects. */
733
+
734
+ /* Note to self: since the function for fitting the model uses the C++ exception system,
735
+ and the stop signals are translated into Rcpp stops, this section below should not
736
+ be reachable anyhow. */
441
737
  if (ret_val == EXIT_FAILURE)
442
738
  {
739
+ Rcpp::Rcerr << "Unexpected error" << std::endl;
443
740
  return Rcpp::unwindProtect(safe_errlist, nullptr);
444
741
  }
445
742
 
@@ -447,63 +744,66 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
447
744
  tmat_to_dense(tmat_ptr, dmat_ptr, nrows, standardize_dist? 0. : std::numeric_limits<double>::infinity());
448
745
 
449
746
  bool serialization_failed = false;
450
- Rcpp::RawVector serialized_obj;
451
- try {
747
+
748
+ if (lazy_serialization)
749
+ {
750
+ if (ndim == 1) {
751
+ outp["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&model_ptr);
752
+ }
753
+ else {
754
+ outp["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&ext_model_ptr);
755
+ }
756
+
757
+ if (build_imputer) {
758
+ outp["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&imputer_ptr);
759
+ }
760
+ else {
761
+ outp["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
762
+ }
763
+ }
764
+
765
+ else
766
+ {
767
+ Rcpp::RawVector serialized_obj;
768
+ /* Note to self: the serialization functions use unwind protection internally. */
452
769
  if (ndim == 1)
453
770
  serialized_obj = serialize_cpp_obj(model_ptr.get());
454
771
  else
455
772
  serialized_obj = serialize_cpp_obj(ext_model_ptr.get());
456
- }
457
- catch (std::bad_alloc &e) {
458
- throw_mem_err();
459
- }
460
- if (unlikely(!serialized_obj.size())) serialization_failed = true;
461
- if (unlikely(serialization_failed)) {
462
- if (ndim == 1)
463
- model_ptr.reset();
464
- else
465
- ext_model_ptr.reset();
466
- }
467
773
 
468
- if (!serialization_failed)
469
- {
470
- outp["serialized"] = serialized_obj;
774
+ if (unlikely(!serialized_obj.size())) serialization_failed = true;
775
+ if (unlikely(serialization_failed)) {
776
+ throw Rcpp::exception("Error: insufficient memory\n");
777
+ }
778
+
779
+ model_lst_nonlazy["ser"] = serialized_obj;
471
780
  if (ndim == 1) {
472
- outp["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model_ptr.get());
781
+ model_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model_ptr.get());
473
782
  model_ptr.release();
474
783
  }
475
784
  else {
476
- outp["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, ext_model_ptr.get());
785
+ model_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, ext_model_ptr.get());
477
786
  ext_model_ptr.release();
478
787
  }
479
- } else
480
- outp["ptr"] = R_NilValue;
481
788
 
482
- if (build_imputer && !serialization_failed)
483
- {
484
- try {
485
- outp["imp_ser"] = serialize_cpp_obj(imputer_ptr.get());
486
- }
487
- catch (std::bad_alloc &e) {
488
- throw_mem_err();
489
- }
490
- if (!Rf_xlength(outp["imp_ser"]))
789
+ outp["model"] = model_lst_nonlazy;
790
+
791
+ if (build_imputer)
491
792
  {
492
- serialization_failed = true;
493
- imputer_ptr.reset();
494
- if (ndim == 1)
495
- model_ptr.reset();
496
- else
497
- ext_model_ptr.reset();
498
- outp["imp_ptr"] = R_NilValue;
499
- outp["ptr"] = R_NilValue;
500
- } else {
501
- outp["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer_ptr.get());
793
+ imputer_lst_nonlazy["ser"] = serialize_cpp_obj(imputer_ptr.get());
794
+ if (!Rf_xlength(imputer_lst_nonlazy["ser"]))
795
+ {
796
+ throw Rcpp::exception("Error: insufficient memory\n");
797
+ }
798
+
799
+ imputer_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer_ptr.get());
502
800
  imputer_ptr.release();
503
801
  }
802
+
803
+ outp["imputer"] = imputer_lst_nonlazy;
504
804
  }
505
805
 
506
- if (output_imputations && !serialization_failed)
806
+ if (output_imputations)
507
807
  {
508
808
  outp["imputed_num"] = Xcpp;
509
809
  outp["imputed_cat"] = X_cat;
@@ -534,12 +834,13 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
534
834
  Rcpp::NumericVector ref_X_num, Rcpp::IntegerVector ref_X_cat,
535
835
  Rcpp::NumericVector ref_Xc, Rcpp::IntegerVector ref_Xc_ind, Rcpp::IntegerVector ref_Xc_indptr,
536
836
  uint64_t random_seed, bool use_long_double,
537
- Rcpp::List &model_cpp_obj_update, Rcpp::List &model_params_update)
837
+ Rcpp::List &model_cpp_obj_update, Rcpp::List &model_params_update,
838
+ bool is_altrepped)
538
839
  {
539
840
  Rcpp::List out = Rcpp::List::create(
540
- Rcpp::_["serialized"] = R_NilValue,
541
- Rcpp::_["imp_ser"] = R_NilValue,
542
- Rcpp::_["ind_ser"] = R_NilValue
841
+ Rcpp::_["model_ser"] = R_NilValue,
842
+ Rcpp::_["imputer_ser"] = R_NilValue,
843
+ Rcpp::_["indexer_ser"] = R_NilValue
543
844
  );
544
845
 
545
846
  Rcpp::IntegerVector ntrees_plus1 = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"]) + 1);
@@ -674,10 +975,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
674
975
  if (build_imputer)
675
976
  imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
676
977
 
677
- if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL)
678
- indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
679
- if (indexer_ptr != NULL && indexer_ptr->indices.empty())
680
- indexer_ptr = NULL;
978
+ indexer_ptr = get_indexer_ptr_from_R_obj(indexer_R_ptr);
681
979
 
682
980
  size_t old_ntrees = (ndim == 1)? (model_ptr->trees.size()) : (ext_model_ptr->hplanes.size());
683
981
 
@@ -706,6 +1004,9 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
706
1004
 
707
1005
  Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized;
708
1006
  size_t new_size;
1007
+
1008
+ if (is_altrepped) goto dont_serialize;
1009
+
709
1010
  try
710
1011
  {
711
1012
  if (ndim == 1)
@@ -719,7 +1020,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
719
1020
  new_serialized = resize_vec(serialized_obj, new_size);
720
1021
  char *temp = (char*)RAW(new_serialized);
721
1022
  incremental_serialize_isotree(*model_ptr, temp);
722
- out["serialized"] = new_serialized;
1023
+ out["model_ser"] = new_serialized;
723
1024
  }
724
1025
 
725
1026
  catch (std::runtime_error &e) {
@@ -729,7 +1030,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
729
1030
 
730
1031
  else {
731
1032
  serialize_anew_singlevar:
732
- out["serialized"] = serialize_cpp_obj(model_ptr);
1033
+ out["model_ser"] = serialize_cpp_obj(model_ptr);
733
1034
  }
734
1035
  }
735
1036
 
@@ -744,7 +1045,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
744
1045
  new_serialized = resize_vec(serialized_obj, new_size);
745
1046
  char *temp = (char*)RAW(new_serialized);
746
1047
  incremental_serialize_isotree(*ext_model_ptr, temp);
747
- out["serialized"] = new_serialized;
1048
+ out["model_ser"] = new_serialized;
748
1049
  }
749
1050
 
750
1051
  catch (std::runtime_error &e) {
@@ -754,7 +1055,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
754
1055
 
755
1056
  else {
756
1057
  serialize_anew_ext:
757
- out["serialized"] = serialize_cpp_obj(ext_model_ptr);
1058
+ out["model_ser"] = serialize_cpp_obj(ext_model_ptr);
758
1059
  }
759
1060
  }
760
1061
 
@@ -769,7 +1070,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
769
1070
  new_imp_serialized = resize_vec(serialized_imputer, new_size);
770
1071
  char *temp = (char*)RAW(new_imp_serialized);
771
1072
  incremental_serialize_isotree(*imputer_ptr, temp);
772
- out["imp_ser"] = new_imp_serialized;
1073
+ out["imputer_ser"] = new_imp_serialized;
773
1074
  }
774
1075
 
775
1076
  catch (std::runtime_error &e) {
@@ -779,7 +1080,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
779
1080
 
780
1081
  else {
781
1082
  serialize_anew_imp:
782
- out["imp_ser"] = serialize_cpp_obj(imputer_ptr);
1083
+ out["imputer_ser"] = serialize_cpp_obj(imputer_ptr);
783
1084
  }
784
1085
  }
785
1086
 
@@ -794,7 +1095,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
794
1095
  new_ind_serialized = resize_vec(serialized_indexer, new_size);
795
1096
  char *temp = (char*)RAW(new_ind_serialized);
796
1097
  incremental_serialize_isotree(*indexer_ptr, temp);
797
- out["ind_ser"] = new_ind_serialized;
1098
+ out["indexer_ser"] = new_ind_serialized;
798
1099
  }
799
1100
 
800
1101
  catch (std::runtime_error &e) {
@@ -804,7 +1105,7 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
804
1105
 
805
1106
  else {
806
1107
  serialize_anew_ind:
807
- out["ind_ser"] = serialize_cpp_obj(indexer_ptr);
1108
+ out["indexer_ser"] = serialize_cpp_obj(indexer_ptr);
808
1109
  }
809
1110
  }
810
1111
  }
@@ -822,11 +1123,27 @@ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector
822
1123
  throw;
823
1124
  }
824
1125
 
825
- model_cpp_obj_update["serialized"] = out["serialized"];
826
- if (build_imputer)
827
- model_cpp_obj_update["imp_ser"] = out["imp_ser"];
828
- if (indexer_ptr != NULL)
829
- model_cpp_obj_update["ind_ser"] = out["ind_ser"];
1126
+ {
1127
+ Rcpp::List model_lst = model_cpp_obj_update["model"];
1128
+ model_lst["ser"] = out["model_ser"];
1129
+ model_cpp_obj_update["model"] = model_lst;
1130
+
1131
+ if (build_imputer)
1132
+ {
1133
+ Rcpp::List imputer_lst = model_cpp_obj_update["imputer"];
1134
+ imputer_lst["ser"] = out["imputer_ser"];
1135
+ model_cpp_obj_update["imputer"] = imputer_lst;
1136
+ }
1137
+
1138
+ if (indexer_ptr)
1139
+ {
1140
+ Rcpp::List indexer_lst = model_cpp_obj_update["indexer"];
1141
+ indexer_lst["ser"] = out["indexer_ser"];
1142
+ model_cpp_obj_update["indexer"] = indexer_lst;
1143
+ }
1144
+ }
1145
+
1146
+ dont_serialize:
830
1147
  model_params_update["ntrees"] = ntrees_plus1;
831
1148
  }
832
1149
 
@@ -880,14 +1197,10 @@ void predict_iso(SEXP model_R_ptr, bool is_extended,
880
1197
  IsoForest* model_ptr = NULL;
881
1198
  ExtIsoForest* ext_model_ptr = NULL;
882
1199
  if (is_extended)
883
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1200
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
884
1201
  else
885
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
886
- TreesIndexer* indexer = NULL;
887
- if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL)
888
- indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
889
- if (indexer != NULL && indexer->indices.empty())
890
- indexer = NULL;
1202
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
1203
+ TreesIndexer* indexer = get_indexer_ptr_from_R_obj(indexer_R_ptr);
891
1204
 
892
1205
  MissingAction missing_action = is_extended?
893
1206
  ext_model_ptr->missing_action
@@ -951,17 +1264,13 @@ void dist_iso(SEXP model_R_ptr, SEXP indexer_R_ptr,
951
1264
 
952
1265
  IsoForest* model_ptr = NULL;
953
1266
  ExtIsoForest* ext_model_ptr = NULL;
954
- TreesIndexer* indexer = NULL;
1267
+ TreesIndexer* indexer = get_indexer_ptr_from_R_obj(indexer_R_ptr);
955
1268
  if (is_extended)
956
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1269
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
957
1270
  else
958
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
959
- if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL)
960
- indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
961
- if (indexer != NULL && (indexer->indices.empty() || (!as_kernel && indexer->indices.front().node_distances.empty())))
962
- indexer = NULL;
1271
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
963
1272
 
964
- if (use_reference_points && indexer != NULL && !indexer->indices.front().reference_points.empty()) {
1273
+ if (use_reference_points && indexer && !indexer->indices.front().reference_points.empty()) {
965
1274
  tmat_ptr = NULL;
966
1275
  dmat_ptr = NULL;
967
1276
  rmat_ptr = REAL(rmat);
@@ -1044,11 +1353,13 @@ Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
1044
1353
  IsoForest* model_ptr = NULL;
1045
1354
  ExtIsoForest* ext_model_ptr = NULL;
1046
1355
  if (is_extended)
1047
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1356
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
1048
1357
  else
1049
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1358
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
1359
+
1360
+ Imputer* imputer_ptr = get_pointer_from_xptr<Imputer>(imputer_R_ptr);
1050
1361
 
1051
- Imputer* imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
1362
+ if (!imputer_ptr) throw Rcpp::exception("Error: requested missing value imputation, but model was built without imputer.\n");
1052
1363
 
1053
1364
 
1054
1365
  impute_missing_values(numeric_data_ptr, categ_data_ptr, true,
@@ -1064,85 +1375,154 @@ Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
1064
1375
  }
1065
1376
 
1066
1377
  // [[Rcpp::export(rng = false)]]
1067
- void drop_imputer(Rcpp::List lst_modify, Rcpp::List lst_modify2)
1378
+ void drop_imputer(bool is_altrepped, bool free_cpp,
1379
+ SEXP lst_imputer, Rcpp::List lst_cpp_objects, Rcpp::List lst_params)
1068
1380
  {
1069
- Rcpp::RawVector empty_ser = Rcpp::RawVector();
1070
- Rcpp::LogicalVector FalseObj = Rcpp::LogicalVector::create(false);
1071
- Rcpp::XPtr<Imputer> imp_ptr = lst_modify["imp_ptr"];
1072
- imp_ptr.release();
1381
+ SEXP FalseObj = PROTECT(Rf_ScalarLogical(0));
1382
+ SEXP blank_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
1383
+ SEXP altrepped_null = PROTECT(get_altrepped_null_pointer());
1384
+
1385
+ if (is_altrepped) {
1386
+
1387
+ if (free_cpp) {
1388
+ SEXP imp_R_ptr = R_altrep_data1(lst_imputer);
1389
+ Imputer* imputer_ptr = (Imputer*)R_ExternalPtrAddr(imp_R_ptr);
1390
+ delete imputer_ptr;
1391
+ R_SetExternalPtrAddr(imp_R_ptr, nullptr);
1392
+ R_ClearExternalPtr(imp_R_ptr);
1393
+ }
1394
+
1395
+ lst_cpp_objects["imputer"] = altrepped_null;
1396
+
1397
+ }
1398
+
1399
+ else {
1400
+
1401
+ if (free_cpp) {
1402
+ SEXP imp_R_ptr = VECTOR_ELT(lst_imputer, 0);
1403
+ Imputer* imputer_ptr = get_pointer_from_xptr<Imputer>(imp_R_ptr);
1404
+ delete imputer_ptr;
1405
+ R_SetExternalPtrAddr(imp_R_ptr, nullptr);
1406
+ R_ClearExternalPtr(imp_R_ptr);
1407
+ SET_VECTOR_ELT(lst_imputer, 0, imp_R_ptr);
1408
+ }
1073
1409
 
1074
- lst_modify["imp_ser"] = empty_ser;
1075
- lst_modify2["build_imputer"] = FalseObj;
1410
+ SET_VECTOR_ELT(lst_imputer, 0, blank_ptr);
1411
+ SET_VECTOR_ELT(lst_imputer, 1, R_NilValue);
1412
+ }
1413
+
1414
+ lst_params["build_imputer"] = FalseObj;
1415
+ UNPROTECT(3);
1076
1416
  }
1077
1417
 
1078
1418
  // [[Rcpp::export(rng = false)]]
1079
- void drop_indexer(Rcpp::List lst_modify, Rcpp::List lst_modify2)
1419
+ void drop_indexer(bool is_altrepped, bool free_cpp,
1420
+ SEXP lst_indexer, Rcpp::List lst_cpp_objects, Rcpp::List lst_metadata)
1080
1421
  {
1081
- Rcpp::XPtr<TreesIndexer> empty_ptr = Rcpp::XPtr<TreesIndexer>(nullptr, false);
1082
- Rcpp::RawVector empty_ser = Rcpp::RawVector();
1083
- Rcpp::CharacterVector empty_char = Rcpp::CharacterVector();
1084
- Rcpp::XPtr<TreesIndexer> indexer = lst_modify["indexer"];
1085
- indexer.release();
1422
+ SEXP empty_str = PROTECT(Rf_allocVector(STRSXP, 0));
1423
+ SEXP blank_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
1424
+ SEXP altrepped_null = PROTECT(get_altrepped_null_pointer());
1425
+
1426
+ if (is_altrepped) {
1427
+
1428
+ if (free_cpp) {
1429
+ SEXP ind_R_ptr = R_altrep_data1(lst_indexer);
1430
+ TreesIndexer* indexer_ptr = (TreesIndexer*)R_ExternalPtrAddr(ind_R_ptr);
1431
+ delete indexer_ptr;
1432
+ R_SetExternalPtrAddr(ind_R_ptr, nullptr);
1433
+ R_ClearExternalPtr(ind_R_ptr);
1434
+ }
1435
+
1436
+ lst_cpp_objects["indexer"] = altrepped_null;
1437
+ }
1438
+
1439
+ else {
1440
+
1441
+ if (free_cpp) {
1442
+ SEXP ind_R_ptr = VECTOR_ELT(lst_indexer, 0);
1443
+ TreesIndexer* indexer_ptr = get_pointer_from_xptr<TreesIndexer>(ind_R_ptr);
1444
+ delete indexer_ptr;
1445
+ R_SetExternalPtrAddr(ind_R_ptr, nullptr);
1446
+ R_ClearExternalPtr(ind_R_ptr);
1447
+ SET_VECTOR_ELT(lst_indexer, 0, ind_R_ptr);
1448
+ }
1449
+
1450
+ SET_VECTOR_ELT(lst_indexer, 0, blank_ptr);
1451
+ SET_VECTOR_ELT(lst_indexer, 1, R_NilValue);
1452
+ }
1086
1453
 
1087
- lst_modify["ind_ser"] = empty_ser;
1088
- lst_modify2["reference_names"] = empty_char;
1454
+ lst_metadata["reference_names"] = empty_str;
1455
+ UNPROTECT(3);
1089
1456
  }
1090
1457
 
1091
1458
  // [[Rcpp::export(rng = false)]]
1092
- void drop_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2)
1459
+ void drop_reference_points(bool is_altrepped, SEXP lst_indexer, Rcpp::List lst_cpp_objects, Rcpp::List lst_metadata)
1093
1460
  {
1094
- Rcpp::CharacterVector empty_char = Rcpp::CharacterVector();
1095
- Rcpp::RawVector empty_ser = Rcpp::RawVector();
1096
- Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"];
1097
- TreesIndexer *indexer_ptr = indexer_R_ptr.get();
1098
- if (indexer_ptr == NULL) {
1099
- lst_modify["ind_ser"] = empty_ser;
1100
- lst_modify2["reference_names"] = empty_char;
1101
- return;
1102
- }
1103
- if (indexer_ptr->indices.empty()) {
1104
- indexer_R_ptr.release();
1105
- lst_modify["ind_ser"] = empty_ser;
1106
- lst_modify2["reference_names"] = empty_char;
1107
- return;
1108
- }
1109
- if (indexer_ptr->indices.front().reference_points.empty()) {
1110
- lst_modify2["reference_names"] = empty_char;
1111
- return;
1461
+ SEXP empty_str = PROTECT(Rf_allocVector(STRSXP, 0));
1462
+
1463
+ if (is_altrepped)
1464
+ {
1465
+ SEXP ind_R_ptr = R_altrep_data1(lst_indexer);
1466
+ TreesIndexer* indexer_ptr = (TreesIndexer*)R_ExternalPtrAddr(ind_R_ptr);
1467
+ if (!indexer_ptr) return;
1468
+
1469
+ for (auto &tree : indexer_ptr->indices)
1470
+ {
1471
+ tree.reference_points.clear();
1472
+ tree.reference_indptr.clear();
1473
+ tree.reference_mapping.clear();
1474
+ }
1112
1475
  }
1113
1476
 
1114
- std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer_ptr));
1115
- for (auto &tree : new_indexer->indices)
1477
+ else
1116
1478
  {
1117
- tree.reference_points.clear();
1118
- tree.reference_indptr.clear();
1119
- tree.reference_mapping.clear();
1479
+ SEXP ind_R_ptr = VECTOR_ELT(lst_indexer, 0);
1480
+ TreesIndexer* indexer_ptr = get_pointer_from_xptr<TreesIndexer>(ind_R_ptr);
1481
+ if (!indexer_ptr) return;
1482
+
1483
+ std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer_ptr));
1484
+ for (auto &tree : new_indexer->indices)
1485
+ {
1486
+ tree.reference_points.clear();
1487
+ tree.reference_indptr.clear();
1488
+ tree.reference_mapping.clear();
1489
+ }
1490
+
1491
+ SET_VECTOR_ELT(lst_indexer, 1, serialize_cpp_obj(new_indexer.get()));
1492
+ *indexer_ptr = std::move(*new_indexer);
1493
+ new_indexer.release();
1120
1494
  }
1121
- Rcpp::RawVector ind_ser = serialize_cpp_obj(new_indexer.get());
1122
- *indexer_ptr = std::move(*new_indexer);
1123
- new_indexer.release();
1124
- lst_modify["ind_ser"] = ind_ser;
1125
- lst_modify2["reference_names"] = empty_char;
1495
+
1496
+ lst_metadata["reference_names"] = empty_str;
1497
+ UNPROTECT(1);
1126
1498
  }
1127
1499
 
1128
1500
  // [[Rcpp::export(rng = false)]]
1129
1501
  Rcpp::List subset_trees
1130
1502
  (
1131
1503
  SEXP model_R_ptr, SEXP imputer_R_ptr, SEXP indexer_R_ptr,
1132
- bool is_extended, bool has_imputer,
1504
+ bool is_extended, bool is_altrepped,
1133
1505
  Rcpp::IntegerVector trees_take
1134
1506
  )
1135
1507
  {
1136
- bool has_indexer = !Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL;
1137
-
1138
1508
  Rcpp::List out = Rcpp::List::create(
1509
+ Rcpp::_["model"] = R_NilValue,
1510
+ Rcpp::_["imputer"] = R_NilValue,
1511
+ Rcpp::_["indexer"] = R_NilValue
1512
+ );
1513
+ Rcpp::List lst_model = Rcpp::List::create(
1139
1514
  Rcpp::_["ptr"] = R_NilValue,
1140
- Rcpp::_["serialized"] = R_NilValue,
1141
- Rcpp::_["imp_ptr"] = R_NilValue,
1142
- Rcpp::_["imp_ser"] = R_NilValue,
1143
- Rcpp::_["indexer"] = R_NilValue,
1144
- Rcpp::_["ind_ser"] = R_NilValue
1515
+ Rcpp::_["ser"] = R_NilValue
1516
+ );
1517
+ Rcpp::List lst_imputer = Rcpp::List::create(
1518
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
1519
+ Rcpp::_["ser"] = R_NilValue
1145
1520
  );
1521
+ Rcpp::List lst_indexer = Rcpp::List::create(
1522
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
1523
+ Rcpp::_["ser"] = R_NilValue
1524
+ );
1525
+
1146
1526
 
1147
1527
  IsoForest* model_ptr = NULL;
1148
1528
  ExtIsoForest* ext_model_ptr = NULL;
@@ -1161,16 +1541,15 @@ Rcpp::List subset_trees
1161
1541
  model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1162
1542
  new_model_ptr = std::unique_ptr<IsoForest>(new IsoForest());
1163
1543
  }
1164
-
1165
1544
 
1166
- if (has_imputer) {
1167
- imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
1168
- new_imputer_ptr = std::unique_ptr<Imputer>(new Imputer());
1545
+ imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
1546
+ if (imputer_ptr) {
1547
+ new_imputer_ptr = std::unique_ptr<Imputer>(new Imputer());
1169
1548
  }
1170
1549
 
1171
- if (has_indexer) {
1172
- indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1173
- new_indexer_ptr = std::unique_ptr<TreesIndexer>(new TreesIndexer());
1550
+ indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1551
+ if (indexer_ptr) {
1552
+ new_indexer_ptr = std::unique_ptr<TreesIndexer>(new TreesIndexer());
1174
1553
  }
1175
1554
 
1176
1555
  std::unique_ptr<size_t[]> trees_take_(new size_t[trees_take.size()]);
@@ -1184,31 +1563,49 @@ Rcpp::List subset_trees
1184
1563
  trees_take_.get(), trees_take.size());
1185
1564
  trees_take_.reset();
1186
1565
 
1187
- if (!is_extended)
1188
- out["serialized"] = serialize_cpp_obj(new_model_ptr.get());
1566
+ if (is_altrepped)
1567
+ {
1568
+ out["model"] = is_extended?
1569
+ Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&new_ext_model_ptr)
1570
+ :
1571
+ Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&new_model_ptr);
1572
+ out["imputer"] = imputer_ptr?
1573
+ Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&new_imputer_ptr)
1574
+ :
1575
+ Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1576
+ out["indexer"] = indexer_ptr?
1577
+ Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&new_indexer_ptr)
1578
+ :
1579
+ Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1580
+ }
1581
+
1189
1582
  else
1190
- out["serialized"] = serialize_cpp_obj(new_ext_model_ptr.get());
1191
- if (has_imputer)
1192
- out["imp_ser"] = serialize_cpp_obj(new_imputer_ptr.get());
1193
- if (has_indexer)
1194
- out["ind_ser"] = serialize_cpp_obj(new_indexer_ptr.get());
1583
+ {
1584
+ lst_model["ser"] = is_extended? serialize_cpp_obj(new_ext_model_ptr.get()) : serialize_cpp_obj(new_model_ptr.get());
1585
+ if (imputer_ptr) lst_imputer["ser"] = serialize_cpp_obj(new_imputer_ptr.get());
1586
+ if (indexer_ptr) lst_indexer["ser"] = serialize_cpp_obj(new_indexer_ptr.get());
1195
1587
 
1196
- if (!is_extended) {
1197
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, new_model_ptr.get());
1588
+ lst_model["ptr"] = is_extended?
1589
+ Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, new_ext_model_ptr.get())
1590
+ :
1591
+ Rcpp::unwindProtect(safe_XPtr<IsoForest>, new_model_ptr.get());
1198
1592
  new_model_ptr.release();
1593
+
1594
+ if (imputer_ptr) {
1595
+ lst_imputer["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, new_imputer_ptr.get());
1596
+ new_imputer_ptr.release();
1597
+ }
1598
+
1599
+ if (indexer_ptr) {
1600
+ lst_indexer["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, new_indexer_ptr.get());
1601
+ new_indexer_ptr.release();
1602
+ }
1603
+
1604
+ out["model"] = lst_model;
1605
+ out["imputer"] = lst_imputer;
1606
+ out["indexer"] = lst_indexer;
1199
1607
  }
1200
- else {
1201
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, new_ext_model_ptr.get());
1202
- new_ext_model_ptr.release();
1203
- }
1204
- if (has_imputer) {
1205
- out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, new_imputer_ptr.get());
1206
- new_imputer_ptr.release();
1207
- }
1208
- if (has_indexer) {
1209
- out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, new_indexer_ptr.get());
1210
- new_indexer_ptr.release();
1211
- }
1608
+
1212
1609
  return out;
1213
1610
  }
1214
1611
 
@@ -1251,12 +1648,12 @@ Rcpp::List get_n_nodes(SEXP model_R_ptr, bool is_extended, int nthreads)
1251
1648
  ExtIsoForest* ext_model_ptr = NULL;
1252
1649
  if (is_extended)
1253
1650
  {
1254
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1651
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
1255
1652
  ntrees = ext_model_ptr->hplanes.size();
1256
1653
  }
1257
1654
  else
1258
1655
  {
1259
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1656
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
1260
1657
  ntrees = model_ptr->trees.size();
1261
1658
  }
1262
1659
 
@@ -1282,65 +1679,52 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1282
1679
  Rcpp::RawVector serialized_imputer,
1283
1680
  Rcpp::RawVector serialized_indexer,
1284
1681
  Rcpp::List &model_cpp_obj_update,
1285
- Rcpp::List &model_params_update)
1682
+ Rcpp::List &model_params_update,
1683
+ bool is_altrepped)
1286
1684
  {
1287
- if ((!Rf_isNull(imp_R_ptr) && R_ExternalPtrAddr(imp_R_ptr) != NULL)
1288
- &&
1289
- !(!Rf_isNull(oimp_R_ptr) && R_ExternalPtrAddr(oimp_R_ptr) != NULL))
1290
- {
1291
- Rcpp::stop("Model to append trees to has imputer, but model to append from doesn't. Try dropping the imputer.\n");
1292
- }
1293
- if ((!Rf_isNull(ind_R_ptr) && R_ExternalPtrAddr(ind_R_ptr) != NULL)
1294
- &&
1295
- !(!Rf_isNull(oind_R_ptr) && R_ExternalPtrAddr(oind_R_ptr) != NULL))
1296
- {
1297
- Rcpp::stop("Model to append trees to has indexer, but model to append from doesn't. Try dropping the indexer.\n");
1298
- }
1299
-
1300
1685
  Rcpp::List out = Rcpp::List::create(
1301
- Rcpp::_["serialized"] = R_NilValue,
1302
- Rcpp::_["imp_ser"] = R_NilValue,
1303
- Rcpp::_["ind_ser"] = R_NilValue
1686
+ Rcpp::_["model_ser"] = R_NilValue,
1687
+ Rcpp::_["imputer_ser"] = R_NilValue,
1688
+ Rcpp::_["indexer_ser"] = R_NilValue
1304
1689
  );
1305
1690
 
1306
1691
  Rcpp::IntegerVector ntrees_new = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"]));
1307
1692
 
1308
- IsoForest* model_ptr = NULL;
1309
- IsoForest* other_ptr = NULL;
1310
- ExtIsoForest* ext_model_ptr = NULL;
1311
- ExtIsoForest* ext_other_ptr = NULL;
1312
- Imputer* imputer_ptr = NULL;
1313
- Imputer* oimputer_ptr = NULL;
1314
- TreesIndexer* indexer_ptr = NULL;
1315
- TreesIndexer* oindexer_ptr = NULL;
1693
+ IsoForest* model_ptr = nullptr;
1694
+ IsoForest* other_ptr = nullptr;
1695
+ ExtIsoForest* ext_model_ptr = nullptr;
1696
+ ExtIsoForest* ext_other_ptr = nullptr;
1697
+ Imputer* imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
1698
+ Imputer* oimputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(oimp_R_ptr));
1699
+ TreesIndexer* indexer_ptr = get_indexer_ptr_from_R_obj(ind_R_ptr);
1700
+ TreesIndexer* oindexer_ptr = get_indexer_ptr_from_R_obj(oind_R_ptr);
1316
1701
  size_t old_ntrees;
1317
1702
 
1318
1703
  if (is_extended) {
1319
1704
  ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1320
1705
  ext_other_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(other_R_ptr));
1321
1706
  old_ntrees = ext_model_ptr->hplanes.size();
1707
+ if (ext_model_ptr == ext_other_ptr) {
1708
+ throw Rcpp::exception("Error: attempting to append trees from one model to itself.");
1709
+ }
1322
1710
  } else {
1323
1711
  model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1324
1712
  other_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(other_R_ptr));
1325
1713
  old_ntrees = model_ptr->trees.size();
1714
+ if (model_ptr == other_ptr) {
1715
+ throw Rcpp::exception("Error: attempting to append trees from one model to itself.");
1716
+ }
1326
1717
  }
1327
1718
 
1328
- if (!Rf_isNull(imp_R_ptr) && !Rf_isNull(oimp_R_ptr) &&
1329
- R_ExternalPtrAddr(imp_R_ptr) != NULL &&
1330
- R_ExternalPtrAddr(oimp_R_ptr) != NULL)
1331
- {
1332
- imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
1333
- oimputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(oimp_R_ptr));
1719
+ if (imputer_ptr && !oimputer_ptr) {
1720
+ throw Rcpp::exception("Model to append trees to has imputer, but model to append from doesn't. Try dropping the imputer.\n");
1334
1721
  }
1335
1722
 
1336
- if (!Rf_isNull(ind_R_ptr) && !Rf_isNull(oind_R_ptr) &&
1337
- R_ExternalPtrAddr(ind_R_ptr) != NULL &&
1338
- R_ExternalPtrAddr(oind_R_ptr) != NULL)
1339
- {
1340
- indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
1341
- oindexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(oind_R_ptr));
1723
+ if (indexer_ptr && !oindexer_ptr) {
1724
+ throw Rcpp::exception("Model to append trees to has indexer, but model to append from doesn't. Try dropping the indexer.\n");
1342
1725
  }
1343
1726
 
1727
+
1344
1728
  merge_models(model_ptr, other_ptr,
1345
1729
  ext_model_ptr, ext_other_ptr,
1346
1730
  imputer_ptr, oimputer_ptr,
@@ -1348,6 +1732,9 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1348
1732
 
1349
1733
  Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized;
1350
1734
  size_t new_size;
1735
+
1736
+ if (is_altrepped) goto dont_serialize;
1737
+
1351
1738
  try
1352
1739
  {
1353
1740
  if (!is_extended)
@@ -1361,7 +1748,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1361
1748
  new_serialized = resize_vec(serialized_obj, new_size);
1362
1749
  char *temp = (char*)RAW(new_serialized);
1363
1750
  incremental_serialize_isotree(*model_ptr, temp);
1364
- out["serialized"] = new_serialized;
1751
+ out["model_ser"] = new_serialized;
1365
1752
  }
1366
1753
 
1367
1754
  catch (std::runtime_error &e) {
@@ -1371,7 +1758,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1371
1758
 
1372
1759
  else {
1373
1760
  serialize_anew_singlevar:
1374
- out["serialized"] = serialize_cpp_obj(model_ptr);
1761
+ out["model_ser"] = serialize_cpp_obj(model_ptr);
1375
1762
  }
1376
1763
  }
1377
1764
 
@@ -1386,7 +1773,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1386
1773
  new_serialized = resize_vec(serialized_obj, new_size);
1387
1774
  char *temp = (char*)RAW(new_serialized);
1388
1775
  incremental_serialize_isotree(*ext_model_ptr, temp);
1389
- out["serialized"] = new_serialized;
1776
+ out["model_ser"] = new_serialized;
1390
1777
  }
1391
1778
 
1392
1779
  catch (std::runtime_error &e) {
@@ -1396,11 +1783,11 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1396
1783
 
1397
1784
  else {
1398
1785
  serialize_anew_ext:
1399
- out["serialized"] = serialize_cpp_obj(ext_model_ptr);
1786
+ out["model_ser"] = serialize_cpp_obj(ext_model_ptr);
1400
1787
  }
1401
1788
  }
1402
1789
 
1403
- if (imputer_ptr != NULL)
1790
+ if (imputer_ptr)
1404
1791
  {
1405
1792
  if (serialized_imputer.size() &&
1406
1793
  check_can_undergo_incremental_serialization(*imputer_ptr, (char*)RAW(serialized_imputer)))
@@ -1411,7 +1798,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1411
1798
  new_imp_serialized = resize_vec(serialized_imputer, new_size);
1412
1799
  char *temp = (char*)RAW(new_imp_serialized);
1413
1800
  incremental_serialize_isotree(*imputer_ptr, temp);
1414
- out["imp_ser"] = new_imp_serialized;
1801
+ out["imputer_ser"] = new_imp_serialized;
1415
1802
  }
1416
1803
 
1417
1804
  catch (std::runtime_error &e) {
@@ -1421,11 +1808,11 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1421
1808
 
1422
1809
  else {
1423
1810
  serialize_anew_imp:
1424
- out["imp_ser"] = serialize_cpp_obj(imputer_ptr);
1811
+ out["imputer_ser"] = serialize_cpp_obj(imputer_ptr);
1425
1812
  }
1426
1813
  }
1427
1814
 
1428
- if (indexer_ptr != NULL)
1815
+ if (indexer_ptr)
1429
1816
  {
1430
1817
  if (serialized_indexer.size() &&
1431
1818
  check_can_undergo_incremental_serialization(*indexer_ptr, (char*)RAW(serialized_indexer)))
@@ -1436,7 +1823,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1436
1823
  new_ind_serialized = resize_vec(serialized_indexer, new_size);
1437
1824
  char *temp = (char*)RAW(new_ind_serialized);
1438
1825
  incremental_serialize_isotree(*indexer_ptr, temp);
1439
- out["ind_ser"] = new_ind_serialized;
1826
+ out["indexer_ser"] = new_ind_serialized;
1440
1827
  }
1441
1828
 
1442
1829
  catch (std::runtime_error &e) {
@@ -1446,7 +1833,7 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1446
1833
 
1447
1834
  else {
1448
1835
  serialize_anew_ind:
1449
- out["ind_ser"] = serialize_cpp_obj(indexer_ptr);
1836
+ out["indexer_ser"] = serialize_cpp_obj(indexer_ptr);
1450
1837
  }
1451
1838
  }
1452
1839
  }
@@ -1458,18 +1845,34 @@ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1458
1845
  else
1459
1846
  ext_model_ptr->hplanes.resize(old_ntrees);
1460
1847
 
1461
- if (imputer_ptr != NULL)
1848
+ if (imputer_ptr)
1462
1849
  imputer_ptr->imputer_tree.resize(old_ntrees);
1463
- if (indexer_ptr != NULL)
1850
+ if (indexer_ptr)
1464
1851
  indexer_ptr->indices.resize(old_ntrees);
1465
1852
  throw;
1466
1853
  }
1467
1854
 
1468
- model_cpp_obj_update["serialized"] = out["serialized"];
1469
- if (imputer_ptr)
1470
- model_cpp_obj_update["imp_ser"] = out["imp_ser"];
1471
- if (indexer_ptr)
1472
- model_cpp_obj_update["ind_ser"] = out["ind_ser"];
1855
+ {
1856
+ Rcpp::List model_lst = model_cpp_obj_update["model"];
1857
+ model_lst["ser"] = out["model_ser"];
1858
+ model_cpp_obj_update["model"] = model_lst;
1859
+
1860
+ if (imputer_ptr)
1861
+ {
1862
+ Rcpp::List imputer_lst = model_cpp_obj_update["imputer"];
1863
+ imputer_lst["ser"] = out["imputer_ser"];
1864
+ model_cpp_obj_update["imputer"] = imputer_lst;
1865
+ }
1866
+
1867
+ if (indexer_ptr)
1868
+ {
1869
+ Rcpp::List indexer_lst = model_cpp_obj_update["indexer"];
1870
+ indexer_lst["ser"] = out["indexer_ser"];
1871
+ model_cpp_obj_update["indexer"] = indexer_lst;
1872
+ }
1873
+ }
1874
+
1875
+ dont_serialize:
1473
1876
  *(INTEGER(ntrees_new)) = is_extended? ext_model_ptr->hplanes.size() : model_ptr->trees.size();
1474
1877
  model_params_update["ntrees"] = ntrees_new;
1475
1878
  }
@@ -1551,14 +1954,12 @@ Rcpp::CharacterVector model_to_sql_with_select_from(SEXP model_R_ptr, bool is_ex
1551
1954
  }
1552
1955
 
1553
1956
  // [[Rcpp::export(rng = false)]]
1554
- Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr, bool has_imputer, SEXP ind_R_ptr)
1957
+ Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr, SEXP ind_R_ptr, bool lazy_serialization)
1555
1958
  {
1556
- bool has_indexer = !Rf_isNull(ind_R_ptr) && R_ExternalPtrAddr(ind_R_ptr) != NULL;
1557
-
1558
1959
  Rcpp::List out = Rcpp::List::create(
1559
- Rcpp::_["ptr"] = R_NilValue,
1560
- Rcpp::_["imp_ptr"] = R_NilValue,
1561
- Rcpp::_["indexer"] = R_NilValue
1960
+ Rcpp::_["model"] = Rcpp::XPtr<void*>(nullptr, false),
1961
+ Rcpp::_["imputer"] = Rcpp::XPtr<void*>(nullptr, false),
1962
+ Rcpp::_["indexer"] = Rcpp::XPtr<void*>(nullptr, false)
1562
1963
  );
1563
1964
 
1564
1965
  IsoForest* model_ptr = NULL;
@@ -1569,9 +1970,9 @@ Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr,
1569
1970
  ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1570
1971
  else
1571
1972
  model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1572
- if (has_imputer)
1973
+ if (R_ExternalPtrAddr(imp_R_ptr))
1573
1974
  imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
1574
- if (has_indexer)
1975
+ if (R_ExternalPtrAddr(ind_R_ptr))
1575
1976
  indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
1576
1977
 
1577
1978
  std::unique_ptr<IsoForest> copy_model(new IsoForest());
@@ -1579,84 +1980,115 @@ Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr,
1579
1980
  std::unique_ptr<Imputer> copy_imputer(new Imputer());
1580
1981
  std::unique_ptr<TreesIndexer> copy_indexer(new TreesIndexer());
1581
1982
 
1582
- if (model_ptr != NULL)
1983
+ if (model_ptr)
1583
1984
  *copy_model = *model_ptr;
1584
- if (ext_model_ptr != NULL)
1985
+ if (ext_model_ptr)
1585
1986
  *copy_ext_model = *ext_model_ptr;
1586
- if (imputer_ptr != NULL)
1987
+ if (imputer_ptr)
1587
1988
  *copy_imputer = *imputer_ptr;
1588
- if (indexer_ptr != NULL)
1989
+ if (indexer_ptr)
1589
1990
  *copy_indexer = *indexer_ptr;
1590
1991
 
1591
- if (is_extended) {
1592
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, copy_ext_model.get());
1593
- copy_ext_model.release();
1594
- }
1595
- else {
1596
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, copy_model.get());
1597
- copy_model.release();
1598
- }
1599
- if (has_imputer) {
1600
- out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, copy_imputer.get());
1601
- copy_imputer.release();
1992
+ if (lazy_serialization)
1993
+ {
1994
+ if (is_extended) {
1995
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&copy_ext_model);
1996
+ }
1997
+ else {
1998
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&copy_model);
1999
+ }
2000
+
2001
+ if (imputer_ptr) {
2002
+ out["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&copy_imputer);
2003
+ }
2004
+ else {
2005
+ out["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
2006
+ }
2007
+
2008
+ if (indexer_ptr) {
2009
+ out["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&copy_indexer);
2010
+ }
2011
+ else {
2012
+ out["indexer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
2013
+ }
1602
2014
  }
1603
- if (has_indexer) {
1604
- out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, copy_indexer.get());
1605
- copy_indexer.release();
2015
+
2016
+ else
2017
+ {
2018
+ if (is_extended) {
2019
+ out["model"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, copy_ext_model.get());
2020
+ copy_ext_model.release();
2021
+ }
2022
+ else {
2023
+ out["model"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, copy_model.get());
2024
+ copy_model.release();
2025
+ }
2026
+ if (imputer_ptr) {
2027
+ out["imputer"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, copy_imputer.get());
2028
+ copy_imputer.release();
2029
+ }
2030
+ if (indexer_ptr) {
2031
+ out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, copy_indexer.get());
2032
+ copy_indexer.release();
2033
+ }
1606
2034
  }
2035
+
1607
2036
  return out;
1608
2037
  }
1609
2038
 
1610
2039
  // [[Rcpp::export(rng = false)]]
1611
- void build_tree_indices(Rcpp::List lst_modify, bool is_extended, bool with_distances, int nthreads)
2040
+ void build_tree_indices(Rcpp::List lst_cpp_objects, SEXP ptr_model, bool is_altrepped, bool is_extended, bool with_distances, int nthreads)
1612
2041
  {
1613
- Rcpp::RawVector ind_ser = Rcpp::RawVector();
1614
- Rcpp::List empty_lst = Rcpp::List::create(Rcpp::_["indexer"] = R_NilValue);
2042
+ Rcpp::List lst_out = Rcpp::List::create(
2043
+ Rcpp::_["ptr"] = R_NilValue,
2044
+ Rcpp::_["ser"] = R_NilValue
2045
+ );
1615
2046
  std::unique_ptr<TreesIndexer> indexer(new TreesIndexer());
1616
2047
 
1617
2048
  if (!is_extended) {
1618
2049
  build_tree_indices(*indexer,
1619
- *static_cast<IsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])),
2050
+ *static_cast<IsoForest*>(R_ExternalPtrAddr(ptr_model)),
1620
2051
  nthreads,
1621
2052
  with_distances);
1622
2053
  }
1623
2054
  else {
1624
2055
  build_tree_indices(*indexer,
1625
- *static_cast<ExtIsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])),
2056
+ *static_cast<ExtIsoForest*>(R_ExternalPtrAddr(ptr_model)),
1626
2057
  nthreads,
1627
2058
  with_distances);
1628
2059
  }
1629
2060
 
1630
- ind_ser = serialize_cpp_obj(indexer.get());
1631
- empty_lst["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
1632
- if (!Rf_isNull(lst_modify["indexer"])) {
1633
- Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"];
1634
- indexer_R_ptr.release();
2061
+ if (is_altrepped) {
2062
+ lst_cpp_objects["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&indexer);
2063
+ }
2064
+
2065
+ else {
2066
+ lst_out["ser"] = serialize_cpp_obj(indexer.get());
2067
+ lst_out["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
2068
+ indexer.release();
2069
+ lst_cpp_objects["indexer"] = lst_out;
1635
2070
  }
1636
-
1637
- lst_modify["ind_ser"] = ind_ser;
1638
- lst_modify["indexer"] = empty_lst["indexer"];
1639
- indexer.release();
1640
2071
  }
1641
2072
 
1642
2073
  // [[Rcpp::export(rng = false)]]
1643
2074
  bool check_node_indexer_has_distances(SEXP indexer_R_ptr)
1644
2075
  {
1645
- if (Rf_isNull(indexer_R_ptr) || R_ExternalPtrAddr(indexer_R_ptr) == NULL)
1646
- return false;
1647
- TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1648
- if (indexer->indices.empty()) return false;
2076
+ const TreesIndexer *indexer = (const TreesIndexer*)R_ExternalPtrAddr(indexer_R_ptr);
2077
+ if (!indexer) return false;
1649
2078
  return !indexer->indices.front().node_distances.empty();
1650
2079
  }
1651
2080
 
1652
2081
  // [[Rcpp::export(rng = false)]]
1653
- void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rnames, bool is_extended,
2082
+ void set_reference_points(Rcpp::List lst_cpp_objects, SEXP ptr_model, SEXP ind_R_ptr, bool is_altrepped,
2083
+ Rcpp::List lst_metadata, SEXP rnames, bool is_extended,
1654
2084
  Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
1655
2085
  Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
1656
2086
  size_t nrows, int nthreads, bool with_distances)
1657
2087
  {
1658
- Rcpp::RawVector ind_ser = Rcpp::RawVector();
1659
- Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"];
2088
+ Rcpp::List lst_out = Rcpp::List::create(
2089
+ Rcpp::_["ptr"] = R_NilValue,
2090
+ Rcpp::_["ser"] = R_NilValue
2091
+ );
1660
2092
 
1661
2093
  double* numeric_data_ptr = NULL;
1662
2094
  int* categ_data_ptr = NULL;
@@ -1682,14 +2114,13 @@ void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rn
1682
2114
  Xc_indptr_ptr = INTEGER(Xc_indptr);
1683
2115
  }
1684
2116
 
1685
- IsoForest* model_ptr = NULL;
1686
- ExtIsoForest* ext_model_ptr = NULL;
1687
- TreesIndexer* indexer = NULL;
2117
+ IsoForest* model_ptr = nullptr;
2118
+ ExtIsoForest* ext_model_ptr = nullptr;
2119
+ TreesIndexer* indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
1688
2120
  if (is_extended)
1689
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"]));
2121
+ ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(ptr_model));
1690
2122
  else
1691
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"]));
1692
- indexer = indexer_R_ptr.get();
2123
+ model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(ptr_model));
1693
2124
 
1694
2125
  MissingAction missing_action = is_extended?
1695
2126
  ext_model_ptr->missing_action
@@ -1701,9 +2132,18 @@ void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rn
1701
2132
  if (Xc.size()) Xc_ptr = set_R_nan_as_C_nan(Xc_ptr, Xc.size(), Xcpp, nthreads);
1702
2133
  }
1703
2134
 
1704
- std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer));
2135
+ std::unique_ptr<TreesIndexer> new_indexer(is_altrepped? nullptr : (new TreesIndexer(*indexer)));
2136
+ TreesIndexer *indexer_use = is_altrepped? indexer : new_indexer.get();
1705
2137
 
1706
- set_reference_points(model_ptr, ext_model_ptr, new_indexer.get(),
2138
+ /* Note: if using an altrepped pointer, the indexer is modified in-place. If that fails,
2139
+ it will end up overwitten, with the previous references taken away. OTOH, if using
2140
+ a pointer + serialized, and it fails, it should not overwrite anything, and thus
2141
+ should not re-assign here immediately. */
2142
+ if (is_altrepped) {
2143
+ lst_metadata["reference_names"] = rnames;
2144
+ }
2145
+
2146
+ set_reference_points(model_ptr, ext_model_ptr, indexer_use,
1707
2147
  with_distances,
1708
2148
  numeric_data_ptr, categ_data_ptr,
1709
2149
  true, (size_t)0, (size_t)0,
@@ -1711,39 +2151,41 @@ void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rn
1711
2151
  (double*)NULL, (int*)NULL, (int*)NULL,
1712
2152
  nrows, nthreads);
1713
2153
 
1714
- ind_ser = serialize_cpp_obj(new_indexer.get());
1715
- *indexer = std::move(*new_indexer);
1716
- new_indexer.release();
1717
- lst_modify["ind_ser"] = ind_ser;
1718
- lst_modify2["reference_names"] = rnames;
2154
+ if (!is_altrepped) {
2155
+ lst_out["ser"] = serialize_cpp_obj(new_indexer.get());
2156
+ *indexer = std::move(*new_indexer);
2157
+ lst_metadata["reference_names"] = rnames;
2158
+ }
1719
2159
  }
1720
2160
 
1721
2161
  // [[Rcpp::export(rng = false)]]
1722
2162
  bool check_node_indexer_has_references(SEXP indexer_R_ptr)
1723
2163
  {
1724
- if (Rf_isNull(indexer_R_ptr) || R_ExternalPtrAddr(indexer_R_ptr) == NULL)
1725
- return false;
1726
- TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1727
- if (indexer->indices.empty())
1728
- return false;
1729
- if (indexer->indices.front().reference_points.empty())
1730
- return false;
1731
- else
1732
- return true;
2164
+ const TreesIndexer *indexer = (const TreesIndexer*)R_ExternalPtrAddr(indexer_R_ptr);
2165
+ if (!indexer) return false;
2166
+ return !(indexer->indices.front().reference_points.empty());
1733
2167
  }
1734
2168
 
1735
2169
  // [[Rcpp::export(rng = false)]]
1736
2170
  int get_num_references(SEXP indexer_R_ptr)
1737
2171
  {
1738
- TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1739
- if (indexer == NULL || indexer->indices.empty()) return 0;
2172
+ const TreesIndexer *indexer = static_cast<const TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
2173
+ if (!indexer || indexer->indices.empty()) return 0;
1740
2174
  return indexer->indices.front().reference_points.size();
1741
2175
  }
1742
2176
 
1743
2177
  // [[Rcpp::export(rng = false)]]
1744
- SEXP get_null_R_pointer()
2178
+ SEXP get_null_R_pointer_internal(bool altrepped)
1745
2179
  {
1746
- return R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue);
2180
+ if (!altrepped) {
2181
+ return R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue);
2182
+ }
2183
+ else {
2184
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
2185
+ SEXP out = PROTECT(R_new_altrep(altrepped_pointer_NullPointer, R_ptr, R_NilValue));
2186
+ UNPROTECT(2);
2187
+ return out;
2188
+ }
1747
2189
  }
1748
2190
 
1749
2191
  /* This library will use different code paths for opening a file path
@@ -1852,18 +2294,30 @@ void serialize_to_file
1852
2294
  }
1853
2295
 
1854
2296
  // [[Rcpp::export]]
1855
- Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname)
2297
+ Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname, bool lazy_serialization)
1856
2298
  {
1857
2299
  Rcpp::List out = Rcpp::List::create(
1858
- Rcpp::_["ptr"] = R_NilValue,
1859
- Rcpp::_["serialized"] = R_NilValue,
1860
- Rcpp::_["imp_ptr"] = R_NilValue,
1861
- Rcpp::_["imp_ser"] = R_NilValue,
2300
+ Rcpp::_["model"] = R_NilValue,
2301
+ Rcpp::_["imputer"] = R_NilValue,
1862
2302
  Rcpp::_["indexer"] = R_NilValue,
1863
- Rcpp::_["ind_ser"] = R_NilValue,
1864
2303
  Rcpp::_["metadata"] = R_NilValue
1865
2304
  );
1866
2305
 
2306
+ if (!lazy_serialization) {
2307
+ out["model"] = Rcpp::List::create(
2308
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
2309
+ Rcpp::_["ser"] = R_NilValue
2310
+ );
2311
+ out["imputer"] = Rcpp::List::create(
2312
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
2313
+ Rcpp::_["ser"] = R_NilValue
2314
+ );
2315
+ out["indexer"] = Rcpp::List::create(
2316
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
2317
+ Rcpp::_["ser"] = R_NilValue
2318
+ );
2319
+ }
2320
+
1867
2321
  FileOpener file_(fname[0], "rb");
1868
2322
  FILE *input_file = file_.get_handle();
1869
2323
 
@@ -1928,30 +2382,61 @@ Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname)
1928
2382
  ptr_metadata
1929
2383
  );
1930
2384
 
1931
- if (has_IsoForest)
1932
- out["serialized"] = serialize_cpp_obj(model.get());
1933
- else
1934
- out["serialized"] = serialize_cpp_obj(model_ext.get());
1935
- if (has_Imputer)
1936
- out["imp_ser"] = serialize_cpp_obj(imputer.get());
1937
- if (has_Indexer)
1938
- out["ind_ser"] = serialize_cpp_obj(indexer.get());
2385
+ if (lazy_serialization)
2386
+ {
2387
+ if (has_IsoForest)
2388
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, &model);
2389
+ else
2390
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, &model_ext);
2391
+
2392
+ if (has_Imputer)
2393
+ out["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, &imputer);
2394
+ else
2395
+ out["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1939
2396
 
1940
- if (has_IsoForest) {
1941
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model.get());
1942
- model.release();
1943
- }
1944
- else {
1945
- out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, model_ext.get());
1946
- model_ext.release();
1947
- }
1948
- if (has_Imputer) {
1949
- out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer.get());
1950
- imputer.release();
2397
+ if (has_Imputer)
2398
+ out["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, &indexer);
2399
+ else
2400
+ out["indexer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1951
2401
  }
1952
- if (has_Indexer) {
1953
- out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
1954
- indexer.release();
2402
+
2403
+ else
2404
+ {
2405
+ Rcpp::List tmp_model = out["model"];
2406
+ Rcpp::List tmp_imputer = out["imputer"];
2407
+ Rcpp::List tmp_indexer = out["indexer"];
2408
+
2409
+ if (has_IsoForest)
2410
+ tmp_model["ser"] = serialize_cpp_obj(model.get());
2411
+ else
2412
+ tmp_model["ser"] = serialize_cpp_obj(model_ext.get());
2413
+
2414
+ if (has_Imputer)
2415
+ tmp_imputer["ser"] = serialize_cpp_obj(imputer.get());
2416
+
2417
+ if (has_Indexer)
2418
+ tmp_indexer["ser"] = serialize_cpp_obj(indexer.get());
2419
+
2420
+ if (has_IsoForest) {
2421
+ tmp_model["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model.get());
2422
+ model.release();
2423
+ }
2424
+ else {
2425
+ tmp_model["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, model_ext.get());
2426
+ model_ext.release();
2427
+ }
2428
+ if (has_Imputer) {
2429
+ tmp_imputer["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer.get());
2430
+ imputer.release();
2431
+ }
2432
+ if (has_Indexer) {
2433
+ tmp_indexer["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
2434
+ indexer.release();
2435
+ }
2436
+
2437
+ out["model"] = tmp_model;
2438
+ out["imputer"] = tmp_imputer;
2439
+ out["indexer"] = tmp_indexer;
1955
2440
  }
1956
2441
 
1957
2442
  return out;
@@ -2473,12 +2958,12 @@ Rcpp::IntegerMatrix get_null_int_mat()
2473
2958
  int get_ntrees(SEXP model_R_ptr, bool is_extended)
2474
2959
  {
2475
2960
  if (is_extended) {
2476
- ExtIsoForest* ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2961
+ const ExtIsoForest* ext_model_ptr = static_cast<const ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2477
2962
  return ext_model_ptr->hplanes.size();
2478
2963
  }
2479
2964
 
2480
2965
  else {
2481
- IsoForest* model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2966
+ const IsoForest* model_ptr = static_cast<const IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2482
2967
  return model_ptr->trees.size();
2483
2968
  }
2484
2969
  }