bigdecimal 4.0.1 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,12 +29,16 @@
29
29
  #endif
30
30
 
31
31
  #include "bits.h"
32
+ #include "ntt.h"
33
+ #include "div.h"
32
34
  #include "static_assert.h"
33
35
 
34
- #define BIGDECIMAL_VERSION "4.0.1"
35
-
36
- /* #define ENABLE_NUMERIC_STRING */
36
+ #define BIGDECIMAL_VERSION "4.1.1"
37
37
 
38
+ /* Make sure VPMULT_BATCH_SIZE*BASE*BASE does not overflow DECDIG_DBL */
39
+ #define VPMULT_BATCH_SIZE 16
40
+ #define NTT_MULTIPLICATION_THRESHOLD 450
41
+ #define NEWTON_RAPHSON_DIVISION_THRESHOLD 100
38
42
  #define SIGNED_VALUE_MAX INTPTR_MAX
39
43
  #define SIGNED_VALUE_MIN INTPTR_MIN
40
44
  #define MUL_OVERFLOW_SIGNED_VALUE_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, SIGNED_VALUE_MIN, SIGNED_VALUE_MAX)
@@ -75,16 +79,6 @@ static struct {
75
79
  uint8_t mode;
76
80
  } rbd_rounding_modes[RBD_NUM_ROUNDING_MODES];
77
81
 
78
- typedef struct {
79
- VALUE bigdecimal;
80
- Real *real;
81
- } BDVALUE;
82
-
83
- typedef struct {
84
- VALUE bigdecimal_or_nil;
85
- Real *real_or_null;
86
- } NULLABLE_BDVALUE;
87
-
88
82
  static inline BDVALUE
89
83
  bdvalue_nonnullable(NULLABLE_BDVALUE v)
90
84
  {
@@ -160,42 +154,6 @@ rbd_struct_size(size_t const internal_digits)
160
154
  return offsetof(Real, frac) + frac_len * sizeof(DECDIG);
161
155
  }
162
156
 
163
- static inline Real *
164
- rbd_allocate_struct(size_t const internal_digits)
165
- {
166
- size_t const size = rbd_struct_size(internal_digits);
167
- Real *real = ruby_xcalloc(1, size);
168
- atomic_allocation_count_inc();
169
- real->MaxPrec = internal_digits;
170
- return real;
171
- }
172
-
173
- static inline Real *
174
- rbd_allocate_struct_decimal_digits(size_t const decimal_digits)
175
- {
176
- return rbd_allocate_struct(roomof(decimal_digits, BASE_FIG));
177
- }
178
-
179
- static void
180
- rbd_free_struct(Real *real)
181
- {
182
- if (real != NULL) {
183
- check_allocation_count_nonzero();
184
- ruby_xfree(real);
185
- atomic_allocation_count_dec_nounderflow();
186
- }
187
- }
188
-
189
- MAYBE_UNUSED(static inline Real * rbd_allocate_struct_zero(int sign, size_t const digits));
190
- #define NewZero rbd_allocate_struct_zero
191
- static inline Real *
192
- rbd_allocate_struct_zero(int sign, size_t const digits)
193
- {
194
- Real *real = rbd_allocate_struct_decimal_digits(digits);
195
- VpSetZero(real, sign);
196
- return real;
197
- }
198
-
199
157
  /*
200
158
  * ================== Ruby Interface part ==========================
201
159
  */
@@ -207,11 +165,9 @@ rbd_allocate_struct_zero(int sign, size_t const digits)
207
165
  static unsigned short VpGetException(void);
208
166
  static void VpSetException(unsigned short f);
209
167
  static void VpCheckException(Real *p, bool always);
210
- static int AddExponent(Real *a, SIGNED_VALUE n);
211
168
  static VALUE CheckGetValue(BDVALUE v);
212
169
  static void VpInternalRound(Real *c, size_t ixDigit, DECDIG vPrev, DECDIG v);
213
170
  static int VpLimitRound(Real *c, size_t ixDigit);
214
- static Real *VpCopy(Real *pv, Real const* const x);
215
171
  static int VPrint(FILE *fp,const char *cntl_chr,Real *a);
216
172
 
217
173
  /*
@@ -226,49 +182,67 @@ static VALUE BigDecimal_negative_zero(void);
226
182
  static VALUE BigDecimal_addsub_with_coerce(VALUE self, VALUE r, size_t prec, int operation);
227
183
  static VALUE BigDecimal_mult_with_coerce(VALUE self, VALUE r, size_t prec);
228
184
 
229
- static void
230
- BigDecimal_delete(void *pv)
231
- {
232
- rbd_free_struct(pv);
233
- }
185
+ #ifndef HAVE_RB_EXT_RACTOR_SAFE
186
+ # undef RUBY_TYPED_FROZEN_SHAREABLE
187
+ # define RUBY_TYPED_FROZEN_SHAREABLE 0
188
+ #endif
189
+
190
+ #ifdef RUBY_TYPED_EMBEDDABLE
191
+ # define HAVE_RUBY_TYPED_EMBEDDABLE 1
192
+ #else
193
+ # ifdef HAVE_CONST_RUBY_TYPED_EMBEDDABLE
194
+ # define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE
195
+ # define HAVE_RUBY_TYPED_EMBEDDABLE 1
196
+ # else
197
+ # define RUBY_TYPED_EMBEDDABLE 0
198
+ # endif
199
+ #endif
234
200
 
235
201
  static size_t
236
202
  BigDecimal_memsize(const void *ptr)
237
203
  {
204
+ #ifdef HAVE_RUBY_TYPED_EMBEDDABLE
205
+ return 0; // Entirely embedded
206
+ #else
238
207
  const Real *pv = ptr;
239
208
  return (sizeof(*pv) + pv->MaxPrec * sizeof(DECDIG));
240
- }
241
-
242
- #ifndef HAVE_RB_EXT_RACTOR_SAFE
243
- # undef RUBY_TYPED_FROZEN_SHAREABLE
244
- # define RUBY_TYPED_FROZEN_SHAREABLE 0
245
209
  #endif
210
+ }
246
211
 
247
212
  static const rb_data_type_t BigDecimal_data_type = {
248
- "BigDecimal",
249
- { 0, BigDecimal_delete, BigDecimal_memsize, },
250
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
251
- 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED
252
- #endif
213
+ .wrap_struct_name = "BigDecimal",
214
+ .function = {
215
+ .dmark = 0,
216
+ .dfree = RUBY_DEFAULT_FREE,
217
+ .dsize = BigDecimal_memsize,
218
+ },
219
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE,
253
220
  };
254
221
 
255
- // TypedData_Wrap_Struct may fail if there is no memory, or GC.add_stress_to_class(BigDecimal) is set.
256
- // We need to first allocate empty struct, allocate Real struct, and then set the data pointer.
257
- typedef struct { VALUE _obj; } NULL_WRAPPED_VALUE;
258
- static NULL_WRAPPED_VALUE
259
- BigDecimal_alloc_empty_struct(VALUE klass)
222
+ static VALUE
223
+ BigDecimal_allocate(size_t const internal_digits)
260
224
  {
261
- return (NULL_WRAPPED_VALUE) { TypedData_Wrap_Struct(klass, &BigDecimal_data_type, NULL) };
225
+ const size_t size = rbd_struct_size(internal_digits);
226
+ VALUE bd = rb_data_typed_object_zalloc(rb_cBigDecimal, size, &BigDecimal_data_type);
227
+ Real *vp;
228
+ TypedData_Get_Struct(bd, Real, &BigDecimal_data_type, vp);
229
+ vp->MaxPrec = internal_digits;
230
+ RB_OBJ_FREEZE(bd);
231
+ return bd;
262
232
  }
263
233
 
264
234
  static VALUE
265
- BigDecimal_wrap_struct(NULL_WRAPPED_VALUE v, Real *real)
235
+ BigDecimal_allocate_decimal_digits(size_t const decimal_digits)
266
236
  {
267
- VALUE obj = v._obj;
268
- assert(RTYPEDDATA_DATA(obj) == NULL);
269
- RTYPEDDATA_DATA(obj) = real;
270
- RB_OBJ_FREEZE(obj);
271
- return obj;
237
+ return BigDecimal_allocate(roomof(decimal_digits, BASE_FIG));
238
+ }
239
+
240
+ static Real *
241
+ VpPtr(VALUE obj)
242
+ {
243
+ Real *vp;
244
+ TypedData_Get_Struct(obj, Real, &BigDecimal_data_type, vp);
245
+ return vp;
272
246
  }
273
247
 
274
248
  MAYBE_UNUSED(static inline BDVALUE rbd_allocate_struct_zero_wrap(int sign, size_t const digits));
@@ -276,9 +250,10 @@ MAYBE_UNUSED(static inline BDVALUE rbd_allocate_struct_zero_wrap(int sign, size_
276
250
  static BDVALUE
277
251
  rbd_allocate_struct_zero_wrap(int sign, size_t const digits)
278
252
  {
279
- NULL_WRAPPED_VALUE null_wrapped = BigDecimal_alloc_empty_struct(rb_cBigDecimal);
280
- Real *real = rbd_allocate_struct_zero(sign, digits);
281
- return (BDVALUE) { BigDecimal_wrap_struct(null_wrapped, real), real };
253
+ VALUE obj = BigDecimal_allocate_decimal_digits(digits);
254
+ Real *real = VpPtr(obj);
255
+ VpSetZero(real, sign);
256
+ return (BDVALUE) { obj, real };
282
257
  }
283
258
 
284
259
  static inline int
@@ -336,20 +311,11 @@ GetBDValueWithPrecInternal(VALUE v, size_t prec, int must)
336
311
  break;
337
312
  }
338
313
 
339
- #ifdef ENABLE_NUMERIC_STRING
340
- case T_STRING: {
341
- const char *c_str = StringValueCStr(v);
342
- v = rb_cstr_convert_to_BigDecimal(c_str, must);
343
- break;
344
- }
345
- #endif /* ENABLE_NUMERIC_STRING */
346
-
347
314
  default:
348
315
  goto SomeOneMayDoIt;
349
316
  }
350
317
 
351
- Real *vp;
352
- TypedData_Get_Struct(v, Real, &BigDecimal_data_type, vp);
318
+ Real *vp = VpPtr(v);
353
319
  return (NULLABLE_BDVALUE) { v, vp };
354
320
 
355
321
  SomeOneMayDoIt:
@@ -996,7 +962,7 @@ BigDecimal_mode(int argc, VALUE *argv, VALUE self)
996
962
  static size_t
997
963
  GetAddSubPrec(Real *a, Real *b)
998
964
  {
999
- if (!VpIsDef(a) || !VpIsDef(b)) return (size_t)-1L;
965
+ if (VpIsZero(a) || VpIsZero(b)) return Max(a->Prec, b->Prec);
1000
966
  ssize_t min_a = a->exponent - a->Prec;
1001
967
  ssize_t min_b = b->exponent - b->Prec;
1002
968
  return Max(a->exponent, b->exponent) - Min(min_a, min_b);
@@ -1022,26 +988,18 @@ check_int_precision(VALUE v)
1022
988
  static NULLABLE_BDVALUE
1023
989
  CreateFromString(const char *str, VALUE klass, bool strict_p, bool raise_exception)
1024
990
  {
1025
- NULL_WRAPPED_VALUE null_wrapped = BigDecimal_alloc_empty_struct(klass);
1026
- Real *pv = VpAlloc(str, strict_p, raise_exception);
1027
- if (!pv) return (NULLABLE_BDVALUE) { Qnil, NULL };
1028
- return (NULLABLE_BDVALUE) { BigDecimal_wrap_struct(null_wrapped, pv), pv };
991
+ return VpAlloc(str, strict_p, raise_exception);
1029
992
  }
1030
993
 
1031
- static Real *
1032
- VpCopy(Real *pv, Real const* const x)
994
+ void
995
+ VpMemCopy(Real *pv, Real const* const x)
1033
996
  {
1034
- assert(x != NULL);
1035
-
1036
- pv = (Real *)ruby_xrealloc(pv, rbd_struct_size(x->MaxPrec));
1037
997
  pv->MaxPrec = x->MaxPrec;
1038
998
  pv->Prec = x->Prec;
1039
999
  pv->exponent = x->exponent;
1040
1000
  pv->sign = x->sign;
1041
1001
  pv->flag = x->flag;
1042
1002
  MEMCPY(pv->frac, x->frac, DECDIG, pv->MaxPrec);
1043
-
1044
- return pv;
1045
1003
  }
1046
1004
 
1047
1005
  /* Returns True if the value is Not a Number. */
@@ -1081,9 +1039,6 @@ BigDecimal_check_num(Real *p)
1081
1039
  VpCheckException(p, true);
1082
1040
  }
1083
1041
 
1084
- static VALUE BigDecimal_fix(VALUE self);
1085
- static VALUE BigDecimal_split(VALUE self);
1086
-
1087
1042
  /* Returns the value as an Integer.
1088
1043
  *
1089
1044
  * If the BigDecimal is infinity or NaN, raises FloatDomainError.
@@ -1234,7 +1189,7 @@ GetCoercePrec(Real *a, size_t prec)
1234
1189
  static VALUE
1235
1190
  BigDecimal_coerce(VALUE self, VALUE other)
1236
1191
  {
1237
- Real* pv = DATA_PTR(self);
1192
+ Real* pv = VpPtr(self);
1238
1193
  BDVALUE b = GetBDValueWithPrecMust(other, GetCoercePrec(pv, 0));
1239
1194
  return rb_assoc_new(CheckGetValue(b), self);
1240
1195
  }
@@ -1300,13 +1255,32 @@ BigDecimal_addsub_with_coerce(VALUE self, VALUE r, size_t prec, int operation)
1300
1255
  if (VpIsNaN(a.real)) return CheckGetValue(a);
1301
1256
  if (VpIsNaN(b.real)) return CheckGetValue(b);
1302
1257
 
1303
- mx = GetAddSubPrec(a.real, b.real);
1304
- if (mx == (size_t)-1L) {
1305
- /* a or b is inf */
1258
+ if (VpIsInf(a.real) || VpIsInf(b.real)) {
1306
1259
  c = NewZeroWrap(1, BASE_FIG);
1307
1260
  VpAddSub(c.real, a.real, b.real, operation);
1308
1261
  }
1309
1262
  else {
1263
+
1264
+ // Optimization when exponent difference is large
1265
+ // (1.234e+1000).add(5.678e-1000, 10) == (1.234e+1000).add(0.1e+990, 10) in every rounding mode
1266
+ if (prec && !VpIsZero(a.real) && !VpIsZero(b.real)) {
1267
+ size_t precRoom = roomof(prec, BASE_FIG);
1268
+ if (a.real->exponent - (ssize_t)Max(a.real->Prec, precRoom) - 1 > b.real->exponent) {
1269
+ BDVALUE b2 = NewZeroWrap(1, BASE_FIG);
1270
+ VpSetOne(b2.real)
1271
+ VpSetSign(b2.real, b.real->sign);
1272
+ b2.real->exponent = a.real->exponent - (ssize_t)Max(a.real->Prec, precRoom) - 1;
1273
+ b = b2;
1274
+ } else if (b.real->exponent - (ssize_t)Max(b.real->Prec, precRoom) - 1 > a.real->exponent) {
1275
+ BDVALUE a2 = NewZeroWrap(1, BASE_FIG);
1276
+ VpSetOne(a2.real)
1277
+ VpSetSign(a2.real, a.real->sign);
1278
+ a2.real->exponent = b.real->exponent - (ssize_t)Max(b.real->Prec, precRoom) - 1;
1279
+ a = a2;
1280
+ }
1281
+ }
1282
+
1283
+ mx = GetAddSubPrec(a.real, b.real);
1310
1284
  c = NewZeroWrap(1, (mx + 1) * BASE_FIG);
1311
1285
  size_t pl = VpGetPrecLimit();
1312
1286
  if (prec) VpSetPrecLimit(prec);
@@ -1683,7 +1657,7 @@ BigDecimal_DoDivmod(VALUE self, VALUE r, NULLABLE_BDVALUE *div, NULLABLE_BDVALUE
1683
1657
 
1684
1658
  if (VpIsNaN(a.real) || VpIsNaN(b.real) || (VpIsInf(a.real) && VpIsInf(b.real))) {
1685
1659
  VALUE nan = BigDecimal_nan();
1686
- *div = *mod = (NULLABLE_BDVALUE) { nan, DATA_PTR(nan) };
1660
+ *div = *mod = (NULLABLE_BDVALUE) { nan, VpPtr(nan) };
1687
1661
  goto Done;
1688
1662
  }
1689
1663
  if (VpIsZero(b.real)) {
@@ -1692,19 +1666,19 @@ BigDecimal_DoDivmod(VALUE self, VALUE r, NULLABLE_BDVALUE *div, NULLABLE_BDVALUE
1692
1666
  if (VpIsInf(a.real)) {
1693
1667
  if (VpGetSign(a.real) == VpGetSign(b.real)) {
1694
1668
  VALUE inf = BigDecimal_positive_infinity();
1695
- *div = (NULLABLE_BDVALUE) { inf, DATA_PTR(inf) };
1669
+ *div = (NULLABLE_BDVALUE) { inf, VpPtr(inf) };
1696
1670
  }
1697
1671
  else {
1698
1672
  VALUE inf = BigDecimal_negative_infinity();
1699
- *div = (NULLABLE_BDVALUE) { inf, DATA_PTR(inf) };
1673
+ *div = (NULLABLE_BDVALUE) { inf, VpPtr(inf) };
1700
1674
  }
1701
1675
  VALUE nan = BigDecimal_nan();
1702
- *mod = (NULLABLE_BDVALUE) { nan, DATA_PTR(nan) };
1676
+ *mod = (NULLABLE_BDVALUE) { nan, VpPtr(nan) };
1703
1677
  goto Done;
1704
1678
  }
1705
1679
  if (VpIsZero(a.real)) {
1706
1680
  VALUE zero = BigDecimal_positive_zero();
1707
- *div = (NULLABLE_BDVALUE) { zero, DATA_PTR(zero) };
1681
+ *div = (NULLABLE_BDVALUE) { zero, VpPtr(zero) };
1708
1682
  *mod = bdvalue_nullable(a);
1709
1683
  goto Done;
1710
1684
  }
@@ -1718,7 +1692,7 @@ BigDecimal_DoDivmod(VALUE self, VALUE r, NULLABLE_BDVALUE *div, NULLABLE_BDVALUE
1718
1692
  *mod = bdvalue_nullable(b);
1719
1693
  } else {
1720
1694
  VALUE zero = BigDecimal_positive_zero();
1721
- *div = (NULLABLE_BDVALUE) { zero, DATA_PTR(zero) };
1695
+ *div = (NULLABLE_BDVALUE) { zero, VpPtr(zero) };
1722
1696
  *mod = bdvalue_nullable(a);
1723
1697
  }
1724
1698
  goto Done;
@@ -2562,9 +2536,7 @@ check_exception(VALUE bd)
2562
2536
  {
2563
2537
  assert(is_kind_of_BigDecimal(bd));
2564
2538
 
2565
- Real *vp;
2566
- TypedData_Get_Struct(bd, Real, &BigDecimal_data_type, vp);
2567
- VpCheckException(vp, false);
2539
+ VpCheckException(VpPtr(bd), false);
2568
2540
 
2569
2541
  return bd;
2570
2542
  }
@@ -2572,17 +2544,19 @@ check_exception(VALUE bd)
2572
2544
  static VALUE
2573
2545
  rb_uint64_convert_to_BigDecimal(uint64_t uval)
2574
2546
  {
2575
- NULL_WRAPPED_VALUE null_wrapped = BigDecimal_alloc_empty_struct(rb_cBigDecimal);
2547
+ VALUE bd;
2576
2548
  Real *vp;
2577
2549
  if (uval == 0) {
2578
- vp = rbd_allocate_struct(1);
2550
+ bd = BigDecimal_allocate(1);
2551
+ vp = VpPtr(bd);
2579
2552
  vp->Prec = 1;
2580
2553
  vp->exponent = 1;
2581
2554
  VpSetZero(vp, 1);
2582
2555
  vp->frac[0] = 0;
2583
2556
  }
2584
2557
  else if (uval < BASE) {
2585
- vp = rbd_allocate_struct(1);
2558
+ bd = BigDecimal_allocate(1);
2559
+ vp = VpPtr(bd);
2586
2560
  vp->Prec = 1;
2587
2561
  vp->exponent = 1;
2588
2562
  VpSetSign(vp, 1);
@@ -2607,14 +2581,15 @@ rb_uint64_convert_to_BigDecimal(uint64_t uval)
2607
2581
  }
2608
2582
 
2609
2583
  const size_t exp = len + ntz;
2610
- vp = rbd_allocate_struct(len);
2584
+ bd = BigDecimal_allocate(len);
2585
+ vp = VpPtr(bd);
2611
2586
  vp->Prec = len;
2612
2587
  vp->exponent = exp;
2613
2588
  VpSetSign(vp, 1);
2614
2589
  MEMCPY(vp->frac, buf + BIGDECIMAL_INT64_MAX_LENGTH - len, DECDIG, len);
2615
2590
  }
2616
2591
 
2617
- return BigDecimal_wrap_struct(null_wrapped, vp);
2592
+ return bd;
2618
2593
  }
2619
2594
 
2620
2595
  static VALUE
@@ -2623,8 +2598,7 @@ rb_int64_convert_to_BigDecimal(int64_t ival)
2623
2598
  const uint64_t uval = (ival < 0) ? (((uint64_t)-(ival+1))+1) : (uint64_t)ival;
2624
2599
  VALUE bd = rb_uint64_convert_to_BigDecimal(uval);
2625
2600
  if (ival < 0) {
2626
- Real *vp;
2627
- TypedData_Get_Struct(bd, Real, &BigDecimal_data_type, vp);
2601
+ Real *vp = VpPtr(bd);
2628
2602
  VpSetSign(vp, -1);
2629
2603
  }
2630
2604
  return bd;
@@ -2831,8 +2805,7 @@ rb_float_convert_to_BigDecimal(VALUE val, size_t digs, int raise_exception)
2831
2805
  }
2832
2806
 
2833
2807
  VALUE bd = rb_inum_convert_to_BigDecimal(inum);
2834
- Real *vp;
2835
- TypedData_Get_Struct(bd, Real, &BigDecimal_data_type, vp);
2808
+ Real *vp = VpPtr(bd);
2836
2809
  assert(vp->Prec == prec);
2837
2810
  vp->exponent = exp;
2838
2811
 
@@ -2898,13 +2871,15 @@ rb_convert_to_BigDecimal(VALUE val, size_t digs, int raise_exception)
2898
2871
  if (digs == SIZE_MAX)
2899
2872
  return check_exception(val);
2900
2873
 
2901
- NULL_WRAPPED_VALUE null_wrapped = BigDecimal_alloc_empty_struct(rb_cBigDecimal);
2902
- Real *vp;
2903
- TypedData_Get_Struct(val, Real, &BigDecimal_data_type, vp);
2904
- vp = VpCopy(NULL, vp);
2874
+ Real *vp = VpPtr(val);
2875
+
2876
+ VALUE copy = BigDecimal_allocate(vp->MaxPrec);
2877
+ Real *vp_copy = VpPtr(copy);
2878
+
2879
+ VpMemCopy(vp_copy, vp);
2880
+
2905
2881
  RB_GC_GUARD(val);
2906
2882
 
2907
- VALUE copy = BigDecimal_wrap_struct(null_wrapped, vp);
2908
2883
  /* TODO: rounding */
2909
2884
  return check_exception(copy);
2910
2885
  }
@@ -3226,19 +3201,39 @@ BigDecimal_literal(const char *str)
3226
3201
 
3227
3202
  #ifdef BIGDECIMAL_USE_VP_TEST_METHODS
3228
3203
  VALUE
3229
- BigDecimal_vpdivd(VALUE self, VALUE r, VALUE cprec) {
3230
- BDVALUE a,b,c,d;
3204
+ BigDecimal_vpdivd_generic(VALUE self, VALUE r, VALUE cprec, void (*vpdivd_func)(Real*, Real*, Real*, Real*)) {
3205
+ BDVALUE a, b, c, d;
3231
3206
  size_t cn = NUM2INT(cprec);
3232
3207
  a = GetBDValueMust(self);
3233
3208
  b = GetBDValueMust(r);
3234
3209
  c = NewZeroWrap(1, cn * BASE_FIG);
3235
3210
  d = NewZeroWrap(1, VPDIVD_REM_PREC(a.real, b.real, c.real) * BASE_FIG);
3236
- VpDivd(c.real, d.real, a.real, b.real);
3211
+ vpdivd_func(c.real, d.real, a.real, b.real);
3237
3212
  RB_GC_GUARD(a.bigdecimal);
3238
3213
  RB_GC_GUARD(b.bigdecimal);
3239
3214
  return rb_assoc_new(c.bigdecimal, d.bigdecimal);
3240
3215
  }
3241
3216
 
3217
+ void
3218
+ VpDivdNormal(Real *c, Real *r, Real *a, Real *b) {
3219
+ VpDivd(c, r, a, b);
3220
+ }
3221
+
3222
+ VALUE
3223
+ BigDecimal_vpdivd(VALUE self, VALUE r, VALUE cprec) {
3224
+ return BigDecimal_vpdivd_generic(self, r, cprec, VpDivdNormal);
3225
+ }
3226
+
3227
+ VALUE
3228
+ BigDecimal_vpdivd_newton(VALUE self, VALUE r, VALUE cprec) {
3229
+ return BigDecimal_vpdivd_generic(self, r, cprec, VpDivdNewton);
3230
+ }
3231
+
3232
+ VALUE
3233
+ BigDecimal_newton_raphson_inverse(VALUE self, VALUE prec) {
3234
+ return newton_raphson_inverse(self, NUM2SIZET(prec));
3235
+ }
3236
+
3242
3237
  VALUE
3243
3238
  BigDecimal_vpmult(VALUE self, VALUE v) {
3244
3239
  BDVALUE a,b,c;
@@ -3250,6 +3245,23 @@ BigDecimal_vpmult(VALUE self, VALUE v) {
3250
3245
  RB_GC_GUARD(b.bigdecimal);
3251
3246
  return c.bigdecimal;
3252
3247
  }
3248
+
3249
+ VALUE
3250
+ BigDecimal_nttmult(VALUE self, VALUE v) {
3251
+ BDVALUE a,b,c;
3252
+ a = GetBDValueMust(self);
3253
+ b = GetBDValueMust(v);
3254
+ c = NewZeroWrap(1, VPMULT_RESULT_PREC(a.real, b.real) * BASE_FIG);
3255
+ ntt_multiply(a.real->Prec, b.real->Prec, a.real->frac, b.real->frac, c.real->frac);
3256
+ VpSetSign(c.real, a.real->sign * b.real->sign);
3257
+ c.real->exponent = a.real->exponent + b.real->exponent;
3258
+ c.real->Prec = a.real->Prec + b.real->Prec;
3259
+ VpNmlz(c.real);
3260
+ RB_GC_GUARD(a.bigdecimal);
3261
+ RB_GC_GUARD(b.bigdecimal);
3262
+ return c.bigdecimal;
3263
+ }
3264
+
3253
3265
  #endif /* BIGDECIMAL_USE_VP_TEST_METHODS */
3254
3266
 
3255
3267
  /* Document-class: BigDecimal
@@ -3620,7 +3632,10 @@ Init_bigdecimal(void)
3620
3632
 
3621
3633
  #ifdef BIGDECIMAL_USE_VP_TEST_METHODS
3622
3634
  rb_define_method(rb_cBigDecimal, "vpdivd", BigDecimal_vpdivd, 2);
3635
+ rb_define_method(rb_cBigDecimal, "vpdivd_newton", BigDecimal_vpdivd_newton, 2);
3636
+ rb_define_method(rb_cBigDecimal, "newton_raphson_inverse", BigDecimal_newton_raphson_inverse, 1);
3623
3637
  rb_define_method(rb_cBigDecimal, "vpmult", BigDecimal_vpmult, 1);
3638
+ rb_define_method(rb_cBigDecimal, "nttmult", BigDecimal_nttmult, 1);
3624
3639
  #endif /* BIGDECIMAL_USE_VP_TEST_METHODS */
3625
3640
 
3626
3641
  #define ROUNDING_MODE(i, name, value) \
@@ -3663,7 +3678,7 @@ Init_bigdecimal(void)
3663
3678
  static int gfDebug = 1; /* Debug switch */
3664
3679
  #endif /* BIGDECIMAL_DEBUG */
3665
3680
 
3666
- static Real *VpConstOne; /* constant 1.0 */
3681
+ static VALUE VpConstOne; /* constant 1.0 */
3667
3682
 
3668
3683
  enum op_sw {
3669
3684
  OP_SW_ADD = 1, /* + */
@@ -4064,8 +4079,9 @@ VpInit(DECDIG BaseVal)
4064
4079
  VpGetDoubleNegZero();
4065
4080
 
4066
4081
  /* Const 1.0 */
4067
- VpConstOne = NewZero(1, 1);
4068
- VpSetOne(VpConstOne);
4082
+ rb_global_variable(&VpConstOne);
4083
+ VpConstOne = NewZeroWrap(1, 1).bigdecimal;
4084
+ VpSetOne(VpPtr(VpConstOne));
4069
4085
 
4070
4086
  #ifdef BIGDECIMAL_DEBUG
4071
4087
  gnAlloc = 0;
@@ -4077,7 +4093,7 @@ VpInit(DECDIG BaseVal)
4077
4093
  VP_EXPORT Real *
4078
4094
  VpOne(void)
4079
4095
  {
4080
- return VpConstOne;
4096
+ return VpPtr(VpConstOne);
4081
4097
  }
4082
4098
 
4083
4099
  /* If exponent overflows,then raise exception or returns 0 */
@@ -4108,7 +4124,7 @@ overflow:
4108
4124
  return VpException(VP_EXCEPTION_OVERFLOW, "Exponent overflow", 0);
4109
4125
  }
4110
4126
 
4111
- Real *
4127
+ NULLABLE_BDVALUE
4112
4128
  bigdecimal_parse_special_string(const char *str)
4113
4129
  {
4114
4130
  static const struct {
@@ -4133,66 +4149,27 @@ bigdecimal_parse_special_string(const char *str)
4133
4149
  p = str + table[i].len;
4134
4150
  while (*p && ISSPACE(*p)) ++p;
4135
4151
  if (*p == '\0') {
4136
- Real *vp = rbd_allocate_struct(1);
4152
+ VALUE obj = BigDecimal_allocate(1);
4153
+ Real *vp = VpPtr(obj);
4137
4154
  switch (table[i].sign) {
4138
4155
  default:
4139
- UNREACHABLE; break;
4156
+ UNREACHABLE;
4157
+ return (NULLABLE_BDVALUE) { Qnil, NULL };
4140
4158
  case VP_SIGN_POSITIVE_INFINITE:
4141
4159
  VpSetPosInf(vp);
4142
- return vp;
4160
+ break;
4143
4161
  case VP_SIGN_NEGATIVE_INFINITE:
4144
4162
  VpSetNegInf(vp);
4145
- return vp;
4163
+ break;
4146
4164
  case VP_SIGN_NaN:
4147
4165
  VpSetNaN(vp);
4148
- return vp;
4166
+ break;
4149
4167
  }
4168
+ return (NULLABLE_BDVALUE) { obj, vp };
4150
4169
  }
4151
4170
  }
4152
4171
 
4153
- return NULL;
4154
- }
4155
-
4156
- struct VpCtoV_args {
4157
- Real *a;
4158
- const char *int_chr;
4159
- size_t ni;
4160
- const char *frac;
4161
- size_t nf;
4162
- const char *exp_chr;
4163
- size_t ne;
4164
- };
4165
-
4166
- static VALUE
4167
- call_VpCtoV(VALUE arg)
4168
- {
4169
- struct VpCtoV_args *x = (struct VpCtoV_args *)arg;
4170
- return (VALUE)VpCtoV(x->a, x->int_chr, x->ni, x->frac, x->nf, x->exp_chr, x->ne);
4171
- }
4172
-
4173
- static int
4174
- protected_VpCtoV(Real *a, const char *int_chr, size_t ni, const char *frac, size_t nf, const char *exp_chr, size_t ne, int free_on_error)
4175
- {
4176
- struct VpCtoV_args args;
4177
- int state = 0;
4178
-
4179
- args.a = a;
4180
- args.int_chr = int_chr;
4181
- args.ni = ni;
4182
- args.frac = frac;
4183
- args.nf = nf;
4184
- args.exp_chr = exp_chr;
4185
- args.ne = ne;
4186
-
4187
- VALUE result = rb_protect(call_VpCtoV, (VALUE)&args, &state);
4188
- if (state) {
4189
- if (free_on_error) {
4190
- rbd_free_struct(a);
4191
- }
4192
- rb_jump_tag(state);
4193
- }
4194
-
4195
- return (int)result;
4172
+ return (NULLABLE_BDVALUE) { Qnil, NULL };
4196
4173
  }
4197
4174
 
4198
4175
  /*
@@ -4201,25 +4178,25 @@ protected_VpCtoV(Real *a, const char *int_chr, size_t ni, const char *frac, size
4201
4178
  * szVal ... The value assigned(char).
4202
4179
  *
4203
4180
  * [Returns]
4204
- * Pointer to the newly allocated variable, or
4205
- * NULL be returned if memory allocation is failed,or any error.
4181
+ * NULLABLE_BDVALUE to the newly allocated variable.
4182
+ * Null is returned if memory allocation failed, or any error occured.
4206
4183
  */
4207
- VP_EXPORT Real *
4184
+ VP_EXPORT NULLABLE_BDVALUE
4208
4185
  VpAlloc(const char *szVal, int strict_p, int exc)
4209
4186
  {
4210
4187
  const char *orig_szVal = szVal;
4211
4188
  size_t i, j, ni, ipf, nf, ipe, ne, exp_seen, nalloc;
4212
4189
  char v, *psz;
4213
4190
  int sign=1;
4214
- Real *vp = NULL;
4215
4191
  VALUE buf;
4216
4192
 
4217
4193
  /* Skipping leading spaces */
4218
4194
  while (ISSPACE(*szVal)) szVal++;
4219
4195
 
4220
4196
  /* Check on Inf & NaN */
4221
- if ((vp = bigdecimal_parse_special_string(szVal)) != NULL) {
4222
- return vp;
4197
+ NULLABLE_BDVALUE special_bd = bigdecimal_parse_special_string(szVal);
4198
+ if (special_bd.real_or_null != NULL) {
4199
+ return special_bd;
4223
4200
  }
4224
4201
 
4225
4202
  /* Skip leading `#`.
@@ -4373,10 +4350,11 @@ VpAlloc(const char *szVal, int strict_p, int exc)
4373
4350
  VALUE str;
4374
4351
  invalid_value:
4375
4352
  if (!strict_p) {
4376
- return NewZero(1, 1);
4353
+ BDVALUE res = rbd_allocate_struct_zero_wrap(1, 1);
4354
+ return (NULLABLE_BDVALUE) { res.bigdecimal, res.real };
4377
4355
  }
4378
4356
  if (!exc) {
4379
- return NULL;
4357
+ return (NULLABLE_BDVALUE) { Qnil, NULL };
4380
4358
  }
4381
4359
  str = rb_str_new2(orig_szVal);
4382
4360
  rb_raise(rb_eArgError, "invalid value for BigDecimal(): \"%"PRIsVALUE"\"", str);
@@ -4384,11 +4362,12 @@ VpAlloc(const char *szVal, int strict_p, int exc)
4384
4362
 
4385
4363
  nalloc = (ni + nf + BASE_FIG - 1) / BASE_FIG + 1; /* set effective allocation */
4386
4364
  /* units for szVal[] */
4387
- vp = rbd_allocate_struct(nalloc);
4365
+ VALUE obj = BigDecimal_allocate(nalloc);
4366
+ Real *vp = VpPtr(obj);
4388
4367
  VpSetZero(vp, sign);
4389
- protected_VpCtoV(vp, psz, ni, psz + ipf, nf, psz + ipe, ne, true);
4368
+ VpCtoV(vp, psz, ni, psz + ipf, nf, psz + ipe, ne);
4390
4369
  rb_str_resize(buf, 0);
4391
- return vp;
4370
+ return (NULLABLE_BDVALUE) { obj, vp };
4392
4371
  }
4393
4372
 
4394
4373
  /*
@@ -4860,17 +4839,12 @@ VpSetPTR(Real *a, Real *b, Real *c, size_t *a_pos, size_t *b_pos, size_t *c_pos,
4860
4839
  * a0 a1 .... an * b0
4861
4840
  * +_____________________________
4862
4841
  * c0 c1 c2 ...... cl
4863
- * nc <---|
4864
- * MaxAB |--------------------|
4865
4842
  */
4866
4843
  VP_EXPORT size_t
4867
4844
  VpMult(Real *c, Real *a, Real *b)
4868
4845
  {
4869
- size_t MxIndA, MxIndB, MxIndAB;
4870
- size_t ind_c, i, ii, nc;
4871
- size_t ind_as, ind_ae, ind_bs;
4872
- DECDIG carry;
4873
- DECDIG_DBL s;
4846
+ ssize_t a_batch_max, b_batch_max;
4847
+ DECDIG_DBL batch[VPMULT_BATCH_SIZE * 2 - 1];
4874
4848
 
4875
4849
  if (!VpIsDefOP(c, a, b, OP_SW_MULT)) return 0; /* No significant digit */
4876
4850
 
@@ -4894,61 +4868,57 @@ VpMult(Real *c, Real *a, Real *b)
4894
4868
  a = b;
4895
4869
  b = w;
4896
4870
  }
4897
- MxIndA = a->Prec - 1;
4898
- MxIndB = b->Prec - 1;
4899
- MxIndAB = a->Prec + b->Prec - 1;
4900
4871
 
4901
4872
  /* set LHSV c info */
4902
4873
 
4903
4874
  c->exponent = a->exponent; /* set exponent */
4904
4875
  VpSetSign(c, VpGetSign(a) * VpGetSign(b)); /* set sign */
4905
4876
  if (!AddExponent(c, b->exponent)) return 0;
4906
- carry = 0;
4907
- nc = ind_c = MxIndAB;
4908
- memset(c->frac, 0, (nc + 1) * sizeof(DECDIG)); /* Initialize c */
4909
- c->Prec = nc + 1; /* set precision */
4910
- for (nc = 0; nc < MxIndAB; ++nc, --ind_c) {
4911
- if (nc < MxIndB) { /* The left triangle of the Fig. */
4912
- ind_as = MxIndA - nc;
4913
- ind_ae = MxIndA;
4914
- ind_bs = MxIndB;
4915
- }
4916
- else if (nc <= MxIndA) { /* The middle rectangular of the Fig. */
4917
- ind_as = MxIndA - nc;
4918
- ind_ae = MxIndA - (nc - MxIndB);
4919
- ind_bs = MxIndB;
4920
- }
4921
- else /* if (nc > MxIndA) */ { /* The right triangle of the Fig. */
4922
- ind_as = 0;
4923
- ind_ae = MxIndAB - nc - 1;
4924
- ind_bs = MxIndB - (nc - MxIndA);
4925
- }
4926
4877
 
4927
- for (i = ind_as; i <= ind_ae; ++i) {
4928
- s = (DECDIG_DBL)a->frac[i] * b->frac[ind_bs--];
4929
- carry = (DECDIG)(s / BASE);
4930
- s -= (DECDIG_DBL)carry * BASE;
4931
- c->frac[ind_c] += (DECDIG)s;
4932
- if (c->frac[ind_c] >= BASE) {
4933
- s = c->frac[ind_c] / BASE;
4934
- carry += (DECDIG)s;
4935
- c->frac[ind_c] -= (DECDIG)(s * BASE);
4878
+ if (b->Prec >= NTT_MULTIPLICATION_THRESHOLD) {
4879
+ ntt_multiply(a->Prec, b->Prec, a->frac, b->frac, c->frac);
4880
+ c->Prec = a->Prec + b->Prec;
4881
+ goto Cleanup;
4882
+ }
4883
+
4884
+ c->Prec = a->Prec + b->Prec; /* set precision */
4885
+ memset(c->frac, 0, c->Prec * sizeof(DECDIG)); /* Initialize c */
4886
+
4887
+ // Process VPMULT_BATCH_SIZE decdigits at a time to reduce the number of carry operations.
4888
+ a_batch_max = (a->Prec - 1) / VPMULT_BATCH_SIZE;
4889
+ b_batch_max = (b->Prec - 1) / VPMULT_BATCH_SIZE;
4890
+ for (ssize_t ibatch = a_batch_max; ibatch >= 0; ibatch--) {
4891
+ int isize = ibatch == a_batch_max ? (a->Prec - 1) % VPMULT_BATCH_SIZE + 1 : VPMULT_BATCH_SIZE;
4892
+ for (ssize_t jbatch = b_batch_max; jbatch >= 0; jbatch--) {
4893
+ int jsize = jbatch == b_batch_max ? (b->Prec - 1) % VPMULT_BATCH_SIZE + 1 : VPMULT_BATCH_SIZE;
4894
+ memset(batch, 0, (isize + jsize - 1) * sizeof(DECDIG_DBL));
4895
+
4896
+ // Perform multiplication without carry calculation.
4897
+ // BASE * BASE * VPMULT_BATCH_SIZE < 2**64 should be satisfied so that
4898
+ // DECDIG_DBL can hold the intermediate sum without overflow.
4899
+ for (int i = 0; i < isize; i++) {
4900
+ for (int j = 0; j < jsize; j++) {
4901
+ batch[i + j] += (DECDIG_DBL)a->frac[ibatch * VPMULT_BATCH_SIZE + i] * b->frac[jbatch * VPMULT_BATCH_SIZE + j];
4902
+ }
4936
4903
  }
4937
- if (carry) {
4938
- ii = ind_c;
4939
- while (ii-- > 0) {
4940
- c->frac[ii] += carry;
4941
- if (c->frac[ii] >= BASE) {
4942
- carry = c->frac[ii] / BASE;
4943
- c->frac[ii] -= (carry * BASE);
4944
- }
4945
- else {
4946
- break;
4947
- }
4948
- }
4949
- }
4950
- }
4904
+
4905
+ // Add the batch result to c with carry calculation.
4906
+ DECDIG_DBL carry = 0;
4907
+ for (int k = isize + jsize - 2; k >= 0; k--) {
4908
+ size_t l = (ibatch + jbatch) * VPMULT_BATCH_SIZE + k + 1;
4909
+ DECDIG_DBL s = c->frac[l] + batch[k] + carry;
4910
+ c->frac[l] = (DECDIG)(s % BASE);
4911
+ carry = (DECDIG_DBL)(s / BASE);
4912
+ }
4913
+
4914
+ // Adding carry may exceed BASE, but it won't cause overflow of DECDIG.
4915
+ // Exceeded value will be resolved in the carry operation of next (ibatch + jbatch - 1) batch.
4916
+ // WARNING: This safety strongly relies on the current nested loop execution order.
4917
+ c->frac[(ibatch + jbatch) * VPMULT_BATCH_SIZE] += (DECDIG)carry;
4918
+ }
4951
4919
  }
4920
+
4921
+ Cleanup:
4952
4922
  VpNmlz(c);
4953
4923
 
4954
4924
  Exit:
@@ -4996,6 +4966,11 @@ VpDivd(Real *c, Real *r, Real *a, Real *b)
4996
4966
 
4997
4967
  if (word_a > word_r || word_b + word_c - 2 >= word_r) goto space_error;
4998
4968
 
4969
+ if (word_c >= NEWTON_RAPHSON_DIVISION_THRESHOLD && word_b >= NEWTON_RAPHSON_DIVISION_THRESHOLD) {
4970
+ VpDivdNewton(c, r, a, b);
4971
+ goto Exit;
4972
+ }
4973
+
4999
4974
  for (i = 0; i < word_a; ++i) r->frac[i] = a->frac[i];
5000
4975
  for (i = word_a; i < word_r; ++i) r->frac[i] = 0;
5001
4976
  for (i = 0; i < word_c; ++i) c->frac[i] = 0;