cumo 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +16 -36
  3. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +7 -0
  4. data/CHANGELOG.md +16 -0
  5. data/Dockerfile +34 -0
  6. data/cumo.gemspec +1 -1
  7. data/docker-build.sh +4 -0
  8. data/docker-launch.sh +4 -0
  9. data/docs/src-tree.md +1 -1
  10. data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
  11. data/ext/cumo/cuda/driver.c +8 -0
  12. data/ext/cumo/depend.erb +1 -1
  13. data/ext/cumo/extconf.rb +1 -1
  14. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
  15. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
  16. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
  17. data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
  18. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
  19. data/ext/cumo/include/cumo.h +2 -2
  20. data/ext/cumo/narray/array.c +5 -3
  21. data/ext/cumo/narray/data.c +25 -26
  22. data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
  23. data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
  24. data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
  25. data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
  26. data/ext/cumo/narray/gen/tmpl/batch_norm.c +4 -1
  27. data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +4 -1
  28. data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
  29. data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
  30. data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
  31. data/ext/cumo/narray/gen/tmpl/each.c +4 -2
  32. data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
  33. data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +4 -1
  34. data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
  35. data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
  36. data/ext/cumo/narray/gen/tmpl/median.c +2 -2
  37. data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
  38. data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
  39. data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
  40. data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
  41. data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
  42. data/ext/cumo/narray/gen/tmpl/sort.c +2 -2
  43. data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
  44. data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
  45. data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
  46. data/ext/cumo/narray/index.c +1 -1
  47. data/ext/cumo/narray/narray.c +19 -18
  48. data/lib/cumo/narray/extra.rb +160 -156
  49. data/test/cuda/device_test.rb +2 -1
  50. data/test/cudnn_test.rb +2 -2
  51. metadata +5 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6c2daf87323aec6ca6a8770825e09c5b6bb57d2ab1b6f10b7405d8c1ea1aa07e
4
- data.tar.gz: cf8359dbc51fdc479fddf3eb6204db7b4f531757d0da445c50b2e34b8a817a4c
3
+ metadata.gz: 93c1ecf4d6098da90d957600dc7254e02072999fa33374951809cb8c4f5645ee
4
+ data.tar.gz: f8961f11f4b8feed097fbfbe3fe0603e270f8f1b44121c112c506e42cefc2bf1
5
5
  SHA512:
6
- metadata.gz: c6a3eca253db3d854c8f7ced3ddc2f54836189ccf2065885e14de5acbaa1aced18e4341cf55e7b348f78db11afbd1be1558c3ab94881c3162ca6241b6e91c511
7
- data.tar.gz: 5696875ce0d4e3d9d20e483892167b71696bfff6a2dc5594b87628e517e3fa29528f828bc63d8adbf6f0c4b43c47efe6beac31d0344320cf7c2f847d6c0d5d1b
6
+ metadata.gz: df0b42ff21e2158657e0d8a86872f9e85a6af7ab6ae09c7dfc4368d71001846c7f087633bfa2f6071bdd43f910da041470a43694c2aa2c37c74b5ff684e85c88
7
+ data.tar.gz: 95572510fbc31633f423db010c9135271c5ded4bfda28c5f07734b90d76e9fd36fa8c2af0bdd1d03151df2eba93aa3f07c61d6d39aa2f8c7d011364a7ee99615
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2025-10-05 08:00:13 UTC using RuboCop version 1.81.1.
3
+ # on 2025-12-29 17:31:25 UTC using RuboCop version 1.82.1.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -25,16 +25,8 @@ Bundler/OrderedGems:
25
25
  Exclude:
26
26
  - 'Gemfile'
27
27
 
28
- # Offense count: 1
29
- # Configuration parameters: EnforcedStyle, AllowedGems.
30
- # SupportedStyles: required, forbidden
31
- Gemspec/DependencyVersion:
32
- Exclude:
33
- - 'cumo.gemspec'
34
-
35
28
  # Offense count: 2
36
29
  # This cop supports safe autocorrection (--autocorrect).
37
- # Configuration parameters: Severity.
38
30
  Gemspec/DeprecatedAttributeAssignment:
39
31
  Exclude:
40
32
  - '3rd_party/mkmf-cu/mkmf-cu.gemspec'
@@ -49,14 +41,12 @@ Gemspec/DevelopmentDependencies:
49
41
 
50
42
  # Offense count: 2
51
43
  # This cop supports safe autocorrection (--autocorrect).
52
- # Configuration parameters: Severity.
53
44
  Gemspec/RequireMFA:
54
45
  Exclude:
55
46
  - '3rd_party/mkmf-cu/mkmf-cu.gemspec'
56
47
  - 'cumo.gemspec'
57
48
 
58
49
  # Offense count: 1
59
- # Configuration parameters: Severity.
60
50
  Gemspec/RequiredRubyVersion:
61
51
  Exclude:
62
52
  - '3rd_party/mkmf-cu/mkmf-cu.gemspec'
@@ -275,13 +265,15 @@ Layout/MultilineOperationIndentation:
275
265
  - 'lib/cumo/narray/extra.rb'
276
266
  - 'test/narray_test.rb'
277
267
 
278
- # Offense count: 16
268
+ # Offense count: 27
279
269
  # This cop supports safe autocorrection (--autocorrect).
280
270
  # Configuration parameters: InspectBlocks.
281
271
  Layout/RedundantLineBreak:
282
272
  Exclude:
283
273
  - '3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb'
274
+ - '3rd_party/mkmf-cu/mkmf-cu.gemspec'
284
275
  - '3rd_party/mkmf-cu/test/test_mkmf-cu.rb'
276
+ - 'ext/cumo/extconf.rb'
285
277
  - 'ext/cumo/narray/gen/narray_def.rb'
286
278
  - 'test/bit_test.rb'
287
279
  - 'test/cudnn_test.rb'
@@ -379,7 +371,7 @@ Lint/ConstantDefinitionInBlock:
379
371
  Exclude:
380
372
  - 'test/cuda/compiler_test.rb'
381
373
 
382
- # Offense count: 655
374
+ # Offense count: 650
383
375
  # Configuration parameters: Only, Ignore.
384
376
  Lint/ConstantResolution:
385
377
  Enabled: false
@@ -419,12 +411,13 @@ Lint/NonAtomicFileOperation:
419
411
  Exclude:
420
412
  - 'lib/cumo/cuda/compiler.rb'
421
413
 
422
- # Offense count: 24
414
+ # Offense count: 26
423
415
  # This cop supports unsafe autocorrection (--autocorrect-all).
424
416
  # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredClasses.
425
417
  # IgnoredClasses: Time, DateTime
426
418
  Lint/NumberConversion:
427
419
  Exclude:
420
+ - '3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb'
428
421
  - 'bench/cumo_bench.rb'
429
422
  - 'bench/numo_bench.rb'
430
423
  - 'ext/cumo/narray/gen/cogen_kernel.rb'
@@ -522,17 +515,6 @@ Naming/MethodParameterName:
522
515
  - 'lib/cumo/narray/extra.rb'
523
516
  - 'test/ractor_test.rb'
524
517
 
525
- # Offense count: 1
526
- # Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros, UseSorbetSigs.
527
- # NamePrefix: is_, has_, have_, does_
528
- # ForbiddenPrefixes: is_, has_, have_, does_
529
- # AllowedMethods: is_a?
530
- # MethodDefinitionMacros: define_method, define_singleton_method
531
- Naming/PredicatePrefix:
532
- Exclude:
533
- - 'spec/**/*'
534
- - 'ext/cumo/extconf.rb'
535
-
536
518
  # Offense count: 1
537
519
  # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
538
520
  # SupportedStyles: snake_case, normalcase, non_integer
@@ -719,7 +701,7 @@ Style/Documentation:
719
701
  - 'lib/cumo/linalg.rb'
720
702
  - 'lib/cumo/narray/extra.rb'
721
703
 
722
- # Offense count: 203
704
+ # Offense count: 202
723
705
  # Configuration parameters: AllowedMethods, RequireForNonPublicMethods.
724
706
  Style/DocumentationMethod:
725
707
  Enabled: false
@@ -767,7 +749,7 @@ Style/FileWrite:
767
749
  Exclude:
768
750
  - 'lib/cumo/cuda/compiler.rb'
769
751
 
770
- # Offense count: 29
752
+ # Offense count: 27
771
753
  # Configuration parameters: AllowedVariables.
772
754
  Style/GlobalVars:
773
755
  Exclude:
@@ -775,12 +757,11 @@ Style/GlobalVars:
775
757
  - 'ext/cumo/narray/gen/cogen.rb'
776
758
  - 'ext/cumo/narray/gen/cogen_kernel.rb'
777
759
 
778
- # Offense count: 12
760
+ # Offense count: 10
779
761
  # This cop supports safe autocorrection (--autocorrect).
780
762
  # Configuration parameters: MinBodyLength, AllowConsecutiveConditionals.
781
763
  Style/GuardClause:
782
764
  Exclude:
783
- - 'ext/cumo/extconf.rb'
784
765
  - 'ext/cumo/narray/gen/erbpp2.rb'
785
766
  - 'lib/cumo/cuda/link_state.rb'
786
767
  - 'lib/cumo/cuda/module.rb'
@@ -865,9 +846,9 @@ Style/InvertibleUnlessCondition:
865
846
  - 'lib/cumo/cuda/compiler.rb'
866
847
  - 'lib/cumo/cuda/device.rb'
867
848
 
868
- # Offense count: 122
849
+ # Offense count: 119
869
850
  # This cop supports safe autocorrection (--autocorrect).
870
- # Configuration parameters: IgnoreMacros, AllowedMethods, AllowedPatterns, IncludedMacros, AllowParenthesesInMultilineCall, AllowParenthesesInChaining, AllowParenthesesInCamelCaseMethod, AllowParenthesesInStringInterpolation, EnforcedStyle.
851
+ # Configuration parameters: IgnoreMacros, AllowedMethods, AllowedPatterns, IncludedMacros, IncludedMacroPatterns, AllowParenthesesInMultilineCall, AllowParenthesesInChaining, AllowParenthesesInCamelCaseMethod, AllowParenthesesInStringInterpolation, EnforcedStyle.
871
852
  # SupportedStyles: require_parentheses, omit_parentheses
872
853
  Style/MethodCallWithArgsParentheses:
873
854
  Enabled: false
@@ -888,7 +869,7 @@ Style/MethodCalledOnDoEndBlock:
888
869
  - 'ext/cumo/narray/gen/cogen_kernel.rb'
889
870
  - 'lib/cumo/narray/extra.rb'
890
871
 
891
- # Offense count: 106
872
+ # Offense count: 105
892
873
  # This cop supports safe autocorrection (--autocorrect).
893
874
  # Configuration parameters: EnforcedStyle.
894
875
  # SupportedStyles: if, case, both
@@ -923,13 +904,12 @@ Style/MutableConstant:
923
904
  - '3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb'
924
905
  - 'test/test_helper.rb'
925
906
 
926
- # Offense count: 14
907
+ # Offense count: 13
927
908
  # This cop supports safe autocorrection (--autocorrect).
928
909
  # Configuration parameters: EnforcedStyle.
929
910
  # SupportedStyles: both, prefix, postfix
930
911
  Style/NegatedIf:
931
912
  Exclude:
932
- - 'ext/cumo/extconf.rb'
933
913
  - 'ext/cumo/narray/gen/erbpp2.rb'
934
914
  - 'ext/cumo/narray/gen/spec.rb'
935
915
  - 'lib/cumo/narray/extra.rb'
@@ -1190,7 +1170,7 @@ Style/StringHashKeys:
1190
1170
  Exclude:
1191
1171
  - '3rd_party/mkmf-cu/test/test_mkmf-cu.rb'
1192
1172
 
1193
- # Offense count: 1376
1173
+ # Offense count: 1369
1194
1174
  # This cop supports safe autocorrection (--autocorrect).
1195
1175
  # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
1196
1176
  # SupportedStyles: single_quotes, double_quotes
@@ -1229,7 +1209,7 @@ Style/TernaryParentheses:
1229
1209
  - 'ext/cumo/narray/gen/narray_def.rb'
1230
1210
  - 'lib/cumo/narray/extra.rb'
1231
1211
 
1232
- # Offense count: 9
1212
+ # Offense count: 8
1233
1213
  Style/TopLevelMethodDefinition:
1234
1214
  Exclude:
1235
1215
  - 'bench/cumo_bench.rb'
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "mkmf"
3
4
  require "open3"
4
5
  require_relative "nvcc"
5
6
 
@@ -60,6 +61,12 @@ module MakeMakefileCuda
60
61
  # CUDA 11.0
61
62
  capability = [35, 50, 60, 70, 75, 80]
62
63
  end
64
+
65
+ if find_executable('nvidia-smi')
66
+ arch_version = `nvidia-smi --query-gpu=compute_cap --format=csv,noheader`.strip
67
+ capability << (arch_version.to_f * 10).to_i unless arch_version.empty?
68
+ end
69
+
63
70
  capability.each do |arch|
64
71
  cmd << " --generate-code=arch=compute_#{arch},code=sm_#{arch}"
65
72
  end
data/CHANGELOG.md CHANGED
@@ -1,3 +1,19 @@
1
+ # 0.5.1 (2025/12/30)
2
+
3
+ Enhancements:
4
+
5
+ * Add CUDA 13 support (#153)
6
+ * Add cuDNN 9 support
7
+
8
+ Fixes:
9
+
10
+ * Backport: fix example code
11
+ * Backport: fix example code
12
+ * Backport: fix doc
13
+ * Backport: fix documents
14
+ * Backport: fix document of logseq
15
+ * Backport: trim comment out
16
+
1
17
  # 0.5.0 (2025/11/01)
2
18
 
3
19
  Fixes:
data/Dockerfile ADDED
@@ -0,0 +1,34 @@
1
+ FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
2
+
3
+ ARG RUBY_VERSION=3.4.7
4
+
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV RBENV_ROOT="/root/.rbenv"
7
+ ENV PATH="${RBENV_ROOT}/bin:${RBENV_ROOT}/shims:${PATH}"
8
+
9
+ ENV CUDA_PATH=/usr/local/cuda
10
+ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
11
+ ENV CPATH=/usr/local/cuda/include:${CPATH}
12
+ ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
13
+
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ git \
16
+ build-essential \
17
+ wget \
18
+ curl \
19
+ vim \
20
+ ca-certificates \
21
+ libssl-dev \
22
+ libreadline-dev \
23
+ zlib1g-dev \
24
+ libyaml-dev \
25
+ libffi-dev \
26
+ && rm -rf /var/lib/apt/lists/*
27
+
28
+ RUN git clone --depth 1 https://github.com/rbenv/ruby-build.git && \
29
+ cd ruby-build/bin && ./ruby-build ${RUBY_VERSION} /usr && \
30
+ git config --global --add safe.directory /workspace
31
+
32
+ WORKDIR /workspace
33
+
34
+ CMD ["/bin/bash"]
data/cumo.gemspec CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.required_ruby_version = ">= 3.0.0"
20
20
 
21
21
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
22
- f.match(%r{^(test|spec|features)/})
22
+ f.match(%r{^(test|spec|features|docker)/})
23
23
  end
24
24
  spec.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
25
  spec.bindir = "exe"
data/docker-build.sh ADDED
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+
3
+ script_dir=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
4
+ docker build -t cumo-dev ${script_dir}
data/docker-launch.sh ADDED
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+
3
+ project_dir="$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)/"
4
+ docker run --gpus all -v $project_dir:/workspace -it cumo-dev bash
data/docs/src-tree.md CHANGED
@@ -6,7 +6,7 @@
6
6
  * Technically, it is not possible to use CRuby API such as `VALUE` in .cu files.
7
7
  * CRuby API is not callable from CUDA kernel because they do not have `__device__` modifier.
8
8
  * nvcc does not support `#include RUBY_EXTCONF_H`, so can not include `ruby.h`.
9
- * (RULE) It is allowed to use C++14 codes in .cu files.
9
+ * (RULE) It is allowed to use C++17 codes in .cu files.
10
10
  * Rest of `*.{h,c}` files are for host (CPU).
11
11
  * Call C wrapper functions defined in .cu files.
12
12
  * It can use CRuby API.
@@ -74,6 +74,25 @@ cumo_cuda_cudnn_CreateTensorDescriptor(
74
74
  status = cudnnSetTensor4dDescriptor(
75
75
  *desc, CUDNN_TENSOR_NCHW, cudnn_dtype, shape[0], shape[1], shape[2], shape[3]);
76
76
  }
77
+ else if (ndim < 4) {
78
+ // cuDNN 9 fix: Force 4D (N, C, H, W)
79
+ int pad_shape[4] = {1, 1, 1, 1};
80
+
81
+ if (ndim == 1) {
82
+ // 1D: arrays are treated as "Channel" (1, C, 1, 1)
83
+ pad_shape[1] = (int)(shape[0]);
84
+ } else {
85
+ // 2D: [N, C] -> [N, C, 1, 1]
86
+ // 3D: [N, C, H] -> [N, C, H, 1]
87
+ for (int idim = 0; idim < ndim; ++idim) {
88
+ pad_shape[idim] = (int)(shape[idim]);
89
+ }
90
+ }
91
+
92
+ status = cudnnSetTensor4dDescriptor(
93
+ *desc, CUDNN_TENSOR_NCHW, cudnn_dtype,
94
+ pad_shape[0], pad_shape[1], pad_shape[2], pad_shape[3]);
95
+ }
77
96
  else {
78
97
  int int_shape[CUMO_NA_MAX_DIMENSION];
79
98
  for (int idim = 0; idim < ndim; ++idim) {
@@ -514,8 +533,11 @@ cumo_cuda_cudnn_FindConvolutionBackwardFilterAlgorithm(
514
533
  // TODO(sonots): Support other than 4, 5 dimensional arrays by reshaping into 4-dimensional arrays as Chainer does.
515
534
  cudnnBatchNormMode_t
516
535
  cumo_cuda_cudnn_GetBatchNormMode(size_t ndim, int* axis) {
517
- if (ndim == 1 && axis[0] == 0) { // (1, channels, (depth, )height, width)
518
- return CUDNN_BATCHNORM_PER_ACTIVATION;
536
+ if (ndim == 1) {
537
+ return CUDNN_BATCHNORM_SPATIAL;
538
+ }
539
+ if (ndim == 2) {
540
+ return CUDNN_BATCHNORM_SPATIAL;
519
541
  }
520
542
  if ((ndim == 3 && axis[0] == 0 && axis[1] == 2 && axis[2] == 3) ||
521
543
  (ndim == 4 && axis[0] == 0 && axis[1] == 2 && axis[2] == 3 && axis[3] == 4)) { // (1, channels, (1, )1, 1)
@@ -533,7 +555,7 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
533
555
  {
534
556
  cudnnStatus_t status = CUDNN_STATUS_SUCCESS;
535
557
  status = cudnnCreateTensorDescriptor(desc);
536
- if (status = CUDNN_STATUS_SUCCESS) return status;
558
+ if (status == CUDNN_STATUS_SUCCESS) return status;
537
559
 
538
560
  status = cudnnDeriveBNTensorDescriptor(*desc, x_desc, mode);
539
561
  return status;
@@ -33,7 +33,11 @@ rb_cuCtxCreate(VALUE self, VALUE flags, VALUE dev)
33
33
  CUcontext _pctx;
34
34
  CUresult status;
35
35
 
36
+ #if defined(CUDA_VERSION) && CUDA_VERSION >= 13000
37
+ status = cuCtxCreate(&_pctx, NULL, _flags, _dev);
38
+ #else
36
39
  status = cuCtxCreate(&_pctx, _flags, _dev);
40
+ #endif
37
41
 
38
42
  check_status(status);
39
43
  return SIZET2NUM((size_t)_pctx);
@@ -418,5 +422,9 @@ Init_cumo_cuda_driver()
418
422
 
419
423
  cuInit(0);
420
424
  cuDeviceGet(&cuDevice, 0);
425
+ #if defined(CUDA_VERSION) && CUDA_VERSION >= 13000
426
+ cuCtxCreate(&context, NULL, 0, cuDevice);
427
+ #else
421
428
  cuCtxCreate(&context, 0, cuDevice);
429
+ #endif
422
430
  }
data/ext/cumo/depend.erb CHANGED
@@ -55,6 +55,6 @@ run-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
55
55
  ./$<
56
56
 
57
57
  <%= __dir__ %>/cuda/memory_pool_impl_test.exe: <%= __dir__ %>/cuda/memory_pool_impl_test.cpp <%= __dir__ %>/cuda/memory_pool_impl.cpp <%= __dir__ %>/cuda/memory_pool_impl.hpp
58
- nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
58
+ nvcc -std=c++17 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
59
59
 
60
60
  CLEANOBJS = <%= __dir__ %>/*.o <%= __dir__ %>/*/*.o <%= __dir__ %>/*/*/*.o <%= __dir__ %>/*.bak <%= __dir__ %>/narray/types/*.c <%= __dir__ %>/narray/types/*_kernel.cu <%= __dir__ %>/*.exe <%= __dir__ %>/*/*.exe
data/ext/cumo/extconf.rb CHANGED
@@ -29,7 +29,7 @@ MakeMakefileCuda.install!(cxx: true)
29
29
  if ENV['DEBUG']
30
30
  $CFLAGS << " -g -O0 -Wall"
31
31
  end
32
- $CXXFLAGS << " -std=c++14"
32
+ $CXXFLAGS << " -std=c++17"
33
33
  #$CFLAGS=" $(cflags) -O3 -m64 -msse2 -funroll-loops"
34
34
  #$CFLAGS=" $(cflags) -O3"
35
35
  $INCFLAGS = "-I$(srcdir)/include -I$(srcdir)/narray -I$(srcdir)/cuda #{$INCFLAGS}"
@@ -28,8 +28,10 @@ class cumo_thrust_strided_range
28
28
 
29
29
  typedef typename thrust::iterator_difference<Iterator>::type difference_type;
30
30
 
31
- struct stride_functor : public thrust::unary_function<difference_type,difference_type>
31
+ struct stride_functor
32
32
  {
33
+ using argument_type = difference_type;
34
+ using result_type = difference_type;
33
35
  difference_type stride;
34
36
 
35
37
  stride_functor(difference_type stride)
@@ -86,8 +88,10 @@ struct cumo_thrust_minmax_pair
86
88
  // returns a cumo_thrust_minmax_pair whose minimum and maximum values
87
89
  // are initialized to x.
88
90
  template <typename T>
89
- struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thrust_minmax_pair<T> >
91
+ struct cumo_thrust_minmax_unary_op
90
92
  {
93
+ using argument_type = T;
94
+ using result_type = cumo_thrust_minmax_pair<T>;
91
95
  __host__ __device__ cumo_thrust_minmax_pair<T> operator()(const T& x) const
92
96
  {
93
97
  cumo_thrust_minmax_pair<T> result;
@@ -102,8 +106,11 @@ struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thru
102
106
  // maximum values are the min() and max() respectively of
103
107
  // the minimums and maximums of the input pairs
104
108
  template <typename T>
105
- struct cumo_thrust_minmax_binary_op : public thrust::binary_function< cumo_thrust_minmax_pair<T>, cumo_thrust_minmax_pair<T>, cumo_thrust_minmax_pair<T> >
109
+ struct cumo_thrust_minmax_binary_op
106
110
  {
111
+ using first_argument_type = cumo_thrust_minmax_pair<T>;
112
+ using second_argument_type = cumo_thrust_minmax_pair<T>;
113
+ using result_type = cumo_thrust_minmax_pair<T>;
107
114
  __host__ __device__ cumo_thrust_minmax_pair<T> operator()(const cumo_thrust_minmax_pair<T>& x, const cumo_thrust_minmax_pair<T>& y) const
108
115
  {
109
116
  cumo_thrust_minmax_pair<T> result;
@@ -157,10 +164,10 @@ struct cumo_thrust_variance_unary_op
157
164
  // all values that have been agregated so far
158
165
  template <typename T>
159
166
  struct cumo_thrust_variance_binary_op
160
- : public thrust::binary_function<const cumo_thrust_variance_data<T>&,
161
- const cumo_thrust_variance_data<T>&,
162
- cumo_thrust_variance_data<T> >
163
167
  {
168
+ using first_argument_type = const cumo_thrust_variance_data<T>&;
169
+ using second_argument_type = const cumo_thrust_variance_data<T>&;
170
+ using result_type = cumo_thrust_variance_data<T>;
164
171
  __host__ __device__
165
172
  cumo_thrust_variance_data<T> operator()(const cumo_thrust_variance_data<T>& x, const cumo_thrust_variance_data <T>& y) const
166
173
  {
@@ -49,10 +49,10 @@ struct cumo_thrust_complex_variance_unary_op
49
49
  // all values that have been agregated so far
50
50
  template <typename T, typename R>
51
51
  struct cumo_thrust_complex_variance_binary_op
52
- : public thrust::binary_function<const cumo_thrust_complex_variance_data<T,R>&,
53
- const cumo_thrust_complex_variance_data<T,R>&,
54
- cumo_thrust_complex_variance_data<T,R> >
55
52
  {
53
+ using first_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
54
+ using second_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
55
+ using result_type = cumo_thrust_complex_variance_data<T,R>;
56
56
  __host__ __device__
57
57
  cumo_thrust_complex_variance_data<T,R> operator()(const cumo_thrust_complex_variance_data<T,R>& x, const cumo_thrust_complex_variance_data<T,R>& y) const
58
58
  {
@@ -157,18 +157,27 @@ __host__ __device__ static inline dtype f_seq(dtype x, dtype y, double c)
157
157
  /* --------- thrust ----------------- */
158
158
  #include "cumo/cuda/cumo_thrust_complex.hpp"
159
159
 
160
- struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
160
+ struct cumo_thrust_plus
161
161
  {
162
+ using first_argument_type = dtype;
163
+ using second_argument_type = dtype;
164
+ using result_type = dtype;
162
165
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
163
166
  };
164
167
 
165
- struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
168
+ struct cumo_thrust_multiplies
166
169
  {
170
+ using first_argument_type = dtype;
171
+ using second_argument_type = dtype;
172
+ using result_type = dtype;
167
173
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
168
174
  };
169
175
 
170
- struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
176
+ struct cumo_thrust_multiplies_mulsum_nan
171
177
  {
178
+ using first_argument_type = dtype;
179
+ using second_argument_type = dtype;
180
+ using result_type = dtype;
172
181
  __host__ __device__ dtype operator()(dtype x, dtype y) {
173
182
  if (not_nan(x) && not_nan(y)) {
174
183
  return m_mul(x, y);
@@ -178,8 +187,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
178
187
  }
179
188
  };
180
189
 
181
- struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
190
+ struct cumo_thrust_square
182
191
  {
192
+ using argument_type = dtype;
193
+ using result_type = dtype;
183
194
  __host__ __device__ rtype operator()(const dtype& x) const { return c_abs_square(x); }
184
195
  };
185
196
 
@@ -72,18 +72,27 @@ __host__ __device__ static inline dtype f_minimum_nan(dtype x, dtype y)
72
72
  /* --------- thrust ----------------- */
73
73
  #include "cumo/cuda/cumo_thrust.hpp"
74
74
 
75
- struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
75
+ struct cumo_thrust_plus
76
76
  {
77
+ using first_argument_type = dtype;
78
+ using second_argument_type = dtype;
79
+ using result_type = dtype;
77
80
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
78
81
  };
79
82
 
80
- struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
83
+ struct cumo_thrust_multiplies
81
84
  {
85
+ using first_argument_type = dtype;
86
+ using second_argument_type = dtype;
87
+ using result_type = dtype;
82
88
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
83
89
  };
84
90
 
85
- struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
91
+ struct cumo_thrust_multiplies_mulsum_nan
86
92
  {
93
+ using first_argument_type = dtype;
94
+ using second_argument_type = dtype;
95
+ using result_type = dtype;
87
96
  __host__ __device__ dtype operator()(dtype x, dtype y) {
88
97
  if (not_nan(x) && not_nan(y)) {
89
98
  return m_mul(x, y);
@@ -93,8 +102,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
93
102
  }
94
103
  };
95
104
 
96
- struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
105
+ struct cumo_thrust_square
97
106
  {
107
+ using argument_type = dtype;
108
+ using result_type = dtype;
98
109
  __host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
99
110
  };
100
111
 
@@ -70,18 +70,26 @@ __host__ __device__ static inline dtype f_minimum(dtype x, dtype y)
70
70
  /* --------- thrust ----------------- */
71
71
  #include "cumo/cuda/cumo_thrust.hpp"
72
72
 
73
- struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
73
+ struct cumo_thrust_plus
74
74
  {
75
+ using first_argument_type = dtype;
76
+ using second_argument_type = dtype;
77
+ using result_type = dtype;
75
78
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
76
79
  };
77
80
 
78
- struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
81
+ struct cumo_thrust_multiplies
79
82
  {
83
+ using first_argument_type = dtype;
84
+ using second_argument_type = dtype;
85
+ using result_type = dtype;
80
86
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
81
87
  };
82
88
 
83
- struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
89
+ struct cumo_thrust_square
84
90
  {
91
+ using argument_type = dtype;
92
+ using result_type = dtype;
85
93
  __host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
86
94
  };
87
95
 
@@ -10,8 +10,8 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.5.0"
14
- #define CUMO_VERSION_CODE 50
13
+ #define CUMO_VERSION "0.5.1"
14
+ #define CUMO_VERSION_CODE 51
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
17
17
  bool cumo_show_warning_enabled_p();
@@ -466,11 +466,13 @@ cumo_na_s_array_shape(VALUE mod, VALUE ary)
466
466
  @return [Cumo::NArray]
467
467
  @example
468
468
  Cumo::NArray.new_like([[1,2,3],[4,5,6]])
469
- => Cumo::Int32#shape=[2,3](empty)
469
+ # => Cumo::Int32#shape=[2,3](empty)
470
+
470
471
  Cumo::DFloat.new_like([[1,2],[3,4]])
471
- => Cumo::DFloat#shape=[2,2](empty)
472
+ # => Cumo::DFloat#shape=[2,2](empty)
473
+
472
474
  Cumo::NArray.new_like([1,2i,3])
473
- => Cumo::DComplex#shape=[3](empty)
475
+ # => Cumo::DComplex#shape=[3](empty)
474
476
  */
475
477
  VALUE
476
478
  cumo_na_s_new_like(VALUE type, VALUE obj)