cumo 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +18 -37
  3. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +28 -21
  4. data/CHANGELOG.md +28 -0
  5. data/Dockerfile +34 -0
  6. data/cumo.gemspec +1 -1
  7. data/docker-build.sh +4 -0
  8. data/docker-launch.sh +4 -0
  9. data/docs/src-tree.md +1 -1
  10. data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
  11. data/ext/cumo/cuda/driver.c +8 -0
  12. data/ext/cumo/depend.erb +1 -1
  13. data/ext/cumo/extconf.rb +1 -1
  14. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +14 -7
  15. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
  16. data/ext/cumo/include/cumo/narray.h +2 -0
  17. data/ext/cumo/include/cumo/types/complex.h +2 -2
  18. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
  19. data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
  20. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
  21. data/ext/cumo/include/cumo.h +2 -2
  22. data/ext/cumo/narray/array.c +5 -3
  23. data/ext/cumo/narray/data.c +25 -26
  24. data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
  25. data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
  26. data/ext/cumo/narray/gen/tmpl/alloc_func.c +4 -1
  27. data/ext/cumo/narray/gen/tmpl/allocate.c +1 -0
  28. data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
  29. data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
  30. data/ext/cumo/narray/gen/tmpl/batch_norm.c +4 -1
  31. data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +4 -1
  32. data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
  33. data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
  34. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +12 -12
  35. data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
  36. data/ext/cumo/narray/gen/tmpl/each.c +4 -2
  37. data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
  38. data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +4 -1
  39. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +12 -12
  40. data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
  41. data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
  42. data/ext/cumo/narray/gen/tmpl/median.c +2 -2
  43. data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
  44. data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
  45. data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
  46. data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
  47. data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
  48. data/ext/cumo/narray/gen/tmpl/sort.c +2 -2
  49. data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
  50. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +1 -0
  51. data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
  52. data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
  53. data/ext/cumo/narray/index.c +1 -1
  54. data/ext/cumo/narray/narray.c +116 -21
  55. data/lib/cumo/narray/extra.rb +160 -156
  56. data/test/cuda/device_test.rb +2 -1
  57. data/test/cudnn_test.rb +2 -2
  58. data/test/narray_test.rb +80 -0
  59. data/test/ractor_test.rb +5 -3
  60. metadata +5 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6c2daf87323aec6ca6a8770825e09c5b6bb57d2ab1b6f10b7405d8c1ea1aa07e
4
- data.tar.gz: cf8359dbc51fdc479fddf3eb6204db7b4f531757d0da445c50b2e34b8a817a4c
3
+ metadata.gz: 17cb9dfdf9be41292bcd0204a67c5f919da60588d005d4441ad632767dce504c
4
+ data.tar.gz: 4c6e388cdb5b3b9f99d45a989a610d63e493fe53b3baad570c7bb2656f72d86c
5
5
  SHA512:
6
- metadata.gz: c6a3eca253db3d854c8f7ced3ddc2f54836189ccf2065885e14de5acbaa1aced18e4341cf55e7b348f78db11afbd1be1558c3ab94881c3162ca6241b6e91c511
7
- data.tar.gz: 5696875ce0d4e3d9d20e483892167b71696bfff6a2dc5594b87628e517e3fa29528f828bc63d8adbf6f0c4b43c47efe6beac31d0344320cf7c2f847d6c0d5d1b
6
+ metadata.gz: 917adaa087836d673a143364f88fb9ddf91ad84cbcc064b115258e32f4b22e70a63c45f3e655a2e56ad58e70d6a4330f3e63ce177b5ad9e0a0c9c11685b39503
7
+ data.tar.gz: c5ec5a4179266a1cf4c5f15b5bc0d7e8b0b01a5a69bb7a75ad3c1387e861666da8b571156bc90295f2cf702558f18e332ce7fd68823330f834ea8d9c4f8b6419
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2025-10-05 08:00:13 UTC using RuboCop version 1.81.1.
3
+ # on 2026-01-09 18:33:26 UTC using RuboCop version 1.82.1.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -25,16 +25,8 @@ Bundler/OrderedGems:
25
25
  Exclude:
26
26
  - 'Gemfile'
27
27
 
28
- # Offense count: 1
29
- # Configuration parameters: EnforcedStyle, AllowedGems.
30
- # SupportedStyles: required, forbidden
31
- Gemspec/DependencyVersion:
32
- Exclude:
33
- - 'cumo.gemspec'
34
-
35
28
  # Offense count: 2
36
29
  # This cop supports safe autocorrection (--autocorrect).
37
- # Configuration parameters: Severity.
38
30
  Gemspec/DeprecatedAttributeAssignment:
39
31
  Exclude:
40
32
  - '3rd_party/mkmf-cu/mkmf-cu.gemspec'
@@ -49,14 +41,12 @@ Gemspec/DevelopmentDependencies:
49
41
 
50
42
  # Offense count: 2
51
43
  # This cop supports safe autocorrection (--autocorrect).
52
- # Configuration parameters: Severity.
53
44
  Gemspec/RequireMFA:
54
45
  Exclude:
55
46
  - '3rd_party/mkmf-cu/mkmf-cu.gemspec'
56
47
  - 'cumo.gemspec'
57
48
 
58
49
  # Offense count: 1
59
- # Configuration parameters: Severity.
60
50
  Gemspec/RequiredRubyVersion:
61
51
  Exclude:
62
52
  - '3rd_party/mkmf-cu/mkmf-cu.gemspec'
@@ -275,13 +265,15 @@ Layout/MultilineOperationIndentation:
275
265
  - 'lib/cumo/narray/extra.rb'
276
266
  - 'test/narray_test.rb'
277
267
 
278
- # Offense count: 16
268
+ # Offense count: 27
279
269
  # This cop supports safe autocorrection (--autocorrect).
280
270
  # Configuration parameters: InspectBlocks.
281
271
  Layout/RedundantLineBreak:
282
272
  Exclude:
283
273
  - '3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb'
274
+ - '3rd_party/mkmf-cu/mkmf-cu.gemspec'
284
275
  - '3rd_party/mkmf-cu/test/test_mkmf-cu.rb'
276
+ - 'ext/cumo/extconf.rb'
285
277
  - 'ext/cumo/narray/gen/narray_def.rb'
286
278
  - 'test/bit_test.rb'
287
279
  - 'test/cudnn_test.rb'
@@ -379,7 +371,7 @@ Lint/ConstantDefinitionInBlock:
379
371
  Exclude:
380
372
  - 'test/cuda/compiler_test.rb'
381
373
 
382
- # Offense count: 655
374
+ # Offense count: 665
383
375
  # Configuration parameters: Only, Ignore.
384
376
  Lint/ConstantResolution:
385
377
  Enabled: false
@@ -396,7 +388,7 @@ Lint/ErbNewArguments:
396
388
  Exclude:
397
389
  - 'ext/cumo/narray/gen/erbpp2.rb'
398
390
 
399
- # Offense count: 7
391
+ # Offense count: 15
400
392
  Lint/FloatComparison:
401
393
  Exclude:
402
394
  - 'test/narray_test.rb'
@@ -419,18 +411,20 @@ Lint/NonAtomicFileOperation:
419
411
  Exclude:
420
412
  - 'lib/cumo/cuda/compiler.rb'
421
413
 
422
- # Offense count: 24
414
+ # Offense count: 34
423
415
  # This cop supports unsafe autocorrection (--autocorrect-all).
424
416
  # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredClasses.
425
417
  # IgnoredClasses: Time, DateTime
426
418
  Lint/NumberConversion:
427
419
  Exclude:
420
+ - '3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb'
428
421
  - 'bench/cumo_bench.rb'
429
422
  - 'bench/numo_bench.rb'
430
423
  - 'ext/cumo/narray/gen/cogen_kernel.rb'
431
424
  - 'ext/cumo/narray/gen/erbln.rb'
432
425
  - 'lib/cumo/narray/extra.rb'
433
426
  - 'test/cudnn_test.rb'
427
+ - 'test/narray_test.rb'
434
428
 
435
429
  # Offense count: 2
436
430
  # This cop supports safe autocorrection (--autocorrect).
@@ -522,17 +516,6 @@ Naming/MethodParameterName:
522
516
  - 'lib/cumo/narray/extra.rb'
523
517
  - 'test/ractor_test.rb'
524
518
 
525
- # Offense count: 1
526
- # Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros, UseSorbetSigs.
527
- # NamePrefix: is_, has_, have_, does_
528
- # ForbiddenPrefixes: is_, has_, have_, does_
529
- # AllowedMethods: is_a?
530
- # MethodDefinitionMacros: define_method, define_singleton_method
531
- Naming/PredicatePrefix:
532
- Exclude:
533
- - 'spec/**/*'
534
- - 'ext/cumo/extconf.rb'
535
-
536
519
  # Offense count: 1
537
520
  # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
538
521
  # SupportedStyles: snake_case, normalcase, non_integer
@@ -719,7 +702,7 @@ Style/Documentation:
719
702
  - 'lib/cumo/linalg.rb'
720
703
  - 'lib/cumo/narray/extra.rb'
721
704
 
722
- # Offense count: 203
705
+ # Offense count: 202
723
706
  # Configuration parameters: AllowedMethods, RequireForNonPublicMethods.
724
707
  Style/DocumentationMethod:
725
708
  Enabled: false
@@ -767,7 +750,7 @@ Style/FileWrite:
767
750
  Exclude:
768
751
  - 'lib/cumo/cuda/compiler.rb'
769
752
 
770
- # Offense count: 29
753
+ # Offense count: 27
771
754
  # Configuration parameters: AllowedVariables.
772
755
  Style/GlobalVars:
773
756
  Exclude:
@@ -775,12 +758,11 @@ Style/GlobalVars:
775
758
  - 'ext/cumo/narray/gen/cogen.rb'
776
759
  - 'ext/cumo/narray/gen/cogen_kernel.rb'
777
760
 
778
- # Offense count: 12
761
+ # Offense count: 10
779
762
  # This cop supports safe autocorrection (--autocorrect).
780
763
  # Configuration parameters: MinBodyLength, AllowConsecutiveConditionals.
781
764
  Style/GuardClause:
782
765
  Exclude:
783
- - 'ext/cumo/extconf.rb'
784
766
  - 'ext/cumo/narray/gen/erbpp2.rb'
785
767
  - 'lib/cumo/cuda/link_state.rb'
786
768
  - 'lib/cumo/cuda/module.rb'
@@ -865,9 +847,9 @@ Style/InvertibleUnlessCondition:
865
847
  - 'lib/cumo/cuda/compiler.rb'
866
848
  - 'lib/cumo/cuda/device.rb'
867
849
 
868
- # Offense count: 122
850
+ # Offense count: 119
869
851
  # This cop supports safe autocorrection (--autocorrect).
870
- # Configuration parameters: IgnoreMacros, AllowedMethods, AllowedPatterns, IncludedMacros, AllowParenthesesInMultilineCall, AllowParenthesesInChaining, AllowParenthesesInCamelCaseMethod, AllowParenthesesInStringInterpolation, EnforcedStyle.
852
+ # Configuration parameters: IgnoreMacros, AllowedMethods, AllowedPatterns, IncludedMacros, IncludedMacroPatterns, AllowParenthesesInMultilineCall, AllowParenthesesInChaining, AllowParenthesesInCamelCaseMethod, AllowParenthesesInStringInterpolation, EnforcedStyle.
871
853
  # SupportedStyles: require_parentheses, omit_parentheses
872
854
  Style/MethodCallWithArgsParentheses:
873
855
  Enabled: false
@@ -888,7 +870,7 @@ Style/MethodCalledOnDoEndBlock:
888
870
  - 'ext/cumo/narray/gen/cogen_kernel.rb'
889
871
  - 'lib/cumo/narray/extra.rb'
890
872
 
891
- # Offense count: 106
873
+ # Offense count: 105
892
874
  # This cop supports safe autocorrection (--autocorrect).
893
875
  # Configuration parameters: EnforcedStyle.
894
876
  # SupportedStyles: if, case, both
@@ -923,13 +905,12 @@ Style/MutableConstant:
923
905
  - '3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb'
924
906
  - 'test/test_helper.rb'
925
907
 
926
- # Offense count: 14
908
+ # Offense count: 13
927
909
  # This cop supports safe autocorrection (--autocorrect).
928
910
  # Configuration parameters: EnforcedStyle.
929
911
  # SupportedStyles: both, prefix, postfix
930
912
  Style/NegatedIf:
931
913
  Exclude:
932
- - 'ext/cumo/extconf.rb'
933
914
  - 'ext/cumo/narray/gen/erbpp2.rb'
934
915
  - 'ext/cumo/narray/gen/spec.rb'
935
916
  - 'lib/cumo/narray/extra.rb'
@@ -1190,7 +1171,7 @@ Style/StringHashKeys:
1190
1171
  Exclude:
1191
1172
  - '3rd_party/mkmf-cu/test/test_mkmf-cu.rb'
1192
1173
 
1193
- # Offense count: 1376
1174
+ # Offense count: 1369
1194
1175
  # This cop supports safe autocorrection (--autocorrect).
1195
1176
  # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
1196
1177
  # SupportedStyles: single_quotes, double_quotes
@@ -1229,7 +1210,7 @@ Style/TernaryParentheses:
1229
1210
  - 'ext/cumo/narray/gen/narray_def.rb'
1230
1211
  - 'lib/cumo/narray/extra.rb'
1231
1212
 
1232
- # Offense count: 9
1213
+ # Offense count: 8
1233
1214
  Style/TopLevelMethodDefinition:
1234
1215
  Exclude:
1235
1216
  - 'bench/cumo_bench.rb'
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "mkmf"
3
4
  require "open3"
4
5
  require_relative "nvcc"
5
6
 
@@ -37,29 +38,35 @@ module MakeMakefileCuda
37
38
  cmd = "nvcc #{s}"
38
39
  if ENV['CUMO_NVCC_GENERATE_CODE']
39
40
  cmd << " --generate-code=#{ENV['CUMO_NVCC_GENERATE_CODE']}"
40
- elsif ENV['DEBUG']
41
- cmd << " -arch=sm_35"
42
41
  else
43
- # Ref. https://en.wikipedia.org/wiki/CUDA
44
- if cuda_version >= Gem::Version.new("13.0")
45
- # CUDA 13.0
46
- capability = [75, 87, 89, 90, 121]
47
- elsif cuda_version >= Gem::Version.new("12.9")
48
- # CUDA 12.9
49
- capability = [50, 60, 70, 75, 87, 89, 90, 121]
50
- elsif cuda_version >= Gem::Version.new("12.8")
51
- # CUDA 12.8
52
- capability = [50, 60, 70, 75, 87, 89, 90, 120]
53
- elsif cuda_version >= Gem::Version.new("12.0")
54
- # CUDA 12.0 – 12.6
55
- capability = [50, 60, 70, 75, 87, 89, 90]
56
- elsif cuda_version >= Gem::Version.new("11.8")
57
- # CUDA 11.8
58
- capability = [35, 50, 60, 70, 75, 87, 89, 90]
59
- else
60
- # CUDA 11.0
61
- capability = [35, 50, 60, 70, 75, 80]
42
+ capability = nil
43
+ if find_executable('nvidia-smi')
44
+ arch_version = `nvidia-smi --query-gpu=compute_cap --format=csv,noheader`.strip
45
+ capability = [(arch_version.to_f * 10).to_i] unless arch_version.empty?
62
46
  end
47
+ unless capability
48
+ # Ref. https://en.wikipedia.org/wiki/CUDA
49
+ if cuda_version >= Gem::Version.new("13.0")
50
+ # CUDA 13.0
51
+ capability = [75, 80, 86, 87, 89, 90, 100, 103, 110, 120, 121]
52
+ elsif cuda_version >= Gem::Version.new("12.9")
53
+ # CUDA 12.9
54
+ capability = [50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100, 103, 110, 120, 121]
55
+ elsif cuda_version >= Gem::Version.new("12.8")
56
+ # CUDA 12.8
57
+ capability = [50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100, 103, 110, 120]
58
+ elsif cuda_version >= Gem::Version.new("12.0")
59
+ # CUDA 12.0 – 12.6
60
+ capability = [50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90]
61
+ elsif cuda_version >= Gem::Version.new("11.8")
62
+ # CUDA 11.8
63
+ capability = [35, 50, 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90]
64
+ else
65
+ # CUDA 11.0
66
+ capability = [35, 50, 60, 61, 62, 70, 72, 75, 80]
67
+ end
68
+ end
69
+
63
70
  capability.each do |arch|
64
71
  cmd << " --generate-code=arch=compute_#{arch},code=sm_#{arch}"
65
72
  end
data/CHANGELOG.md CHANGED
@@ -1,3 +1,31 @@
1
+ # 0.5.2 (2025/01/25)
2
+
3
+ Fixes:
4
+
5
+ * Backport: Add support for copy on write with store_binary and frozen string
6
+ * Remove unnecessary debug code
7
+ * Fix capability list
8
+ * Build only with supported capabilities to reduce compilation time
9
+ * Fix SEGV when calling {mean, var, stddev, rms} on a single-element array (#154)
10
+ * Suppress warning message for deprecated declarations
11
+ * Fix variable typo in complex log2 and log10 functions (#152)
12
+
13
+ # 0.5.1 (2025/12/30)
14
+
15
+ Enhancements:
16
+
17
+ * Add CUDA 13 support (#153)
18
+ * Add cuDNN 9 support
19
+
20
+ Fixes:
21
+
22
+ * Backport: fix example code
23
+ * Backport: fix example code
24
+ * Backport: fix doc
25
+ * Backport: fix documents
26
+ * Backport: fix document of logseq
27
+ * Backport: trim comment out
28
+
1
29
  # 0.5.0 (2025/11/01)
2
30
 
3
31
  Fixes:
data/Dockerfile ADDED
@@ -0,0 +1,34 @@
1
+ FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
2
+
3
+ ARG RUBY_VERSION=3.4.7
4
+
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV RBENV_ROOT="/root/.rbenv"
7
+ ENV PATH="${RBENV_ROOT}/bin:${RBENV_ROOT}/shims:${PATH}"
8
+
9
+ ENV CUDA_PATH=/usr/local/cuda
10
+ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
11
+ ENV CPATH=/usr/local/cuda/include:${CPATH}
12
+ ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
13
+
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ git \
16
+ build-essential \
17
+ wget \
18
+ curl \
19
+ vim \
20
+ ca-certificates \
21
+ libssl-dev \
22
+ libreadline-dev \
23
+ zlib1g-dev \
24
+ libyaml-dev \
25
+ libffi-dev \
26
+ && rm -rf /var/lib/apt/lists/*
27
+
28
+ RUN git clone --depth 1 https://github.com/rbenv/ruby-build.git && \
29
+ cd ruby-build/bin && ./ruby-build ${RUBY_VERSION} /usr && \
30
+ git config --global --add safe.directory /workspace
31
+
32
+ WORKDIR /workspace
33
+
34
+ CMD ["/bin/bash"]
data/cumo.gemspec CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.required_ruby_version = ">= 3.0.0"
20
20
 
21
21
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
22
- f.match(%r{^(test|spec|features)/})
22
+ f.match(%r{^(test|spec|features|docker)/})
23
23
  end
24
24
  spec.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
25
  spec.bindir = "exe"
data/docker-build.sh ADDED
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+
3
+ script_dir=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
4
+ docker build -t cumo-dev ${script_dir}
data/docker-launch.sh ADDED
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+
3
+ project_dir="$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)/"
4
+ docker run --gpus all -v $project_dir:/workspace -it cumo-dev bash
data/docs/src-tree.md CHANGED
@@ -6,7 +6,7 @@
6
6
  * Technically, it is not possible to use CRuby API such as `VALUE` in .cu files.
7
7
  * CRuby API is not callable from CUDA kernel because they do not have `__device__` modifier.
8
8
  * nvcc does not support `#include RUBY_EXTCONF_H`, so can not include `ruby.h`.
9
- * (RULE) It is allowed to use C++14 codes in .cu files.
9
+ * (RULE) It is allowed to use C++17 codes in .cu files.
10
10
  * Rest of `*.{h,c}` files are for host (CPU).
11
11
  * Call C wrapper functions defined in .cu files.
12
12
  * It can use CRuby API.
@@ -74,6 +74,25 @@ cumo_cuda_cudnn_CreateTensorDescriptor(
74
74
  status = cudnnSetTensor4dDescriptor(
75
75
  *desc, CUDNN_TENSOR_NCHW, cudnn_dtype, shape[0], shape[1], shape[2], shape[3]);
76
76
  }
77
+ else if (ndim < 4) {
78
+ // cuDNN 9 fix: Force 4D (N, C, H, W)
79
+ int pad_shape[4] = {1, 1, 1, 1};
80
+
81
+ if (ndim == 1) {
82
+ // 1D: arrays are treated as "Channel" (1, C, 1, 1)
83
+ pad_shape[1] = (int)(shape[0]);
84
+ } else {
85
+ // 2D: [N, C] -> [N, C, 1, 1]
86
+ // 3D: [N, C, H] -> [N, C, H, 1]
87
+ for (int idim = 0; idim < ndim; ++idim) {
88
+ pad_shape[idim] = (int)(shape[idim]);
89
+ }
90
+ }
91
+
92
+ status = cudnnSetTensor4dDescriptor(
93
+ *desc, CUDNN_TENSOR_NCHW, cudnn_dtype,
94
+ pad_shape[0], pad_shape[1], pad_shape[2], pad_shape[3]);
95
+ }
77
96
  else {
78
97
  int int_shape[CUMO_NA_MAX_DIMENSION];
79
98
  for (int idim = 0; idim < ndim; ++idim) {
@@ -514,8 +533,11 @@ cumo_cuda_cudnn_FindConvolutionBackwardFilterAlgorithm(
514
533
  // TODO(sonots): Support other than 4, 5 dimensional arrays by reshaping into 4-dimensional arrays as Chainer does.
515
534
  cudnnBatchNormMode_t
516
535
  cumo_cuda_cudnn_GetBatchNormMode(size_t ndim, int* axis) {
517
- if (ndim == 1 && axis[0] == 0) { // (1, channels, (depth, )height, width)
518
- return CUDNN_BATCHNORM_PER_ACTIVATION;
536
+ if (ndim == 1) {
537
+ return CUDNN_BATCHNORM_SPATIAL;
538
+ }
539
+ if (ndim == 2) {
540
+ return CUDNN_BATCHNORM_SPATIAL;
519
541
  }
520
542
  if ((ndim == 3 && axis[0] == 0 && axis[1] == 2 && axis[2] == 3) ||
521
543
  (ndim == 4 && axis[0] == 0 && axis[1] == 2 && axis[2] == 3 && axis[3] == 4)) { // (1, channels, (1, )1, 1)
@@ -533,7 +555,7 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
533
555
  {
534
556
  cudnnStatus_t status = CUDNN_STATUS_SUCCESS;
535
557
  status = cudnnCreateTensorDescriptor(desc);
536
- if (status = CUDNN_STATUS_SUCCESS) return status;
558
+ if (status == CUDNN_STATUS_SUCCESS) return status;
537
559
 
538
560
  status = cudnnDeriveBNTensorDescriptor(*desc, x_desc, mode);
539
561
  return status;
@@ -33,7 +33,11 @@ rb_cuCtxCreate(VALUE self, VALUE flags, VALUE dev)
33
33
  CUcontext _pctx;
34
34
  CUresult status;
35
35
 
36
+ #if defined(CUDA_VERSION) && CUDA_VERSION >= 13000
37
+ status = cuCtxCreate(&_pctx, NULL, _flags, _dev);
38
+ #else
36
39
  status = cuCtxCreate(&_pctx, _flags, _dev);
40
+ #endif
37
41
 
38
42
  check_status(status);
39
43
  return SIZET2NUM((size_t)_pctx);
@@ -418,5 +422,9 @@ Init_cumo_cuda_driver()
418
422
 
419
423
  cuInit(0);
420
424
  cuDeviceGet(&cuDevice, 0);
425
+ #if defined(CUDA_VERSION) && CUDA_VERSION >= 13000
426
+ cuCtxCreate(&context, NULL, 0, cuDevice);
427
+ #else
421
428
  cuCtxCreate(&context, 0, cuDevice);
429
+ #endif
422
430
  }
data/ext/cumo/depend.erb CHANGED
@@ -55,6 +55,6 @@ run-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
55
55
  ./$<
56
56
 
57
57
  <%= __dir__ %>/cuda/memory_pool_impl_test.exe: <%= __dir__ %>/cuda/memory_pool_impl_test.cpp <%= __dir__ %>/cuda/memory_pool_impl.cpp <%= __dir__ %>/cuda/memory_pool_impl.hpp
58
- nvcc -std=c++14 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
58
+ nvcc -std=c++17 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
59
59
 
60
60
  CLEANOBJS = <%= __dir__ %>/*.o <%= __dir__ %>/*/*.o <%= __dir__ %>/*/*/*.o <%= __dir__ %>/*.bak <%= __dir__ %>/narray/types/*.c <%= __dir__ %>/narray/types/*_kernel.cu <%= __dir__ %>/*.exe <%= __dir__ %>/*/*.exe
data/ext/cumo/extconf.rb CHANGED
@@ -29,7 +29,7 @@ MakeMakefileCuda.install!(cxx: true)
29
29
  if ENV['DEBUG']
30
30
  $CFLAGS << " -g -O0 -Wall"
31
31
  end
32
- $CXXFLAGS << " -std=c++14"
32
+ $CXXFLAGS << " -std=c++17"
33
33
  #$CFLAGS=" $(cflags) -O3 -m64 -msse2 -funroll-loops"
34
34
  #$CFLAGS=" $(cflags) -O3"
35
35
  $INCFLAGS = "-I$(srcdir)/include -I$(srcdir)/narray -I$(srcdir)/cuda #{$INCFLAGS}"
@@ -26,10 +26,12 @@ class cumo_thrust_strided_range
26
26
  {
27
27
  public:
28
28
 
29
- typedef typename thrust::iterator_difference<Iterator>::type difference_type;
29
+ typedef typename thrust::iterator_traits<Iterator>::difference_type difference_type;
30
30
 
31
- struct stride_functor : public thrust::unary_function<difference_type,difference_type>
31
+ struct stride_functor
32
32
  {
33
+ using argument_type = difference_type;
34
+ using result_type = difference_type;
33
35
  difference_type stride;
34
36
 
35
37
  stride_functor(difference_type stride)
@@ -86,8 +88,10 @@ struct cumo_thrust_minmax_pair
86
88
  // returns a cumo_thrust_minmax_pair whose minimum and maximum values
87
89
  // are initialized to x.
88
90
  template <typename T>
89
- struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thrust_minmax_pair<T> >
91
+ struct cumo_thrust_minmax_unary_op
90
92
  {
93
+ using argument_type = T;
94
+ using result_type = cumo_thrust_minmax_pair<T>;
91
95
  __host__ __device__ cumo_thrust_minmax_pair<T> operator()(const T& x) const
92
96
  {
93
97
  cumo_thrust_minmax_pair<T> result;
@@ -102,8 +106,11 @@ struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thru
102
106
  // maximum values are the min() and max() respectively of
103
107
  // the minimums and maximums of the input pairs
104
108
  template <typename T>
105
- struct cumo_thrust_minmax_binary_op : public thrust::binary_function< cumo_thrust_minmax_pair<T>, cumo_thrust_minmax_pair<T>, cumo_thrust_minmax_pair<T> >
109
+ struct cumo_thrust_minmax_binary_op
106
110
  {
111
+ using first_argument_type = cumo_thrust_minmax_pair<T>;
112
+ using second_argument_type = cumo_thrust_minmax_pair<T>;
113
+ using result_type = cumo_thrust_minmax_pair<T>;
107
114
  __host__ __device__ cumo_thrust_minmax_pair<T> operator()(const cumo_thrust_minmax_pair<T>& x, const cumo_thrust_minmax_pair<T>& y) const
108
115
  {
109
116
  cumo_thrust_minmax_pair<T> result;
@@ -157,10 +164,10 @@ struct cumo_thrust_variance_unary_op
157
164
  // all values that have been agregated so far
158
165
  template <typename T>
159
166
  struct cumo_thrust_variance_binary_op
160
- : public thrust::binary_function<const cumo_thrust_variance_data<T>&,
161
- const cumo_thrust_variance_data<T>&,
162
- cumo_thrust_variance_data<T> >
163
167
  {
168
+ using first_argument_type = const cumo_thrust_variance_data<T>&;
169
+ using second_argument_type = const cumo_thrust_variance_data<T>&;
170
+ using result_type = cumo_thrust_variance_data<T>;
164
171
  __host__ __device__
165
172
  cumo_thrust_variance_data<T> operator()(const cumo_thrust_variance_data<T>& x, const cumo_thrust_variance_data <T>& y) const
166
173
  {
@@ -49,10 +49,10 @@ struct cumo_thrust_complex_variance_unary_op
49
49
  // all values that have been agregated so far
50
50
  template <typename T, typename R>
51
51
  struct cumo_thrust_complex_variance_binary_op
52
- : public thrust::binary_function<const cumo_thrust_complex_variance_data<T,R>&,
53
- const cumo_thrust_complex_variance_data<T,R>&,
54
- cumo_thrust_complex_variance_data<T,R> >
55
52
  {
53
+ using first_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
54
+ using second_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
55
+ using result_type = cumo_thrust_complex_variance_data<T,R>;
56
56
  __host__ __device__
57
57
  cumo_thrust_complex_variance_data<T,R> operator()(const cumo_thrust_complex_variance_data<T,R>& x, const cumo_thrust_complex_variance_data<T,R>& y) const
58
58
  {
@@ -226,6 +226,7 @@ typedef struct {
226
226
  typedef struct {
227
227
  cumo_narray_t base;
228
228
  char *ptr;
229
+ bool owned;
229
230
  } cumo_narray_data_t;
230
231
 
231
232
 
@@ -360,6 +361,7 @@ _cumo_na_get_narray_t(VALUE obj, unsigned char cumo_na_type)
360
361
  #define CUMO_NA_DATA(na) ((cumo_narray_data_t*)(na))
361
362
  #define CUMO_NA_VIEW(na) ((cumo_narray_view_t*)(na))
362
363
  #define CUMO_NA_DATA_PTR(na) (CUMO_NA_DATA(na)->ptr)
364
+ #define CUMO_NA_DATA_OWNED(na) (CUMO_NA_DATA(na)->owned)
363
365
  #define CUMO_NA_VIEW_DATA(na) (CUMO_NA_VIEW(na)->data)
364
366
  #define CUMO_NA_VIEW_OFFSET(na) (CUMO_NA_VIEW(na)->offset)
365
367
  #define CUMO_NA_VIEW_STRIDX(na) (CUMO_NA_VIEW(na)->stridx)
@@ -166,14 +166,14 @@ static inline dtype c_log(dtype x) {
166
166
  static inline dtype c_log2(dtype x) {
167
167
  dtype z;
168
168
  z = c_log(x);
169
- z = c_mul_r(x,M_LOG2E);
169
+ z = c_mul_r(z,M_LOG2E);
170
170
  return z;
171
171
  }
172
172
 
173
173
  static inline dtype c_log10(dtype x) {
174
174
  dtype z;
175
175
  z = c_log(x);
176
- z = c_mul_r(x,M_LOG10E);
176
+ z = c_mul_r(z,M_LOG10E);
177
177
  return z;
178
178
  }
179
179
 
@@ -157,18 +157,27 @@ __host__ __device__ static inline dtype f_seq(dtype x, dtype y, double c)
157
157
  /* --------- thrust ----------------- */
158
158
  #include "cumo/cuda/cumo_thrust_complex.hpp"
159
159
 
160
- struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
160
+ struct cumo_thrust_plus
161
161
  {
162
+ using first_argument_type = dtype;
163
+ using second_argument_type = dtype;
164
+ using result_type = dtype;
162
165
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
163
166
  };
164
167
 
165
- struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
168
+ struct cumo_thrust_multiplies
166
169
  {
170
+ using first_argument_type = dtype;
171
+ using second_argument_type = dtype;
172
+ using result_type = dtype;
167
173
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
168
174
  };
169
175
 
170
- struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
176
+ struct cumo_thrust_multiplies_mulsum_nan
171
177
  {
178
+ using first_argument_type = dtype;
179
+ using second_argument_type = dtype;
180
+ using result_type = dtype;
172
181
  __host__ __device__ dtype operator()(dtype x, dtype y) {
173
182
  if (not_nan(x) && not_nan(y)) {
174
183
  return m_mul(x, y);
@@ -178,8 +187,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
178
187
  }
179
188
  };
180
189
 
181
- struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
190
+ struct cumo_thrust_square
182
191
  {
192
+ using argument_type = dtype;
193
+ using result_type = dtype;
183
194
  __host__ __device__ rtype operator()(const dtype& x) const { return c_abs_square(x); }
184
195
  };
185
196
 
@@ -72,18 +72,27 @@ __host__ __device__ static inline dtype f_minimum_nan(dtype x, dtype y)
72
72
  /* --------- thrust ----------------- */
73
73
  #include "cumo/cuda/cumo_thrust.hpp"
74
74
 
75
- struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
75
+ struct cumo_thrust_plus
76
76
  {
77
+ using first_argument_type = dtype;
78
+ using second_argument_type = dtype;
79
+ using result_type = dtype;
77
80
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
78
81
  };
79
82
 
80
- struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
83
+ struct cumo_thrust_multiplies
81
84
  {
85
+ using first_argument_type = dtype;
86
+ using second_argument_type = dtype;
87
+ using result_type = dtype;
82
88
  __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
83
89
  };
84
90
 
85
- struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
91
+ struct cumo_thrust_multiplies_mulsum_nan
86
92
  {
93
+ using first_argument_type = dtype;
94
+ using second_argument_type = dtype;
95
+ using result_type = dtype;
87
96
  __host__ __device__ dtype operator()(dtype x, dtype y) {
88
97
  if (not_nan(x) && not_nan(y)) {
89
98
  return m_mul(x, y);
@@ -93,8 +102,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
93
102
  }
94
103
  };
95
104
 
96
- struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
105
+ struct cumo_thrust_square
97
106
  {
107
+ using argument_type = dtype;
108
+ using result_type = dtype;
98
109
  __host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
99
110
  };
100
111