cumo 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +16 -36
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +7 -0
- data/CHANGELOG.md +16 -0
- data/Dockerfile +34 -0
- data/cumo.gemspec +1 -1
- data/docker-build.sh +4 -0
- data/docker-launch.sh +4 -0
- data/docs/src-tree.md +1 -1
- data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
- data/ext/cumo/cuda/driver.c +8 -0
- data/ext/cumo/depend.erb +1 -1
- data/ext/cumo/extconf.rb +1 -1
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/narray/array.c +5 -3
- data/ext/cumo/narray/data.c +25 -26
- data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
- data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
- data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +4 -1
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +4 -1
- data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
- data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
- data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
- data/ext/cumo/narray/gen/tmpl/each.c +4 -2
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +4 -1
- data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
- data/ext/cumo/narray/gen/tmpl/median.c +2 -2
- data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
- data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
- data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
- data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
- data/ext/cumo/narray/gen/tmpl/sort.c +2 -2
- data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
- data/ext/cumo/narray/index.c +1 -1
- data/ext/cumo/narray/narray.c +19 -18
- data/lib/cumo/narray/extra.rb +160 -156
- data/test/cuda/device_test.rb +2 -1
- data/test/cudnn_test.rb +2 -2
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 93c1ecf4d6098da90d957600dc7254e02072999fa33374951809cb8c4f5645ee
|
|
4
|
+
data.tar.gz: f8961f11f4b8feed097fbfbe3fe0603e270f8f1b44121c112c506e42cefc2bf1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: df0b42ff21e2158657e0d8a86872f9e85a6af7ab6ae09c7dfc4368d71001846c7f087633bfa2f6071bdd43f910da041470a43694c2aa2c37c74b5ff684e85c88
|
|
7
|
+
data.tar.gz: 95572510fbc31633f423db010c9135271c5ded4bfda28c5f07734b90d76e9fd36fa8c2af0bdd1d03151df2eba93aa3f07c61d6d39aa2f8c7d011364a7ee99615
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2025-
|
|
3
|
+
# on 2025-12-29 17:31:25 UTC using RuboCop version 1.82.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -25,16 +25,8 @@ Bundler/OrderedGems:
|
|
|
25
25
|
Exclude:
|
|
26
26
|
- 'Gemfile'
|
|
27
27
|
|
|
28
|
-
# Offense count: 1
|
|
29
|
-
# Configuration parameters: EnforcedStyle, AllowedGems.
|
|
30
|
-
# SupportedStyles: required, forbidden
|
|
31
|
-
Gemspec/DependencyVersion:
|
|
32
|
-
Exclude:
|
|
33
|
-
- 'cumo.gemspec'
|
|
34
|
-
|
|
35
28
|
# Offense count: 2
|
|
36
29
|
# This cop supports safe autocorrection (--autocorrect).
|
|
37
|
-
# Configuration parameters: Severity.
|
|
38
30
|
Gemspec/DeprecatedAttributeAssignment:
|
|
39
31
|
Exclude:
|
|
40
32
|
- '3rd_party/mkmf-cu/mkmf-cu.gemspec'
|
|
@@ -49,14 +41,12 @@ Gemspec/DevelopmentDependencies:
|
|
|
49
41
|
|
|
50
42
|
# Offense count: 2
|
|
51
43
|
# This cop supports safe autocorrection (--autocorrect).
|
|
52
|
-
# Configuration parameters: Severity.
|
|
53
44
|
Gemspec/RequireMFA:
|
|
54
45
|
Exclude:
|
|
55
46
|
- '3rd_party/mkmf-cu/mkmf-cu.gemspec'
|
|
56
47
|
- 'cumo.gemspec'
|
|
57
48
|
|
|
58
49
|
# Offense count: 1
|
|
59
|
-
# Configuration parameters: Severity.
|
|
60
50
|
Gemspec/RequiredRubyVersion:
|
|
61
51
|
Exclude:
|
|
62
52
|
- '3rd_party/mkmf-cu/mkmf-cu.gemspec'
|
|
@@ -275,13 +265,15 @@ Layout/MultilineOperationIndentation:
|
|
|
275
265
|
- 'lib/cumo/narray/extra.rb'
|
|
276
266
|
- 'test/narray_test.rb'
|
|
277
267
|
|
|
278
|
-
# Offense count:
|
|
268
|
+
# Offense count: 27
|
|
279
269
|
# This cop supports safe autocorrection (--autocorrect).
|
|
280
270
|
# Configuration parameters: InspectBlocks.
|
|
281
271
|
Layout/RedundantLineBreak:
|
|
282
272
|
Exclude:
|
|
283
273
|
- '3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb'
|
|
274
|
+
- '3rd_party/mkmf-cu/mkmf-cu.gemspec'
|
|
284
275
|
- '3rd_party/mkmf-cu/test/test_mkmf-cu.rb'
|
|
276
|
+
- 'ext/cumo/extconf.rb'
|
|
285
277
|
- 'ext/cumo/narray/gen/narray_def.rb'
|
|
286
278
|
- 'test/bit_test.rb'
|
|
287
279
|
- 'test/cudnn_test.rb'
|
|
@@ -379,7 +371,7 @@ Lint/ConstantDefinitionInBlock:
|
|
|
379
371
|
Exclude:
|
|
380
372
|
- 'test/cuda/compiler_test.rb'
|
|
381
373
|
|
|
382
|
-
# Offense count:
|
|
374
|
+
# Offense count: 650
|
|
383
375
|
# Configuration parameters: Only, Ignore.
|
|
384
376
|
Lint/ConstantResolution:
|
|
385
377
|
Enabled: false
|
|
@@ -419,12 +411,13 @@ Lint/NonAtomicFileOperation:
|
|
|
419
411
|
Exclude:
|
|
420
412
|
- 'lib/cumo/cuda/compiler.rb'
|
|
421
413
|
|
|
422
|
-
# Offense count:
|
|
414
|
+
# Offense count: 26
|
|
423
415
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
424
416
|
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredClasses.
|
|
425
417
|
# IgnoredClasses: Time, DateTime
|
|
426
418
|
Lint/NumberConversion:
|
|
427
419
|
Exclude:
|
|
420
|
+
- '3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb'
|
|
428
421
|
- 'bench/cumo_bench.rb'
|
|
429
422
|
- 'bench/numo_bench.rb'
|
|
430
423
|
- 'ext/cumo/narray/gen/cogen_kernel.rb'
|
|
@@ -522,17 +515,6 @@ Naming/MethodParameterName:
|
|
|
522
515
|
- 'lib/cumo/narray/extra.rb'
|
|
523
516
|
- 'test/ractor_test.rb'
|
|
524
517
|
|
|
525
|
-
# Offense count: 1
|
|
526
|
-
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros, UseSorbetSigs.
|
|
527
|
-
# NamePrefix: is_, has_, have_, does_
|
|
528
|
-
# ForbiddenPrefixes: is_, has_, have_, does_
|
|
529
|
-
# AllowedMethods: is_a?
|
|
530
|
-
# MethodDefinitionMacros: define_method, define_singleton_method
|
|
531
|
-
Naming/PredicatePrefix:
|
|
532
|
-
Exclude:
|
|
533
|
-
- 'spec/**/*'
|
|
534
|
-
- 'ext/cumo/extconf.rb'
|
|
535
|
-
|
|
536
518
|
# Offense count: 1
|
|
537
519
|
# Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
|
|
538
520
|
# SupportedStyles: snake_case, normalcase, non_integer
|
|
@@ -719,7 +701,7 @@ Style/Documentation:
|
|
|
719
701
|
- 'lib/cumo/linalg.rb'
|
|
720
702
|
- 'lib/cumo/narray/extra.rb'
|
|
721
703
|
|
|
722
|
-
# Offense count:
|
|
704
|
+
# Offense count: 202
|
|
723
705
|
# Configuration parameters: AllowedMethods, RequireForNonPublicMethods.
|
|
724
706
|
Style/DocumentationMethod:
|
|
725
707
|
Enabled: false
|
|
@@ -767,7 +749,7 @@ Style/FileWrite:
|
|
|
767
749
|
Exclude:
|
|
768
750
|
- 'lib/cumo/cuda/compiler.rb'
|
|
769
751
|
|
|
770
|
-
# Offense count:
|
|
752
|
+
# Offense count: 27
|
|
771
753
|
# Configuration parameters: AllowedVariables.
|
|
772
754
|
Style/GlobalVars:
|
|
773
755
|
Exclude:
|
|
@@ -775,12 +757,11 @@ Style/GlobalVars:
|
|
|
775
757
|
- 'ext/cumo/narray/gen/cogen.rb'
|
|
776
758
|
- 'ext/cumo/narray/gen/cogen_kernel.rb'
|
|
777
759
|
|
|
778
|
-
# Offense count:
|
|
760
|
+
# Offense count: 10
|
|
779
761
|
# This cop supports safe autocorrection (--autocorrect).
|
|
780
762
|
# Configuration parameters: MinBodyLength, AllowConsecutiveConditionals.
|
|
781
763
|
Style/GuardClause:
|
|
782
764
|
Exclude:
|
|
783
|
-
- 'ext/cumo/extconf.rb'
|
|
784
765
|
- 'ext/cumo/narray/gen/erbpp2.rb'
|
|
785
766
|
- 'lib/cumo/cuda/link_state.rb'
|
|
786
767
|
- 'lib/cumo/cuda/module.rb'
|
|
@@ -865,9 +846,9 @@ Style/InvertibleUnlessCondition:
|
|
|
865
846
|
- 'lib/cumo/cuda/compiler.rb'
|
|
866
847
|
- 'lib/cumo/cuda/device.rb'
|
|
867
848
|
|
|
868
|
-
# Offense count:
|
|
849
|
+
# Offense count: 119
|
|
869
850
|
# This cop supports safe autocorrection (--autocorrect).
|
|
870
|
-
# Configuration parameters: IgnoreMacros, AllowedMethods, AllowedPatterns, IncludedMacros, AllowParenthesesInMultilineCall, AllowParenthesesInChaining, AllowParenthesesInCamelCaseMethod, AllowParenthesesInStringInterpolation, EnforcedStyle.
|
|
851
|
+
# Configuration parameters: IgnoreMacros, AllowedMethods, AllowedPatterns, IncludedMacros, IncludedMacroPatterns, AllowParenthesesInMultilineCall, AllowParenthesesInChaining, AllowParenthesesInCamelCaseMethod, AllowParenthesesInStringInterpolation, EnforcedStyle.
|
|
871
852
|
# SupportedStyles: require_parentheses, omit_parentheses
|
|
872
853
|
Style/MethodCallWithArgsParentheses:
|
|
873
854
|
Enabled: false
|
|
@@ -888,7 +869,7 @@ Style/MethodCalledOnDoEndBlock:
|
|
|
888
869
|
- 'ext/cumo/narray/gen/cogen_kernel.rb'
|
|
889
870
|
- 'lib/cumo/narray/extra.rb'
|
|
890
871
|
|
|
891
|
-
# Offense count:
|
|
872
|
+
# Offense count: 105
|
|
892
873
|
# This cop supports safe autocorrection (--autocorrect).
|
|
893
874
|
# Configuration parameters: EnforcedStyle.
|
|
894
875
|
# SupportedStyles: if, case, both
|
|
@@ -923,13 +904,12 @@ Style/MutableConstant:
|
|
|
923
904
|
- '3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb'
|
|
924
905
|
- 'test/test_helper.rb'
|
|
925
906
|
|
|
926
|
-
# Offense count:
|
|
907
|
+
# Offense count: 13
|
|
927
908
|
# This cop supports safe autocorrection (--autocorrect).
|
|
928
909
|
# Configuration parameters: EnforcedStyle.
|
|
929
910
|
# SupportedStyles: both, prefix, postfix
|
|
930
911
|
Style/NegatedIf:
|
|
931
912
|
Exclude:
|
|
932
|
-
- 'ext/cumo/extconf.rb'
|
|
933
913
|
- 'ext/cumo/narray/gen/erbpp2.rb'
|
|
934
914
|
- 'ext/cumo/narray/gen/spec.rb'
|
|
935
915
|
- 'lib/cumo/narray/extra.rb'
|
|
@@ -1190,7 +1170,7 @@ Style/StringHashKeys:
|
|
|
1190
1170
|
Exclude:
|
|
1191
1171
|
- '3rd_party/mkmf-cu/test/test_mkmf-cu.rb'
|
|
1192
1172
|
|
|
1193
|
-
# Offense count:
|
|
1173
|
+
# Offense count: 1369
|
|
1194
1174
|
# This cop supports safe autocorrection (--autocorrect).
|
|
1195
1175
|
# Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
|
|
1196
1176
|
# SupportedStyles: single_quotes, double_quotes
|
|
@@ -1229,7 +1209,7 @@ Style/TernaryParentheses:
|
|
|
1229
1209
|
- 'ext/cumo/narray/gen/narray_def.rb'
|
|
1230
1210
|
- 'lib/cumo/narray/extra.rb'
|
|
1231
1211
|
|
|
1232
|
-
# Offense count:
|
|
1212
|
+
# Offense count: 8
|
|
1233
1213
|
Style/TopLevelMethodDefinition:
|
|
1234
1214
|
Exclude:
|
|
1235
1215
|
- 'bench/cumo_bench.rb'
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "mkmf"
|
|
3
4
|
require "open3"
|
|
4
5
|
require_relative "nvcc"
|
|
5
6
|
|
|
@@ -60,6 +61,12 @@ module MakeMakefileCuda
|
|
|
60
61
|
# CUDA 11.0
|
|
61
62
|
capability = [35, 50, 60, 70, 75, 80]
|
|
62
63
|
end
|
|
64
|
+
|
|
65
|
+
if find_executable('nvidia-smi')
|
|
66
|
+
arch_version = `nvidia-smi --query-gpu=compute_cap --format=csv,noheader`.strip
|
|
67
|
+
capability << (arch_version.to_f * 10).to_i unless arch_version.empty?
|
|
68
|
+
end
|
|
69
|
+
|
|
63
70
|
capability.each do |arch|
|
|
64
71
|
cmd << " --generate-code=arch=compute_#{arch},code=sm_#{arch}"
|
|
65
72
|
end
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
# 0.5.1 (2025/12/30)
|
|
2
|
+
|
|
3
|
+
Enhancements:
|
|
4
|
+
|
|
5
|
+
* Add CUDA 13 support (#153)
|
|
6
|
+
* Add cuDNN 9 support
|
|
7
|
+
|
|
8
|
+
Fixes:
|
|
9
|
+
|
|
10
|
+
* Backport: fix example code
|
|
11
|
+
* Backport: fix example code
|
|
12
|
+
* Backport: fix doc
|
|
13
|
+
* Backport: fix documents
|
|
14
|
+
* Backport: fix document of logseq
|
|
15
|
+
* Backport: trim comment out
|
|
16
|
+
|
|
1
17
|
# 0.5.0 (2025/11/01)
|
|
2
18
|
|
|
3
19
|
Fixes:
|
data/Dockerfile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
|
|
2
|
+
|
|
3
|
+
ARG RUBY_VERSION=3.4.7
|
|
4
|
+
|
|
5
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
6
|
+
ENV RBENV_ROOT="/root/.rbenv"
|
|
7
|
+
ENV PATH="${RBENV_ROOT}/bin:${RBENV_ROOT}/shims:${PATH}"
|
|
8
|
+
|
|
9
|
+
ENV CUDA_PATH=/usr/local/cuda
|
|
10
|
+
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
|
|
11
|
+
ENV CPATH=/usr/local/cuda/include:${CPATH}
|
|
12
|
+
ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
|
|
13
|
+
|
|
14
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
15
|
+
git \
|
|
16
|
+
build-essential \
|
|
17
|
+
wget \
|
|
18
|
+
curl \
|
|
19
|
+
vim \
|
|
20
|
+
ca-certificates \
|
|
21
|
+
libssl-dev \
|
|
22
|
+
libreadline-dev \
|
|
23
|
+
zlib1g-dev \
|
|
24
|
+
libyaml-dev \
|
|
25
|
+
libffi-dev \
|
|
26
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
27
|
+
|
|
28
|
+
RUN git clone --depth 1 https://github.com/rbenv/ruby-build.git && \
|
|
29
|
+
cd ruby-build/bin && ./ruby-build ${RUBY_VERSION} /usr && \
|
|
30
|
+
git config --global --add safe.directory /workspace
|
|
31
|
+
|
|
32
|
+
WORKDIR /workspace
|
|
33
|
+
|
|
34
|
+
CMD ["/bin/bash"]
|
data/cumo.gemspec
CHANGED
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
|
19
19
|
spec.required_ruby_version = ">= 3.0.0"
|
|
20
20
|
|
|
21
21
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
|
22
|
-
f.match(%r{^(test|spec|features)/})
|
|
22
|
+
f.match(%r{^(test|spec|features|docker)/})
|
|
23
23
|
end
|
|
24
24
|
spec.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
25
25
|
spec.bindir = "exe"
|
data/docker-build.sh
ADDED
data/docker-launch.sh
ADDED
data/docs/src-tree.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* Technically, it is not possible to use CRuby API such as `VALUE` in .cu files.
|
|
7
7
|
* CRuby API is not callable from CUDA kernel because they do not have `__device__` modifier.
|
|
8
8
|
* nvcc does not support `#include RUBY_EXTCONF_H`, so can not include `ruby.h`.
|
|
9
|
-
* (RULE) It is allowed to use C++
|
|
9
|
+
* (RULE) It is allowed to use C++17 codes in .cu files.
|
|
10
10
|
* Rest of `*.{h,c}` files are for host (CPU).
|
|
11
11
|
* Call C wrapper functions defined in .cu files.
|
|
12
12
|
* It can use CRuby API.
|
|
@@ -74,6 +74,25 @@ cumo_cuda_cudnn_CreateTensorDescriptor(
|
|
|
74
74
|
status = cudnnSetTensor4dDescriptor(
|
|
75
75
|
*desc, CUDNN_TENSOR_NCHW, cudnn_dtype, shape[0], shape[1], shape[2], shape[3]);
|
|
76
76
|
}
|
|
77
|
+
else if (ndim < 4) {
|
|
78
|
+
// cuDNN 9 fix: Force 4D (N, C, H, W)
|
|
79
|
+
int pad_shape[4] = {1, 1, 1, 1};
|
|
80
|
+
|
|
81
|
+
if (ndim == 1) {
|
|
82
|
+
// 1D: arrays are treated as "Channel" (1, C, 1, 1)
|
|
83
|
+
pad_shape[1] = (int)(shape[0]);
|
|
84
|
+
} else {
|
|
85
|
+
// 2D: [N, C] -> [N, C, 1, 1]
|
|
86
|
+
// 3D: [N, C, H] -> [N, C, H, 1]
|
|
87
|
+
for (int idim = 0; idim < ndim; ++idim) {
|
|
88
|
+
pad_shape[idim] = (int)(shape[idim]);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
status = cudnnSetTensor4dDescriptor(
|
|
93
|
+
*desc, CUDNN_TENSOR_NCHW, cudnn_dtype,
|
|
94
|
+
pad_shape[0], pad_shape[1], pad_shape[2], pad_shape[3]);
|
|
95
|
+
}
|
|
77
96
|
else {
|
|
78
97
|
int int_shape[CUMO_NA_MAX_DIMENSION];
|
|
79
98
|
for (int idim = 0; idim < ndim; ++idim) {
|
|
@@ -514,8 +533,11 @@ cumo_cuda_cudnn_FindConvolutionBackwardFilterAlgorithm(
|
|
|
514
533
|
// TODO(sonots): Support other than 4, 5 dimensional arrays by reshaping into 4-dimensional arrays as Chainer does.
|
|
515
534
|
cudnnBatchNormMode_t
|
|
516
535
|
cumo_cuda_cudnn_GetBatchNormMode(size_t ndim, int* axis) {
|
|
517
|
-
if (ndim == 1
|
|
518
|
-
return
|
|
536
|
+
if (ndim == 1) {
|
|
537
|
+
return CUDNN_BATCHNORM_SPATIAL;
|
|
538
|
+
}
|
|
539
|
+
if (ndim == 2) {
|
|
540
|
+
return CUDNN_BATCHNORM_SPATIAL;
|
|
519
541
|
}
|
|
520
542
|
if ((ndim == 3 && axis[0] == 0 && axis[1] == 2 && axis[2] == 3) ||
|
|
521
543
|
(ndim == 4 && axis[0] == 0 && axis[1] == 2 && axis[2] == 3 && axis[3] == 4)) { // (1, channels, (1, )1, 1)
|
|
@@ -533,7 +555,7 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
|
|
|
533
555
|
{
|
|
534
556
|
cudnnStatus_t status = CUDNN_STATUS_SUCCESS;
|
|
535
557
|
status = cudnnCreateTensorDescriptor(desc);
|
|
536
|
-
if (status
|
|
558
|
+
if (status == CUDNN_STATUS_SUCCESS) return status;
|
|
537
559
|
|
|
538
560
|
status = cudnnDeriveBNTensorDescriptor(*desc, x_desc, mode);
|
|
539
561
|
return status;
|
data/ext/cumo/cuda/driver.c
CHANGED
|
@@ -33,7 +33,11 @@ rb_cuCtxCreate(VALUE self, VALUE flags, VALUE dev)
|
|
|
33
33
|
CUcontext _pctx;
|
|
34
34
|
CUresult status;
|
|
35
35
|
|
|
36
|
+
#if defined(CUDA_VERSION) && CUDA_VERSION >= 13000
|
|
37
|
+
status = cuCtxCreate(&_pctx, NULL, _flags, _dev);
|
|
38
|
+
#else
|
|
36
39
|
status = cuCtxCreate(&_pctx, _flags, _dev);
|
|
40
|
+
#endif
|
|
37
41
|
|
|
38
42
|
check_status(status);
|
|
39
43
|
return SIZET2NUM((size_t)_pctx);
|
|
@@ -418,5 +422,9 @@ Init_cumo_cuda_driver()
|
|
|
418
422
|
|
|
419
423
|
cuInit(0);
|
|
420
424
|
cuDeviceGet(&cuDevice, 0);
|
|
425
|
+
#if defined(CUDA_VERSION) && CUDA_VERSION >= 13000
|
|
426
|
+
cuCtxCreate(&context, NULL, 0, cuDevice);
|
|
427
|
+
#else
|
|
421
428
|
cuCtxCreate(&context, 0, cuDevice);
|
|
429
|
+
#endif
|
|
422
430
|
}
|
data/ext/cumo/depend.erb
CHANGED
|
@@ -55,6 +55,6 @@ run-ctest : <%= __dir__ %>/cuda/memory_pool_impl_test.exe
|
|
|
55
55
|
./$<
|
|
56
56
|
|
|
57
57
|
<%= __dir__ %>/cuda/memory_pool_impl_test.exe: <%= __dir__ %>/cuda/memory_pool_impl_test.cpp <%= __dir__ %>/cuda/memory_pool_impl.cpp <%= __dir__ %>/cuda/memory_pool_impl.hpp
|
|
58
|
-
nvcc -std=c++
|
|
58
|
+
nvcc -std=c++17 <%= ENV['DEBUG'] ? '-g -O0 --compiler-options -Wall' : '' %> -L. -L$(libdir) -I. $(INCFLAGS) -o $@ $< <%= __dir__ %>/cuda/memory_pool_impl.cpp
|
|
59
59
|
|
|
60
60
|
CLEANOBJS = <%= __dir__ %>/*.o <%= __dir__ %>/*/*.o <%= __dir__ %>/*/*/*.o <%= __dir__ %>/*.bak <%= __dir__ %>/narray/types/*.c <%= __dir__ %>/narray/types/*_kernel.cu <%= __dir__ %>/*.exe <%= __dir__ %>/*/*.exe
|
data/ext/cumo/extconf.rb
CHANGED
|
@@ -29,7 +29,7 @@ MakeMakefileCuda.install!(cxx: true)
|
|
|
29
29
|
if ENV['DEBUG']
|
|
30
30
|
$CFLAGS << " -g -O0 -Wall"
|
|
31
31
|
end
|
|
32
|
-
$CXXFLAGS << " -std=c++
|
|
32
|
+
$CXXFLAGS << " -std=c++17"
|
|
33
33
|
#$CFLAGS=" $(cflags) -O3 -m64 -msse2 -funroll-loops"
|
|
34
34
|
#$CFLAGS=" $(cflags) -O3"
|
|
35
35
|
$INCFLAGS = "-I$(srcdir)/include -I$(srcdir)/narray -I$(srcdir)/cuda #{$INCFLAGS}"
|
|
@@ -28,8 +28,10 @@ class cumo_thrust_strided_range
|
|
|
28
28
|
|
|
29
29
|
typedef typename thrust::iterator_difference<Iterator>::type difference_type;
|
|
30
30
|
|
|
31
|
-
struct stride_functor
|
|
31
|
+
struct stride_functor
|
|
32
32
|
{
|
|
33
|
+
using argument_type = difference_type;
|
|
34
|
+
using result_type = difference_type;
|
|
33
35
|
difference_type stride;
|
|
34
36
|
|
|
35
37
|
stride_functor(difference_type stride)
|
|
@@ -86,8 +88,10 @@ struct cumo_thrust_minmax_pair
|
|
|
86
88
|
// returns a cumo_thrust_minmax_pair whose minimum and maximum values
|
|
87
89
|
// are initialized to x.
|
|
88
90
|
template <typename T>
|
|
89
|
-
struct cumo_thrust_minmax_unary_op
|
|
91
|
+
struct cumo_thrust_minmax_unary_op
|
|
90
92
|
{
|
|
93
|
+
using argument_type = T;
|
|
94
|
+
using result_type = cumo_thrust_minmax_pair<T>;
|
|
91
95
|
__host__ __device__ cumo_thrust_minmax_pair<T> operator()(const T& x) const
|
|
92
96
|
{
|
|
93
97
|
cumo_thrust_minmax_pair<T> result;
|
|
@@ -102,8 +106,11 @@ struct cumo_thrust_minmax_unary_op : public thrust::unary_function< T, cumo_thru
|
|
|
102
106
|
// maximum values are the min() and max() respectively of
|
|
103
107
|
// the minimums and maximums of the input pairs
|
|
104
108
|
template <typename T>
|
|
105
|
-
struct cumo_thrust_minmax_binary_op
|
|
109
|
+
struct cumo_thrust_minmax_binary_op
|
|
106
110
|
{
|
|
111
|
+
using first_argument_type = cumo_thrust_minmax_pair<T>;
|
|
112
|
+
using second_argument_type = cumo_thrust_minmax_pair<T>;
|
|
113
|
+
using result_type = cumo_thrust_minmax_pair<T>;
|
|
107
114
|
__host__ __device__ cumo_thrust_minmax_pair<T> operator()(const cumo_thrust_minmax_pair<T>& x, const cumo_thrust_minmax_pair<T>& y) const
|
|
108
115
|
{
|
|
109
116
|
cumo_thrust_minmax_pair<T> result;
|
|
@@ -157,10 +164,10 @@ struct cumo_thrust_variance_unary_op
|
|
|
157
164
|
// all values that have been agregated so far
|
|
158
165
|
template <typename T>
|
|
159
166
|
struct cumo_thrust_variance_binary_op
|
|
160
|
-
: public thrust::binary_function<const cumo_thrust_variance_data<T>&,
|
|
161
|
-
const cumo_thrust_variance_data<T>&,
|
|
162
|
-
cumo_thrust_variance_data<T> >
|
|
163
167
|
{
|
|
168
|
+
using first_argument_type = const cumo_thrust_variance_data<T>&;
|
|
169
|
+
using second_argument_type = const cumo_thrust_variance_data<T>&;
|
|
170
|
+
using result_type = cumo_thrust_variance_data<T>;
|
|
164
171
|
__host__ __device__
|
|
165
172
|
cumo_thrust_variance_data<T> operator()(const cumo_thrust_variance_data<T>& x, const cumo_thrust_variance_data <T>& y) const
|
|
166
173
|
{
|
|
@@ -49,10 +49,10 @@ struct cumo_thrust_complex_variance_unary_op
|
|
|
49
49
|
// all values that have been agregated so far
|
|
50
50
|
template <typename T, typename R>
|
|
51
51
|
struct cumo_thrust_complex_variance_binary_op
|
|
52
|
-
: public thrust::binary_function<const cumo_thrust_complex_variance_data<T,R>&,
|
|
53
|
-
const cumo_thrust_complex_variance_data<T,R>&,
|
|
54
|
-
cumo_thrust_complex_variance_data<T,R> >
|
|
55
52
|
{
|
|
53
|
+
using first_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
|
|
54
|
+
using second_argument_type = const cumo_thrust_complex_variance_data<T,R>&;
|
|
55
|
+
using result_type = cumo_thrust_complex_variance_data<T,R>;
|
|
56
56
|
__host__ __device__
|
|
57
57
|
cumo_thrust_complex_variance_data<T,R> operator()(const cumo_thrust_complex_variance_data<T,R>& x, const cumo_thrust_complex_variance_data<T,R>& y) const
|
|
58
58
|
{
|
|
@@ -157,18 +157,27 @@ __host__ __device__ static inline dtype f_seq(dtype x, dtype y, double c)
|
|
|
157
157
|
/* --------- thrust ----------------- */
|
|
158
158
|
#include "cumo/cuda/cumo_thrust_complex.hpp"
|
|
159
159
|
|
|
160
|
-
struct cumo_thrust_plus
|
|
160
|
+
struct cumo_thrust_plus
|
|
161
161
|
{
|
|
162
|
+
using first_argument_type = dtype;
|
|
163
|
+
using second_argument_type = dtype;
|
|
164
|
+
using result_type = dtype;
|
|
162
165
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
|
|
163
166
|
};
|
|
164
167
|
|
|
165
|
-
struct cumo_thrust_multiplies
|
|
168
|
+
struct cumo_thrust_multiplies
|
|
166
169
|
{
|
|
170
|
+
using first_argument_type = dtype;
|
|
171
|
+
using second_argument_type = dtype;
|
|
172
|
+
using result_type = dtype;
|
|
167
173
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
|
|
168
174
|
};
|
|
169
175
|
|
|
170
|
-
struct cumo_thrust_multiplies_mulsum_nan
|
|
176
|
+
struct cumo_thrust_multiplies_mulsum_nan
|
|
171
177
|
{
|
|
178
|
+
using first_argument_type = dtype;
|
|
179
|
+
using second_argument_type = dtype;
|
|
180
|
+
using result_type = dtype;
|
|
172
181
|
__host__ __device__ dtype operator()(dtype x, dtype y) {
|
|
173
182
|
if (not_nan(x) && not_nan(y)) {
|
|
174
183
|
return m_mul(x, y);
|
|
@@ -178,8 +187,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
|
|
|
178
187
|
}
|
|
179
188
|
};
|
|
180
189
|
|
|
181
|
-
struct cumo_thrust_square
|
|
190
|
+
struct cumo_thrust_square
|
|
182
191
|
{
|
|
192
|
+
using argument_type = dtype;
|
|
193
|
+
using result_type = dtype;
|
|
183
194
|
__host__ __device__ rtype operator()(const dtype& x) const { return c_abs_square(x); }
|
|
184
195
|
};
|
|
185
196
|
|
|
@@ -72,18 +72,27 @@ __host__ __device__ static inline dtype f_minimum_nan(dtype x, dtype y)
|
|
|
72
72
|
/* --------- thrust ----------------- */
|
|
73
73
|
#include "cumo/cuda/cumo_thrust.hpp"
|
|
74
74
|
|
|
75
|
-
struct cumo_thrust_plus
|
|
75
|
+
struct cumo_thrust_plus
|
|
76
76
|
{
|
|
77
|
+
using first_argument_type = dtype;
|
|
78
|
+
using second_argument_type = dtype;
|
|
79
|
+
using result_type = dtype;
|
|
77
80
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
|
|
78
81
|
};
|
|
79
82
|
|
|
80
|
-
struct cumo_thrust_multiplies
|
|
83
|
+
struct cumo_thrust_multiplies
|
|
81
84
|
{
|
|
85
|
+
using first_argument_type = dtype;
|
|
86
|
+
using second_argument_type = dtype;
|
|
87
|
+
using result_type = dtype;
|
|
82
88
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
|
|
83
89
|
};
|
|
84
90
|
|
|
85
|
-
struct cumo_thrust_multiplies_mulsum_nan
|
|
91
|
+
struct cumo_thrust_multiplies_mulsum_nan
|
|
86
92
|
{
|
|
93
|
+
using first_argument_type = dtype;
|
|
94
|
+
using second_argument_type = dtype;
|
|
95
|
+
using result_type = dtype;
|
|
87
96
|
__host__ __device__ dtype operator()(dtype x, dtype y) {
|
|
88
97
|
if (not_nan(x) && not_nan(y)) {
|
|
89
98
|
return m_mul(x, y);
|
|
@@ -93,8 +102,10 @@ struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype,
|
|
|
93
102
|
}
|
|
94
103
|
};
|
|
95
104
|
|
|
96
|
-
struct cumo_thrust_square
|
|
105
|
+
struct cumo_thrust_square
|
|
97
106
|
{
|
|
107
|
+
using argument_type = dtype;
|
|
108
|
+
using result_type = dtype;
|
|
98
109
|
__host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
|
|
99
110
|
};
|
|
100
111
|
|
|
@@ -70,18 +70,26 @@ __host__ __device__ static inline dtype f_minimum(dtype x, dtype y)
|
|
|
70
70
|
/* --------- thrust ----------------- */
|
|
71
71
|
#include "cumo/cuda/cumo_thrust.hpp"
|
|
72
72
|
|
|
73
|
-
struct cumo_thrust_plus
|
|
73
|
+
struct cumo_thrust_plus
|
|
74
74
|
{
|
|
75
|
+
using first_argument_type = dtype;
|
|
76
|
+
using second_argument_type = dtype;
|
|
77
|
+
using result_type = dtype;
|
|
75
78
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
|
|
76
79
|
};
|
|
77
80
|
|
|
78
|
-
struct cumo_thrust_multiplies
|
|
81
|
+
struct cumo_thrust_multiplies
|
|
79
82
|
{
|
|
83
|
+
using first_argument_type = dtype;
|
|
84
|
+
using second_argument_type = dtype;
|
|
85
|
+
using result_type = dtype;
|
|
80
86
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
|
|
81
87
|
};
|
|
82
88
|
|
|
83
|
-
struct cumo_thrust_square
|
|
89
|
+
struct cumo_thrust_square
|
|
84
90
|
{
|
|
91
|
+
using argument_type = dtype;
|
|
92
|
+
using result_type = dtype;
|
|
85
93
|
__host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
|
|
86
94
|
};
|
|
87
95
|
|
data/ext/cumo/include/cumo.h
CHANGED
data/ext/cumo/narray/array.c
CHANGED
|
@@ -466,11 +466,13 @@ cumo_na_s_array_shape(VALUE mod, VALUE ary)
|
|
|
466
466
|
@return [Cumo::NArray]
|
|
467
467
|
@example
|
|
468
468
|
Cumo::NArray.new_like([[1,2,3],[4,5,6]])
|
|
469
|
-
=> Cumo::Int32#shape=[2,3](empty)
|
|
469
|
+
# => Cumo::Int32#shape=[2,3](empty)
|
|
470
|
+
|
|
470
471
|
Cumo::DFloat.new_like([[1,2],[3,4]])
|
|
471
|
-
=> Cumo::DFloat#shape=[2,2](empty)
|
|
472
|
+
# => Cumo::DFloat#shape=[2,2](empty)
|
|
473
|
+
|
|
472
474
|
Cumo::NArray.new_like([1,2i,3])
|
|
473
|
-
=> Cumo::DComplex#shape=[3](empty)
|
|
475
|
+
# => Cumo::DComplex#shape=[3](empty)
|
|
474
476
|
*/
|
|
475
477
|
VALUE
|
|
476
478
|
cumo_na_s_new_like(VALUE type, VALUE obj)
|