gumath 0.2.0dev5 → 0.2.0dev8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +7 -2
  3. data/Gemfile +0 -3
  4. data/ext/ruby_gumath/GPATH +0 -0
  5. data/ext/ruby_gumath/GRTAGS +0 -0
  6. data/ext/ruby_gumath/GTAGS +0 -0
  7. data/ext/ruby_gumath/extconf.rb +0 -5
  8. data/ext/ruby_gumath/functions.c +10 -2
  9. data/ext/ruby_gumath/gufunc_object.c +15 -4
  10. data/ext/ruby_gumath/gufunc_object.h +9 -3
  11. data/ext/ruby_gumath/gumath/Makefile +63 -0
  12. data/ext/ruby_gumath/gumath/Makefile.in +1 -0
  13. data/ext/ruby_gumath/gumath/config.h +56 -0
  14. data/ext/ruby_gumath/gumath/config.h.in +3 -0
  15. data/ext/ruby_gumath/gumath/config.log +497 -0
  16. data/ext/ruby_gumath/gumath/config.status +1034 -0
  17. data/ext/ruby_gumath/gumath/configure +375 -4
  18. data/ext/ruby_gumath/gumath/configure.ac +47 -3
  19. data/ext/ruby_gumath/gumath/libgumath/Makefile +236 -0
  20. data/ext/ruby_gumath/gumath/libgumath/Makefile.in +90 -24
  21. data/ext/ruby_gumath/gumath/libgumath/Makefile.vc +54 -15
  22. data/ext/ruby_gumath/gumath/libgumath/apply.c +92 -28
  23. data/ext/ruby_gumath/gumath/libgumath/apply.o +0 -0
  24. data/ext/ruby_gumath/gumath/libgumath/common.o +0 -0
  25. data/ext/ruby_gumath/gumath/libgumath/cpu_device_binary.o +0 -0
  26. data/ext/ruby_gumath/gumath/libgumath/cpu_device_unary.o +0 -0
  27. data/ext/ruby_gumath/gumath/libgumath/cpu_host_binary.o +0 -0
  28. data/ext/ruby_gumath/gumath/libgumath/cpu_host_unary.o +0 -0
  29. data/ext/ruby_gumath/gumath/libgumath/examples.o +0 -0
  30. data/ext/ruby_gumath/gumath/libgumath/extending/graph.c +27 -20
  31. data/ext/ruby_gumath/gumath/libgumath/extending/pdist.c +1 -1
  32. data/ext/ruby_gumath/gumath/libgumath/func.c +13 -9
  33. data/ext/ruby_gumath/gumath/libgumath/func.o +0 -0
  34. data/ext/ruby_gumath/gumath/libgumath/graph.o +0 -0
  35. data/ext/ruby_gumath/gumath/libgumath/gumath.h +55 -14
  36. data/ext/ruby_gumath/gumath/libgumath/kernels/common.c +513 -0
  37. data/ext/ruby_gumath/gumath/libgumath/kernels/common.h +155 -0
  38. data/ext/ruby_gumath/gumath/libgumath/kernels/contrib/bfloat16.h +520 -0
  39. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.cc +1123 -0
  40. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.h +1062 -0
  41. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_msvc.cc +555 -0
  42. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.cc +368 -0
  43. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.h +335 -0
  44. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_binary.c +2952 -0
  45. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_unary.c +1100 -0
  46. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.cu +1143 -0
  47. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.h +1061 -0
  48. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.cu +528 -0
  49. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.h +463 -0
  50. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_binary.c +2817 -0
  51. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_unary.c +1331 -0
  52. data/ext/ruby_gumath/gumath/libgumath/kernels/device.hh +614 -0
  53. data/ext/ruby_gumath/gumath/libgumath/libgumath.a +0 -0
  54. data/ext/ruby_gumath/gumath/libgumath/libgumath.so +1 -0
  55. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0 +1 -0
  56. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0.2.0dev3 +0 -0
  57. data/ext/ruby_gumath/gumath/libgumath/nploops.o +0 -0
  58. data/ext/ruby_gumath/gumath/libgumath/pdist.o +0 -0
  59. data/ext/ruby_gumath/gumath/libgumath/quaternion.o +0 -0
  60. data/ext/ruby_gumath/gumath/libgumath/tbl.o +0 -0
  61. data/ext/ruby_gumath/gumath/libgumath/thread.c +17 -4
  62. data/ext/ruby_gumath/gumath/libgumath/thread.o +0 -0
  63. data/ext/ruby_gumath/gumath/libgumath/xndloops.c +110 -0
  64. data/ext/ruby_gumath/gumath/libgumath/xndloops.o +0 -0
  65. data/ext/ruby_gumath/gumath/python/gumath/__init__.py +150 -0
  66. data/ext/ruby_gumath/gumath/python/gumath/_gumath.c +446 -80
  67. data/ext/ruby_gumath/gumath/python/gumath/cuda.c +78 -0
  68. data/ext/ruby_gumath/gumath/python/gumath/examples.c +0 -5
  69. data/ext/ruby_gumath/gumath/python/gumath/functions.c +2 -2
  70. data/ext/ruby_gumath/gumath/python/gumath/gumath.h +246 -0
  71. data/ext/ruby_gumath/gumath/python/gumath/libgumath.a +0 -0
  72. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so +1 -0
  73. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0 +1 -0
  74. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0.2.0dev3 +0 -0
  75. data/ext/ruby_gumath/gumath/python/gumath/pygumath.h +31 -2
  76. data/ext/ruby_gumath/gumath/python/gumath_aux.py +767 -0
  77. data/ext/ruby_gumath/gumath/python/randdec.py +535 -0
  78. data/ext/ruby_gumath/gumath/python/randfloat.py +177 -0
  79. data/ext/ruby_gumath/gumath/python/test_gumath.py +1504 -24
  80. data/ext/ruby_gumath/gumath/python/test_xndarray.py +462 -0
  81. data/ext/ruby_gumath/gumath/setup.py +67 -6
  82. data/ext/ruby_gumath/gumath/tools/detect_cuda_arch.cc +35 -0
  83. data/ext/ruby_gumath/include/gumath.h +55 -14
  84. data/ext/ruby_gumath/include/ruby_gumath.h +4 -1
  85. data/ext/ruby_gumath/lib/libgumath.a +0 -0
  86. data/ext/ruby_gumath/lib/libgumath.so.0.2.0dev3 +0 -0
  87. data/ext/ruby_gumath/ruby_gumath.c +231 -70
  88. data/ext/ruby_gumath/ruby_gumath.h +4 -1
  89. data/ext/ruby_gumath/ruby_gumath_internal.h +25 -0
  90. data/ext/ruby_gumath/util.c +34 -0
  91. data/ext/ruby_gumath/util.h +9 -0
  92. data/gumath.gemspec +3 -2
  93. data/lib/gumath.rb +55 -1
  94. data/lib/gumath/version.rb +2 -2
  95. data/lib/ruby_gumath.so +0 -0
  96. metadata +63 -10
  97. data/ext/ruby_gumath/gumath/libgumath/extending/bfloat16.c +0 -130
  98. data/ext/ruby_gumath/gumath/libgumath/kernels/binary.c +0 -547
  99. data/ext/ruby_gumath/gumath/libgumath/kernels/unary.c +0 -449
@@ -41,9 +41,12 @@ AC_SUBST(host)
41
41
 
42
42
  # Language and compiler:
43
43
  AC_LANG_C
44
- saved_cflags=$CFLAGS
44
+ saved_cflags="$CFLAGS"
45
+ saved_cxxflags="$CXXFLAGS"
45
46
  AC_PROG_CC
46
- CFLAGS=$saved_cflags
47
+ AC_PROG_CXX
48
+ CFLAGS="$saved_cflags"
49
+ CXXFLAGS="$saved_cxxflags"
47
50
 
48
51
  # ar and ranlib:
49
52
  AC_CHECK_TOOL(AR, ar, ar)
@@ -61,6 +64,40 @@ AC_CHECK_HEADER([pthread.h],
61
64
  AC_PROG_INSTALL
62
65
  AC_SUBST(INSTALL)
63
66
 
67
+ # Cuda compiler:
68
+ AC_MSG_CHECKING(for nvcc)
69
+ saved_cc="$CC"
70
+ saved_cflags="$CFLAGS"
71
+ saved_cxxflags="$CXXFLAGS"
72
+ CC=nvcc
73
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
74
+ void
75
+ f(void)
76
+ {
77
+ return;
78
+ }
79
+ ]])],
80
+ [have_nvcc=yes],
81
+ [have_nvcc=no],
82
+ [have_nvcc=undefined])
83
+ CC="$saved_cc"
84
+ CFLAGS="$saved_cflags"
85
+ CXXFLAGS="$saved_cxxflags"
86
+ AC_MSG_RESULT($have_nvcc)
87
+
88
+ CUDA_CXX=
89
+ CONFIGURE_CUDA_CXXFLAGS=
90
+ if test "$have_nvcc" = yes; then
91
+ if nvcc -o tools/detect_cuda_arch tools/detect_cuda_arch.cc > /dev/null 2>&1; then
92
+ CUDA_CXX="nvcc"
93
+ CUDA_ARCH=`./tools/detect_cuda_arch`
94
+ CONFIGURE_CUDA_CXXFLAGS="-std=c++11 -arch=compute_$CUDA_ARCH -code=sm_$CUDA_ARCH"
95
+ AC_DEFINE(HAVE_CUDA, 1, [Define to 1 if you have the nvcc cuda compiler.])
96
+ fi
97
+ fi
98
+ AC_SUBST(CUDA_CXX)
99
+ AC_SUBST(CONFIGURE_CUDA_CXXFLAGS)
100
+
64
101
  # Add an explicit include directory.
65
102
  AC_MSG_CHECKING(for --with-includes)
66
103
  AC_ARG_WITH(includes,
@@ -140,7 +177,7 @@ esac
140
177
 
141
178
  # Substitute variables and generate output:
142
179
  if test -z "$LD"; then
143
- LD="$CC"
180
+ LD="$CXX"
144
181
  fi
145
182
  AC_SUBST(LD)
146
183
  AC_SUBST(AR)
@@ -154,6 +191,12 @@ else
154
191
  CONFIGURE_CFLAGS="$GM_INCLUDE $GM_WARN $GM_CONFIG $GM_OPT $CFLAGS"
155
192
  fi
156
193
 
194
+ if test -z "$CXXFLAGS"; then
195
+ CONFIGURE_CXXFLAGS="$XND_INCLUDE -Wall -Wextra -std=c++11 $GM_OPT -g"
196
+ else
197
+ CONFIGURE_CXXFLAGS="$XND_INCLUDE -Wall -Wextra -std=c++11 $GM_OPT -g $CXXFLAGS"
198
+ fi
199
+
157
200
  if test -z "$LDFLAGS"; then
158
201
  CONFIGURE_LDFLAGS="$GM_LINK $CONFIGURE_LDFLAGS"
159
202
  else
@@ -161,6 +204,7 @@ else
161
204
  fi
162
205
 
163
206
  AC_SUBST(CONFIGURE_CFLAGS)
207
+ AC_SUBST(CONFIGURE_CXXFLAGS)
164
208
  AC_SUBST(CONFIGURE_LDFLAGS)
165
209
 
166
210
  AC_OUTPUT
@@ -0,0 +1,236 @@
1
+
2
+ # ==============================================================================
3
+ # Unix Makefile for libxnd
4
+ # ==============================================================================
5
+
6
+
7
+ LIBSTATIC = libgumath.a
8
+ LIBNAME = libgumath.so
9
+ LIBSONAME = libgumath.so.0
10
+ LIBSHARED = libgumath.so.0.2.0dev3
11
+
12
+ CC = gcc
13
+ CXX = g++
14
+ LD = g++
15
+ AR = ar
16
+ RANLIB = ranlib
17
+ CUDA_CXX =
18
+
19
+ GM_INCLUDES = ../ndtypes/libndtypes
20
+
21
+ CONFIGURE_CFLAGS = -Wall -Wextra -std=c11 -pedantic -O3 -I /home/sameer/.rvm/gems/ruby-2.4.1/gems/xnd-0.2.0dev7/ext/ruby_xnd/include -I /home/sameer/.rvm/gems/ruby-2.4.1/gems/ndtypes-0.2.0dev6/ext/ruby_ndtypes/include
22
+ GM_CFLAGS = $(strip -I.. -I$(GM_INCLUDES) $(CONFIGURE_CFLAGS) $(CFLAGS))
23
+ GM_CFLAGS_SHARED = $(GM_CFLAGS) -fPIC
24
+
25
+ CONFIGURE_CXXFLAGS = -Wall -Wextra -std=c++11 -O3 -g
26
+ GM_CXXFLAGS = $(strip -I$(GM_INCLUDES) $(CONFIGURE_CXXFLAGS) $(CXXFLAGS))
27
+ GM_CXXFLAGS_SHARED = $(GM_CXXFLAGS) -fPIC
28
+
29
+ CONFIGURE_LDFLAGS = -shared -Wl,-soname,libgumath.so.0
30
+ GM_LDFLAGS = $(strip $(CONFIGURE_LDFLAGS) $(LDFLAGS))
31
+
32
+ CONFIGURE_CUDA_CXXFLAGS =
33
+ GM_CUDA_CXXFLAGS = $(strip $(CONFIGURE_CUDA_CXXFLAGS) $(CUDA_CXXFLAGS))
34
+
35
+ default: $(LIBSTATIC) $(LIBSHARED)
36
+
37
+
38
+ OBJS = apply.o func.o nploops.o tbl.o thread.o xndloops.o cpu_host_unary.o \
39
+ cpu_device_unary.o cpu_host_binary.o cpu_device_binary.o common.o \
40
+ examples.o graph.o quaternion.o pdist.o
41
+
42
+ SHARED_OBJS = .objs/apply.o .objs/func.o .objs/nploops.o .objs/tbl.o .objs/thread.o .objs/xndloops.o \
43
+ .objs/cpu_host_unary.o .objs/cpu_device_unary.o .objs/cpu_host_binary.o .objs/cpu_device_binary.o \
44
+ .objs/common.o .objs/examples.o .objs/graph.o .objs/quaternion.o .objs/pdist.o
45
+
46
+ ifdef CUDA_CXX
47
+ OBJS += cuda_host_unary.o cuda_device_unary.o cuda_host_binary.o cuda_device_binary.o
48
+ SHARED_OBJS += .objs/cuda_host_unary.o .objs/cuda_device_unary.o .objs/cuda_host_binary.o .objs/cuda_device_binary.o
49
+ endif
50
+
51
+
52
+ $(LIBSTATIC): Makefile $(OBJS)
53
+ $(AR) rc $(LIBSTATIC) $(OBJS)
54
+ $(RANLIB) $(LIBSTATIC)
55
+
56
+ $(LIBSHARED): Makefile $(SHARED_OBJS)
57
+ $(LD) $(GM_LDFLAGS) -o $(LIBSHARED) $(SHARED_OBJS)
58
+ ln -sf $(LIBSHARED) $(LIBNAME)
59
+ ln -sf $(LIBSHARED) $(LIBSONAME)
60
+
61
+
62
+ apply.o:\
63
+ Makefile apply.c gumath.h
64
+ $(CC) $(GM_CFLAGS) -c apply.c
65
+
66
+ .objs/apply.o:\
67
+ Makefile apply.c gumath.h
68
+ $(CC) $(GM_CFLAGS_SHARED) -c apply.c -o .objs/apply.o
69
+
70
+ func.o:\
71
+ Makefile func.c gumath.h
72
+ $(CC) $(GM_CFLAGS) -c func.c
73
+
74
+ .objs/func.o:\
75
+ Makefile func.c gumath.h
76
+ $(CC) $(GM_CFLAGS_SHARED) -c func.c -o .objs/func.o
77
+
78
+ nploops.o:\
79
+ Makefile nploops.c gumath.h
80
+ $(CC) $(GM_CFLAGS) -c nploops.c
81
+
82
+ .objs/nploops.o:\
83
+ Makefile nploops.c gumath.h
84
+ $(CC) $(GM_CFLAGS_SHARED) -c nploops.c -o .objs/nploops.o
85
+
86
+ tbl.o:\
87
+ Makefile tbl.c gumath.h
88
+ $(CC) $(GM_CFLAGS) -c tbl.c
89
+
90
+ .objs/tbl.o:\
91
+ Makefile tbl.c gumath.h
92
+ $(CC) $(GM_CFLAGS_SHARED) -c tbl.c -o .objs/tbl.o
93
+
94
+ thread.o:\
95
+ Makefile thread.c gumath.h
96
+ $(CC) $(GM_CFLAGS) -c thread.c
97
+
98
+ .objs/thread.o:\
99
+ Makefile thread.c gumath.h
100
+ $(CC) $(GM_CFLAGS_SHARED) -c thread.c -o .objs/thread.o
101
+
102
+ xndloops.o:\
103
+ Makefile xndloops.c gumath.h
104
+ $(CC) $(GM_CFLAGS) -c xndloops.c
105
+
106
+ .objs/xndloops.o:\
107
+ Makefile xndloops.c gumath.h
108
+ $(CC) $(GM_CFLAGS_SHARED) -c xndloops.c -o .objs/xndloops.o
109
+
110
+ cpu_device_unary.o:\
111
+ Makefile kernels/cpu_device_unary.cc kernels/common.h gumath.h
112
+ $(CXX) -I. $(GM_CXXFLAGS) -Wno-absolute-value -c kernels/cpu_device_unary.cc
113
+
114
+ .objs/cpu_device_unary.o:\
115
+ Makefile kernels/cpu_device_unary.cc kernels/common.h gumath.h
116
+ $(CXX) -I. $(GM_CXXFLAGS_SHARED) -Wno-absolute-value -c kernels/cpu_device_unary.cc -o .objs/cpu_device_unary.o
117
+
118
+ cpu_host_unary.o:\
119
+ Makefile kernels/cpu_host_unary.c kernels/common.h gumath.h
120
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cpu_host_unary.c
121
+
122
+ .objs/cpu_host_unary.o:\
123
+ Makefile kernels/cpu_host_unary.c kernels/common.h gumath.h
124
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cpu_host_unary.c -o .objs/cpu_host_unary.o
125
+
126
+ cpu_host_binary.o:\
127
+ Makefile kernels/cpu_host_binary.c kernels/common.h gumath.h
128
+ $(CC) -I. $(GM_CFLAGS) -c kernels/cpu_host_binary.c
129
+
130
+ .objs/cpu_host_binary.o:\
131
+ Makefile kernels/cpu_host_binary.c kernels/common.h gumath.h
132
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/cpu_host_binary.c -o .objs/cpu_host_binary.o
133
+
134
+ cpu_device_binary.o:\
135
+ Makefile kernels/cpu_device_binary.cc kernels/common.h gumath.h
136
+ $(CXX) -I. $(GM_CXXFLAGS) -c kernels/cpu_device_binary.cc
137
+
138
+ .objs/cpu_device_binary.o:\
139
+ Makefile kernels/cpu_device_binary.cc kernels/common.h gumath.h
140
+ $(CXX) -I. $(GM_CXXFLAGS_SHARED) -c kernels/cpu_device_binary.cc -o .objs/cpu_device_binary.o
141
+
142
+ common.o:\
143
+ Makefile kernels/common.c kernels/common.h gumath.h
144
+ common.o:\
145
+ Makefile kernels/common.c kernels/common.h gumath.h
146
+ $(CC) -I. $(GM_CFLAGS) -c kernels/common.c
147
+
148
+ .objs/common.o:\
149
+ Makefile kernels/common.c kernels/common.h gumath.h
150
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/common.c -o .objs/common.o
151
+
152
+ examples.o:\
153
+ Makefile extending/examples.c gumath.h
154
+ $(CC) -I. $(GM_CFLAGS) -c extending/examples.c -o examples.o
155
+
156
+ .objs/examples.o:\
157
+ Makefile extending/examples.c gumath.h
158
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/examples.c -o .objs/examples.o
159
+
160
+ graph.o:\
161
+ Makefile extending/graph.c gumath.h
162
+ $(CC) -I. $(GM_CFLAGS) -c extending/graph.c -o graph.o
163
+
164
+ .objs/graph.o:\
165
+ Makefile extending/graph.c gumath.h
166
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/graph.c -o .objs/graph.o
167
+
168
+ quaternion.o:\
169
+ Makefile extending/quaternion.c gumath.h
170
+ $(CC) -I. $(GM_CFLAGS) -c extending/quaternion.c -o quaternion.o
171
+
172
+ .objs/quaternion.o:\
173
+ Makefile extending/quaternion.c gumath.h
174
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/quaternion.c -o .objs/quaternion.o
175
+
176
+ pdist.o:\
177
+ Makefile extending/pdist.c gumath.h
178
+ $(CC) -I. $(GM_CFLAGS) -c extending/pdist.c -o pdist.o
179
+
180
+ .objs/pdist.o:\
181
+ Makefile extending/pdist.c gumath.h
182
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/pdist.c -o .objs/pdist.o
183
+
184
+
185
+ # Cuda
186
+ cuda_host_unary.o:\
187
+ Makefile kernels/cuda_host_unary.c kernels/common.h kernels/cuda_device_unary.h gumath.h
188
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cuda_host_unary.c
189
+
190
+ .objs/cuda_host_unary.o:\
191
+ Makefile kernels/cuda_host_unary.c kernels/common.h kernels/cuda_device_unary.h gumath.h
192
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cuda_host_unary.c -o .objs/cuda_host_unary.o
193
+
194
+ cuda_device_unary.o:\
195
+ Makefile kernels/cuda_device_unary.cu kernels/common.h kernels/cuda_device_unary.h gumath.h
196
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_unary.cu
197
+
198
+ .objs/cuda_device_unary.o:\
199
+ Makefile kernels/cuda_device_unary.cu kernels/common.h kernels/cuda_device_unary.h gumath.h
200
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS_SHARED)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_unary.cu -o .objs/cuda_device_unary.o
201
+
202
+ cuda_host_binary.o:\
203
+ Makefile kernels/cuda_host_binary.c kernels/common.h kernels/cuda_device_binary.h gumath.h
204
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cuda_host_binary.c
205
+
206
+ .objs/cuda_host_binary.o:\
207
+ Makefile kernels/cuda_host_binary.c kernels/common.h kernels/cuda_device_binary.h gumath.h
208
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cuda_host_binary.c -o .objs/cuda_host_binary.o
209
+
210
+ cuda_device_binary.o:\
211
+ Makefile kernels/cuda_device_binary.cu kernels/common.h kernels/cuda_device_binary.h gumath.h
212
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_binary.cu
213
+
214
+ .objs/cuda_device_binary.o:\
215
+ Makefile kernels/cuda_device_binary.cu kernels/common.h kernels/cuda_device_binary.h gumath.h
216
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS_SHARED)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_binary.cu -o .objs/cuda_device_binary.o
217
+
218
+
219
+ # Coverage
220
+ coverage:\
221
+ Makefile clean runtest
222
+ ./tests/runtest
223
+ for file in *.c; do gcov -l "$$file" > /dev/null 2>&1; done
224
+
225
+ FORCE:
226
+
227
+ clean: FORCE
228
+ rm -f *.o *.so *.gch *.gcda *.gcno *.gcov *.dyn *.dpi *.lock
229
+ rm -f $(LIBSTATIC) $(LIBSHARED) $(LIBSONAME) $(LIBNAME)
230
+ cd .objs && rm -f *.o *.so *.gch *.gcda *.gcno *.gcov *.dyn *.dpi *.lock
231
+
232
+ distclean: clean
233
+ rm -f Makefile
234
+
235
+
236
+
@@ -10,9 +10,11 @@ LIBSONAME = @LIBSONAME@
10
10
  LIBSHARED = @LIBSHARED@
11
11
 
12
12
  CC = @CC@
13
+ CXX = @CXX@
13
14
  LD = @LD@
14
15
  AR = @AR@
15
16
  RANLIB = @RANLIB@
17
+ CUDA_CXX = @CUDA_CXX@
16
18
 
17
19
  GM_INCLUDES = @CONFIGURE_INCLUDES@
18
20
 
@@ -20,19 +22,31 @@ CONFIGURE_CFLAGS = @CONFIGURE_CFLAGS@
20
22
  GM_CFLAGS = $(strip -I.. -I$(GM_INCLUDES) $(CONFIGURE_CFLAGS) $(CFLAGS))
21
23
  GM_CFLAGS_SHARED = $(GM_CFLAGS) -fPIC
22
24
 
25
+ CONFIGURE_CXXFLAGS = @CONFIGURE_CXXFLAGS@
26
+ GM_CXXFLAGS = $(strip -I$(GM_INCLUDES) $(CONFIGURE_CXXFLAGS) $(CXXFLAGS))
27
+ GM_CXXFLAGS_SHARED = $(GM_CXXFLAGS) -fPIC
28
+
23
29
  CONFIGURE_LDFLAGS = @CONFIGURE_LDFLAGS@
24
30
  GM_LDFLAGS = $(strip $(CONFIGURE_LDFLAGS) $(LDFLAGS))
25
31
 
32
+ CONFIGURE_CUDA_CXXFLAGS = @CONFIGURE_CUDA_CXXFLAGS@
33
+ GM_CUDA_CXXFLAGS = $(strip $(CONFIGURE_CUDA_CXXFLAGS) $(CUDA_CXXFLAGS))
26
34
 
27
35
  default: $(LIBSTATIC) $(LIBSHARED)
28
36
 
29
37
 
30
- OBJS = apply.o func.o nploops.o tbl.o thread.o xndloops.o unary.o binary.o \
31
- examples.o bfloat16.o graph.o quaternion.o pdist.o
38
+ OBJS = apply.o func.o nploops.o tbl.o thread.o xndloops.o cpu_host_unary.o \
39
+ cpu_device_unary.o cpu_host_binary.o cpu_device_binary.o common.o \
40
+ examples.o graph.o quaternion.o pdist.o
32
41
 
33
42
  SHARED_OBJS = .objs/apply.o .objs/func.o .objs/nploops.o .objs/tbl.o .objs/thread.o .objs/xndloops.o \
34
- .objs/unary.o .objs/binary.o .objs/examples.o .objs/bfloat16.o .objs/graph.o \
35
- .objs/quaternion.o .objs/pdist.o
43
+ .objs/cpu_host_unary.o .objs/cpu_device_unary.o .objs/cpu_host_binary.o .objs/cpu_device_binary.o \
44
+ .objs/common.o .objs/examples.o .objs/graph.o .objs/quaternion.o .objs/pdist.o
45
+
46
+ ifdef CUDA_CXX
47
+ OBJS += cuda_host_unary.o cuda_device_unary.o cuda_host_binary.o cuda_device_binary.o
48
+ SHARED_OBJS += .objs/cuda_host_unary.o .objs/cuda_device_unary.o .objs/cuda_host_binary.o .objs/cuda_device_binary.o
49
+ endif
36
50
 
37
51
 
38
52
  $(LIBSTATIC): Makefile $(OBJS)
@@ -93,21 +107,47 @@ Makefile xndloops.c gumath.h
93
107
  Makefile xndloops.c gumath.h
94
108
  $(CC) $(GM_CFLAGS_SHARED) -c xndloops.c -o .objs/xndloops.o
95
109
 
96
- unary.o:\
97
- Makefile kernels/unary.c gumath.h
98
- $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/unary.c
110
+ cpu_device_unary.o:\
111
+ Makefile kernels/cpu_device_unary.cc kernels/common.h gumath.h
112
+ $(CXX) -I. $(GM_CXXFLAGS) -Wno-absolute-value -c kernels/cpu_device_unary.cc
113
+
114
+ .objs/cpu_device_unary.o:\
115
+ Makefile kernels/cpu_device_unary.cc kernels/common.h gumath.h
116
+ $(CXX) -I. $(GM_CXXFLAGS_SHARED) -Wno-absolute-value -c kernels/cpu_device_unary.cc -o .objs/cpu_device_unary.o
117
+
118
+ cpu_host_unary.o:\
119
+ Makefile kernels/cpu_host_unary.c kernels/common.h gumath.h
120
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cpu_host_unary.c
121
+
122
+ .objs/cpu_host_unary.o:\
123
+ Makefile kernels/cpu_host_unary.c kernels/common.h gumath.h
124
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cpu_host_unary.c -o .objs/cpu_host_unary.o
99
125
 
100
- .objs/unary.o:\
101
- Makefile kernels/unary.c gumath.h
102
- $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/unary.c -o .objs/unary.o
126
+ cpu_host_binary.o:\
127
+ Makefile kernels/cpu_host_binary.c kernels/common.h gumath.h
128
+ $(CC) -I. $(GM_CFLAGS) -c kernels/cpu_host_binary.c
103
129
 
104
- binary.o:\
105
- Makefile kernels/binary.c gumath.h
106
- $(CC) -I. $(GM_CFLAGS) -c kernels/binary.c
130
+ .objs/cpu_host_binary.o:\
131
+ Makefile kernels/cpu_host_binary.c kernels/common.h gumath.h
132
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/cpu_host_binary.c -o .objs/cpu_host_binary.o
107
133
 
108
- .objs/binary.o:\
109
- Makefile kernels/binary.c gumath.h
110
- $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/binary.c -o .objs/binary.o
134
+ cpu_device_binary.o:\
135
+ Makefile kernels/cpu_device_binary.cc kernels/common.h gumath.h
136
+ $(CXX) -I. $(GM_CXXFLAGS) -c kernels/cpu_device_binary.cc
137
+
138
+ .objs/cpu_device_binary.o:\
139
+ Makefile kernels/cpu_device_binary.cc kernels/common.h gumath.h
140
+ $(CXX) -I. $(GM_CXXFLAGS_SHARED) -c kernels/cpu_device_binary.cc -o .objs/cpu_device_binary.o
141
+
142
+ common.o:\
143
+ Makefile kernels/common.c kernels/common.h gumath.h
144
+ common.o:\
145
+ Makefile kernels/common.c kernels/common.h gumath.h
146
+ $(CC) -I. $(GM_CFLAGS) -c kernels/common.c
147
+
148
+ .objs/common.o:\
149
+ Makefile kernels/common.c kernels/common.h gumath.h
150
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/common.c -o .objs/common.o
111
151
 
112
152
  examples.o:\
113
153
  Makefile extending/examples.c gumath.h
@@ -117,14 +157,6 @@ Makefile extending/examples.c gumath.h
117
157
  Makefile extending/examples.c gumath.h
118
158
  $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/examples.c -o .objs/examples.o
119
159
 
120
- bfloat16.o:\
121
- Makefile extending/bfloat16.c gumath.h
122
- $(CC) -I. $(GM_CFLAGS) -c extending/bfloat16.c -o bfloat16.o
123
-
124
- .objs/bfloat16.o:\
125
- Makefile extending/bfloat16.c gumath.h
126
- $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/bfloat16.c -o .objs/bfloat16.o
127
-
128
160
  graph.o:\
129
161
  Makefile extending/graph.c gumath.h
130
162
  $(CC) -I. $(GM_CFLAGS) -c extending/graph.c -o graph.o
@@ -150,6 +182,40 @@ Makefile extending/pdist.c gumath.h
150
182
  $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/pdist.c -o .objs/pdist.o
151
183
 
152
184
 
185
+ # Cuda
186
+ cuda_host_unary.o:\
187
+ Makefile kernels/cuda_host_unary.c kernels/common.h kernels/cuda_device_unary.h gumath.h
188
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cuda_host_unary.c
189
+
190
+ .objs/cuda_host_unary.o:\
191
+ Makefile kernels/cuda_host_unary.c kernels/common.h kernels/cuda_device_unary.h gumath.h
192
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cuda_host_unary.c -o .objs/cuda_host_unary.o
193
+
194
+ cuda_device_unary.o:\
195
+ Makefile kernels/cuda_device_unary.cu kernels/common.h kernels/cuda_device_unary.h gumath.h
196
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_unary.cu
197
+
198
+ .objs/cuda_device_unary.o:\
199
+ Makefile kernels/cuda_device_unary.cu kernels/common.h kernels/cuda_device_unary.h gumath.h
200
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS_SHARED)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_unary.cu -o .objs/cuda_device_unary.o
201
+
202
+ cuda_host_binary.o:\
203
+ Makefile kernels/cuda_host_binary.c kernels/common.h kernels/cuda_device_binary.h gumath.h
204
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cuda_host_binary.c
205
+
206
+ .objs/cuda_host_binary.o:\
207
+ Makefile kernels/cuda_host_binary.c kernels/common.h kernels/cuda_device_binary.h gumath.h
208
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cuda_host_binary.c -o .objs/cuda_host_binary.o
209
+
210
+ cuda_device_binary.o:\
211
+ Makefile kernels/cuda_device_binary.cu kernels/common.h kernels/cuda_device_binary.h gumath.h
212
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_binary.cu
213
+
214
+ .objs/cuda_device_binary.o:\
215
+ Makefile kernels/cuda_device_binary.cu kernels/common.h kernels/cuda_device_binary.h gumath.h
216
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS_SHARED)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_binary.cu -o .objs/cuda_device_binary.o
217
+
218
+
153
219
  # Coverage
154
220
  coverage:\
155
221
  Makefile clean runtest