gumath 0.2.0dev5 → 0.2.0dev8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +7 -2
  3. data/Gemfile +0 -3
  4. data/ext/ruby_gumath/GPATH +0 -0
  5. data/ext/ruby_gumath/GRTAGS +0 -0
  6. data/ext/ruby_gumath/GTAGS +0 -0
  7. data/ext/ruby_gumath/extconf.rb +0 -5
  8. data/ext/ruby_gumath/functions.c +10 -2
  9. data/ext/ruby_gumath/gufunc_object.c +15 -4
  10. data/ext/ruby_gumath/gufunc_object.h +9 -3
  11. data/ext/ruby_gumath/gumath/Makefile +63 -0
  12. data/ext/ruby_gumath/gumath/Makefile.in +1 -0
  13. data/ext/ruby_gumath/gumath/config.h +56 -0
  14. data/ext/ruby_gumath/gumath/config.h.in +3 -0
  15. data/ext/ruby_gumath/gumath/config.log +497 -0
  16. data/ext/ruby_gumath/gumath/config.status +1034 -0
  17. data/ext/ruby_gumath/gumath/configure +375 -4
  18. data/ext/ruby_gumath/gumath/configure.ac +47 -3
  19. data/ext/ruby_gumath/gumath/libgumath/Makefile +236 -0
  20. data/ext/ruby_gumath/gumath/libgumath/Makefile.in +90 -24
  21. data/ext/ruby_gumath/gumath/libgumath/Makefile.vc +54 -15
  22. data/ext/ruby_gumath/gumath/libgumath/apply.c +92 -28
  23. data/ext/ruby_gumath/gumath/libgumath/apply.o +0 -0
  24. data/ext/ruby_gumath/gumath/libgumath/common.o +0 -0
  25. data/ext/ruby_gumath/gumath/libgumath/cpu_device_binary.o +0 -0
  26. data/ext/ruby_gumath/gumath/libgumath/cpu_device_unary.o +0 -0
  27. data/ext/ruby_gumath/gumath/libgumath/cpu_host_binary.o +0 -0
  28. data/ext/ruby_gumath/gumath/libgumath/cpu_host_unary.o +0 -0
  29. data/ext/ruby_gumath/gumath/libgumath/examples.o +0 -0
  30. data/ext/ruby_gumath/gumath/libgumath/extending/graph.c +27 -20
  31. data/ext/ruby_gumath/gumath/libgumath/extending/pdist.c +1 -1
  32. data/ext/ruby_gumath/gumath/libgumath/func.c +13 -9
  33. data/ext/ruby_gumath/gumath/libgumath/func.o +0 -0
  34. data/ext/ruby_gumath/gumath/libgumath/graph.o +0 -0
  35. data/ext/ruby_gumath/gumath/libgumath/gumath.h +55 -14
  36. data/ext/ruby_gumath/gumath/libgumath/kernels/common.c +513 -0
  37. data/ext/ruby_gumath/gumath/libgumath/kernels/common.h +155 -0
  38. data/ext/ruby_gumath/gumath/libgumath/kernels/contrib/bfloat16.h +520 -0
  39. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.cc +1123 -0
  40. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.h +1062 -0
  41. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_msvc.cc +555 -0
  42. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.cc +368 -0
  43. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.h +335 -0
  44. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_binary.c +2952 -0
  45. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_unary.c +1100 -0
  46. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.cu +1143 -0
  47. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.h +1061 -0
  48. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.cu +528 -0
  49. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.h +463 -0
  50. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_binary.c +2817 -0
  51. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_unary.c +1331 -0
  52. data/ext/ruby_gumath/gumath/libgumath/kernels/device.hh +614 -0
  53. data/ext/ruby_gumath/gumath/libgumath/libgumath.a +0 -0
  54. data/ext/ruby_gumath/gumath/libgumath/libgumath.so +1 -0
  55. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0 +1 -0
  56. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0.2.0dev3 +0 -0
  57. data/ext/ruby_gumath/gumath/libgumath/nploops.o +0 -0
  58. data/ext/ruby_gumath/gumath/libgumath/pdist.o +0 -0
  59. data/ext/ruby_gumath/gumath/libgumath/quaternion.o +0 -0
  60. data/ext/ruby_gumath/gumath/libgumath/tbl.o +0 -0
  61. data/ext/ruby_gumath/gumath/libgumath/thread.c +17 -4
  62. data/ext/ruby_gumath/gumath/libgumath/thread.o +0 -0
  63. data/ext/ruby_gumath/gumath/libgumath/xndloops.c +110 -0
  64. data/ext/ruby_gumath/gumath/libgumath/xndloops.o +0 -0
  65. data/ext/ruby_gumath/gumath/python/gumath/__init__.py +150 -0
  66. data/ext/ruby_gumath/gumath/python/gumath/_gumath.c +446 -80
  67. data/ext/ruby_gumath/gumath/python/gumath/cuda.c +78 -0
  68. data/ext/ruby_gumath/gumath/python/gumath/examples.c +0 -5
  69. data/ext/ruby_gumath/gumath/python/gumath/functions.c +2 -2
  70. data/ext/ruby_gumath/gumath/python/gumath/gumath.h +246 -0
  71. data/ext/ruby_gumath/gumath/python/gumath/libgumath.a +0 -0
  72. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so +1 -0
  73. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0 +1 -0
  74. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0.2.0dev3 +0 -0
  75. data/ext/ruby_gumath/gumath/python/gumath/pygumath.h +31 -2
  76. data/ext/ruby_gumath/gumath/python/gumath_aux.py +767 -0
  77. data/ext/ruby_gumath/gumath/python/randdec.py +535 -0
  78. data/ext/ruby_gumath/gumath/python/randfloat.py +177 -0
  79. data/ext/ruby_gumath/gumath/python/test_gumath.py +1504 -24
  80. data/ext/ruby_gumath/gumath/python/test_xndarray.py +462 -0
  81. data/ext/ruby_gumath/gumath/setup.py +67 -6
  82. data/ext/ruby_gumath/gumath/tools/detect_cuda_arch.cc +35 -0
  83. data/ext/ruby_gumath/include/gumath.h +55 -14
  84. data/ext/ruby_gumath/include/ruby_gumath.h +4 -1
  85. data/ext/ruby_gumath/lib/libgumath.a +0 -0
  86. data/ext/ruby_gumath/lib/libgumath.so.0.2.0dev3 +0 -0
  87. data/ext/ruby_gumath/ruby_gumath.c +231 -70
  88. data/ext/ruby_gumath/ruby_gumath.h +4 -1
  89. data/ext/ruby_gumath/ruby_gumath_internal.h +25 -0
  90. data/ext/ruby_gumath/util.c +34 -0
  91. data/ext/ruby_gumath/util.h +9 -0
  92. data/gumath.gemspec +3 -2
  93. data/lib/gumath.rb +55 -1
  94. data/lib/gumath/version.rb +2 -2
  95. data/lib/ruby_gumath.so +0 -0
  96. metadata +63 -10
  97. data/ext/ruby_gumath/gumath/libgumath/extending/bfloat16.c +0 -130
  98. data/ext/ruby_gumath/gumath/libgumath/kernels/binary.c +0 -547
  99. data/ext/ruby_gumath/gumath/libgumath/kernels/unary.c +0 -449
@@ -41,9 +41,12 @@ AC_SUBST(host)
41
41
 
42
42
  # Language and compiler:
43
43
  AC_LANG_C
44
- saved_cflags=$CFLAGS
44
+ saved_cflags="$CFLAGS"
45
+ saved_cxxflags="$CXXFLAGS"
45
46
  AC_PROG_CC
46
- CFLAGS=$saved_cflags
47
+ AC_PROG_CXX
48
+ CFLAGS="$saved_cflags"
49
+ CXXFLAGS="$saved_cxxflags"
47
50
 
48
51
  # ar and ranlib:
49
52
  AC_CHECK_TOOL(AR, ar, ar)
@@ -61,6 +64,40 @@ AC_CHECK_HEADER([pthread.h],
61
64
  AC_PROG_INSTALL
62
65
  AC_SUBST(INSTALL)
63
66
 
67
+ # Cuda compiler:
68
+ AC_MSG_CHECKING(for nvcc)
69
+ saved_cc="$CC"
70
+ saved_cflags="$CFLAGS"
71
+ saved_cxxflags="$CXXFLAGS"
72
+ CC=nvcc
73
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
74
+ void
75
+ f(void)
76
+ {
77
+ return;
78
+ }
79
+ ]])],
80
+ [have_nvcc=yes],
81
+ [have_nvcc=no],
82
+ [have_nvcc=undefined])
83
+ CC="$saved_cc"
84
+ CFLAGS="$saved_cflags"
85
+ CXXFLAGS="$saved_cxxflags"
86
+ AC_MSG_RESULT($have_nvcc)
87
+
88
+ CUDA_CXX=
89
+ CONFIGURE_CUDA_CXXFLAGS=
90
+ if test "$have_nvcc" = yes; then
91
+ if nvcc -o tools/detect_cuda_arch tools/detect_cuda_arch.cc > /dev/null 2>&1; then
92
+ CUDA_CXX="nvcc"
93
+ CUDA_ARCH=`./tools/detect_cuda_arch`
94
+ CONFIGURE_CUDA_CXXFLAGS="-std=c++11 -arch=compute_$CUDA_ARCH -code=sm_$CUDA_ARCH"
95
+ AC_DEFINE(HAVE_CUDA, 1, [Define to 1 if you have the nvcc cuda compiler.])
96
+ fi
97
+ fi
98
+ AC_SUBST(CUDA_CXX)
99
+ AC_SUBST(CONFIGURE_CUDA_CXXFLAGS)
100
+
64
101
  # Add an explicit include directory.
65
102
  AC_MSG_CHECKING(for --with-includes)
66
103
  AC_ARG_WITH(includes,
@@ -140,7 +177,7 @@ esac
140
177
 
141
178
  # Substitute variables and generate output:
142
179
  if test -z "$LD"; then
143
- LD="$CC"
180
+ LD="$CXX"
144
181
  fi
145
182
  AC_SUBST(LD)
146
183
  AC_SUBST(AR)
@@ -154,6 +191,12 @@ else
154
191
  CONFIGURE_CFLAGS="$GM_INCLUDE $GM_WARN $GM_CONFIG $GM_OPT $CFLAGS"
155
192
  fi
156
193
 
194
+ if test -z "$CXXFLAGS"; then
195
+ CONFIGURE_CXXFLAGS="$XND_INCLUDE -Wall -Wextra -std=c++11 $GM_OPT -g"
196
+ else
197
+ CONFIGURE_CXXFLAGS="$XND_INCLUDE -Wall -Wextra -std=c++11 $GM_OPT -g $CXXFLAGS"
198
+ fi
199
+
157
200
  if test -z "$LDFLAGS"; then
158
201
  CONFIGURE_LDFLAGS="$GM_LINK $CONFIGURE_LDFLAGS"
159
202
  else
@@ -161,6 +204,7 @@ else
161
204
  fi
162
205
 
163
206
  AC_SUBST(CONFIGURE_CFLAGS)
207
+ AC_SUBST(CONFIGURE_CXXFLAGS)
164
208
  AC_SUBST(CONFIGURE_LDFLAGS)
165
209
 
166
210
  AC_OUTPUT
@@ -0,0 +1,236 @@
1
+
2
+ # ==============================================================================
3
+ # Unix Makefile for libxnd
4
+ # ==============================================================================
5
+
6
+
7
+ LIBSTATIC = libgumath.a
8
+ LIBNAME = libgumath.so
9
+ LIBSONAME = libgumath.so.0
10
+ LIBSHARED = libgumath.so.0.2.0dev3
11
+
12
+ CC = gcc
13
+ CXX = g++
14
+ LD = g++
15
+ AR = ar
16
+ RANLIB = ranlib
17
+ CUDA_CXX =
18
+
19
+ GM_INCLUDES = ../ndtypes/libndtypes
20
+
21
+ CONFIGURE_CFLAGS = -Wall -Wextra -std=c11 -pedantic -O3 -I /home/sameer/.rvm/gems/ruby-2.4.1/gems/xnd-0.2.0dev7/ext/ruby_xnd/include -I /home/sameer/.rvm/gems/ruby-2.4.1/gems/ndtypes-0.2.0dev6/ext/ruby_ndtypes/include
22
+ GM_CFLAGS = $(strip -I.. -I$(GM_INCLUDES) $(CONFIGURE_CFLAGS) $(CFLAGS))
23
+ GM_CFLAGS_SHARED = $(GM_CFLAGS) -fPIC
24
+
25
+ CONFIGURE_CXXFLAGS = -Wall -Wextra -std=c++11 -O3 -g
26
+ GM_CXXFLAGS = $(strip -I$(GM_INCLUDES) $(CONFIGURE_CXXFLAGS) $(CXXFLAGS))
27
+ GM_CXXFLAGS_SHARED = $(GM_CXXFLAGS) -fPIC
28
+
29
+ CONFIGURE_LDFLAGS = -shared -Wl,-soname,libgumath.so.0
30
+ GM_LDFLAGS = $(strip $(CONFIGURE_LDFLAGS) $(LDFLAGS))
31
+
32
+ CONFIGURE_CUDA_CXXFLAGS =
33
+ GM_CUDA_CXXFLAGS = $(strip $(CONFIGURE_CUDA_CXXFLAGS) $(CUDA_CXXFLAGS))
34
+
35
+ default: $(LIBSTATIC) $(LIBSHARED)
36
+
37
+
38
+ OBJS = apply.o func.o nploops.o tbl.o thread.o xndloops.o cpu_host_unary.o \
39
+ cpu_device_unary.o cpu_host_binary.o cpu_device_binary.o common.o \
40
+ examples.o graph.o quaternion.o pdist.o
41
+
42
+ SHARED_OBJS = .objs/apply.o .objs/func.o .objs/nploops.o .objs/tbl.o .objs/thread.o .objs/xndloops.o \
43
+ .objs/cpu_host_unary.o .objs/cpu_device_unary.o .objs/cpu_host_binary.o .objs/cpu_device_binary.o \
44
+ .objs/common.o .objs/examples.o .objs/graph.o .objs/quaternion.o .objs/pdist.o
45
+
46
+ ifdef CUDA_CXX
47
+ OBJS += cuda_host_unary.o cuda_device_unary.o cuda_host_binary.o cuda_device_binary.o
48
+ SHARED_OBJS += .objs/cuda_host_unary.o .objs/cuda_device_unary.o .objs/cuda_host_binary.o .objs/cuda_device_binary.o
49
+ endif
50
+
51
+
52
+ $(LIBSTATIC): Makefile $(OBJS)
53
+ $(AR) rc $(LIBSTATIC) $(OBJS)
54
+ $(RANLIB) $(LIBSTATIC)
55
+
56
+ $(LIBSHARED): Makefile $(SHARED_OBJS)
57
+ $(LD) $(GM_LDFLAGS) -o $(LIBSHARED) $(SHARED_OBJS)
58
+ ln -sf $(LIBSHARED) $(LIBNAME)
59
+ ln -sf $(LIBSHARED) $(LIBSONAME)
60
+
61
+
62
+ apply.o:\
63
+ Makefile apply.c gumath.h
64
+ $(CC) $(GM_CFLAGS) -c apply.c
65
+
66
+ .objs/apply.o:\
67
+ Makefile apply.c gumath.h
68
+ $(CC) $(GM_CFLAGS_SHARED) -c apply.c -o .objs/apply.o
69
+
70
+ func.o:\
71
+ Makefile func.c gumath.h
72
+ $(CC) $(GM_CFLAGS) -c func.c
73
+
74
+ .objs/func.o:\
75
+ Makefile func.c gumath.h
76
+ $(CC) $(GM_CFLAGS_SHARED) -c func.c -o .objs/func.o
77
+
78
+ nploops.o:\
79
+ Makefile nploops.c gumath.h
80
+ $(CC) $(GM_CFLAGS) -c nploops.c
81
+
82
+ .objs/nploops.o:\
83
+ Makefile nploops.c gumath.h
84
+ $(CC) $(GM_CFLAGS_SHARED) -c nploops.c -o .objs/nploops.o
85
+
86
+ tbl.o:\
87
+ Makefile tbl.c gumath.h
88
+ $(CC) $(GM_CFLAGS) -c tbl.c
89
+
90
+ .objs/tbl.o:\
91
+ Makefile tbl.c gumath.h
92
+ $(CC) $(GM_CFLAGS_SHARED) -c tbl.c -o .objs/tbl.o
93
+
94
+ thread.o:\
95
+ Makefile thread.c gumath.h
96
+ $(CC) $(GM_CFLAGS) -c thread.c
97
+
98
+ .objs/thread.o:\
99
+ Makefile thread.c gumath.h
100
+ $(CC) $(GM_CFLAGS_SHARED) -c thread.c -o .objs/thread.o
101
+
102
+ xndloops.o:\
103
+ Makefile xndloops.c gumath.h
104
+ $(CC) $(GM_CFLAGS) -c xndloops.c
105
+
106
+ .objs/xndloops.o:\
107
+ Makefile xndloops.c gumath.h
108
+ $(CC) $(GM_CFLAGS_SHARED) -c xndloops.c -o .objs/xndloops.o
109
+
110
+ cpu_device_unary.o:\
111
+ Makefile kernels/cpu_device_unary.cc kernels/common.h gumath.h
112
+ $(CXX) -I. $(GM_CXXFLAGS) -Wno-absolute-value -c kernels/cpu_device_unary.cc
113
+
114
+ .objs/cpu_device_unary.o:\
115
+ Makefile kernels/cpu_device_unary.cc kernels/common.h gumath.h
116
+ $(CXX) -I. $(GM_CXXFLAGS_SHARED) -Wno-absolute-value -c kernels/cpu_device_unary.cc -o .objs/cpu_device_unary.o
117
+
118
+ cpu_host_unary.o:\
119
+ Makefile kernels/cpu_host_unary.c kernels/common.h gumath.h
120
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cpu_host_unary.c
121
+
122
+ .objs/cpu_host_unary.o:\
123
+ Makefile kernels/cpu_host_unary.c kernels/common.h gumath.h
124
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cpu_host_unary.c -o .objs/cpu_host_unary.o
125
+
126
+ cpu_host_binary.o:\
127
+ Makefile kernels/cpu_host_binary.c kernels/common.h gumath.h
128
+ $(CC) -I. $(GM_CFLAGS) -c kernels/cpu_host_binary.c
129
+
130
+ .objs/cpu_host_binary.o:\
131
+ Makefile kernels/cpu_host_binary.c kernels/common.h gumath.h
132
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/cpu_host_binary.c -o .objs/cpu_host_binary.o
133
+
134
+ cpu_device_binary.o:\
135
+ Makefile kernels/cpu_device_binary.cc kernels/common.h gumath.h
136
+ $(CXX) -I. $(GM_CXXFLAGS) -c kernels/cpu_device_binary.cc
137
+
138
+ .objs/cpu_device_binary.o:\
139
+ Makefile kernels/cpu_device_binary.cc kernels/common.h gumath.h
140
+ $(CXX) -I. $(GM_CXXFLAGS_SHARED) -c kernels/cpu_device_binary.cc -o .objs/cpu_device_binary.o
141
+
142
+ common.o:\
143
+ Makefile kernels/common.c kernels/common.h gumath.h
144
+ common.o:\
145
+ Makefile kernels/common.c kernels/common.h gumath.h
146
+ $(CC) -I. $(GM_CFLAGS) -c kernels/common.c
147
+
148
+ .objs/common.o:\
149
+ Makefile kernels/common.c kernels/common.h gumath.h
150
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/common.c -o .objs/common.o
151
+
152
+ examples.o:\
153
+ Makefile extending/examples.c gumath.h
154
+ $(CC) -I. $(GM_CFLAGS) -c extending/examples.c -o examples.o
155
+
156
+ .objs/examples.o:\
157
+ Makefile extending/examples.c gumath.h
158
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/examples.c -o .objs/examples.o
159
+
160
+ graph.o:\
161
+ Makefile extending/graph.c gumath.h
162
+ $(CC) -I. $(GM_CFLAGS) -c extending/graph.c -o graph.o
163
+
164
+ .objs/graph.o:\
165
+ Makefile extending/graph.c gumath.h
166
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/graph.c -o .objs/graph.o
167
+
168
+ quaternion.o:\
169
+ Makefile extending/quaternion.c gumath.h
170
+ $(CC) -I. $(GM_CFLAGS) -c extending/quaternion.c -o quaternion.o
171
+
172
+ .objs/quaternion.o:\
173
+ Makefile extending/quaternion.c gumath.h
174
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/quaternion.c -o .objs/quaternion.o
175
+
176
+ pdist.o:\
177
+ Makefile extending/pdist.c gumath.h
178
+ $(CC) -I. $(GM_CFLAGS) -c extending/pdist.c -o pdist.o
179
+
180
+ .objs/pdist.o:\
181
+ Makefile extending/pdist.c gumath.h
182
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/pdist.c -o .objs/pdist.o
183
+
184
+
185
+ # Cuda
186
+ cuda_host_unary.o:\
187
+ Makefile kernels/cuda_host_unary.c kernels/common.h kernels/cuda_device_unary.h gumath.h
188
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cuda_host_unary.c
189
+
190
+ .objs/cuda_host_unary.o:\
191
+ Makefile kernels/cuda_host_unary.c kernels/common.h kernels/cuda_device_unary.h gumath.h
192
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cuda_host_unary.c -o .objs/cuda_host_unary.o
193
+
194
+ cuda_device_unary.o:\
195
+ Makefile kernels/cuda_device_unary.cu kernels/common.h kernels/cuda_device_unary.h gumath.h
196
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_unary.cu
197
+
198
+ .objs/cuda_device_unary.o:\
199
+ Makefile kernels/cuda_device_unary.cu kernels/common.h kernels/cuda_device_unary.h gumath.h
200
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS_SHARED)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_unary.cu -o .objs/cuda_device_unary.o
201
+
202
+ cuda_host_binary.o:\
203
+ Makefile kernels/cuda_host_binary.c kernels/common.h kernels/cuda_device_binary.h gumath.h
204
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cuda_host_binary.c
205
+
206
+ .objs/cuda_host_binary.o:\
207
+ Makefile kernels/cuda_host_binary.c kernels/common.h kernels/cuda_device_binary.h gumath.h
208
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cuda_host_binary.c -o .objs/cuda_host_binary.o
209
+
210
+ cuda_device_binary.o:\
211
+ Makefile kernels/cuda_device_binary.cu kernels/common.h kernels/cuda_device_binary.h gumath.h
212
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_binary.cu
213
+
214
+ .objs/cuda_device_binary.o:\
215
+ Makefile kernels/cuda_device_binary.cu kernels/common.h kernels/cuda_device_binary.h gumath.h
216
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS_SHARED)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_binary.cu -o .objs/cuda_device_binary.o
217
+
218
+
219
+ # Coverage
220
+ coverage:\
221
+ Makefile clean runtest
222
+ ./tests/runtest
223
+ for file in *.c; do gcov -l "$$file" > /dev/null 2>&1; done
224
+
225
+ FORCE:
226
+
227
+ clean: FORCE
228
+ rm -f *.o *.so *.gch *.gcda *.gcno *.gcov *.dyn *.dpi *.lock
229
+ rm -f $(LIBSTATIC) $(LIBSHARED) $(LIBSONAME) $(LIBNAME)
230
+ cd .objs && rm -f *.o *.so *.gch *.gcda *.gcno *.gcov *.dyn *.dpi *.lock
231
+
232
+ distclean: clean
233
+ rm -f Makefile
234
+
235
+
236
+
@@ -10,9 +10,11 @@ LIBSONAME = @LIBSONAME@
10
10
  LIBSHARED = @LIBSHARED@
11
11
 
12
12
  CC = @CC@
13
+ CXX = @CXX@
13
14
  LD = @LD@
14
15
  AR = @AR@
15
16
  RANLIB = @RANLIB@
17
+ CUDA_CXX = @CUDA_CXX@
16
18
 
17
19
  GM_INCLUDES = @CONFIGURE_INCLUDES@
18
20
 
@@ -20,19 +22,31 @@ CONFIGURE_CFLAGS = @CONFIGURE_CFLAGS@
20
22
  GM_CFLAGS = $(strip -I.. -I$(GM_INCLUDES) $(CONFIGURE_CFLAGS) $(CFLAGS))
21
23
  GM_CFLAGS_SHARED = $(GM_CFLAGS) -fPIC
22
24
 
25
+ CONFIGURE_CXXFLAGS = @CONFIGURE_CXXFLAGS@
26
+ GM_CXXFLAGS = $(strip -I$(GM_INCLUDES) $(CONFIGURE_CXXFLAGS) $(CXXFLAGS))
27
+ GM_CXXFLAGS_SHARED = $(GM_CXXFLAGS) -fPIC
28
+
23
29
  CONFIGURE_LDFLAGS = @CONFIGURE_LDFLAGS@
24
30
  GM_LDFLAGS = $(strip $(CONFIGURE_LDFLAGS) $(LDFLAGS))
25
31
 
32
+ CONFIGURE_CUDA_CXXFLAGS = @CONFIGURE_CUDA_CXXFLAGS@
33
+ GM_CUDA_CXXFLAGS = $(strip $(CONFIGURE_CUDA_CXXFLAGS) $(CUDA_CXXFLAGS))
26
34
 
27
35
  default: $(LIBSTATIC) $(LIBSHARED)
28
36
 
29
37
 
30
- OBJS = apply.o func.o nploops.o tbl.o thread.o xndloops.o unary.o binary.o \
31
- examples.o bfloat16.o graph.o quaternion.o pdist.o
38
+ OBJS = apply.o func.o nploops.o tbl.o thread.o xndloops.o cpu_host_unary.o \
39
+ cpu_device_unary.o cpu_host_binary.o cpu_device_binary.o common.o \
40
+ examples.o graph.o quaternion.o pdist.o
32
41
 
33
42
  SHARED_OBJS = .objs/apply.o .objs/func.o .objs/nploops.o .objs/tbl.o .objs/thread.o .objs/xndloops.o \
34
- .objs/unary.o .objs/binary.o .objs/examples.o .objs/bfloat16.o .objs/graph.o \
35
- .objs/quaternion.o .objs/pdist.o
43
+ .objs/cpu_host_unary.o .objs/cpu_device_unary.o .objs/cpu_host_binary.o .objs/cpu_device_binary.o \
44
+ .objs/common.o .objs/examples.o .objs/graph.o .objs/quaternion.o .objs/pdist.o
45
+
46
+ ifdef CUDA_CXX
47
+ OBJS += cuda_host_unary.o cuda_device_unary.o cuda_host_binary.o cuda_device_binary.o
48
+ SHARED_OBJS += .objs/cuda_host_unary.o .objs/cuda_device_unary.o .objs/cuda_host_binary.o .objs/cuda_device_binary.o
49
+ endif
36
50
 
37
51
 
38
52
  $(LIBSTATIC): Makefile $(OBJS)
@@ -93,21 +107,47 @@ Makefile xndloops.c gumath.h
93
107
  Makefile xndloops.c gumath.h
94
108
  $(CC) $(GM_CFLAGS_SHARED) -c xndloops.c -o .objs/xndloops.o
95
109
 
96
- unary.o:\
97
- Makefile kernels/unary.c gumath.h
98
- $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/unary.c
110
+ cpu_device_unary.o:\
111
+ Makefile kernels/cpu_device_unary.cc kernels/common.h gumath.h
112
+ $(CXX) -I. $(GM_CXXFLAGS) -Wno-absolute-value -c kernels/cpu_device_unary.cc
113
+
114
+ .objs/cpu_device_unary.o:\
115
+ Makefile kernels/cpu_device_unary.cc kernels/common.h gumath.h
116
+ $(CXX) -I. $(GM_CXXFLAGS_SHARED) -Wno-absolute-value -c kernels/cpu_device_unary.cc -o .objs/cpu_device_unary.o
117
+
118
+ cpu_host_unary.o:\
119
+ Makefile kernels/cpu_host_unary.c kernels/common.h gumath.h
120
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cpu_host_unary.c
121
+
122
+ .objs/cpu_host_unary.o:\
123
+ Makefile kernels/cpu_host_unary.c kernels/common.h gumath.h
124
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cpu_host_unary.c -o .objs/cpu_host_unary.o
99
125
 
100
- .objs/unary.o:\
101
- Makefile kernels/unary.c gumath.h
102
- $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/unary.c -o .objs/unary.o
126
+ cpu_host_binary.o:\
127
+ Makefile kernels/cpu_host_binary.c kernels/common.h gumath.h
128
+ $(CC) -I. $(GM_CFLAGS) -c kernels/cpu_host_binary.c
103
129
 
104
- binary.o:\
105
- Makefile kernels/binary.c gumath.h
106
- $(CC) -I. $(GM_CFLAGS) -c kernels/binary.c
130
+ .objs/cpu_host_binary.o:\
131
+ Makefile kernels/cpu_host_binary.c kernels/common.h gumath.h
132
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/cpu_host_binary.c -o .objs/cpu_host_binary.o
107
133
 
108
- .objs/binary.o:\
109
- Makefile kernels/binary.c gumath.h
110
- $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/binary.c -o .objs/binary.o
134
+ cpu_device_binary.o:\
135
+ Makefile kernels/cpu_device_binary.cc kernels/common.h gumath.h
136
+ $(CXX) -I. $(GM_CXXFLAGS) -c kernels/cpu_device_binary.cc
137
+
138
+ .objs/cpu_device_binary.o:\
139
+ Makefile kernels/cpu_device_binary.cc kernels/common.h gumath.h
140
+ $(CXX) -I. $(GM_CXXFLAGS_SHARED) -c kernels/cpu_device_binary.cc -o .objs/cpu_device_binary.o
141
+
142
+ common.o:\
143
+ Makefile kernels/common.c kernels/common.h gumath.h
144
+ common.o:\
145
+ Makefile kernels/common.c kernels/common.h gumath.h
146
+ $(CC) -I. $(GM_CFLAGS) -c kernels/common.c
147
+
148
+ .objs/common.o:\
149
+ Makefile kernels/common.c kernels/common.h gumath.h
150
+ $(CC) -I. $(GM_CFLAGS_SHARED) -c kernels/common.c -o .objs/common.o
111
151
 
112
152
  examples.o:\
113
153
  Makefile extending/examples.c gumath.h
@@ -117,14 +157,6 @@ Makefile extending/examples.c gumath.h
117
157
  Makefile extending/examples.c gumath.h
118
158
  $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/examples.c -o .objs/examples.o
119
159
 
120
- bfloat16.o:\
121
- Makefile extending/bfloat16.c gumath.h
122
- $(CC) -I. $(GM_CFLAGS) -c extending/bfloat16.c -o bfloat16.o
123
-
124
- .objs/bfloat16.o:\
125
- Makefile extending/bfloat16.c gumath.h
126
- $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/bfloat16.c -o .objs/bfloat16.o
127
-
128
160
  graph.o:\
129
161
  Makefile extending/graph.c gumath.h
130
162
  $(CC) -I. $(GM_CFLAGS) -c extending/graph.c -o graph.o
@@ -150,6 +182,40 @@ Makefile extending/pdist.c gumath.h
150
182
  $(CC) -I. $(GM_CFLAGS_SHARED) -c extending/pdist.c -o .objs/pdist.o
151
183
 
152
184
 
185
+ # Cuda
186
+ cuda_host_unary.o:\
187
+ Makefile kernels/cuda_host_unary.c kernels/common.h kernels/cuda_device_unary.h gumath.h
188
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cuda_host_unary.c
189
+
190
+ .objs/cuda_host_unary.o:\
191
+ Makefile kernels/cuda_host_unary.c kernels/common.h kernels/cuda_device_unary.h gumath.h
192
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cuda_host_unary.c -o .objs/cuda_host_unary.o
193
+
194
+ cuda_device_unary.o:\
195
+ Makefile kernels/cuda_device_unary.cu kernels/common.h kernels/cuda_device_unary.h gumath.h
196
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_unary.cu
197
+
198
+ .objs/cuda_device_unary.o:\
199
+ Makefile kernels/cuda_device_unary.cu kernels/common.h kernels/cuda_device_unary.h gumath.h
200
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS_SHARED)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_unary.cu -o .objs/cuda_device_unary.o
201
+
202
+ cuda_host_binary.o:\
203
+ Makefile kernels/cuda_host_binary.c kernels/common.h kernels/cuda_device_binary.h gumath.h
204
+ $(CC) -I. $(GM_CFLAGS) -Wno-absolute-value -c kernels/cuda_host_binary.c
205
+
206
+ .objs/cuda_host_binary.o:\
207
+ Makefile kernels/cuda_host_binary.c kernels/common.h kernels/cuda_device_binary.h gumath.h
208
+ $(CC) -I. $(GM_CFLAGS_SHARED) -Wno-absolute-value -c kernels/cuda_host_binary.c -o .objs/cuda_host_binary.o
209
+
210
+ cuda_device_binary.o:\
211
+ Makefile kernels/cuda_device_binary.cu kernels/common.h kernels/cuda_device_binary.h gumath.h
212
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_binary.cu
213
+
214
+ .objs/cuda_device_binary.o:\
215
+ Makefile kernels/cuda_device_binary.cu kernels/common.h kernels/cuda_device_binary.h gumath.h
216
+ $(CUDA_CXX) --compiler-options "$(GM_CXXFLAGS_SHARED)" $(GM_CUDA_CXXFLAGS) -c kernels/cuda_device_binary.cu -o .objs/cuda_device_binary.o
217
+
218
+
153
219
  # Coverage
154
220
  coverage:\
155
221
  Makefile clean runtest