cumo 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (158) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/3rd_party/LICENSE.txt +60 -0
  4. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
  5. data/LICENSE.txt +1 -62
  6. data/README.md +33 -29
  7. data/bench/cumo_bench.rb +47 -25
  8. data/bench/numo_bench.rb +27 -25
  9. data/docs/src-tree.md +16 -0
  10. data/ext/cumo/cuda/cublas.c +69 -219
  11. data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
  12. data/ext/cumo/cuda/runtime.c +2 -14
  13. data/ext/cumo/cumo.c +16 -16
  14. data/ext/cumo/include/cumo.h +2 -2
  15. data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
  16. data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
  17. data/ext/cumo/include/cumo/indexer.h +46 -63
  18. data/ext/cumo/include/cumo/intern.h +58 -112
  19. data/ext/cumo/include/cumo/narray.h +214 -185
  20. data/ext/cumo/include/cumo/narray_kernel.h +66 -37
  21. data/ext/cumo/include/cumo/ndloop.h +42 -42
  22. data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
  23. data/ext/cumo/include/cumo/template.h +56 -51
  24. data/ext/cumo/include/cumo/template_kernel.h +31 -31
  25. data/ext/cumo/include/cumo/types/bit.h +3 -3
  26. data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
  27. data/ext/cumo/include/cumo/types/complex.h +126 -126
  28. data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
  29. data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
  30. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
  31. data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
  32. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
  33. data/ext/cumo/include/cumo/types/int_macro.h +1 -1
  34. data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
  35. data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
  36. data/ext/cumo/include/cumo/types/scomplex.h +5 -5
  37. data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
  38. data/ext/cumo/narray/array.c +143 -143
  39. data/ext/cumo/narray/data.c +184 -184
  40. data/ext/cumo/narray/gen/cogen.rb +5 -2
  41. data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
  42. data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
  43. data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
  44. data/ext/cumo/narray/gen/erbln.rb +132 -0
  45. data/ext/cumo/narray/gen/erbpp2.rb +18 -13
  46. data/ext/cumo/narray/gen/narray_def.rb +3 -3
  47. data/ext/cumo/narray/gen/spec.rb +2 -2
  48. data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
  49. data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
  50. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
  51. data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
  52. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
  53. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
  54. data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
  55. data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
  56. data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
  57. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
  58. data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
  59. data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
  60. data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
  61. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
  62. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
  63. data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
  64. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
  65. data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
  66. data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
  67. data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
  68. data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
  69. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
  70. data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
  71. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
  72. data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
  73. data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
  74. data/ext/cumo/narray/gen/tmpl/each.c +9 -9
  75. data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
  76. data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
  77. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
  78. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
  79. data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
  80. data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
  81. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
  82. data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
  83. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
  84. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
  85. data/ext/cumo/narray/gen/tmpl/format.c +11 -11
  86. data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
  87. data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
  88. data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
  89. data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
  90. data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
  91. data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
  92. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
  93. data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
  94. data/ext/cumo/narray/gen/tmpl/median.c +10 -10
  95. data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
  96. data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
  97. data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
  98. data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
  99. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
  100. data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
  101. data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
  102. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
  103. data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
  104. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
  105. data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
  106. data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
  107. data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
  108. data/ext/cumo/narray/gen/tmpl/store.c +6 -6
  109. data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
  110. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
  111. data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
  112. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
  113. data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
  114. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
  115. data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
  116. data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
  117. data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
  118. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
  119. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
  120. data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
  121. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
  122. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
  123. data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
  124. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
  125. data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
  126. data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
  127. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
  128. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
  129. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
  130. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
  131. data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
  132. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
  133. data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
  134. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
  135. data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
  136. data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
  137. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
  138. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
  139. data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
  140. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
  141. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
  142. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
  143. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
  144. data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
  145. data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
  146. data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
  147. data/ext/cumo/narray/index.c +213 -213
  148. data/ext/cumo/narray/math.c +27 -27
  149. data/ext/cumo/narray/narray.c +484 -484
  150. data/ext/cumo/narray/ndloop.c +259 -258
  151. data/ext/cumo/narray/rand.c +3 -3
  152. data/ext/cumo/narray/step.c +70 -70
  153. data/ext/cumo/narray/struct.c +139 -139
  154. metadata +6 -7
  155. data/ext/cumo/include/cumo/intern_fwd.h +0 -38
  156. data/lib/erbpp.rb +0 -294
  157. data/lib/erbpp/line_number.rb +0 -137
  158. data/lib/erbpp/narray_def.rb +0 -381
@@ -1,381 +0,0 @@
1
- require 'erbpp'
2
-
3
- module DefMethod
4
-
5
- def def_id(meth,var=nil)
6
- IdVar.new(self, meth, var)
7
- end
8
-
9
- def def_method(meth, n_arg, tmpl=nil, opts={})
10
- h = {:meth => meth, :n_arg => n_arg}
11
- h.merge!(opts)
12
- tmpl ||= meth
13
- Function.new(self, tmpl, h)
14
- end
15
-
16
- def def_singleton(meth, n_arg, tmpl=nil, opts={})
17
- def_method(meth, n_arg, tmpl, :singleton => true)
18
- end
19
-
20
- def def_alias(dst, src)
21
- Alias.new(self, dst, src)
22
- end
23
-
24
- def def_allocate(tmpl)
25
- h = {:meth => "allocate", :singleton => true}
26
- Allocate.new(self, tmpl, h)
27
- end
28
-
29
- def binary(meth, ope=nil)
30
- ope = meth if ope.nil?
31
- def_method(meth, 1, "binary", :op => ope)
32
- end
33
-
34
- def binary2(meth, ope=nil)
35
- ope = meth if ope.nil?
36
- def_method(meth, 1, "binary2", :op =>ope)
37
- end
38
-
39
- def unary(meth, ope=nil)
40
- def_method(meth, 0, "unary", :op => ope)
41
- end
42
-
43
- def pow
44
- def_method("pow", 1, "pow", :op => "**")
45
- end
46
-
47
- def unary2(meth, dtype, tpclass)
48
- h = {:dtype => dtype, :tpclass => tpclass}
49
- def_method(meth, 0, "unary2", h)
50
- end
51
-
52
- def set2(meth, dtype, tpclass)
53
- h = {:dtype => dtype, :tpclass => tpclass}
54
- def_method(meth, 1, "set2", h)
55
- end
56
-
57
- def cond_binary(meth,op=nil)
58
- op = meth unless op
59
- def_method(meth, 1, "cond_binary", :op => op)
60
- end
61
-
62
- def cond_unary(meth)
63
- def_method(meth, 0, "cond_unary")
64
- end
65
-
66
- def bit_count(meth)
67
- def_method(meth, -1, "bit_count")
68
- end
69
-
70
- def bit_reduce(meth, init_bit)
71
- h = {:init_bit=>init_bit}
72
- def_method(meth, -1, "bit_reduce", h)
73
- end
74
-
75
- def accum(meth, dtype, tpclass)
76
- h = {:dtype => dtype, :tpclass => tpclass}
77
- def_method(meth, -1, "accum", h)
78
- end
79
-
80
- def accum_index(meth)
81
- def_method(meth, -1, "accum_index")
82
- end
83
-
84
- def cum(meth, cmacro)
85
- def_method(meth, -1, "cum", cmacro:cmacro)
86
- end
87
-
88
- def accum_binary(meth, ope=nil)
89
- ope = meth if ope.nil?
90
- def_method(meth, -1, "accum_binary", :op => ope)
91
- end
92
-
93
- def qsort(tp, dtype, dcast, suffix="")
94
- h = {:tp => tp, :dtype => dtype, :dcast => dcast, :suffix => suffix}
95
- NodefFunction.new(self, "qsort", h)
96
- end
97
-
98
- def def_mod_func(meth, n_arg, tmpl=nil, opts={})
99
- h = {:meth => meth, :n_arg => n_arg}
100
- h.merge!(opts)
101
- tmpl ||= meth
102
- ModuleFunction.new(self, tmpl, h)
103
- end
104
-
105
- def math(meth, n=1, tmpl=nil)
106
- h = {:mod_var => 'mTM'}
107
- if tmpl.nil?
108
- case n
109
- when 1
110
- tmpl = "unary_s"
111
- when 2
112
- tmpl = "binary_s"
113
- when 3
114
- tmpl = "ternary_s"
115
- else
116
- raise "invalid n=#{n}"
117
- end
118
- end
119
- def_mod_func(meth, n, tmpl, h)
120
- end
121
-
122
- def store_numeric
123
- StoreNum.new(self,"store_numeric")
124
- end
125
-
126
- def store_array
127
- StoreArray.new(self,"store_array")
128
- end
129
-
130
- def cast_array
131
- CastArray.new(self,"cast_array")
132
- end
133
-
134
- def store_from(cname,dtype,macro)
135
- Store.new(self,"store_from",cname.downcase,dtype,"cumo_c"+cname,macro)
136
- end
137
-
138
- def store_bit(cname)
139
- Store.new(self,"store_bit",cname.downcase,nil,"cumo_c"+cname,nil)
140
- end
141
-
142
- def store
143
- Function.new(self,"store","store")
144
- end
145
-
146
- def find_method(meth)
147
- Function::DEFS.find{|x| x.kind_of?(Function) and meth == x.meth }
148
- end
149
-
150
- def find_tmpl(meth)
151
- Function::DEFS.find{|x| x.kind_of?(Function) and meth == x.tmpl }
152
- end
153
-
154
- def cast_func
155
- "cumo_#{tp}_s_cast"
156
- end
157
- end
158
-
159
- # ----------------------------------------------------------------------
160
-
161
- class DataType < ErbPP
162
- include DefMethod
163
-
164
- def initialize(erb_path, type_file)
165
- super(nil, erb_path)
166
- @class_alias = []
167
- @upcast = []
168
- @mod_var = "cT"
169
- load_type(type_file) if type_file
170
- dirs = template_dir || ["tmpl"]
171
- @tmpl_dirs = dirs.map{|d| File.join(File.dirname(erb_path),d)}
172
- end
173
-
174
- attr_reader :tmpl_dirs
175
-
176
- def load_type(file)
177
- eval File.read(file)
178
- end
179
-
180
- attrs = %w[
181
- class_name
182
- ctype
183
- template_dir
184
- blas_char
185
- complex_class_name
186
- complex_type
187
- real_class_name
188
- real_ctype
189
- has_math
190
- is_bit
191
- is_int
192
- is_unsigned
193
- is_float
194
- is_real
195
- is_complex
196
- is_object
197
- is_comparable
198
- is_double_precision
199
- mod_var
200
- ]
201
- define_attrs attrs
202
-
203
- def type_name
204
- @type_name ||= class_name.downcase
205
- end
206
- alias tp type_name
207
-
208
- def type_var
209
- @type_var ||= "cumo_c"+class_name
210
- end
211
-
212
- def math_var
213
- @math_var ||= "cumo_m"+class_name+"Math"
214
- end
215
-
216
- def real_class_name(arg=nil)
217
- if arg.nil?
218
- @real_class_name ||= class_name
219
- else
220
- @real_class_name = arg
221
- end
222
- end
223
-
224
- def real_ctype(arg=nil)
225
- if arg.nil?
226
- @real_ctype ||= ctype
227
- else
228
- @real_ctype = arg
229
- end
230
- end
231
-
232
- def real_type_var
233
- @real_type_var ||= "cumo_c"+real_class_name
234
- end
235
-
236
- def real_type_name
237
- @real_type_name ||= real_class_name.downcase
238
- end
239
-
240
- def class_alias(*args)
241
- @class_alias.concat(args)
242
- end
243
-
244
- def upcast(c=nil,t=nil)
245
- if c
246
- if t
247
- t = "cumo_c#{t}"
248
- else
249
- t = "cT"
250
- end
251
- @upcast << "rb_hash_aset(hCast, cumo_c#{c}, #{t});"
252
- else
253
- @upcast
254
- end
255
- end
256
-
257
- def upcast_rb(c,t=nil)
258
- if t
259
- t = "cumo_c#{t}"
260
- else
261
- t = "cT"
262
- end
263
- if c=="Integer"
264
- @upcast << "#ifdef RUBY_INTEGER_UNIFICATION"
265
- @upcast << "rb_hash_aset(hCast, rb_cInteger, #{t});"
266
- @upcast << "#else"
267
- @upcast << "rb_hash_aset(hCast, rb_cFixnum, #{t});"
268
- @upcast << "rb_hash_aset(hCast, rb_cBignum, #{t});"
269
- @upcast << "#endif"
270
- else
271
- @upcast << "rb_hash_aset(hCast, rb_c#{c}, #{t});"
272
- end
273
- end
274
- end
275
-
276
-
277
- # ----------------------------------------------------------------------
278
-
279
-
280
- class Allocate < Function
281
- def definition
282
- "rb_define_alloc_func(#{mod_var}, #{c_func});"
283
- end
284
- end
285
-
286
- # ----------------------------------------------------------------------
287
-
288
- class Store < Function
289
- DEFS = []
290
-
291
- def initialize(parent,tmpl,tpname,dtype,tpclass,macro)
292
- super(parent,tmpl)
293
- @tpname=tpname
294
- @dtype=dtype
295
- @tpclass=tpclass
296
- @macro=macro
297
- DEFS.push(self)
298
- end
299
- attr_reader :tmpl, :tpname, :dtype, :tpclass, :macro
300
-
301
- def c_func
302
- "cumo_#{tp}_store_#{tpname}"
303
- end
304
-
305
- def c_iter
306
- "iter_#{tp}_store_#{tpname}"
307
- end
308
-
309
- def definition
310
- nil
311
- end
312
-
313
- def condition(klass)
314
- "#{klass}==#{tpclass}"
315
- end
316
-
317
- def extract_data(ptr,pos,x)
318
- case tpname
319
- when "bit"
320
- "{BIT_DIGIT b; LOAD_BIT(#{ptr},#{pos},b); x = m_from_real(b);}"
321
- when "robject"
322
- "#{x} = m_num_to_data(*(#{dtype}*)(#{ptr}+#{pos}))"
323
- when /complex/
324
- "{#{dtype} *p = (#{dtype}*)(#{ptr}+#{pos}); #{x} = c_new(REAL(*p),IMAG(*p));}"
325
- else
326
- "#{x} = m_from_real(*(#{dtype}*)(#{ptr}+#{pos}))"
327
- end
328
- end
329
-
330
- def self.definitions
331
- a = []
332
- DEFS.each do |x|
333
- if x.condition("")
334
- if x.tpname == x.parents[0].class_name.downcase
335
- a.unshift(x)
336
- else
337
- a.push(x)
338
- end
339
- end
340
- end
341
- a
342
- end
343
- end
344
-
345
- class StoreNum < Store
346
- def initialize(parent,tmpl)
347
- super(parent,tmpl,"numeric",nil,nil,nil)
348
- end
349
-
350
- def condition(klass)
351
- "IS_INTEGER_CLASS(#{klass}) || #{klass}==rb_cFloat || #{klass}==rb_cComplex"
352
- end
353
- end
354
-
355
- class StoreArray < Store
356
- def initialize(parent,tmpl)
357
- super(parent,tmpl,"array",nil,nil,nil)
358
- end
359
-
360
- def c_func
361
- "cumo_#{tp}_#{tmpl}"
362
- end
363
-
364
- def condition(klass)
365
- "#{klass}==rb_cArray"
366
- end
367
- end
368
-
369
- class CastArray < StoreArray
370
- def condition(klass)
371
- nil
372
- end
373
-
374
- def c_func
375
- "cumo_#{tp}_cast_#{tpname}"
376
- end
377
-
378
- def c_iter
379
- "iter_#{tp}_cast_#{tpname}"
380
- end
381
- end