tensor_stream 0.6.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +10 -0
  3. data/CHANGELOG.md +8 -0
  4. data/README.md +40 -1
  5. data/benchmark/benchmark.rb +4 -1
  6. data/lib/tensor_stream.rb +5 -0
  7. data/lib/tensor_stream/debugging/debugging.rb +4 -2
  8. data/lib/tensor_stream/device.rb +2 -1
  9. data/lib/tensor_stream/evaluator/base_evaluator.rb +43 -32
  10. data/lib/tensor_stream/evaluator/evaluator.rb +0 -1
  11. data/lib/tensor_stream/evaluator/opencl/kernels/acos.cl +8 -0
  12. data/lib/tensor_stream/evaluator/opencl/kernels/apply_gradient.cl +9 -0
  13. data/lib/tensor_stream/evaluator/opencl/kernels/asin.cl +9 -0
  14. data/lib/tensor_stream/evaluator/opencl/kernels/floor_mod.cl +3 -0
  15. data/lib/tensor_stream/evaluator/opencl/kernels/log_softmax.cl +26 -0
  16. data/lib/tensor_stream/evaluator/opencl/kernels/max.cl +5 -5
  17. data/lib/tensor_stream/evaluator/opencl/kernels/min.cl +46 -0
  18. data/lib/tensor_stream/evaluator/opencl/kernels/real_div.cl +3 -0
  19. data/lib/tensor_stream/evaluator/opencl/kernels/softmax_cross.cl +27 -0
  20. data/lib/tensor_stream/evaluator/opencl/kernels/softmax_cross_grad.cl +28 -0
  21. data/lib/tensor_stream/evaluator/opencl/opencl_buffer.rb +5 -6
  22. data/lib/tensor_stream/evaluator/opencl/opencl_evaluator.rb +200 -265
  23. data/lib/tensor_stream/evaluator/operation_helpers/array_ops_helper.rb +4 -8
  24. data/lib/tensor_stream/evaluator/ruby_evaluator.rb +193 -122
  25. data/lib/tensor_stream/exceptions.rb +6 -0
  26. data/lib/tensor_stream/graph.rb +21 -6
  27. data/lib/tensor_stream/graph_builder.rb +67 -0
  28. data/lib/tensor_stream/graph_deserializers/protobuf.rb +271 -0
  29. data/lib/tensor_stream/graph_keys.rb +1 -0
  30. data/lib/tensor_stream/graph_serializers/pbtext.rb +11 -10
  31. data/lib/tensor_stream/helpers/op_helper.rb +7 -33
  32. data/lib/tensor_stream/helpers/string_helper.rb +16 -0
  33. data/lib/tensor_stream/math_gradients.rb +67 -44
  34. data/lib/tensor_stream/nn/nn_ops.rb +7 -1
  35. data/lib/tensor_stream/operation.rb +14 -27
  36. data/lib/tensor_stream/ops.rb +82 -29
  37. data/lib/tensor_stream/session.rb +4 -0
  38. data/lib/tensor_stream/tensor.rb +30 -12
  39. data/lib/tensor_stream/tensor_shape.rb +1 -1
  40. data/lib/tensor_stream/train/gradient_descent_optimizer.rb +37 -4
  41. data/lib/tensor_stream/train/saver.rb +46 -0
  42. data/lib/tensor_stream/train/utils.rb +37 -0
  43. data/lib/tensor_stream/trainer.rb +2 -0
  44. data/lib/tensor_stream/utils.rb +24 -14
  45. data/lib/tensor_stream/variable.rb +5 -11
  46. data/lib/tensor_stream/variable_scope.rb +15 -0
  47. data/lib/tensor_stream/version.rb +1 -1
  48. data/samples/iris.rb +8 -4
  49. data/samples/linear_regression.rb +1 -1
  50. data/samples/multigpu.rb +73 -0
  51. data/samples/nearest_neighbor.rb +3 -3
  52. data/tensor_stream.gemspec +1 -1
  53. data/test_samples/raw_neural_net_sample.rb +4 -1
  54. metadata +21 -6
@@ -0,0 +1,46 @@
1
+ // same dimension add floating point op
2
+ % c_dtype = dtype_to_c_type(dtype)
3
+ __kernel void min_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global const <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
4
+ // Get the index of the current element to be processed
5
+ const int globalRow = get_global_id(0); // Row ID of C (0..M)
6
+ const int globalCol = get_global_id(1); // Col ID of C (0..N)
7
+
8
+ C[globalRow * N + globalCol] = A[globalRow * N + globalCol] <= B[globalRow * N + globalCol] ? A[globalRow * N + globalCol] : B[globalRow * N + globalCol];
9
+ }
10
+
11
+ // 1D + Scalar floating point add op
12
+ __kernel void min_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global const <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
13
+ // Get the index of the current element to be processed
14
+ const int globalRow = get_global_id(0); // Row ID of C (0..M)
15
+ const int globalCol = get_global_id(1); // Col ID of C (0..N)
16
+
17
+ if (switch_op == 0) {
18
+ C[globalRow * N + globalCol] = A[globalRow * N + globalCol] <= B[0] ? A[globalRow * N + globalCol] : B[0];
19
+ } else {
20
+ C[globalRow * N + globalCol] = B[0] <= A[globalRow * N + globalCol] ? B[0] : A[globalRow * N + globalCol];
21
+ }
22
+ }
23
+
24
+ // 1D + Scalar floating point add op broadcast
25
+ __kernel void min_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op, __global const <%= c_dtype %> *A, __global const <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
26
+ // Get the index of the current element to be processed
27
+ const int globalRow = get_global_id(0); // Row ID of C (0..M)
28
+ const int globalCol = get_global_id(1); // Col ID of C (0..N)
29
+
30
+ int b_m_index = globalRow;
31
+ int b_n_index = globalCol;
32
+
33
+ if ( b_m_index >= M2) {
34
+ b_m_index = b_m_index % M2;
35
+ };
36
+
37
+ if (b_n_index >= N2) {
38
+ b_n_index = b_n_index % N2;
39
+ }
40
+
41
+ if (switch_op == 0) {
42
+ C[globalRow * N + globalCol] = A[globalRow * N + globalCol] <= B[b_m_index * N2 + b_n_index] ? A[globalRow * N + globalCol] : B[b_m_index * N2 + b_n_index];
43
+ } else {
44
+ C[globalRow * N + globalCol] = B[b_m_index * N2 + b_n_index] <= A[globalRow * N + globalCol] ? B[b_m_index * N2 + b_n_index] : A[globalRow * N + globalCol];
45
+ }
46
+ }
@@ -0,0 +1,3 @@
1
+ % c_dtype = dtype_to_c_type(dtype)
2
+ % op = operator_to_c('div')
3
+ <%= render 'operand.cl', c_dtype: c_dtype, op: op, fname: 'real_div', dtype: "#{a}_#{b}", result_t: c_dtype %>
@@ -0,0 +1,27 @@
1
+ // First naive implementation
2
+ % c_dtype = dtype_to_c_type(dtype)
3
+ __kernel void softmax_cross_<%= dtype %>(const int N,
4
+ const __global <%= c_dtype %>* A,
5
+ const __global <%= c_dtype %>* L,
6
+ __global <%= c_dtype %>* C) {
7
+
8
+ // Get the index of the current element to be processed
9
+ const int globalRow = get_global_id(0); // Row ID of C (0..M)
10
+
11
+ // Compute a single element (loop over K)
12
+ <%= c_dtype %> acc = 0.0f;
13
+ <%= c_dtype %> max = <%= min_value_for(dtype) %>;
14
+
15
+ for (int k=0; k<N; k++) {
16
+ max = A[globalRow*N + k] > max ? A[globalRow*N + k] : max;
17
+ }
18
+
19
+ for (int k=0; k<N; k++) {
20
+ acc += exp(A[globalRow*N + k] - max);
21
+ }
22
+
23
+ // Store the result
24
+ for (int k=0; k < N; k++) {
25
+ C[globalRow*N + k] = (log(acc) - (A[globalRow*N + k] - max)) * L[globalRow*N + k];
26
+ }
27
+ }
@@ -0,0 +1,28 @@
1
+ // First naive implementation
2
+ % c_dtype = dtype_to_c_type(dtype)
3
+ __kernel void softmax_cross_grad_<%= dtype %>(const int N,
4
+ const __global <%= c_dtype %>* A,
5
+ const __global <%= c_dtype %>* L,
6
+ const __global <%= c_dtype %>* G,
7
+ __global <%= c_dtype %>* C) {
8
+
9
+ // Get the index of the current element to be processed
10
+ const int globalRow = get_global_id(0); // Row ID of C (0..M)
11
+
12
+ // Compute a single element (loop over K)
13
+ <%= c_dtype %> acc = 0.0f;
14
+ <%= c_dtype %> max = <%= min_value_for(dtype) %>;
15
+
16
+ for (int k=0; k<N; k++) {
17
+ max = A[globalRow*N + k] > max ? A[globalRow*N + k] : max;
18
+ }
19
+
20
+ for (int k=0; k<N; k++) {
21
+ acc += exp(A[globalRow*N + k] - max);
22
+ }
23
+
24
+ // Store the result
25
+ for (int k=0; k < N; k++) {
26
+ C[globalRow*N + k] = ((exp(A[globalRow*N + k] - max)/acc) * G[globalRow*N + k] - L[globalRow*N + k]);
27
+ }
28
+ }
@@ -1,10 +1,11 @@
1
1
  module TensorStream
2
+ # Buffer used by the OpenCL evaluator
2
3
  class OpenCLBuffer < Buffer
3
4
  include ArrayOpsHelper
4
5
 
5
6
  attr_accessor :shape, :buffer, :cl_buffer, :op
6
7
 
7
- def initialize(data_type: , shape:, buffer:, cl_buffer:, op: nil, name: nil)
8
+ def initialize(data_type:, shape:, buffer:, cl_buffer:, op: nil, name: nil)
8
9
  @data_type = data_type
9
10
  @shape = shape
10
11
  @buffer = buffer
@@ -25,12 +26,10 @@ module TensorStream
25
26
  op.command_queue.finish
26
27
  self.dirty = false
27
28
  end
28
- result = buffer.reshape(*shape.map { |s| s.to_i}.reverse).to_a
29
29
 
30
- if data_type == :boolean
31
- result = process_function_op(result, ->(a, _b) { a != 0 })
32
- end
30
+ result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
31
+ result = process_function_op(result, ->(a, _b) { a != 0 }) if data_type == :boolean
33
32
  result
34
33
  end
35
34
  end
36
- end
35
+ end
@@ -102,6 +102,7 @@ module TensorStream
102
102
 
103
103
  def complete_eval(tensor, context)
104
104
  buffer = _run(tensor, context)
105
+
105
106
  if buffer.is_a?(Array)
106
107
  buffer = buffer.collect do |b|
107
108
  next b if b.buffer.size.zero?
@@ -109,7 +110,8 @@ module TensorStream
109
110
  b
110
111
  end
111
112
  else
112
- return buffer if buffer.nil?
113
+ return buffer.outputs[0] if buffer.is_a?(OutputGroup)
114
+ return buffer if buffer.nil?
113
115
  return [] if buffer.buffer.nil?
114
116
  return buffer if buffer.buffer.size.zero?
115
117
  _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: [buffer.op].compact)
@@ -150,13 +152,6 @@ module TensorStream
150
152
  @opencl_context = OpenCL.create_context(opencl_device)
151
153
  end
152
154
 
153
- def choose_best_device
154
- @best_device ||= begin
155
- devices = OpenclEvaluator.query_devices_with_score
156
- devices.sort { |a| a[1] }.reverse.first
157
- end
158
- end
159
-
160
155
  def self.query_devices_with_score
161
156
  OpenCL.platforms.flat_map do |p|
162
157
 
@@ -282,18 +277,82 @@ module TensorStream
282
277
  assign_var(tensor, value, context)
283
278
  end
284
279
 
280
+ # Fast in place multiply subtract assign
281
+ register_op :apply_gradient_descent do |_context, tensor, inputs|
282
+ _target_var, learning_rate, delta = inputs
283
+
284
+ assign = tensor.inputs[0] || tensor
285
+
286
+ unless assign.buffer
287
+ value = read_final_result(buffer)
288
+ assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
289
+ assign.value = value
290
+ end
291
+
292
+ assign.buffer.dirty = true # force buffer copy when variable is read externally
293
+ output_buffer = assign.buffer
294
+
295
+ m, n = output_buffer.shape
296
+ work_group = [m || 1, n || 1]
297
+ cl_m = OpenCL::Int1.new(m || 1)
298
+ cl_n = OpenCL::Int1.new(n || 1)
299
+
300
+ event_wait_list = [assign.buffer.op, learning_rate.op, delta.op].compact # add dependency wait list
301
+ method_call = :"apply_gradient_#{output_buffer.data_type}"
302
+ event = _cl_program("apply_gradient", dtype: output_buffer.data_type).send(method_call, _opencl_queue, work_group, cl_m, cl_n, delta.cl_buffer, learning_rate.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
303
+ output_buffer.op = event
304
+ output_buffer
305
+ end
306
+
285
307
  %i[less less_equal greater greater_equal equal not_equal logical_and].each do |op|
286
308
  register_op op, noop: true do |context, tensor, inputs|
287
309
  execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], context, 'cond')
288
310
  end
289
311
  end
290
312
 
291
- %i[max add div sub mod mul pow sigmoid_grad squared_difference].each do |op|
313
+ %i[max min add real_div div sub floor_mod mod mul pow sigmoid_grad squared_difference].each do |op|
292
314
  register_op op, noop: true do |context, tensor, inputs|
293
315
  execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], context)
294
316
  end
295
317
  end
296
318
 
319
+ register_op :add_n do |_context, tensor, inputs|
320
+ if inputs.size == 1
321
+ inputs[0]
322
+ else
323
+ m, n = inputs[0].shape
324
+ work_group = [m || 1, n || 1]
325
+ cl_m = OpenCL::Int1.new(m || 1)
326
+ cl_n = OpenCL::Int1.new(n || 1)
327
+ cl_switch = OpenCL::Int1.new(0)
328
+ dtype = tensor.data_type
329
+
330
+ output_buffer = _create_result_buffer(tensor.data_type, inputs[0].shape, "out_#{tensor.name}")
331
+ inputs_queue = inputs.dup
332
+ a = inputs_queue.pop
333
+ until inputs_queue.empty?
334
+ b = inputs_queue.pop
335
+ event_wait_list = [a.op, b.op].compact
336
+ method_call = :"add_#{a.data_type}_#{b.data_type}"
337
+ event = _cl_program('add', a: a.data_type, b: b.data_type, dtype: dtype).send(method_call, _opencl_queue, work_group, cl_m, cl_n, cl_switch, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
338
+ a = output_buffer
339
+ a.op = event
340
+ end
341
+
342
+ output_buffer.op = a.op
343
+ output_buffer
344
+ end
345
+ end
346
+
347
+ register_op :expand_dims, buffer: true do |_context, tensor, inputs|
348
+ axis = inputs[1].buffer[0]
349
+ shape = inputs[0].shape.dup
350
+ axis = -axis if axis == shape.size
351
+ new_shape = shape.insert(axis, 1).compact
352
+ new_buf = inputs[0].buffer.reshape(*new_shape.reverse)
353
+ convert_to_opencl(new_buf, new_shape, data_type: inputs[0].data_type, name: tensor.name)
354
+ end
355
+
297
356
  register_op :floor_div, noop: true do |context, tensor, inputs|
298
357
  if fp_type?(tensor.data_type)
299
358
  execute_2_operand_func('floor_div', tensor, inputs[0], inputs[1], context)
@@ -307,7 +366,7 @@ module TensorStream
307
366
  execute_cond_func('where', tensor, pred, inputs[0], inputs[1], context)
308
367
  end
309
368
 
310
- register_op :matmul do |_context, tensor, inputs|
369
+ register_op :mat_mul do |_context, tensor, inputs|
311
370
  a, b = inputs
312
371
 
313
372
  m = a.shape[0]
@@ -355,7 +414,7 @@ module TensorStream
355
414
  end
356
415
  end
357
416
 
358
- %i[sign exp tan sin cos abs sqrt negate square reciprocal tanh tanh_grad sigmoid log1p round floor ceil].each do |op|
417
+ %i[sign exp tan acos asin sin cos abs sqrt negate square reciprocal tanh tanh_grad sigmoid log1p round floor ceil].each do |op|
359
418
  register_op op, noop: true do |context, tensor, inputs|
360
419
  execute_func(op.to_s, tensor, inputs[0], context)
361
420
  end
@@ -377,6 +436,57 @@ module TensorStream
377
436
  output_buffer
378
437
  end
379
438
 
439
+ register_op :log_softmax do |_context, tensor, inputs|
440
+ a = inputs[0] # logits
441
+ event_wait_list = [a.op].compact
442
+ dtype = tensor.data_type
443
+ output_buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
444
+
445
+ m, n = a.shape
446
+ work_group = [m]
447
+ n = m if n.nil?
448
+ cl_n = OpenCL::Int1.new(n || 1)
449
+
450
+ event = _cl_program("log_softmax", dtype: dtype).send(:"log_softmax_#{dtype}", _opencl_queue, work_group, cl_n, a.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
451
+ output_buffer.op = event
452
+ output_buffer
453
+ end
454
+
455
+ register_op :softmax_cross_entropy_with_logits_v2 do |_context, tensor, inputs|
456
+ a = inputs[0] # logits
457
+ b = inputs[1] # labels
458
+ event_wait_list = [a.op, b.op].compact
459
+ dtype = tensor.data_type
460
+ output_buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
461
+
462
+ m, n = a.shape
463
+ work_group = [m]
464
+ n = m if n.nil?
465
+ cl_n = OpenCL::Int1.new(n || 1)
466
+
467
+ event = _cl_program("softmax_cross", dtype: dtype).send(:"softmax_cross_#{dtype}", _opencl_queue, work_group, cl_n, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
468
+ output_buffer.op = event
469
+ output_buffer
470
+ end
471
+
472
+ register_op :softmax_cross_entropy_with_logits_v2_grad do |_context, tensor, inputs|
473
+ a = inputs[0] # logits
474
+ b = inputs[1] # labels
475
+ c = inputs[2] # grads
476
+ event_wait_list = [a.op, b.op, c.op].compact
477
+ dtype = tensor.data_type
478
+ output_buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
479
+
480
+ m, n = a.shape
481
+ work_group = [m]
482
+ n = m if n.nil?
483
+ cl_n = OpenCL::Int1.new(n || 1)
484
+
485
+ event = _cl_program("softmax_cross_grad", dtype: dtype).send(:"softmax_cross_grad_#{dtype}", _opencl_queue, work_group, cl_n, a.cl_buffer, b.cl_buffer, c.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
486
+ output_buffer.op = event
487
+ output_buffer
488
+ end
489
+
380
490
  register_op :softmax_grad do |_context, tensor, inputs|
381
491
  a, grad = inputs
382
492
 
@@ -417,30 +527,6 @@ module TensorStream
417
527
  end
418
528
  end
419
529
 
420
- register_op :truncate do |_context, tensor, inputs|
421
- a, b = inputs
422
- if a.shape.size.zero?
423
- a
424
- else
425
- input_b = read_final_result(b)
426
- if a.shape == input_b
427
- a
428
- else
429
- input_a = read_final_result(a)
430
- if input_b == []
431
- if a.buffer.size == 1
432
- a.shape = input_b
433
- a
434
- else
435
- wrap_opencl(a.buffer[0], data_type: a.data_type, name: tensor.name)
436
- end
437
- else
438
- wrap_opencl(truncate(input_a, input_b), data_type: a.data_type, name: tensor.name)
439
- end
440
- end
441
- end
442
- end
443
-
444
530
  register_op :print do |context, tensor, inputs|
445
531
  a, b = inputs
446
532
  input_b = complete_eval(b, context)
@@ -475,23 +561,27 @@ module TensorStream
475
561
  convert_to_opencl(transposed.flatten, transposed.shape.reverse, data_type: inputs[0].data_type, name: tensor.name)
476
562
  end
477
563
 
478
- register_op :index, buffer: true do |_context, tensor, inputs|
479
- a = inputs[0]
480
- input_a = read_final_result(a)
481
- index = read_final_result(inputs[1])
564
+ register_op :index, noop: true do |context, tensor, inputs|
565
+ a = _run(inputs[0], context)
566
+ index = read_final_result(_run(inputs[1], context))
482
567
 
483
- if a.is_a?(Array)
484
- a[index]
568
+ if a.is_a?(OutputGroup)
569
+ a.outputs[index]
485
570
  else
486
- new_shape = a.shape.dup
487
- new_shape.shift
488
- convert_to_opencl(input_a[index], new_shape, data_type: a.data_type, name: tensor.name)
571
+ if a.is_a?(Array)
572
+ a[index]
573
+ else
574
+ new_shape = a.shape.dup
575
+ new_shape.shift
576
+ input_a = read_final_result(a)
577
+ convert_to_opencl(input_a[index], new_shape, data_type: a.data_type, name: tensor.name)
578
+ end
489
579
  end
490
580
  end
491
581
 
492
582
  register_op :broadcast_gradient_args, buffer: true do |_context, tensor, inputs|
493
583
  rx, ry = get_broadcast_gradient_args(inputs[0].buffer.to_a, inputs[1].buffer.to_a)
494
- [ wrap_opencl(rx, data_type: :int32, name: "#{tensor.name}"), wrap_opencl(ry, data_type: :int32, name: "#{tensor.name}:1")]
584
+ OutputGroup.new([wrap_opencl(rx, data_type: :int32, name: "#{tensor.name}"), wrap_opencl(ry, data_type: :int32, name: "#{tensor.name}:1")])
495
585
  end
496
586
 
497
587
  register_op :shape do |_context, tensor, inputs|
@@ -537,6 +627,9 @@ module TensorStream
537
627
 
538
628
  register_op :argmin, buffer: true do |_context, tensor, inputs|
539
629
  axis = tensor.options[:axis] || 0
630
+ rank = inputs[0].shape.size
631
+ raise TensorStream::InvalidArgumentError, "Expected dimension in the range [#{-rank},#{rank}) but got #{axis}" if axis < -rank || axis >= rank
632
+
540
633
  arr = inputs[0].buffer.reshape(*inputs[0].shape.reverse).to_a
541
634
  op = get_op_with_axis(arr, axis, 0, inputs[0].data_type, ->(a, b) { a < b })
542
635
  convert_to_opencl(op, shape_eval(op), data_type: tensor.data_type, name: tensor.name)
@@ -544,6 +637,9 @@ module TensorStream
544
637
 
545
638
  register_op :argmax, buffer: true do |_context, tensor, inputs|
546
639
  axis = tensor.options[:axis] || 0
640
+ rank = inputs[0].shape.size
641
+ raise TensorStream::InvalidArgumentError, "Expected dimension in the range [#{-rank},#{rank}) but got #{axis}" if axis < -rank || axis >= rank
642
+
547
643
  arr = inputs[0].buffer.reshape(*inputs[0].shape.reverse).to_a
548
644
  op = get_op_with_axis(arr, axis, 0, inputs[0].data_type, ->(a, b) { a > b })
549
645
  convert_to_opencl(op, shape_eval(op), data_type: tensor.data_type, name: tensor.name)
@@ -559,7 +655,7 @@ module TensorStream
559
655
  # puts "#{tensor.to_math(true,1)} = #{read_final_result(complete_eval(result, child_context))}"
560
656
  if tensor.breakpoint
561
657
  a = resolve_placeholder(tensor.inputs[0], child_context) if tensor.inputs && tensor.inputs[0]
562
- b = resolve_placeholder(tensor.inputs[1], child_context) if tensor.inputs && tensor.inputs[1]
658
+ b = resolve_placeholder(tensor.inputs[1], child_context) if tensor.inputs && tensor.inputs[1]
563
659
  a = read_final_result(complete_eval(a, child_context))
564
660
  b = read_final_result(complete_eval(b, child_context))
565
661
  result = read_final_result(complete_eval(result, child_context))
@@ -581,6 +677,8 @@ module TensorStream
581
677
  end
582
678
  rescue EvaluatorExcecutionException => e
583
679
  raise e
680
+ rescue TensorStreamError => e
681
+ raise e
584
682
  rescue StandardError => e
585
683
  _opencl_queue.finish # dump queue
586
684
  puts e.message
@@ -614,7 +712,8 @@ module TensorStream
614
712
  else
615
713
  wrap_opencl(tensor, name: tensor.name)
616
714
  end
617
- @context[:_cache][cache_key] = @context[cache_key] if tensor.is_const
715
+ @context[:_cache][cache_key] = @context[cache_key] if tensor.is_const
716
+ @context[cache_key]
618
717
  end
619
718
 
620
719
  private
@@ -625,11 +724,11 @@ module TensorStream
625
724
 
626
725
  if assign.buffer
627
726
  # buffer = type_cast(buffer, assign.data_type, name: "#{tensor.name}/cast_#{tensor.name}_#{tensor.data_type}")
628
- if assign.buffer.cl_buffer != buffer.cl_buffer
629
- assign.buffer.op = _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.buffer.cl_buffer, event_wait_list: [buffer.op, assign.buffer.op])
630
- else
631
- assign.buffer.op = buffer.op
632
- end
727
+ assign.buffer.op = if assign.buffer.cl_buffer != buffer.cl_buffer
728
+ _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.buffer.cl_buffer, event_wait_list: [buffer.op, assign.buffer.op])
729
+ else
730
+ buffer.op
731
+ end
633
732
  else
634
733
  value = read_final_result(buffer)
635
734
  assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
@@ -660,12 +759,12 @@ module TensorStream
660
759
  method_call = :"#{prog}_#{a.data_type}_#{b.data_type}"
661
760
  event = if prog == "#{op_name}_b"
662
761
  cl_m_b, cl_n_b = if b.shape.size == 2
663
- [ OpenCL::Int1.new(b.shape[0]), OpenCL::Int1.new(b.shape[1]) ]
664
- elsif b.shape.size == 1
665
- [ OpenCL::Int1.new(1), OpenCL::Int1.new(b.shape[0]) ]
666
- else
667
- raise "rank > 2 not supported!"
668
- end
762
+ [OpenCL::Int1.new(b.shape[0]), OpenCL::Int1.new(b.shape[1])]
763
+ elsif b.shape.size == 1
764
+ [OpenCL::Int1.new(1), OpenCL::Int1.new(b.shape[0])]
765
+ else
766
+ raise "rank > 2 not supported!"
767
+ end
669
768
  _cl_program("#{prog_name || op_name}", a: a.data_type, b: b.data_type, dtype: dtype).send(method_call, _opencl_queue, work_group, cl_m, cl_n, cl_m_b, cl_n_b, cl_switch, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
670
769
  else
671
770
  _cl_program("#{prog_name || op_name}", a: a.data_type, b: b.data_type, dtype: dtype).send(method_call, _opencl_queue, work_group, cl_m, cl_n, cl_switch, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
@@ -691,7 +790,7 @@ module TensorStream
691
790
  cl_n = OpenCL::Int1.new(n || 1)
692
791
 
693
792
  event_wait_list = [a.op, b.op, p.op].compact # add dependency wait list
694
- output_buffer.op = _cl_program("#{op_name}", dtype: dtype).send(:"#{op_name}_#{dtype}", _opencl_queue, work_group, cl_m, cl_n, p.cl_buffer, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
793
+ output_buffer.op = _cl_program(op_name.to_s, dtype: dtype).send(:"#{op_name}_#{dtype}", _opencl_queue, work_group, cl_m, cl_n, p.cl_buffer, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
695
794
  output_buffer
696
795
  end
697
796
 
@@ -706,7 +805,7 @@ module TensorStream
706
805
  cl_m = OpenCL::Int1.new(m || 1)
707
806
  cl_n = OpenCL::Int1.new(n || 1)
708
807
 
709
- event = _cl_program("#{op_name}", dtype: dtype).send(:"#{op_name}_#{dtype}", _opencl_queue, work_group, cl_m, cl_n, a.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
808
+ event = _cl_program(op_name.to_s, dtype: dtype).send(:"#{op_name}_#{dtype}", _opencl_queue, work_group, cl_m, cl_n, a.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
710
809
  output_buffer.op = event
711
810
  output_buffer
712
811
  end
@@ -741,60 +840,58 @@ module TensorStream
741
840
 
742
841
  def wrap_opencl(tensor, data_type: nil, name: nil)
743
842
  value, shape = if tensor.is_a?(Tensor)
744
- [tensor.value, tensor.shape.shape]
745
- else
746
- [tensor , shape_eval(tensor)]
747
- end
843
+ [tensor.value, tensor.shape.shape]
844
+ else
845
+ [tensor, shape_eval(tensor)]
846
+ end
748
847
 
749
848
  convert_to_opencl(value, shape, data_type: data_type || tensor.data_type, name: name)
750
849
  end
751
850
 
752
851
  def convert_to_opencl(value, shape, data_type: nil, name: nil)
753
- if !value.is_a?(Array) && !value.is_a?(NArray)
754
- value = [value]
755
- end
852
+ value = [value] if !value.is_a?(Array) && !value.is_a?(NArray)
756
853
 
757
854
  cache_key = "_cl_object_#{name}:#{shape.join('_')}:#{object_id}"
758
- cl_object = if name && @context[:_cache][cache_key]
855
+ cl_object = if name && @context[:_cache][cache_key]
759
856
  @context[:_cache][cache_key]
760
- else
761
- narray_size = shape.reduce(:*) || 1
857
+ else
858
+ narray_size = shape.reduce(:*) || 1
762
859
 
763
- buffer = if value.is_a?(NArray)
764
- value
765
- else
766
- allocate_narray_for_type(data_type, narray_size)
767
- end
860
+ buffer = if value.is_a?(NArray)
861
+ value
862
+ else
863
+ allocate_narray_for_type(data_type, narray_size)
864
+ end
768
865
 
769
- cl_buffer_size = shape.empty? ? 1 : shape.reduce(:*)
866
+ cl_buffer_size = shape.empty? ? 1 : shape.reduce(:*)
770
867
 
771
- cl_buffer = if !value.flatten.empty?
772
- cl_buffer_size = 1 if cl_buffer_size.zero?
773
- _opencl_context.create_buffer(cl_buffer_size * buffer.element_size)
774
- else
775
- nil
776
- end
868
+ cl_buffer = unless value.flatten.empty?
869
+ cl_buffer_size = 1 if cl_buffer_size.zero?
870
+ _opencl_context.create_buffer(cl_buffer_size * buffer.element_size)
871
+ end
777
872
 
778
- @context[:_cache][cache_key] = OpenCLBuffer.new(name: name, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
779
- end
873
+ @context[:_cache][cache_key] = OpenCLBuffer.new(name: name, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
874
+ end
780
875
 
781
876
  if value.is_a?(Array)
782
877
  value.flatten.each_with_index do |element, index|
783
- if element.is_a?(Tensor)
784
- cl_object.buffer[index] = read_final_result(complete_eval(element, {}))
785
- else
786
- cl_object.buffer[index] = ( data_type == :boolean ? ( element ? 1 : 0 ) : Tensor.cast_dtype(element, data_type))
787
- end
878
+ cl_object.buffer[index] = if element.is_a?(Tensor)
879
+ read_final_result(complete_eval(element, {}))
880
+ elsif data_type == :boolean
881
+ element ? 1 : 0
882
+ else
883
+ Tensor.cast_dtype(element, data_type)
884
+ end
788
885
  end
789
886
  elsif value.is_a?(NArray)
790
887
  cl_object.buffer = value
888
+ elsif data_type == :boolean
889
+ cl_object.buffer[0] = element ? 1 : 0
791
890
  else
792
- cl_object.buffer[0] = ( data_type == :boolean ? ( element ? 1 : 0 ) : Tensor.cast_dtype(value, data_type))
891
+ cl_object.buffer[0] = Tensor.cast_dtype(value, data_type)
793
892
  end
794
893
 
795
- write_op = if cl_object.cl_buffer && !value.nil? && (!value.is_a?(Array) || !value.empty?)
796
- _opencl_queue.enqueue_write_buffer(cl_object.cl_buffer, cl_object.buffer)
797
- end
894
+ write_op = _opencl_queue.enqueue_write_buffer(cl_object.cl_buffer, cl_object.buffer) if cl_object.cl_buffer && !value.nil? && (!value.is_a?(Array) || !value.empty?)
798
895
  cl_object.op = write_op
799
896
  cl_object
800
897
  end
@@ -861,7 +958,7 @@ module TensorStream
861
958
 
862
959
  def _reduced_shape(input_shape, axes)
863
960
  return [] if axes.nil? # reduce to scalar
864
- axes = [ axes ] unless axes.is_a?(Array)
961
+ axes = [axes] unless axes.is_a?(Array)
865
962
  return input_shape if axes.empty?
866
963
 
867
964
  axes.each do |dimen|
@@ -882,8 +979,7 @@ module TensorStream
882
979
  rank = input.shape.size - 1
883
980
 
884
981
  if axis.is_a?(Array)
885
- axis.map{ |x| rank - x.abs }.sort.reverse.each do |x|
886
-
982
+ axis.map { |x| rank - x.abs }.sort.reverse_each do |x|
887
983
  value = value.send(func, x.to_i)
888
984
  end
889
985
  else
@@ -891,75 +987,21 @@ module TensorStream
891
987
  end
892
988
 
893
989
  new_shape = if value.is_a?(NArray)
894
- value.shape.reverse
895
- else
896
- value = [value]
897
- []
898
- end
990
+ value.shape.reverse
991
+ else
992
+ value = [value]
993
+ []
994
+ end
899
995
 
900
- if tensor.options[:keepdims]
901
- new_shape = _reduced_shape(input.shape.dup, axis)
902
- end
996
+ new_shape = _reduced_shape(input.shape.dup, axis) if tensor.options[:keepdims]
903
997
 
904
998
  convert_to_opencl(value.flatten, new_shape, data_type: tensor.data_type, name: tensor.name)
905
999
  end
906
1000
  end
907
1001
 
908
- def arr_pad(arr, paddings, data_type = :float32, rank = 0)
909
- raise "padding #{paddings[rank]} needs to have to elements [before, after]" if paddings[rank].size != 2
910
-
911
- before = paddings[rank][0]
912
- after = paddings[rank][1]
913
- pad_value = fp_type?(data_type) ? 0.0 : 0
914
- if arr[0].is_a?(Array)
915
- next_dim_elem = arr.collect { |a| arr_pad(a, paddings, data_type, rank + 1) }
916
- padding = deep_dup_array(next_dim_elem[0], pad_value)
917
- Array.new(before) { padding } + next_dim_elem + Array.new(after) { padding }
918
- else
919
- Array.new(before) { pad_value } + arr + Array.new(after) { pad_value }
920
- end
921
- end
922
-
923
- def deep_dup_array(arr, value = nil)
924
- if arr.is_a?(Array)
925
- arr.dup.collect do |a|
926
- deep_dup_array(a, value)
927
- end
928
- else
929
- value.nil? ? arr : value
930
- end
931
- end
932
-
933
- def matmul_const_transform(mat, mat_b, tensor)
934
- if !mat.is_a?(Array)
935
- compat_shape = shape_eval(mat_b).reverse
936
- func = -> { tensor.data_type == :int32 ? mat.to_i : mat.to_f }
937
-
938
- generate_vector(compat_shape, generator: func)
939
- else
940
- mat
941
- end
942
- end
943
-
944
- # determine possible reduction axis to be used
945
- def _broadcast_gradient_op(vector_shape1, vector_shape2, level)
946
- va_rank = _rank_from_shape(vector_shape1)
947
- vb_rank = _rank_from_shape(vector_shape2)
948
- return [] if vector_shape1 == vector_shape2 # same shape so no reductions
949
-
950
- shape2_r = vector_shape2.reverse
951
-
952
- vector_shape1.reverse.each_with_index.collect do |s, index|
953
- next va_rank - index - 1 if index >= shape2_r.size
954
- next nil if shape2_r[index] == s
955
- next nil if shape2_r[index] > s
956
- va_rank - index - 1
957
- end.compact
958
- end
959
-
960
1002
  # selects variants of cl programs depending on input
961
1003
  def select_program(input_a, input_b, op)
962
- return [input_a, input_b, "#{op}", 0] if input_a.shape == input_b.shape
1004
+ return [input_a, input_b, op.to_s, 0] if input_a.shape == input_b.shape
963
1005
 
964
1006
  return [input_b, input_a, "#{op}_c", 1] if input_a.shape.empty? || input_a.shape.reduce(:*) == 1 # A is scalar?
965
1007
  return [input_a, input_b, "#{op}_c", 0] if input_b.shape.empty? || input_a.shape.reduce(:*) == 1 # B is scalar?
@@ -979,26 +1021,6 @@ module TensorStream
979
1021
  shape.is_a?(Array) ? shape.size : 0
980
1022
  end
981
1023
 
982
- def concat_array(values, axis)
983
- combined_array = values.shift
984
- axis = get_rank(combined_array) - 1 if axis == -1
985
-
986
- values.each do |v|
987
- combined_array = concat(combined_array, v, axis)
988
- end
989
- combined_array
990
- end
991
-
992
- def concat(a, b, axis)
993
- if axis.zero?
994
- a + b
995
- else
996
- a.each_with_index.collect do |i, index|
997
- concat(i, b[index], axis - 1)
998
- end
999
- end
1000
- end
1001
-
1002
1024
  def resolve_placeholder(placeholder, _execution_context = {})
1003
1025
  return nil if placeholder.nil?
1004
1026
 
@@ -1014,43 +1036,6 @@ module TensorStream
1014
1036
  Tensor.cast_dtype(var, placeholder.data_type)
1015
1037
  end
1016
1038
 
1017
- def reduce_axis(current_axis, axis, val, keep_dims, f = ->(a, b) { a + b })
1018
- return val unless val.is_a?(Array)
1019
-
1020
- r = val.collect do |v|
1021
- reduce_axis(current_axis + 1, axis, v, keep_dims, f)
1022
- end
1023
-
1024
- should_reduce_axis = axis.nil? || (axis.is_a?(Array) && axis.include?(current_axis)) || (current_axis == axis)
1025
-
1026
- if should_reduce_axis
1027
- reduced_val = r[0]
1028
- if r.size > 1
1029
- reduced_val = f.call(r[0..val.size])
1030
- elsif r.size.zero?
1031
- reduced_val = f.call(nil)
1032
- end
1033
- keep_dims ? [ reduced_val ] : reduced_val
1034
- else
1035
- r
1036
- end
1037
- end
1038
-
1039
- # handle 3 tensor math operations
1040
- def call_3way_vector_op(v_a, v_b, v_c, child_context, op = ->(a, b, c) { a + b + c })
1041
- return op.call(v_a, v_b, v_c) unless v_a.is_a?(Array)
1042
-
1043
- v_a.each_with_index.collect do |v1, index|
1044
- v2 = v_b[index]
1045
- v3 = v_c[index]
1046
- if v1.is_a?(Array)
1047
- call_3way_vector_op(v1, v2, v3, child_context, op)
1048
- else
1049
- op.call(v1, v2, v3)
1050
- end
1051
- end
1052
- end
1053
-
1054
1039
  def all_true?(arr)
1055
1040
  if arr.is_a?(Array) || arr.is_a?(NArray)
1056
1041
  arr.each do |a|
@@ -1061,58 +1046,8 @@ module TensorStream
1061
1046
 
1062
1047
  arr != 0
1063
1048
  end
1064
-
1065
- def generate_vector(shape, dtype: :float32, generator:)
1066
- if shape.is_a?(Integer)
1067
- Array.new(shape) do
1068
- generator.call
1069
- end
1070
- elsif shape.size > 1
1071
- Array.new(shape[0]) do
1072
- generate_vector(shape[1..shape.size], generator: generator, dtype: dtype)
1073
- end
1074
- elsif shape.size == 1
1075
- Array.new(shape[0]) do
1076
- generator.call
1077
- end
1078
- elsif shape.size.zero?
1079
- generator.call
1080
- end
1081
- end
1082
-
1083
- def _get_randomizer(tensor, seed)
1084
- if tensor.graph.random_seed && seed
1085
- Random.new(tensor.graph.random_seed ^ seed)
1086
- elsif tensor.graph.random_seed
1087
- @session.randomizer[tensor.graph.object_id] ||= Random.new(tensor.graph.random_seed)
1088
- @session.randomizer[tensor.graph.object_id]
1089
- elsif seed
1090
- @session.randomizer[tensor.operation] ||= Random.new(seed)
1091
- @session.randomizer[tensor.operation]
1092
- else
1093
- Random.new
1094
- end
1095
- end
1096
-
1097
- def dump_intermediates
1098
- arr = []
1099
- arr << "============== start ==================="
1100
- @context[:compute_history].each_with_index do |history, index|
1101
- arr << "------------------------------------"
1102
- arr << history[:name]
1103
- arr << "#{history[:type]} #{history[:shape]}"
1104
- arr << history[:source]
1105
- arr << history[:description]
1106
- arr << ""
1107
- arr << history[:value].to_json
1108
- arr << "------------------------------------"
1109
- end
1110
- arr << "============== end ====================="
1111
- str = arr.join("\n")
1112
- File.write("/tmp/intermediates.txt", str)
1113
- end
1114
1049
  end
1115
1050
  end
1116
1051
  end
1117
1052
 
1118
- TensorStream::Evaluator.register_evaluator(TensorStream::Evaluator::OpenclEvaluator, 'opencl', 1)
1053
+ TensorStream::Evaluator.register_evaluator(TensorStream::Evaluator::OpenclEvaluator, 'opencl', 1)