RubyGems - tensor_stream-opencl - Versions diffs - 0.1.3 → 0.2.0 - Mend

tensor_stream-opencl 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

checksums.yaml +4 -4
data/Gemfile.lock +11 -4
data/benchmark/benchmark.rb +91 -0
data/benchmark_intel.txt +36 -0
data/lib/tensor_stream/opencl/array_ops.rb +395 -0
data/lib/tensor_stream/opencl/images_ops.rb +62 -0
data/lib/tensor_stream/opencl/kernels/abs.cl +6 -8
data/lib/tensor_stream/opencl/kernels/acos.cl +3 -4
data/lib/tensor_stream/opencl/kernels/apply_adadelta.cl +2 -4
data/lib/tensor_stream/opencl/kernels/apply_adagrad.cl +12 -0
data/lib/tensor_stream/opencl/kernels/apply_adam.cl +2 -5
data/lib/tensor_stream/opencl/kernels/apply_centered_rms_prop.cl +19 -0
data/lib/tensor_stream/opencl/kernels/apply_gradient.cl +3 -4
data/lib/tensor_stream/opencl/kernels/apply_momentum.cl +2 -4
data/lib/tensor_stream/opencl/kernels/apply_rms_prop.cl +16 -0
data/lib/tensor_stream/opencl/kernels/asin.cl +3 -4
data/lib/tensor_stream/opencl/kernels/ceil.cl +3 -4
data/lib/tensor_stream/opencl/kernels/concat.cl +21 -0
data/lib/tensor_stream/opencl/kernels/cos.cl +3 -5
data/lib/tensor_stream/opencl/kernels/exp.cl +3 -5
data/lib/tensor_stream/opencl/kernels/floor.cl +3 -4
data/lib/tensor_stream/opencl/kernels/log.cl +3 -4
data/lib/tensor_stream/opencl/kernels/log1p.cl +3 -4
data/lib/tensor_stream/opencl/kernels/negate.cl +3 -4
data/lib/tensor_stream/opencl/kernels/reciprocal.cl +3 -4
data/lib/tensor_stream/opencl/kernels/sigmoid.cl +3 -4
data/lib/tensor_stream/opencl/kernels/sign.cl +7 -8
data/lib/tensor_stream/opencl/kernels/sin.cl +3 -4
data/lib/tensor_stream/opencl/kernels/split.cl +17 -0
data/lib/tensor_stream/opencl/kernels/split_n.cl +18 -0
data/lib/tensor_stream/opencl/kernels/sqrt.cl +3 -4
data/lib/tensor_stream/opencl/kernels/square.cl +3 -4
data/lib/tensor_stream/opencl/kernels/tan.cl +3 -4
data/lib/tensor_stream/opencl/kernels/tanh.cl +3 -4
data/lib/tensor_stream/opencl/kernels/tanh_grad.cl +3 -4
data/lib/tensor_stream/opencl/kernels/unpack.cl +23 -0
data/lib/tensor_stream/opencl/nn_ops.rb +111 -26
data/lib/tensor_stream/opencl/opencl_buffer.rb +9 -0
data/lib/tensor_stream/opencl/opencl_evaluator.rb +129 -172
data/lib/tensor_stream/opencl/version.rb +1 -1
data/samples/iris.data +150 -0
data/samples/iris.rb +110 -0
data/samples/mnist_data.rb +65 -0
data/samples/multigpu.rb +73 -0
data/samples/nearest_neighbor.rb +56 -0
data/samples/rnn.rb +108 -0
data/tensor_stream-opencl.gemspec +4 -1
metadata +62 -3

data/lib/tensor_stream/opencl/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module TensorStream
   module Opencl
-    VERSION = "0.1.3"
+    VERSION = "0.2.0"
   end
 end

data/samples/iris.data ADDED Viewed

@@ -0,0 +1,150 @@
+5.1,3.5,1.4,0.2,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+5.0,3.6,1.4,0.2,Iris-setosa
+5.4,3.9,1.7,0.4,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.4,3.7,1.5,0.2,Iris-setosa
+4.8,3.4,1.6,0.2,Iris-setosa
+4.8,3.0,1.4,0.1,Iris-setosa
+4.3,3.0,1.1,0.1,Iris-setosa
+5.8,4.0,1.2,0.2,Iris-setosa
+5.7,4.4,1.5,0.4,Iris-setosa
+5.4,3.9,1.3,0.4,Iris-setosa
+5.1,3.5,1.4,0.3,Iris-setosa
+5.7,3.8,1.7,0.3,Iris-setosa
+5.1,3.8,1.5,0.3,Iris-setosa
+5.4,3.4,1.7,0.2,Iris-setosa
+5.1,3.7,1.5,0.4,Iris-setosa
+4.6,3.6,1.0,0.2,Iris-setosa
+5.1,3.3,1.7,0.5,Iris-setosa
+4.8,3.4,1.9,0.2,Iris-setosa
+5.0,3.0,1.6,0.2,Iris-setosa
+5.0,3.4,1.6,0.4,Iris-setosa
+5.2,3.5,1.5,0.2,Iris-setosa
+5.2,3.4,1.4,0.2,Iris-setosa
+4.7,3.2,1.6,0.2,Iris-setosa
+4.8,3.1,1.6,0.2,Iris-setosa
+5.4,3.4,1.5,0.4,Iris-setosa
+5.2,4.1,1.5,0.1,Iris-setosa
+5.5,4.2,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.0,3.2,1.2,0.2,Iris-setosa
+5.5,3.5,1.3,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.4,3.0,1.3,0.2,Iris-setosa
+5.1,3.4,1.5,0.2,Iris-setosa
+5.0,3.5,1.3,0.3,Iris-setosa
+4.5,2.3,1.3,0.3,Iris-setosa
+4.4,3.2,1.3,0.2,Iris-setosa
+5.0,3.5,1.6,0.6,Iris-setosa
+5.1,3.8,1.9,0.4,Iris-setosa
+4.8,3.0,1.4,0.3,Iris-setosa
+5.1,3.8,1.6,0.2,Iris-setosa
+4.6,3.2,1.4,0.2,Iris-setosa
+5.3,3.7,1.5,0.2,Iris-setosa
+5.0,3.3,1.4,0.2,Iris-setosa
+7.0,3.2,4.7,1.4,Iris-versicolor
+6.4,3.2,4.5,1.5,Iris-versicolor
+6.9,3.1,4.9,1.5,Iris-versicolor
+5.5,2.3,4.0,1.3,Iris-versicolor
+6.5,2.8,4.6,1.5,Iris-versicolor
+5.7,2.8,4.5,1.3,Iris-versicolor
+6.3,3.3,4.7,1.6,Iris-versicolor
+4.9,2.4,3.3,1.0,Iris-versicolor
+6.6,2.9,4.6,1.3,Iris-versicolor
+5.2,2.7,3.9,1.4,Iris-versicolor
+5.0,2.0,3.5,1.0,Iris-versicolor
+5.9,3.0,4.2,1.5,Iris-versicolor
+6.0,2.2,4.0,1.0,Iris-versicolor
+6.1,2.9,4.7,1.4,Iris-versicolor
+5.6,2.9,3.6,1.3,Iris-versicolor
+6.7,3.1,4.4,1.4,Iris-versicolor
+5.6,3.0,4.5,1.5,Iris-versicolor
+5.8,2.7,4.1,1.0,Iris-versicolor
+6.2,2.2,4.5,1.5,Iris-versicolor
+5.6,2.5,3.9,1.1,Iris-versicolor
+5.9,3.2,4.8,1.8,Iris-versicolor
+6.1,2.8,4.0,1.3,Iris-versicolor
+6.3,2.5,4.9,1.5,Iris-versicolor
+6.1,2.8,4.7,1.2,Iris-versicolor
+6.4,2.9,4.3,1.3,Iris-versicolor
+6.6,3.0,4.4,1.4,Iris-versicolor
+6.8,2.8,4.8,1.4,Iris-versicolor
+6.7,3.0,5.0,1.7,Iris-versicolor
+6.0,2.9,4.5,1.5,Iris-versicolor
+5.7,2.6,3.5,1.0,Iris-versicolor
+5.5,2.4,3.8,1.1,Iris-versicolor
+5.5,2.4,3.7,1.0,Iris-versicolor
+5.8,2.7,3.9,1.2,Iris-versicolor
+6.0,2.7,5.1,1.6,Iris-versicolor
+5.4,3.0,4.5,1.5,Iris-versicolor
+6.0,3.4,4.5,1.6,Iris-versicolor
+6.7,3.1,4.7,1.5,Iris-versicolor
+6.3,2.3,4.4,1.3,Iris-versicolor
+5.6,3.0,4.1,1.3,Iris-versicolor
+5.5,2.5,4.0,1.3,Iris-versicolor
+5.5,2.6,4.4,1.2,Iris-versicolor
+6.1,3.0,4.6,1.4,Iris-versicolor
+5.8,2.6,4.0,1.2,Iris-versicolor
+5.0,2.3,3.3,1.0,Iris-versicolor
+5.6,2.7,4.2,1.3,Iris-versicolor
+5.7,3.0,4.2,1.2,Iris-versicolor
+5.7,2.9,4.2,1.3,Iris-versicolor
+6.2,2.9,4.3,1.3,Iris-versicolor
+5.1,2.5,3.0,1.1,Iris-versicolor
+5.7,2.8,4.1,1.3,Iris-versicolor
+6.3,3.3,6.0,2.5,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+7.1,3.0,5.9,2.1,Iris-virginica
+6.3,2.9,5.6,1.8,Iris-virginica
+6.5,3.0,5.8,2.2,Iris-virginica
+7.6,3.0,6.6,2.1,Iris-virginica
+4.9,2.5,4.5,1.7,Iris-virginica
+7.3,2.9,6.3,1.8,Iris-virginica
+6.7,2.5,5.8,1.8,Iris-virginica
+7.2,3.6,6.1,2.5,Iris-virginica
+6.5,3.2,5.1,2.0,Iris-virginica
+6.4,2.7,5.3,1.9,Iris-virginica
+6.8,3.0,5.5,2.1,Iris-virginica
+5.7,2.5,5.0,2.0,Iris-virginica
+5.8,2.8,5.1,2.4,Iris-virginica
+6.4,3.2,5.3,2.3,Iris-virginica
+6.5,3.0,5.5,1.8,Iris-virginica
+7.7,3.8,6.7,2.2,Iris-virginica
+7.7,2.6,6.9,2.3,Iris-virginica
+6.0,2.2,5.0,1.5,Iris-virginica
+6.9,3.2,5.7,2.3,Iris-virginica
+5.6,2.8,4.9,2.0,Iris-virginica
+7.7,2.8,6.7,2.0,Iris-virginica
+6.3,2.7,4.9,1.8,Iris-virginica
+6.7,3.3,5.7,2.1,Iris-virginica
+7.2,3.2,6.0,1.8,Iris-virginica
+6.2,2.8,4.8,1.8,Iris-virginica
+6.1,3.0,4.9,1.8,Iris-virginica
+6.4,2.8,5.6,2.1,Iris-virginica
+7.2,3.0,5.8,1.6,Iris-virginica
+7.4,2.8,6.1,1.9,Iris-virginica
+7.9,3.8,6.4,2.0,Iris-virginica
+6.4,2.8,5.6,2.2,Iris-virginica
+6.3,2.8,5.1,1.5,Iris-virginica
+6.1,2.6,5.6,1.4,Iris-virginica
+7.7,3.0,6.1,2.3,Iris-virginica
+6.3,3.4,5.6,2.4,Iris-virginica
+6.4,3.1,5.5,1.8,Iris-virginica
+6.0,3.0,4.8,1.8,Iris-virginica
+6.9,3.1,5.4,2.1,Iris-virginica
+6.7,3.1,5.6,2.4,Iris-virginica
+6.9,3.1,5.1,2.3,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+6.8,3.2,5.9,2.3,Iris-virginica
+6.7,3.3,5.7,2.5,Iris-virginica
+6.7,3.0,5.2,2.3,Iris-virginica
+6.3,2.5,5.0,1.9,Iris-virginica
+6.5,3.0,5.2,2.0,Iris-virginica
+6.2,3.4,5.4,2.3,Iris-virginica
+5.9,3.0,5.1,1.8,Iris-virginica

data/samples/iris.rb ADDED Viewed

@@ -0,0 +1,110 @@
+require "bundler/setup"
+require 'tensor_stream'
+require 'tensor_stream/opencl'
+# This neural network will predict the species of an iris based on sepal and petal size
+# Dataset: http://en.wikipedia.org/wiki/Iris_flower_data_set
+tf = TensorStream
+rows = File.readlines(File.join("samples","iris.data")).map {|l| l.chomp.split(',') }
+rows.shuffle!
+label_encodings = {
+  'Iris-setosa'     => [1, 0, 0],
+  'Iris-versicolor' => [0, 1, 0],
+  'Iris-virginica'  => [0, 0, 1]
+}
+x_data = rows.map {|row| row[0,4].map(&:to_f) }
+y_data = rows.map {|row| label_encodings[row[4]] }
+# Normalize data values before feeding into network
+normalize = -> (val, high, low) { (val - low) / (high - low) } # maps input to float between 0 and 1
+columns = (0..3).map do |i|
+  x_data.map {|row| row[i] }
+end
+x_data.map! do |row|
+  row.map.with_index do |val, j|
+    max, min = columns[j].max, columns[j].min
+    normalize.call(val, max, min)
+  end
+end
+x_train = x_data.slice(0, 100)
+y_train = y_data.slice(0, 100)
+x_test = x_data.slice(100, 50)
+y_test = y_data.slice(100, 50)
+test_cases = []
+x_train.each_with_index do |x, index|
+  test_cases << [x, y_train[index]]
+end
+validation_cases = []
+x_test.each_with_index do |x, index|
+  validation_cases << [x, y_test[index]]
+end
+def init_weights(shape)
+  # Weight initialization
+  weights = TensorStream.random_normal(shape, stddev: 0.1)
+  TensorStream.variable(weights)
+end
+def forwardprop(x, w_1, w_2)
+  # Forward-propagation.
+  # IMPORTANT: yhat is not softmax since TensorFlow's softmax_cross_entropy_with_logits() does that internally.
+  h  = TensorStream.nn.sigmoid(TensorStream.matmul(x, w_1))  # The \sigma function
+  TensorStream.matmul(h, w_2)  # The \varphi function
+end
+x_size = x_train[0].size
+y_size = y_train[0].size
+h_size = 256
+X = tf.placeholder(:float32, shape: [nil, x_size])
+y = tf.placeholder(:float32, shape: [nil, y_size])
+# Weight initializations
+w_1 = init_weights([x_size, h_size])
+w_2 = init_weights([h_size, y_size])
+# Forward propagation
+yhat    = forwardprop(X, w_1, w_2)
+predict = tf.argmax(yhat, 1)
+# Backward propagation
+cost    = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels: y, logits: yhat))
+updates =  TensorStream::Train::GradientDescentOptimizer.new(0.01).minimize(cost)
+# updates =  TensorStream::Train::MomentumOptimizer.new(0.01, 0.5, use_nesterov: true).minimize(cost)
+# updates =  TensorStream::Train::RMSPropOptimizer.new(0.01).minimize(cost)
+# Run SGD
+sess = tf.session
+init = tf.global_variables_initializer
+sess.run(init)
+loss = sess.run(cost, feed_dict: { X => x_test, y => y_test })
+puts "loss test data set #{loss}"
+loss = sess.run(cost, feed_dict: { X => x_train, y => y_train })
+puts "Testing the untrained network..."
+puts loss
+start_time = Time.now
+(0..100).each do |epoch|
+  x_train.size.times do |i|
+    sess.run(updates, feed_dict: {X => [x_train[i]], y => [y_train[i]]})
+  end
+  loss = sess.run(cost, feed_dict: { X => x_train, y => y_train })
+  puts "epoch: #{epoch}, loss #{loss}"
+end
+loss = sess.run(cost, feed_dict: { X => x_train, y => y_train })
+puts "loss after training #{loss}"
+loss = sess.run(cost, feed_dict: { X => x_test, y => y_test })
+puts "loss test data set #{loss}"
+puts("time elapsed ", Time.now.to_i - start_time.to_i)

data/samples/mnist_data.rb ADDED Viewed

@@ -0,0 +1,65 @@
+# A ruby port of the example code discussed by Martin Gorner in
+# "TensorFlow and Deep Learning without a PhD, Part 1 (Google Cloud Next '17)""
+#
+# https://www.youtube.com/watch?v=u4alGiomYP4
+#
+# Requirements:
+#   mnist-learn gem
+#   opencl_ruby_ffi gem
+require "bundler/setup"
+require 'tensor_stream'
+require 'mnist-learn'
+# Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
+# require 'tensor_stream/opencl'
+tf = TensorStream
+# Import MNIST data
+puts "downloading minst data"
+mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
+puts "downloading finished"
+x = tf.placeholder(:float32, shape: [nil, 784])
+w = tf.variable(tf.zeros([784, 10]))
+b = tf.variable(tf.zeros([10]))
+# model
+y = tf.nn.softmax(tf.matmul(tf.reshape(x, [-1, 784]), w) + b)
+y_ = tf.placeholder(:float32, shape: [nil, 10])
+# loss function
+cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
+is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
+accuracy =  tf.reduce_mean(tf.cast(is_correct, :float32))
+optimizer = TensorStream::Train::AdamOptimizer.new
+train_step = optimizer.minimize(cross_entropy)
+sess = tf.session
+init = tf.global_variables_initializer
+sess.run(init)
+(0...1000).each do |i|
+  # load batch of images and correct answers
+  batch_x, batch_y = mnist.train.next_batch(100)
+  train_data = { x => batch_x, y_ => batch_y }
+  # train
+  sess.run(train_step, feed_dict: train_data)
+  if (i % 10 == 0)
+    # success? add code to print it
+    a, c = sess.run([accuracy, cross_entropy], feed_dict: train_data)
+    puts "#{i} train accuracy #{a}, error #{c}"
+    # success on test data?
+    test_data = { x => mnist.test.images, y_ => mnist.test.labels }
+    a, c = sess.run([accuracy, cross_entropy], feed_dict: test_data)
+    puts " test accuracy #{a}, error #{c}"
+  end
+end

data/samples/multigpu.rb ADDED Viewed

@@ -0,0 +1,73 @@
+require "bundler/setup"
+require 'tensor_stream'
+require 'tensor_stream/opencl'
+require 'pry-byebug'
+ts = TensorStream
+n = 10
+DIMEN = 1024
+A = ts.random_uniform([DIMEN, DIMEN]).eval
+B = ts.random_uniform([DIMEN, DIMEN]).eval
+# Create a graph to store results
+c1 = []
+c2 = []
+a = nil
+b = nil
+def matpow(m, n)
+  return m if n < 1
+  TensorStream.matmul(m, matpow(m, n-1))
+end
+ts.device('/device:GPU:0') do
+  a = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
+  b = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
+  # Compute A^n and B^n and store results in c1
+  c1 << matpow(a, n)
+  c1 << matpow(b, n)
+end
+sum = ts.device('/device:GPU:0') do
+  ts.add_n(c1)
+end
+t1_1 = Time.now.to_i
+t2_1 = nil
+ts.session(log_device_placement: true) do |sess|
+  sess.run(sum, feed_dict: { a => A, b => B})
+  t2_1 = Time.now.to_i
+end
+# Multi GPU computing
+# GPU:0 computes A^n
+ts.device('/device:GPU:1') do
+  a = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
+  c2 << matpow(a, n)
+end
+# GPU:1 computes B^n
+ts.device('/device:GPU:1') do
+  b = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
+  c2 << matpow(b, n)
+end
+ts.device('/device:GPU:1') do
+  sum = ts.add_n(c2) #Addition of all elements in c2, i.e. A^n + B^n
+end
+t1_2 = Time.now.to_i
+t2_2 = nil
+ts.session(log_device_placement:true) do |sess|
+    # Run the op.
+    sess.run(sum, feed_dict: {a => A, b => B})
+    t2_2 = Time.now.to_i
+end
+print("Single GPU computation time: " + (t2_1-t1_1).to_s)
+print("Multi GPU computation time: " + (t2_2-t1_2).to_s)

data/samples/nearest_neighbor.rb ADDED Viewed

@@ -0,0 +1,56 @@
+'''
+A nearest neighbor learning algorithm example using TensorFlow library.
+This example is using the MNIST database of handwritten digits
+(http://yann.lecun.com/exdb/mnist/)
+Author: Aymeric Damien
+Project: https://github.com/aymericdamien/TensorFlow-Examples/
+Make sure to install the mnist-learn gem !!
+'''
+require "bundler/setup"
+require 'tensor_stream'
+require 'mnist-learn'
+require 'tensor_stream/opencl'
+tf = TensorStream
+# Import MNIST data
+mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
+# In this example, we limit mnist data
+Xtr, Ytr = mnist.train.next_batch(5000) # 5000 for training (nn candidates)
+Xte, Yte = mnist.test.next_batch(200) # 200 for testing
+# tf Graph Input
+xtr = tf.placeholder(:float, shape: [nil, 784])
+xte = tf.placeholder(:float, shape: [784])
+# Nearest Neighbor calculation using L1 Distance
+# Calculate L1 Distance
+distance = tf.reduce_sum(tf.abs(tf.add(xtr, tf.negative(xte))), 1)
+# Prediction: Get min distance index (Nearest neighbor)
+pred = tf.argmin(distance, 0)
+accuracy = 0.0
+# Initialize the variables (i.e. assign their default value)
+init = tf.global_variables_initializer()
+# Start training
+tf.session do |sess|
+  # Run the initializer
+  sess.run(init)
+  Xte.size.times do |i|
+    # Get nearest neighbor
+    nn_index = sess.run(pred, feed_dict: {xtr => Xtr, xte => Xte[i]})
+    print("Test ", i, "Prediction: ",Ytr[nn_index].max, \
+            "True Class: ", Yte[i].max, "\n")
+    if Ytr[nn_index].max == Yte[i].max
+      accuracy += 1.0/ Xte.size
+    end
+  end
+  print("Done!")
+  print("Accuracy:", accuracy)
+end