RubyGems - tensor_stream - Versions diffs - 1.0.6 → 1.0.7 - Mend

tensor_stream 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/CHANGELOG.md +10 -3
data/lib/tensor_stream.rb +1 -0
data/lib/tensor_stream/evaluator/base_evaluator.rb +6 -0
data/lib/tensor_stream/evaluator/operation_helpers/array_ops_helper.rb +60 -0
data/lib/tensor_stream/evaluator/ruby/array_ops.rb +53 -1
data/lib/tensor_stream/evaluator/ruby/math_ops.rb +42 -5
data/lib/tensor_stream/generated_stub/ops.rb +61 -5
data/lib/tensor_stream/helpers/tensor_mixins.rb +10 -1
data/lib/tensor_stream/math/math_ops.rb +22 -0
data/lib/tensor_stream/math_gradients.rb +15 -1
data/lib/tensor_stream/nn/embedding_lookup.rb +114 -0
data/lib/tensor_stream/nn/nn_ops.rb +3 -0
data/lib/tensor_stream/op_maker.rb +15 -3
data/lib/tensor_stream/ops.rb +12 -0
data/lib/tensor_stream/ops/rsqrt.rb +11 -0
data/lib/tensor_stream/ops/strided_slice.rb +24 -0
data/lib/tensor_stream/ops/sum.rb +4 -2
data/lib/tensor_stream/ops/top_k.rb +23 -0
data/lib/tensor_stream/session.rb +3 -0
data/lib/tensor_stream/tensor_shape.rb +32 -1
data/lib/tensor_stream/train/saver.rb +2 -2
data/lib/tensor_stream/utils.rb +8 -0
data/lib/tensor_stream/utils/py_ports.rb +11 -0
data/lib/tensor_stream/version.rb +1 -1
data/samples/word_embeddings/word_embedding_1.rb +192 -0
data/samples/word_embeddings/word_embedding_2.rb +203 -0
data/tensor_stream.gemspec +3 -0
metadata +40 -4
data/samples/neural_networks/lstm.rb +0 -22

data/samples/word_embeddings/word_embedding_2.rb ADDED

@@ -0,0 +1,203 @@
+#
+# A ruby port of https://github.com/guillaume-chevalier/GloVe-as-a-TensorFlow-Embedding-Layer by Guillaume Chevalier
+#
+# This is a port so some weird python like conventions may have been left behind
+require "bundler/setup"
+require "tensor_stream"
+require "chakin-rb/chakin"
+# require 'pry-byebug'
+require 'zip'
+tf = TensorStream
+batch_size = nil  # Any size is accepted
+word_representations_dimensions = 25  # Embedding of size (vocab_len, nb_dimensions)
+DATA_FOLDER = "embeddings"
+SUBFOLDER_NAME = "glove.twitter.27B"
+TF_EMBEDDING_FILE_NAME = "#{SUBFOLDER_NAME}.ckpt"
+SUFFIX = SUBFOLDER_NAME + "." + word_representations_dimensions.to_s
+TF_EMBEDDINGS_FILE_PATH = File.join(DATA_FOLDER, SUFFIX + "d.ckpt")
+DICT_WORD_TO_INDEX_FILE_NAME = File.join(DATA_FOLDER, SUFFIX + "d.json")
+# Load a `word_to_index` dict mapping words to their id, with a default value
+# of pointing to the last index when not found, which is the unknown word.
+def load_word_to_index(dict_word_to_index_file_name)
+  word_to_index = JSON.parse(File.read(dict_word_to_index_file_name))
+  _LAST_INDEX = word_to_index.size - 1
+  puts "word_to_index dict restored from '#{dict_word_to_index_file_name}'."
+  word_to_index = Hash.new(_LAST_INDEX).merge(word_to_index)
+  word_to_index
+end
+# Define the embedding tf.Variable and load it.
+def load_embedding_tf(sess, word_to_index, tf_embeddings_file_path, nb_dims)
+  # 1. Define the variable that will hold the embedding:
+  tf_embedding = TensorStream.variable(
+    TensorStream.constant(0.0, shape: [word_to_index.size-1, nb_dims]),
+      trainable: false,
+      name: "Embedding"
+  )
+  # 2. Restore the embedding from disks to TensorFlow, GPU (or CPU if GPU unavailable):
+  variables_to_restore = [tf_embedding]
+  embedding_saver = TensorStream::Train::Saver.new(variables_to_restore)
+  embedding_saver.restore(sess, tf_embeddings_file_path)
+  puts "TF embeddings restored from '#{tf_embeddings_file_path}'."
+  tf_embedding
+end
+# Returns the `cosine_similarity = cos(angle_between_a_and_b_in_space)`
+# for the two word A to all the words B.
+# The first input word must be a 1D Tensors (word_representation).
+# The second input words must be 2D Tensors (batch_size, word_representation).
+# The result is a tf tensor that must be fetched with `sess.run`.
+def cosine_similarity_tensorflow(tf_word_representation_A, tf_words_representation_B)
+  a_normalized = TensorStream.nn.l2_normalize(tf_word_representation_A, axis: -1)
+  b_normalized = TensorStream.nn.l2_normalize(tf_words_representation_B, axis: -1)
+  TensorStream.reduce_sum(
+      TensorStream.multiply(a_normalized, b_normalized),
+      axis: -1
+  )
+end
+# In case you didn't do the "%reset":
+tf.reset_default_graph
+sess = tf.session
+# Load the embedding matrix in tf
+word_to_index = load_word_to_index(
+    DICT_WORD_TO_INDEX_FILE_NAME)
+tf_embedding = load_embedding_tf(sess,
+    word_to_index,
+    TF_EMBEDDINGS_FILE_PATH,
+    word_representations_dimensions)
+# Input to the graph where word IDs can be sent in batch. Look at the "shape" args:
+@tf_word_A_id = tf.placeholder(:int32, shape: [1])
+@tf_words_B_ids = tf.placeholder(:int32, shape: [batch_size])
+# Conversion of words to a representation
+tf_word_representation_A = tf.nn.embedding_lookup(tf_embedding, @tf_word_A_id)
+tf_words_representation_B = tf.nn.embedding_lookup(tf_embedding, @tf_words_B_ids)
+# The graph output are the "cosine_similarities" which we want to fetch in sess.run(...).
+@cosine_similarities = cosine_similarity_tensorflow(tf_word_representation_A, tf_words_representation_B)
+print("Model created.")
+# Note: there might be a better way to split sentences for GloVe.
+# Please look at the documentation or open an issue to suggest a fix.
+def sentence_to_word_ids(sentence, word_to_index)
+  punctuation = ['.', '!', '?', ',', ':', ';', "'", '"', '(', ')']
+  # Separating punctuation from words:
+  punctuation.each do |punctuation_character|
+    sentence.gsub!(punctuation_character, " #{punctuation_character} ")
+  end
+  # Removing double spaces and lowercasing:
+  sentence = sentence.downcase.squeeze(" ").strip
+  # Splitting on every space:
+  split_sentence = sentence.split(" ")
+  ids = split_sentence.map { |w| word_to_index[w.strip] }
+  # Converting to IDs:
+  ids = split_sentence.map { |w| word_to_index[w.strip] }
+  [ids, split_sentence]
+end
+# Use the model in sess to predict cosine similarities.
+def predict_cosine_similarities(sess, word_to_index, word_A, words_B)
+  word_A_id, _ = sentence_to_word_ids(word_A, word_to_index)
+  words_B_ids, split_sentence = sentence_to_word_ids(words_B, word_to_index)
+  evaluated_cos_similarities = sess.run(
+      @cosine_similarities,
+      feed_dict: {
+          @tf_word_A_id => word_A_id,
+          @tf_words_B_ids => words_B_ids
+      }
+  )
+  [evaluated_cos_similarities, split_sentence]
+end
+word_A = "Science"
+words_B = "Hello internet, a vocano erupt like the bitcoin out of the blue and there is an unknownWord00!"
+evaluated_cos_similarities, splitted = predict_cosine_similarities(sess, word_to_index, word_A, words_B)
+puts "Cosine similarities with \"#{word_A}\":"
+splitted.zip(evaluated_cos_similarities).each do |word, similarity|
+  puts "    #{(word+":").ljust(15)}#{similarity}"
+end
+tf.reset_default_graph()
+# Transpose word_to_index dict:
+index_to_word = word_to_index.invert
+# New graph
+tf.reset_default_graph()
+sess = tf.session
+# Load the embedding matrix in tf
+tf_word_to_index = load_word_to_index(
+    DICT_WORD_TO_INDEX_FILE_NAME)
+tf_embedding = load_embedding_tf(sess,
+    tf_word_to_index,
+    TF_EMBEDDINGS_FILE_PATH,
+    word_representations_dimensions)
+# An input word
+tf_word_id = tf.placeholder(:int32, shape: [1])
+tf_word_representation = tf.nn.embedding_lookup(tf_embedding, tf_word_id)
+# An input
+tf_nb_similar_words_to_get = tf.placeholder(:int32)
+# Dot the word to every embedding
+tf_all_cosine_similarities = cosine_similarity_tensorflow(
+    tf_word_representation,
+    tf_embedding)
+# Getting the top cosine similarities.
+tf_top_cosine_similarities, tf_top_word_indices = tf.top_k(
+    tf_all_cosine_similarities,
+    tf_nb_similar_words_to_get + 1,
+    sorted: true
+)
+# Discard the first word because it's the input word itself:
+tf_top_cosine_similarities = tf_top_cosine_similarities[1..nil]
+tf_top_word_indices = tf_top_word_indices[1..nil]
+# Get the top words' representations by fetching
+# tf_top_words_representation = "tf_embedding[tf_top_word_indices]":
+tf_top_words_representation = tf.gather(tf_embedding, tf_top_word_indices)
+# Fetch 10 similar words:
+nb_similar_words_to_get = 10
+word = "king"
+word_id = word_to_index[word]
+top_cosine_similarities, top_word_indices, top_words_representation = sess.run(
+    [tf_top_cosine_similarities, tf_top_word_indices, tf_top_words_representation],
+    feed_dict: {
+      tf_word_id => [word_id],
+      tf_nb_similar_words_to_get => nb_similar_words_to_get
+    }
+)
+puts "Top similar words to \"#{word}\":\n"
+top_cosine_similarities.zip(top_word_indices).zip(top_words_representation).each do |w, word_repr|
+  cos_sim, word_id = w
+  puts "#{(index_to_word[word_id]+ ":").ljust(15)}#{(cos_sim.to_s + ",").ljust(15)}#{Vector::elements(word_repr).norm}"
+end

data/tensor_stream.gemspec CHANGED

@@ -17,6 +17,7 @@ Gem::Specification.new do |spec|
   # to allow pushing to a single host or delete this section to allow pushing to any host.
   if spec.respond_to?(:metadata)
     spec.metadata["allowed_push_host"] = "https://rubygems.org"
+    spec.metadata["changelog_uri"] = "https://github.com/jedld/tensor_stream/blob/master/CHANGELOG.md"
   else
     raise "RubyGems 2.0 or newer is required to protect against " \
       "public gem pushes."
@@ -42,8 +43,10 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "colorize"
   spec.add_development_dependency "rspec_junit_formatter"
   spec.add_development_dependency "mnist-learn"
+  spec.add_development_dependency "chakin-rb"
   spec.add_development_dependency "simplecov"
   spec.add_development_dependency "standard"
+  spec.add_development_dependency "rubyzip"
   spec.add_dependency "deep_merge"
   spec.add_dependency "concurrent-ruby"
   spec.add_dependency "chunky_png"

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: tensor_stream
 version: !ruby/object:Gem::Version
-  version: 1.0.6
+  version: 1.0.7
 platform: ruby
 authors:
 - Joseph Emmanuel Dayo
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-03-23 00:00:00.000000000 Z
+date: 2019-04-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -164,6 +164,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: chakin-rb
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: simplecov
   requirement: !ruby/object:Gem::Requirement
@@ -192,6 +206,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: rubyzip
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: deep_merge
   requirement: !ruby/object:Gem::Requirement
@@ -304,12 +332,14 @@ files:
 - lib/tensor_stream/helpers/tensor_mixins.rb
 - lib/tensor_stream/images.rb
 - lib/tensor_stream/initializer.rb
+- lib/tensor_stream/math/math_ops.rb
 - lib/tensor_stream/math_gradients.rb
 - lib/tensor_stream/monkey_patches/array.rb
 - lib/tensor_stream/monkey_patches/float.rb
 - lib/tensor_stream/monkey_patches/integer.rb
 - lib/tensor_stream/monkey_patches/op_patch.rb
 - lib/tensor_stream/monkey_patches/patch.rb
+- lib/tensor_stream/nn/embedding_lookup.rb
 - lib/tensor_stream/nn/nn_ops.rb
 - lib/tensor_stream/op_maker.rb
 - lib/tensor_stream/operation.rb
@@ -349,16 +379,19 @@ files:
 - lib/tensor_stream/ops/rank.rb
 - lib/tensor_stream/ops/reshape.rb
 - lib/tensor_stream/ops/round.rb
+- lib/tensor_stream/ops/rsqrt.rb
 - lib/tensor_stream/ops/shape.rb
 - lib/tensor_stream/ops/sigmoid.rb
 - lib/tensor_stream/ops/sign.rb
 - lib/tensor_stream/ops/sin.rb
 - lib/tensor_stream/ops/size.rb
+- lib/tensor_stream/ops/strided_slice.rb
 - lib/tensor_stream/ops/sub.rb
 - lib/tensor_stream/ops/sum.rb
 - lib/tensor_stream/ops/tan.rb
 - lib/tensor_stream/ops/tanh.rb
 - lib/tensor_stream/ops/tile.rb
+- lib/tensor_stream/ops/top_k.rb
 - lib/tensor_stream/ops/zeros.rb
 - lib/tensor_stream/placeholder.rb
 - lib/tensor_stream/profile/report_tool.rb
@@ -381,25 +414,28 @@ files:
 - lib/tensor_stream/utils.rb
 - lib/tensor_stream/utils/data_type_utils.rb
 - lib/tensor_stream/utils/freezer.rb
+- lib/tensor_stream/utils/py_ports.rb
 - lib/tensor_stream/variable.rb
 - lib/tensor_stream/variable_scope.rb
 - lib/tensor_stream/version.rb
 - samples/datasets/iris.data
 - samples/jupyter_notebooks/linear_regression.ipynb
 - samples/neural_networks/iris.rb
-- samples/neural_networks/lstm.rb
 - samples/neural_networks/mnist_data.rb
 - samples/neural_networks/raw_neural_net_sample.rb
 - samples/neural_networks/rnn.rb
 - samples/others/nearest_neighbor.rb
 - samples/regression/linear_regression.rb
 - samples/regression/logistic_regression.rb
+- samples/word_embeddings/word_embedding_1.rb
+- samples/word_embeddings/word_embedding_2.rb
 - tensor_stream.gemspec
 homepage: http://www.github.com/jedld/tensor_stream
 licenses:
 - MIT
 metadata:
   allowed_push_host: https://rubygems.org
+  changelog_uri: https://github.com/jedld/tensor_stream/blob/master/CHANGELOG.md
 post_install_message:
 rdoc_options: []
 require_paths:
@@ -415,7 +451,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.3
+rubygems_version: 3.0.1
 signing_key:
 specification_version: 4
 summary: A Pure ruby tensorflow implementation

data/samples/neural_networks/lstm.rb DELETED

@@ -1,22 +0,0 @@
-# A ruby port of the example code discussed by Martin Gorner in
-# "TensorFlow and Deep Learning without a PhD, Part 1 (Google Cloud Next '17)""
-#
-# https://www.youtube.com/watch?v=u4alGiomYP4
-#
-# Requirements:
-#   mnist-learn gem
-#   opencl_ruby_ffi gem
-require "bundler/setup"
-require "tensor_stream"
-require "mnist-learn"
-# Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
-# gem install tensor_stream-opencl
-require 'tensor_stream/opencl'
-tf = TensorStream
-# Import MNIST data
-puts "downloading minst data"
-mnist = Mnist.read_data_sets("/tmp/data", one_hot: true)
-puts "downloading finished"