RubyGems - tensor_stream - Versions diffs - 1.0.6 → 1.0.7 - Mend

tensor_stream 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/CHANGELOG.md +10 -3
data/lib/tensor_stream.rb +1 -0
data/lib/tensor_stream/evaluator/base_evaluator.rb +6 -0
data/lib/tensor_stream/evaluator/operation_helpers/array_ops_helper.rb +60 -0
data/lib/tensor_stream/evaluator/ruby/array_ops.rb +53 -1
data/lib/tensor_stream/evaluator/ruby/math_ops.rb +42 -5
data/lib/tensor_stream/generated_stub/ops.rb +61 -5
data/lib/tensor_stream/helpers/tensor_mixins.rb +10 -1
data/lib/tensor_stream/math/math_ops.rb +22 -0
data/lib/tensor_stream/math_gradients.rb +15 -1
data/lib/tensor_stream/nn/embedding_lookup.rb +114 -0
data/lib/tensor_stream/nn/nn_ops.rb +3 -0
data/lib/tensor_stream/op_maker.rb +15 -3
data/lib/tensor_stream/ops.rb +12 -0
data/lib/tensor_stream/ops/rsqrt.rb +11 -0
data/lib/tensor_stream/ops/strided_slice.rb +24 -0
data/lib/tensor_stream/ops/sum.rb +4 -2
data/lib/tensor_stream/ops/top_k.rb +23 -0
data/lib/tensor_stream/session.rb +3 -0
data/lib/tensor_stream/tensor_shape.rb +32 -1
data/lib/tensor_stream/train/saver.rb +2 -2
data/lib/tensor_stream/utils.rb +8 -0
data/lib/tensor_stream/utils/py_ports.rb +11 -0
data/lib/tensor_stream/version.rb +1 -1
data/samples/word_embeddings/word_embedding_1.rb +192 -0
data/samples/word_embeddings/word_embedding_2.rb +203 -0
data/tensor_stream.gemspec +3 -0
metadata +40 -4
data/samples/neural_networks/lstm.rb +0 -22

data/lib/tensor_stream/nn/embedding_lookup.rb ADDED

@@ -0,0 +1,114 @@
+require 'tensor_stream/utils/py_ports'
+##
+# ruby port of https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/python/ops/embedding_ops.py
+#
+module TensorStream
+  module EmbeddingLookup
+    include TensorStream::PyPorts
+    ##
+    # Looks up `ids` in a list of embedding tensors.
+    def embedding_lookup(params, ids, partition_strategy: "mod", name: nil, validate_indices: true, max_norm: nil)
+      _embedding_lookup_and_transform(params, ids, partition_strategy: partition_strategy, name: name, max_norm: max_norm, transform_fn: nil)
+    end
+    ##
+    # Helper function for embedding_lookup and _compute_sampled_logits.
+    def _embedding_lookup_and_transform(params, ids, partition_strategy: "mod", name: nil, max_norm: nil, transform_fn: nil)
+      raise TensorStream::ValueError, "Need at least one param" if params.nil?
+      params = [params] unless params.is_a?(Array)
+      TensorStream.name_scope(name, "embedding_lookup", values: params + [ids]) do |name|
+        np = params.size
+        ids = TensorStream.convert_to_tensor(ids, name: "ids")
+        if (np == 1) && (transform_fn.nil? || (ids.shape.size == 1))
+          result = nil
+          TensorStream.colocate_with(params[0]) do
+            result = _clip(TensorStream.gather(params[0], ids, name: name), ids, max_norm)
+            result = transform_fn.call(result) if transform_fn
+          end
+          return TensorStream.identity(result)
+        else
+          flat_ids = TensorStream.reshape(ids, [-1])
+          original_indices = TensorStream.range(TensorStream.size(flat_ids))
+          p_assignments = nil
+          new_ids = nil
+          if partition_strategy == "mod"
+            p_assignments = flat_ids % np
+            new_ids = floor_div(flat_ids, np)
+          elsif partition_strategy == "div"
+            raise "not yet supported!"
+          else
+            raise TensorStream::ValueError, "Unrecognized partition strategy: " + partition_strategy
+          end
+          p_assignments = TensorStream.cast(p_assignments, :int32)
+          gather_ids = TensorStream.dynamic_partition(new_ids, p_assignments, np)
+          pindices = TensorStream.dynamic_partition(original_indices, p_assignments, np)
+          partitioned_result = []
+          (0...np).each do |p|
+            pids = gather_ids[p]
+            result = nil
+            TensorStream.colocate_with(params[p]) do
+              result = TensorStream.gather(params[p], pids)
+              if transform_fn
+                # If transform_fn is provided, the clip_by_norm precedes
+                # the transform and hence must be co-located. See below
+                # for the counterpart if transform_fn is not proveded.
+                result = transform_fn.call(_clip(result, pids, max_norm))
+              end
+            end
+            partitioned_result << result
+          end
+          ret = TensorStream.dynamic_stitch(pindices, partitioned_result, name: name)
+          if transform_fn.nil?
+            element_shape_s = params[0].shape[1..-1]
+            params[1..-1].each { |p| element_shape_s = element_shape_s.merge_with(p.shape[1..-1]) }
+          else
+            element_shape_s = ret.shape[1..-1]
+          end
+           # Compute the dynamic element shape.
+          element_shape_d = if element_shape_s.fully_defined?
+                               element_shape_s
+                            elsif transform_fn.nil?
+                              # It's important that we compute params[0].shape on the right device
+                              # to avoid data motion.
+                              TensorStream.colocate_with(params[0]) do
+                                params_shape = TensorStream.shape(params[0])
+                                params_shape[1..-1]
+                              end
+                            else
+                              TensorStream.shape(ret)[1..-1]
+                            end
+          ret = TensorStream.reshape(ret, TensorStream.concat([TensorStream.shape(ids), element_shape_d], 0))
+          ret = _clip(ret, ids, max_norm) unless transform_fn
+          ret
+        end
+      end
+    end
+    def _clip(params, ids, max_norm)
+      return params if max_norm.nil?
+      ids_rank, ids_static = _rank(ids)
+      params_rank, params_static = _rank(params)
+      TensorStream.clip_by_norm(params, max_norm, axes: ids_static && params_static ? (ids_rank...params_rank).to_a : TensorStream.range(ids_rank, params_rank))
+    end
+    def _rank(x)
+      rank = TensorStream.convert_to_tensor(x).shape.ndims
+      if rank
+        [rank, false]
+      else
+        [TensorStream.rank(x), false]
+      end
+    end
+  end
+end

data/lib/tensor_stream/nn/nn_ops.rb CHANGED

@@ -1,7 +1,10 @@
+require 'tensor_stream/nn/embedding_lookup'
 module TensorStream
   # High level machine learning functions
   class NN
     extend TensorStream::OpHelper
+    extend TensorStream::EmbeddingLookup
+    extend TensorStream::Maths::MathFunctions
     class << self
       def softmax(logits, axis: nil, name: nil)

data/lib/tensor_stream/op_maker.rb CHANGED

@@ -2,7 +2,7 @@ class TensorStream::OpMaker
   attr_reader :operation, :description, :parameters,
               :options, :gradient, :check_types,
               :supports_broadcast, :data_type_coercion,
-              :aliases, :custom, :infer_type_proc, :exclude,
+              :aliases, :custom, :custom_post, :infer_type_proc, :exclude,
               :data_type_block
   def initialize(op)
@@ -16,6 +16,7 @@ class TensorStream::OpMaker
     @description = []
     @aliases = []
     @custom = []
+    @custom_post = []
     @infer_type_proc = lambda { |tensor|
       next nil if tensor.inputs[0].nil?
       next tensor.inputs[0].shape.shape if tensor.inputs.size == 1
@@ -32,6 +33,10 @@ class TensorStream::OpMaker
     @custom << custom_code
   end
+  def add_custom_post(custom_code)
+    @custom_post << custom_code
+  end
   def self.scan
     op_files = Dir[File.join(File.dirname(__FILE__), "ops", "*.rb")]
     op_files.each { |file|
@@ -111,7 +116,14 @@ class TensorStream::OpMaker
     custom.each do |c|
       body << c
     end
-    body << "_op(:#{operation}, #{(expand_params(false) + options_call).join(', ')})"
+    if custom_post.empty?
+      body << "_op(:#{operation}, #{(expand_params(false) + options_call).join(', ')})"
+    else
+      body << "result = _op(:#{operation}, #{(expand_params(false) + options_call).join(', ')})"
+    end
+    custom_post.each do |c|
+      body << c
+    end
     body.map { |line| "      #{line}"}.join("\n")
   end
@@ -184,7 +196,7 @@ class TensorStream::OpMaker
   end
   def options_call
-    @options.map { |k, v|
+    @options.reject { |k, v| v.dig(:options, :exclude) }.map { |k, v|
       if v.dig(:options, :alias)
         "#{v.dig(:options, :alias)}: #{k}"
       else

data/lib/tensor_stream/ops.rb CHANGED

@@ -195,6 +195,15 @@ module TensorStream
       end
     end
+    ##
+    # Partitions data into num_partitions tensors using indices from partitions
+    def dynamic_partition(data, partitions, num_partitions, name: nil)
+      result = _op(:dynamic_partition, data, partitions, num_partitions: num_partitions, name: nil)
+      num_partitions.times.map do |index|
+        result[index]
+      end
+    end
     def split(value, num_or_size_splits, axis: 0, num: nil, name: "split")
       value = convert_to_tensor(value)
       num_or_size_splits = convert_to_tensor(num_or_size_splits)
@@ -524,6 +533,9 @@ module TensorStream
       _op(:squeeze, value, axis: axis, name: nil)
     end
+    def clip_by_norm(tensor, clip_norm, axes: nil, name: nil)
+    end
     ##
     # Computes the difference between two lists of numbers or strings.
     # Given a list x and a list y, this operation returns a list out that represents all values

data/lib/tensor_stream/ops/rsqrt.rb ADDED

@@ -0,0 +1,11 @@
+TensorStream::OpMaker.define_operation :rsqrt do |op|
+  op.what_it_does "Computes reciprocal of square root of x element-wise."
+  op.parameter :input_a, "tensor X", validate: 'FLOATING_POINT_TYPES'
+  op.option :name, "Optional name", :nil
+  op.define_gradient do |grad, node, params|
+    # Returns -0.5 * grad * conj(y)^3.
+    i_op(:rsqrt_grad, node, grad)
+  end
+end

data/lib/tensor_stream/ops/strided_slice.rb ADDED

@@ -0,0 +1,24 @@
+TensorStream::OpMaker.define_operation :strided_slice do |op|
+  op.what_it_does "Extracts a strided slice of a tensor "
+  op.what_it_does "this op extracts a slice of size `(end-begin)/stride`
+  from the given `input_` tensor. Starting at the location specified by `begin`
+  the slice continues by adding `stride` to the index until all dimensions are
+  not less than `end`.
+  Note that a stride can be negative, which causes a reverse slice."
+  op.parameter :input, "A tensor"
+  op.parameter :_begin, "start index"
+  op.parameter :_end, "end index"
+  op.parameter :strides, "end index", :nil
+  op.option :name, "Optional name", :nil
+  op.define_gradient do |grad, node, params|
+    input, b_index, e_index, strides = params
+    x = ts.shape(input, out_type: node.inputs[0].data_type)
+    _op(:strided_slice_grad, x, b_index, e_index, strides, grad)
+  end
+  op.define_shape do |tensor|
+  end
+end

data/lib/tensor_stream/ops/sum.rb CHANGED

@@ -7,14 +7,16 @@ TensorStream::OpMaker.define_operation :sum do |op|
   op.what_it_does "If axis has no entries, all dimensions are reduced, and a tensor with a single element is returned."
   op.parameter :input_a, "tensor X"
-  op.parameter :axis, "tensor X", :nil, validate: 'INTEGER_TYPES'
+  op.parameter :axis_p, "tensor X", :nil, validate: 'INTEGER_TYPES'
+  op.option :axis, "axis", :nil, exclude: true
   op.option :name, "Optional name", :nil
   op.option :keepdims, "If true, retains reduced dimensions with length 1.", :false
   op.add_custom "input_a = TensorStream.convert_to_tensor(input_a)"
   op.add_custom "return input_a if input_a.shape.scalar?"
-  op.add_custom "axis = cast_axis(input_a, axis)"
+  op.add_custom "axis_p = axis_p || axis"
+  op.add_custom "axis_p = cast_axis(input_a, axis_p)"
   op.define_gradient do |grad, node, params|
     x, y = params

data/lib/tensor_stream/ops/top_k.rb ADDED

@@ -0,0 +1,23 @@
+TensorStream::OpMaker.define_operation :top_k do |op|
+  op.what_it_does "Finds values and indices of the `k` largest entries for the last dimension."
+  op.parameter :input, "1-D or higher `Tensor` with last dimension at least `k`."
+  op.parameter :k, "0-D `int32` `Tensor`.  Number of top elements to look for along the last dimension (along each row for matrices)", 1
+  op.option :sorted, "If true the resulting `k` elements will be sorted by the values in descending order.", "true"
+  op.option :name, "Optional name", :nil
+  op.add_custom_post "[result[0], result[1]]"
+  op.define_shape do |tensor|
+    next nil unless tensor.inputs[0].shape.known?
+    input_shape = tensor.inputs[0].shape.shape.dup
+    k = tensor.options[:k]
+    input_shape[-1] = k
+    input_shape
+  end
+  op.define_gradient do |grad, node, params|
+    #TODO
+  end
+end

data/lib/tensor_stream/session.rb CHANGED

@@ -98,6 +98,9 @@ module TensorStream
     end
     def close
+      # unlink resources to save memory
+      @last_session_context = nil
+      @session_cache = {}
       @closed = true
     end

data/lib/tensor_stream/tensor_shape.rb CHANGED

@@ -18,7 +18,8 @@ module TensorStream
     end
     def [](index)
-      @shape[index]
+      new_shape = @shape[index]
+      TensorShape.new(@shape[index])
     end
     def ndims
@@ -42,6 +43,36 @@ module TensorStream
       known?
     end
+    def merge_with(other)
+      assert_compatible_with(other)
+      if @shape.nil?
+        TensorShape.new(other)
+      else
+        TensorShape.new(@shape)
+      end
+    end
+    def compatible_with?(other)
+      other = as_dimension(other)
+      shape.nil? || other.nil? || shape == other
+    end
+    def as_dimension(value)
+      value.is_a?(TensorShape) ? value.shape : value
+    end
+    def value
+      shape
+    end
+    ##
+    # Raises an exception if `other` is not compatible with this shape.
+    def assert_compatible_with(other)
+      raise TensorStream::ValueError, "Dimensions #{self} and #{other} are not compatible" unless compatible_with?(other)
+    end
     def self.infer_shape(shape_a, shape_b)
       return nil if shape_a.nil? || shape_b.nil?
       return shape_a if shape_b.empty?

data/lib/tensor_stream/train/saver.rb CHANGED

@@ -7,9 +7,9 @@ module TensorStream
     class Saver
       include TensorStream::OpHelper
-      def initialize
+      def initialize(var_list = nil)
         graph = TensorStream::Graph.get_default_graph
-        vars = graph.get_collection(GraphKeys::GLOBAL_VARIABLES)
+        vars = var_list || graph.get_collection(GraphKeys::GLOBAL_VARIABLES)
         @filename = graph["ts_filename"] || TensorStream.placeholder(:string, name: "ts_filename", shape: [])

data/lib/tensor_stream/utils.rb CHANGED

@@ -219,6 +219,10 @@ module TensorStream
       TensorStream::Trainer
     end
+    def math
+      TensorStream::Maths
+    end
     def image
       TensorStream::Images
     end
@@ -248,6 +252,10 @@ module TensorStream
         return TensorStream.expand_dims(value[0], 0)
       end
+      if value.is_a?(TensorShape)
+        value = value.shape
+      end
       check_if_dense(value)
       i_cons(value, dtype: dtype || Tensor.detect_type(value), name: name)
     end

data/lib/tensor_stream/utils/py_ports.rb ADDED

@@ -0,0 +1,11 @@
+module TensorStream
+  module PyPorts
+    def floor_div(a, b)
+      if (a.is_a?(Float))
+        (a.to_i / b.to_i).to_f
+      else
+        a / b
+      end
+    end
+  end
+end

data/lib/tensor_stream/version.rb CHANGED

@@ -1,5 +1,5 @@
 module TensorStream
-  VERSION = "1.0.6".freeze
+  VERSION = "1.0.7".freeze
   def self.version
     VERSION

data/samples/word_embeddings/word_embedding_1.rb ADDED

@@ -0,0 +1,192 @@
+#
+# A ruby port of https://github.com/guillaume-chevalier/GloVe-as-a-TensorFlow-Embedding-Layer by Guillaume Chevalier
+#
+# This is a port so some weird python like conventions may have been left behind
+require "bundler/setup"
+require "tensor_stream"
+require "chakin-rb/chakin"
+# require 'pry-byebug'
+require 'zip'
+tf = TensorStream
+CHAKIN_INDEX = 17
+NUMBER_OF_DIMENSIONS = 25
+SUBFOLDER_NAME = "glove.twitter.27B"
+DATA_FOLDER = "embeddings"
+ZIP_FILE = File.join(DATA_FOLDER, "#{SUBFOLDER_NAME}.zip")
+ZIP_FILE_ALT = "glove" + ZIP_FILE[5..nil]  # sometimes it's lowercase only...
+UNZIP_FOLDER = File.join(DATA_FOLDER, SUBFOLDER_NAME)
+if SUBFOLDER_NAME[-1] == "d"
+  GLOVE_FILENAME = File.join(UNZIP_FOLDER, "#{SUBFOLDER_NAME}.txt")
+else
+  GLOVE_FILENAME = File.join(UNZIP_FOLDER, "#{SUBFOLDER_NAME}.#{NUMBER_OF_DIMENSIONS}d.txt")
+end
+if !File.exist?(ZIP_FILE) && !File.exist?(UNZIP_FOLDER)
+  # GloVe by Stanford is licensed Apache 2.0:
+  #     https://github.com/stanfordnlp/GloVe/blob/master/LICENSE
+  #     http://nlp.stanford.edu/data/glove.twitter.27B.zip
+  #     Copyright 2014 The Board of Trustees of The Leland Stanford Junior University
+  puts "Downloading embeddings to '#{ZIP_FILE}'"
+  Chakin::Vectors.download(number: CHAKIN_INDEX, save_dir: "./#{DATA_FOLDER}")
+else
+  puts "Embeddings already downloaded."
+end
+if !File.exists?(UNZIP_FOLDER)
+  if !File.exists?(ZIP_FILE) && !File.exists?(ZIP_FILE_ALT)
+    ZIP_FILE = ZIP_FILE_ALT
+  end
+  FileUtils.mkdir_p(UNZIP_FOLDER)
+  Zip::File.open(ZIP_FILE) do |zipfile|
+    zipfile.each do |file|
+      puts "Extracting embeddings to '#{UNZIP_FOLDER}/#{file.name}'"
+      fpath = File.join(UNZIP_FOLDER, file.name)
+      zipfile.extract(file, fpath) unless File.exist?(fpath)
+    end
+  end
+else
+  puts "Embeddings already extracted."
+end
+##
+#   Read a GloVe txt file. If `with_indexes=True`, we return a tuple of two dictionnaries
+#   `(word_to_index_dict, index_to_embedding_array)`, otherwise we return only a direct
+#   `word_to_embedding_dict` dictionnary mapping from a string to a numpy array.
+def load_embedding_from_disks(glove_filename, with_indexes: true)
+  word_to_index_dict = {}
+  index_to_embedding_array = []
+  word_to_embedding_dict = {}
+  representation = nil
+  last_index = nil
+  File.open(glove_filename, 'r').each_with_index do |line, i|
+    split = line.split(' ')
+    word = split.shift
+    representation = split
+    representation.map! { |val| val.to_f }
+    if with_indexes
+      word_to_index_dict[word] = i
+      index_to_embedding_array << representation
+    else
+      word_to_embedding_dict[word] = representation
+    end
+    last_index = i
+  end
+  _WORD_NOT_FOUND = [0.0] * representation.size  # Empty representation for unknown words.
+  if with_indexes
+    _LAST_INDEX = last_index + 1
+    word_to_index_dict = Hash.new(_LAST_INDEX).merge(word_to_index_dict)
+    index_to_embedding_array = index_to_embedding_array + [_WORD_NOT_FOUND]
+    return word_to_index_dict, index_to_embedding_array
+  else
+    word_to_embedding_dict = Hash.new(_WORD_NOT_FOUND)
+    return word_to_embedding_dict
+  end
+end
+puts "Loading embedding from disks..."
+word_to_index, index_to_embedding = load_embedding_from_disks(GLOVE_FILENAME, with_indexes: true)
+puts "Embedding loaded from disks."
+vocab_size, embedding_dim = index_to_embedding.shape
+puts "Embedding is of shape: #{index_to_embedding.shape}"
+puts "This means (number of words, number of dimensions per word)"
+puts "The first words are words that tend occur more often."
+puts "Note: for unknown words, the representation is an empty vector,\n" +
+      "and the index is the last one. The dictionnary has a limit:"
+puts "    \"A word\" --> \"Index in embedding\" --> \"Representation\""
+word = "worsdfkljsdf"
+idx = word_to_index[word]
+embd = index_to_embedding[idx].map { |v| v.to_i }  # "int" for compact print only.
+puts "    #{word} --> #{idx} --> #{embd}"
+word = "the"
+idx = word_to_index[word]
+embd = index_to_embedding[idx]  # "int" for compact print only.
+puts "    #{word} --> #{idx} --> #{embd}"
+words = [
+  "The", "Teh", "A", "It", "Its", "Bacon", "Star", "Clone", "Bonjour", "Intelligence",
+  "À", "A", "Ça", "Ca", "Été", "C'est", "Aujourd'hui", "Aujourd", "'", "hui", "?", "!", ",", ".", "-", "/", "~"
+]
+words.each do |word|
+  word_ = word.downcase
+  embedding = index_to_embedding[word_to_index[word_]]
+  norm = Vector::elements(embedding).norm
+  puts (word + ": ").ljust(15) + norm.to_s
+end
+puts "Note: here we printed words starting with capital letters, \n" +
+"however to take their embeddings we need their lowercase version (str.downcase)"
+batch_size = nil  # Any size is accepted
+tf.reset_default_graph
+sess =  tf.session
+# Define the variable that will hold the embedding:
+tf_embedding = tf.variable(
+    tf.constant(0.0, shape: index_to_embedding.shape),
+    trainable: false,
+    name: "Embedding"
+)
+tf_word_ids = tf.placeholder(:int32, shape: [batch_size])
+tf_word_representation_layer = tf.nn.embedding_lookup(tf_embedding, tf_word_ids)
+tf_embedding_placeholder = tf.placeholder(:float32, shape: index_to_embedding.shape)
+tf_embedding_init = tf_embedding.assign(tf_embedding_placeholder)
+sess.run(
+    tf_embedding_init,
+    feed_dict: {
+        tf_embedding_placeholder => index_to_embedding
+    }
+)
+puts "Embedding now stored in TensorStream. Can delete ruby array to clear some CPU RAM."
+batch_of_words = ["Hello", "World", "!"]
+batch_indexes = batch_of_words.map { |w| word_to_index[w.downcase] }
+embedding_from_batch_lookup = sess.run(
+    tf_word_representation_layer,
+    feed_dict: {
+        tf_word_ids => batch_indexes
+    }
+)
+puts "Representations for #{batch_of_words}:"
+puts embedding_from_batch_lookup.inspect
+prefix = SUBFOLDER_NAME + "." + NUMBER_OF_DIMENSIONS.to_s + "d"
+TF_EMBEDDINGS_FILE_NAME = File.join(DATA_FOLDER, prefix + ".ckpt")
+DICT_WORD_TO_INDEX_FILE_NAME = File.join(DATA_FOLDER, prefix + ".json")
+variables_to_save = [tf_embedding]
+embedding_saver = tf::Train::Saver.new(variables_to_save)
+embedding_saver.save(sess, TF_EMBEDDINGS_FILE_NAME)
+puts "TF embeddings saved to '#{TF_EMBEDDINGS_FILE_NAME}'."
+sess.close
+File.open(DICT_WORD_TO_INDEX_FILE_NAME, 'w') do |f|
+  f.write(word_to_index.to_json)
+end
+puts "word_to_index dict saved to '#{DICT_WORD_TO_INDEX_FILE_NAME}'."
+words_B = "like absolutely crazy not hate bag sand rock soap"
+r = words_B.split.map { |w| word_to_index[w.strip()] }
+puts words_B
+puts r.inspect
+puts "done"