RubyGems - markov_words - Versions diffs - 0.1.1 → 0.2.1 - Mend

markov_words 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/Gemfile +2 -2
data/Gemfile.lock +1 -1
data/Rakefile +6 -6
data/bin/console +5 -5
data/lib/markov_words/file_store.rb +39 -0
data/lib/markov_words/version.rb +1 -1
data/lib/markov_words/words.rb +213 -0
data/lib/markov_words.rb +3 -223
data/markov_words.gemspec +16 -17
metadata +14 -12

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: d20f8147164231db373f694de4251a4fa385dc07
-  data.tar.gz: 2e439cbdb4e5f098b293f2b911fd895f9fab934f
+  metadata.gz: 5dda90c58c74410faf33ed06402150326be2afe8
+  data.tar.gz: 77cfaf41ef3314807e06b2ef12f5aa7bc42cbe4a
 SHA512:
-  metadata.gz: d93bd441020b08fcd6025c4fc1346d82de7346c4052b79b39cc13a98b67ca9f0dabb9e142c1c0ab625f64ef9afa8ce5f0749eb72a796125cdceaa3913d75924a
-  data.tar.gz: 9ce374965d0ef312e989ffab1de60f858ef79f2b2c0ea4139a5730d1a44ae6e4a5dbb34ba5f6cf0334c462fdb27fab658b612590aa8c908948f0292c61a0fd21
+  metadata.gz: 14d952d8db7fe1f162bccf3c6ab9f1fcc389ff5cf91000f5f0bb87761860592148f4cd2bdc1fdb82e83ba9037316181f8c6987e10f62dc52c248037f5a902b7c
+  data.tar.gz: 7ab05abcd9938125c56c76f82eec3eab0d46cc3588a4b120f9736830a19a334c988eac762a09360523128889654b76dad03002caf4c8be6ea4017d8ce6b9846f

data/Gemfile CHANGED Viewed

@@ -1,6 +1,6 @@
-source "https://rubygems.org"
+source 'https://rubygems.org'
-git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
+git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
 # Specify your gem's dependencies in markov_words.gemspec
 gemspec

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    markov_words (0.1.0)
+    markov_words (0.2.1)
 GEM
   remote: https://rubygems.org/

data/Rakefile CHANGED Viewed

@@ -1,10 +1,10 @@
-require "bundler/gem_tasks"
-require "rake/testtask"
+require 'bundler/gem_tasks'
+require 'rake/testtask'
 Rake::TestTask.new(:test) do |t|
-  t.libs << "test"
-  t.libs << "lib"
-  t.test_files = FileList["test/**/*_test.rb"]
+  t.libs << 'test'
+  t.libs << 'lib'
+  t.test_files = FileList['test/**/*_test.rb']
 end
-task :default => :test
+task default: :test

data/bin/console CHANGED Viewed

@@ -1,14 +1,14 @@
 #!/usr/bin/env ruby
-require "bundler/setup"
-require "markov_words"
+require 'bundler/setup'
+require 'markov_words'
 # You can add fixtures and/or initialization code here to make experimenting
 # with your gem easier. You can also use a different console, if you like.
 # (If you use this, don't forget to add pry to your Gemfile!)
-require "pry"
+require 'pry'
 Pry.start
-#require "irb"
-#IRB.start(__FILE__)
+# require 'irb'
+# IRB.start(__FILE__)

data/lib/markov_words/file_store.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require 'securerandom'
+module MarkovWords
+  # Utility for persisting arbitrary data to disk as Marshal'ed Ruby objects
+  class FileStore
+    attr_reader :file_path
+    attr_reader :data
+    # @option opts [String] :file_path Path and name for where the file should
+    #   be stored.
+    # @option opts [Boolean] :flush_data Do you want the file to be cleared on
+    #   open?
+    def initialize(opts)
+      @file_path = opts.fetch :file_path, "/tmp/#{SecureRandom.base64}"
+      delete_if_exists(@file_path) if opts[:flush_data]
+    end
+    # Store arbitary data into file storage
+    # @param data [Object] Any Marshal-able object
+    def store_data(data)
+      File.open(@file_path, 'wb') { |f| Marshal.dump(data, f) }
+    end
+    # Retrieve whatever data is stored in the file + return it
+    def retrieve_data
+      result = nil
+      if File.exist?(@file_path)
+        File.open(@file_path, 'r') { |f| result = Marshal.load(f) }
+      end
+      result
+    end
+    private
+    def delete_if_exists(path)
+      File.delete path if File.exist? path
+    end
+  end
+end

data/lib/markov_words/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 module MarkovWords
   # Current version
-  VERSION = "0.1.1"
+  VERSION = '0.2.1'.freeze
 end

data/lib/markov_words/words.rb ADDED Viewed

@@ -0,0 +1,213 @@
+module MarkovWords
+  # This class takes care of word generation, caching, and data storage.
+  class Words
+    # Perform caching? Defaults to true.
+    attr_reader :cache
+    # File location where you want to store the cache
+    attr_reader :cache_file
+    # How many words you want to store in the cache?
+    attr_reader :cache_size
+    # Object for storing + retrieving cache data from persistent storage
+    attr_reader :cache_store
+    # Your dictionary of words. Defaults to /usr/share/dict/words.
+    attr_reader :corpus_file
+    # Where should your database be stored on disk?
+    attr_reader :data_file
+    # Object for storing + retrieving n-gram data from persistent storage
+    attr_reader :data_store
+    # The database of "grams" (word/count combinations), stored on the disk and
+    # loaded into this variable in memory when generating words.
+    attr_reader :grams
+    # Number of n-grams to compute for your database. Defaults to 2
+    attr_reader :gram_size
+    # Max generated word length. Defaults to 16
+    attr_reader :max_length
+    # Minimum generated word length. Defaults to 3. NOTE: If your corpus size
+    # is very small (<1000 words or so), it's hard to guarantee a min_length
+    # because so many n-grams will have no association, which terminates word
+    # generation.
+    attr_reader :min_length
+    # Create a new "Words" object
+    # @param opts [Hash] options sent to the object. Any of the object
+    #   attributes (eg `:cache_file` or `:gram_size`) are valid parameters to
+    #   add to the `opts` hash.
+    # @return [Words] A `MarkovWords::Words` object.
+    def initialize(opts = {})
+      @grams = nil
+      @gram_size = opts.fetch :gram_size, 2
+      @max_length = opts.fetch :max_length, 16
+      @min_length = opts.fetch :min_length, 3
+      initialize_cache(opts)
+      initialize_data(opts)
+    end
+    # "Top off" the cache of stored words, and ensure that it's at
+    # `@cache_size`. If `@cache` is set to `false`, returns an empty array.
+    # @return [Array<String>] All words in the cache.
+    def refresh_cache
+      if @cache
+        words_array = @cache_store.retrieve_data
+        words_array << generate_word while words_array.length < @cache_size
+        @cache_store.store_data words_array
+        words_array
+      else
+        []
+      end
+    end
+    # Generate a new word, or return one from the cache if available.
+    # @return [String] The word.
+    def word
+      if @cache
+        load_word_from_cache
+      else
+        generate_word
+      end
+    end
+    private
+    def initialize_cache(opts)
+      @cache = opts.fetch :cache, true
+      @cache_file = opts.fetch :cache_file,
+                               "tmp/markov_words_#{@gram_size}.cache"
+      @cache_size = opts.fetch :cache_size, 70
+      @cache_store = FileStore.new(file_path: @cache_file)
+    end
+    def initialize_data(opts)
+      @corpus_file = opts.fetch :corpus_file,
+                                '/usr/share/dict/words'
+      @data_file = opts.fetch :data_file,
+                              "tmp/markov_words_#{@gram_size}.data"
+      @data_store = FileStore.new(file_path: @data_file)
+    end
+    def contains_vowel?(ary)
+      if ary.length < 2
+        true
+      else
+        ary.take(2).join.match(/[aeiou]/)
+      end
+    end
+    # Generates an English (by default) -sounding word.
+    def generate_word
+      set_grams if @grams.nil?
+      generate_gram_array(generate_word_length).join
+    end
+    def generate_gram_array(desired_length)
+      gram = ''
+      gram_array = generate_initial_gram_array
+      until gram_array.join.length == desired_length || gram.nil?
+        # grab last @gram_size (or possibly fewer if the array is too small)
+        # elements from the current gram_array, to use as the next key.
+        gal = gram_array.length
+        current_gram_size = gal >= @gram_size ? @gram_size : gal
+        key = gram_array[-current_gram_size..-1].join
+        gram = pick_random_char(@grams[key])
+        gram_array << gram
+      end
+      gram_array
+    end
+    # Set initial array of chars, which is taken from the @grams key list.
+    # must contain a vowel in the first 2 chars (unless @gram_size == 1 in
+    # which case any letter).
+    def generate_initial_gram_array
+      initial_gram_array = []
+      all_grams_array = @grams.to_a
+      gram_min_length = @gram_size < @min_length ? @gram_size : @min_length
+      until initial_gram_array.length >= gram_min_length &&
+            contains_vowel?(initial_gram_array)
+        initial_gram_array = all_grams_array.sample[0].chars
+      end
+      initial_gram_array
+    end
+    # The word must be a random length, between @min and @max
+    def generate_word_length
+      word_length = 0
+      until word_length >= @min_length
+        word_length = SecureRandom.rand(@max_length)
+      end
+      word_length
+    end
+    def load_word_from_cache
+      words_array = @cache_store.retrieve_data
+      if words_array.nil? || words_array.empty?
+        words_array = Array.new(@cache_size) { generate_word }
+      end
+      word = words_array.pop
+      cache_store.store_data words_array
+      word
+    end
+    # Generate a MarkovWords corpus from a datafile, with a given size of
+    # n-gram.  Returns a hash of "grams", which are a map of a letter to the
+    # frequency of the letters that follow it, eg: {"c" => {"a" => 1, "b" =>
+    # 2}}
+    def markov_corpus(file, gram_size)
+      grams = {}
+      # Corpus contains a list of words, separated by newlines
+      File.foreach(file) do |word|
+        word = word.downcase.delete('-')
+        gram_size.downto(1) do |current_gram_size|
+          markov_update_count! grams, word, current_gram_size
+        end
+      end
+      grams
+    end
+    # Given a database of `grams` and a `word`, and the `gram_size` (the
+    # maximum n-gram size we want to compute), update the `grams` database with
+    # entries for each n-gram combination starting at `gram_size` and going
+    # down to 1.
+    def markov_update_count!(grams, word, gram_size)
+      word.chars.each_cons(gram_size + 1) do |gram|
+        l = gram[0..gram_size - 1].join
+        r = gram[gram_size]
+        unless l.empty? || r.empty? || line_ending?(r)
+          grams[l] = {} if grams[l].nil?
+          grams[l][r] = grams[l][r].nil? ? 1 : grams[l][r] += 1
+        end
+      end
+    end
+    # Given a hash in the format: {"c" => {"a" => 1, "b" => 2}}, grab a random
+    # element from the values hash, accurate to the distribution of counts.
+    # In the example hash above, "a" would have a 33% chance of being chosen,
+    # while "b" would have a 66% chance (1/2 ratio).
+    def pick_random_char(counts_hash = {})
+      total = counts_hash.values.sum
+      pick_num = SecureRandom.rand(total)
+      counter = 0
+      counts_hash.each do |char, count|
+        counter += count
+        return char if counter >= pick_num
+      end
+    end
+    def line_ending?(word)
+      word.include?("\n")
+    end
+    def set_grams
+      grams = @data_store.retrieve_data ||
+              markov_corpus(@corpus_file, @gram_size)
+      @data_store.store_data grams unless grams == @grams
+      @grams = grams
+    end
+  end
+end

data/lib/markov_words.rb CHANGED Viewed

@@ -1,228 +1,8 @@
-require "markov_words/version"
+require 'markov_words/version'
+require 'markov_words/words'
+require 'markov_words/file_store'
 require 'securerandom'
 # @author Donald L. Merand
-# A nice library for generating random words (not sentences) using Markov
-# chains.
 module MarkovWords
-  # This class takes care of word generation, caching, and data storage.
-  class Words
-    # Perform caching? Defaults to true.
-    attr :cache
-    # File location where you want to store the cache
-    attr :cache_file
-    # How many words you want to store in the cache?
-    attr :cache_size
-    # Your dictionary of words. Defaults to /usr/share/dict/words.
-    attr :corpus_file
-    # Where should your database be stored on disk?
-    attr :data_file
-    # The database of "grams" (word/count combinations), stored on the disk and
-    # loaded into this variable in memory when generating words.
-    attr :grams
-    # Number of n-grams to compute for your database. Defaults to 2
-    attr :gram_size
-    # Max generated word length. Defaults to 16
-    attr :max_length
-    # Minimum generated word length. Defaults to 3. NOTE: If your corpus size
-    # is very small (<1000 words or so), it's hard to guarantee a min_length
-    # because so many n-grams will have no association, which terminates word
-    # generation.
-    attr :min_length
-    # Create a new "Words" object
-    # @param opts [Hash] options sent to the object. Any of the object
-    #   attributes (eg `:cache_file` or `:gram_size`) are valid parameters to
-    #   add to the `opts` hash.
-    # @return [Words] A `MarkovWords::Words` object.
-    def initialize(opts = {})
-      @gram_size = opts.fetch :gram_size, 2
-      @max_length = opts.fetch :max_length, 16
-      @min_length = opts.fetch :min_length, 3
-      @cache = opts.fetch :cache, true
-      @cache_file = opts.fetch :cache_file,
-        "tmp/markov_words_#{@gram_size}.cache"
-      @cache_size = opts.fetch :cache_size, 70
-      @corpus_file = opts.fetch :corpus_file,
-        '/usr/share/dict/words'
-      @data_file = opts.fetch :data_file,
-        "tmp/markov_words_#{@gram_size}.data"
-      @grams = nil
-    end
-    # "Top off" the cache of stored words, and ensure that it's at
-    # `@cache_size`. If `@cache` is set to `false`, returns an empty array.
-    # @return [Array<String>] All words in the cache.
-    def refresh_cache
-      if @cache
-        words_array = load_from_file(@cache_file) || []
-        while words_array.length < @cache_size
-          words_array << generate_word
-        end
-        save_to_file(@cache_file, words_array)
-        words_array
-      else
-        []
-      end
-    end
-    # Generate a new word, or return one from the cache if available.
-    # @return [String] The word.
-    def word
-      if @cache
-        load_word_from_cache
-      else
-        generate_word
-      end
-    end
-    private
-    def contains_vowel?(ary)
-      if ary.length < 2
-        true
-      else
-        ary.take(2).join.match(/[aeiou]/)
-      end
-    end
-    # Generates an English (by default)- sounding word.
-    def generate_word
-      set_grams if @grams.nil?
-      gram = ''
-      gram_array = []
-      # The word must be a random length, between @min and @max
-      desired_length = 0
-      until desired_length >= @min_length
-        desired_length = SecureRandom.rand(@max_length)
-      end
-      # Set initial array of chars, which is taken from the @grams key list. must
-      # contain a vowel in the first 2 chars (unless @gram_size == 1 in which
-      # case any letter).
-      all_grams_array = @grams.to_a
-      gram_min_length = @gram_size < @min_length ? @gram_size : @min_length
-      until gram_array.length >= gram_min_length && contains_vowel?(gram_array)
-        gram_array = all_grams_array.sample[0].chars
-      end
-      until gram_array.join.length == desired_length || gram.nil?
-        # grab last @gram_size (or possibly fewer if the array is too small)
-        # elements from the current gram_array, to use as the next key.
-        gal = gram_array.length
-        current_gram_size = gal >= @gram_size ? @gram_size : gal
-        key = gram_array[-current_gram_size..-1].join
-        gram = pick_random_char(@grams[key])
-        gram_array << gram
-      end
-      gram_array.join
-    end
-    def generate_words_array
-      @cache_size.times.map { generate_word }
-    end
-    def load_from_file(file)
-      result = nil
-      if File.exist?(file)
-        File.open(file, 'r') {|f| result = Marshal.load(f)}
-      end
-      result
-    end
-    def load_word_from_cache
-      words_array = load_from_file(@cache_file)
-      if words_array.nil? || words_array.empty?
-        words_array = generate_words_array
-      end
-      word = words_array.pop
-      save_to_file(@cache_file, words_array)
-      word
-    end
-    # Generate a MarkovWords corpus from a datafile, with a given size of n-gram.
-    # Returns a hash of "grams", which are a map of a letter to the frequency of
-    # the letters that follow it, eg: {"c" => {"a" => 1, "b" => 2}}
-    def markov_corpus(file, gram_size)
-      grams = {}
-      # Corpus contains a list of words, separated by newlines
-      File.foreach(file) do |word|
-        word = word.downcase.gsub(/-/, '')
-        gram_size.downto(1) do |current_size|
-          word.chars.each_cons(current_size + 1) do |gram|
-            first = gram[0..current_size - 1].join
-            second = gram[current_size]
-            unless first.empty? || second.empty? || is_line_ending?(second)
-              update_count(grams, first, second)
-            end
-          end
-        end
-      end
-      grams
-    end
-    # Given a hash in the format: {"c" => {"a" => 1, "b" => 2}}, grab a random
-    # element from the values hash, accurate to the distribution of counts.
-    # In the example hash above, "a" would have a 33% chance of being chosen,
-    # while "b" would have a 66% chance (1/2 ratio).
-    def pick_random_char(counts_hash = {})
-      if counts_hash.nil?
-        return nil
-      else
-        total = counts_hash.values.sum
-        pick_num = SecureRandom.rand(total)
-        counter = 0
-        counts_hash.each do |char, count|
-          counter += count
-          return char if counter >= pick_num
-        end
-      end
-    end
-    def is_line_ending?(word)
-      word.include?("\n")
-    end
-    # Marshal a Ruby object to file storage
-    def save_to_file(file, data)
-      File.open(file, 'wb') {|f| Marshal.dump(data, f)}
-    end
-    def set_grams
-      if File.exist? @data_file
-        @grams = load_from_file(@data_file)
-      else
-        @grams = markov_corpus(@corpus_file, @gram_size)
-        save_to_file(@data_file, @grams)
-      end
-    end
-    # Given a @grams entry, update the count of "second" in "first"
-    #
-    # Example:
-    #     update_count({"a" => {"b" => 1}}, "a", "b")
-    #     => {"a" => {"b" => 2}}
-    def update_count(grams, first, second)
-      grams[first] = {} if grams[first].nil?
-      if grams[first][second].nil?
-        grams[first][second] = 1
-      else
-        grams[first][second] += 1
-      end
-    end
-  end
 end

data/markov_words.gemspec CHANGED Viewed

@@ -1,29 +1,28 @@
-lib = File.expand_path("../lib", __FILE__)
+lib = File.expand_path('../lib', __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
-require "markov_words/version"
+require 'markov_words/version'
 Gem::Specification.new do |spec|
-  spec.name = "markov_words"
+  spec.name = 'markov_words'
   spec.version = MarkovWords::VERSION
-  spec.authors = ["Donald Merand"]
-  spec.email = ["dmerand@explo.org"]
+  spec.authors = ['Donald Merand']
+  spec.email = ['dmerand@explo.org']
-  spec.summary = %q{Generate words (not sentences) using Markov-chain techniques.}
-  spec.description = %q{It's often nice to have random English-sounding words, eg. for password generators. This library uses Markov-chain techniques on words, as opposed to many others which focus on sentences. }
-  spec.homepage = "https://github.com/exploration/markov_words"
-  spec.license = "MIT"
+  spec.summary = %{Generate words (not sentences) using Markov-chain techniques.}
+  spec.description = %{It's often nice to have random English-sounding words, eg. for password generators. This library uses Markov-chain techniques on words, as opposed to many others which focus on sentences.}
+  spec.homepage = 'https://github.com/exploration/markov_words'
+  spec.license = 'MIT'
   spec.files = `git ls-files -z`.split("\x0").reject do |f|
     f.match(%r{^(test|spec|features)/})
   end
-  spec.bindir = "exe"
+  spec.bindir = 'exe'
   spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
-  spec.require_paths = ["lib"]
+  spec.require_paths = ['lib']
-  spec.add_development_dependency "bundler", "~> 1.16"
-  spec.add_development_dependency "rake", "~> 10.0"
-  spec.add_development_dependency "minitest", "~> 5.0"
-  spec.add_development_dependency "pry", "~> 0.11"
-  spec.add_development_dependency "yard", "~> 0.6"
+  spec.add_development_dependency 'bundler', '~> 1.16'
+  spec.add_development_dependency 'minitest', '~> 5.0'
+  spec.add_development_dependency 'pry', '~> 0.11'
+  spec.add_development_dependency 'rake', '~> 10.0'
+  spec.add_development_dependency 'yard', '~> 0.6'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: markov_words
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.1
 platform: ruby
 authors:
 - Donald Merand
@@ -25,47 +25,47 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '1.16'
 - !ruby/object:Gem::Dependency
-  name: rake
+  name: minitest
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: '5.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: '5.0'
 - !ruby/object:Gem::Dependency
-  name: minitest
+  name: pry
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '5.0'
+        version: '0.11'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '5.0'
+        version: '0.11'
 - !ruby/object:Gem::Dependency
-  name: pry
+  name: rake
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.11'
+        version: '10.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.11'
+        version: '10.0'
 - !ruby/object:Gem::Dependency
   name: yard
   requirement: !ruby/object:Gem::Requirement
@@ -80,9 +80,9 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.6'
-description: 'It''s often nice to have random English-sounding words, eg. for password
+description: It's often nice to have random English-sounding words, eg. for password
   generators. This library uses Markov-chain techniques on words, as opposed to many
-  others which focus on sentences. '
+  others which focus on sentences.
 email:
 - dmerand@explo.org
 executables: []
@@ -101,7 +101,9 @@ files:
 - bin/console
 - bin/setup
 - lib/markov_words.rb
+- lib/markov_words/file_store.rb
 - lib/markov_words/version.rb
+- lib/markov_words/words.rb
 - markov_words.gemspec
 homepage: https://github.com/exploration/markov_words
 licenses: