RubyGems - loremarkov - Versions diffs - 0.0.0.1 - Mend

loremarkov 0.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 60d48131f1f7b613839be5b724082fa40f950908
+  data.tar.gz: a2d5550f2d8e27f3d64b795c735c7c91509adab7
+SHA512:
+  metadata.gz: 965b3e1b980b830abe8278da573331426a1fc6c8fc0fc63ac16f24fcb1914586cceff4f5ab2058077cc73a2cf7e8cdabc945e3ab3b865abc3308af14d64007ad
+  data.tar.gz: 36445f11de26ce86a6e18bfcfc90727281fabb5fb23fd6cc3fa3abd714df81bd93cb02b96954a00e7d6d814f3d7a617c53837eebdd7918c777f386e38cca07ed

data/README.md ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ * Based off of Kernighan & Pike's "The Practice of Programming" Chapter 3
2	+

data/Rakefile ADDED Viewed

@@ -0,0 +1,23 @@
+require 'buildar'
+Buildar.new do |b|
+  b.gemspec_file = 'loremarkov.gemspec'
+  b.version_file = 'VERSION'
+end
+# task default: %w[test bench]
+require 'rake/testtask'
+desc "Run tests"
+Rake::TestTask.new do |t|
+  t.name = "test"
+  t.pattern = "test/test_*.rb"
+  # t.warning = true
+end
+desc "Run benchmarks"
+Rake::TestTask.new do |t|
+  t.name = "bench"
+  t.pattern = "test/bench_*.rb"
+  # t.warning = true
+end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.0.0.1

data/bin/destroy ADDED Viewed

@@ -0,0 +1,20 @@
+#!/usr/bin/env ruby
+def usage msg=nil
+  puts "ERROR: #{msg}" if msg
+  puts <<EOF
+USAGE:
+  destroy filename
+EOF
+  exit 1
+end
+require 'loremarkov'
+$stdout.sync = true
+filename = ARGV.first or usage "provide an input file"
+num_prefixes = ARGV[1] ? ARGV[1].to_i : 5
+text = File.read(filename)
+puts Loremarkov.new(num_prefixes).destroy text

data/lib/loremarkov.rb ADDED Viewed

@@ -0,0 +1,106 @@
+class Loremarkov
+  TOKENS = ["\n", "\t", ' ', "'", '"']
+  # Decompose text into an array of tokens, including and delimited by TOKENS
+  # e.g. "Hello", he said.
+  # # => ['"', 'Hello', '"', ',', ' ', 'he', ' ', 'said.',]
+  # This operation can be losslessly reversed by calling #join on the resulting
+  # array.
+  # i.e. lex(str).join == str
+  #
+  def self.lex(str, tokens = TOKENS)
+    final_ary = []
+    word = ''
+    str.each_byte { |b|   # yes I am terrible with encodings
+      # either a token (thereby ending the current word)
+      # or part of the current word
+      #
+      if tokens.include?(b.chr)
+        final_ary << word if !word.empty?
+        final_ary << b.chr
+        word = ''
+      else
+        word << b.chr
+      end
+    }
+    final_ary << word if !word.empty?
+    final_ary
+  end
+  # Generate a markov data structure
+  # Arrays of string for keys and values
+  # Keys are prefixes -- ordered word sequence of constant length
+  # Values are an accumulation of the next word after the prefix, however many
+  # times it may occur.
+  # e.g. If a prefix occurs twice, then the value will be
+  # an array of two words -- possibly the same word twice.
+  #
+  def self.analyze(text, num_prefix_words)
+    markov = {}
+    words = lex(text)
+    # Go through the possible valid prefixes.
+    # Adding 1 gives you the final key:
+    # *num_prefix_words* words with a nil value  -- signifying EOF
+    #
+    (words.length - num_prefix_words + 1).times { |i|
+      prefix_words = []
+      num_prefix_words.times { |j| prefix_words << words[i + j] }
+      # set to empty array on a new prefix
+      #
+      markov[prefix_words] ||= []
+      # add the target word, which will be nil on the last iteration
+      markov[prefix_words] << words[i + num_prefix_words]
+    }
+    markov
+  end
+  # given the entire text, use an extremely conservative heuristic
+  # to grab only the first chunk to pass to lex
+  #
+  def self.start_prefix(text, num_prefix_words)
+    char_per_word = 20
+    token_frequency = 0.5
+    min_length = 60
+    length = [char_per_word * (num_prefix_words * (1 - token_frequency)).ceil, min_length].max
+    lex(text[0, length])[0, num_prefix_words]
+  end
+  attr_reader :markov
+  def initialize(num_prefix_words)
+    @num_prefix_words = num_prefix_words
+    @markov = {}
+  end
+  # text should have a definite end, not just a convenient buffer split
+  #
+  def analyze(text)
+    @markov.merge!(self.class.analyze(text, @num_prefix_words))
+  end
+  # given a prefix, give me the next word
+  #
+  def generate_one(prefix_words)
+    @markov[prefix_words].sample
+  end
+  # given the start prefix, generate words until EOF
+  #
+  def generate_all(start_prefix_words)
+    words = start_prefix_words
+    while tmp = generate_one(words[-1 * @num_prefix_words, @num_prefix_words])
+      words << tmp
+    end
+    words.join
+  end
+  # do it, you know you want to
+  #
+  def destroy(text)
+    analyze(text)
+    generate_all(self.class.start_prefix(text, @num_prefix_words))
+  end
+end

data/loremarkov.gemspec ADDED Viewed

@@ -0,0 +1,22 @@
+Gem::Specification.new do |s|
+  s.name        = 'loremarkov'
+  s.summary     = "Lorem ipsum and more: create your own filler text"
+  s.description = "Text goes in, markov gibberish comes out"
+  s.authors     = ["Rick Hull"]
+  s.homepage    = 'https://github.com/rickhull/loremarkov'
+  s.license     = 'GPL'
+  s.files       = [
+    'loremarkov.gemspec',
+    'VERSION',
+    'Rakefile',
+    'README.md',
+    'lib/loremarkov.rb',
+    'bin/destroy',
+  ]
+  s.executables = ['destroy']
+  s.add_development_dependency "buildar", "~> 2"
+  s.add_development_dependency "minitest", "~> 5"
+  s.required_ruby_version = "~> 2"
+  s.version     = File.read(File.join(__dir__, 'VERSION')).chomp
+end

metadata ADDED Viewed

@@ -0,0 +1,78 @@
+--- !ruby/object:Gem::Specification
+name: loremarkov
+version: !ruby/object:Gem::Version
+  version: 0.0.0.1
+platform: ruby
+authors:
+- Rick Hull
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-12-08 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: buildar
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2'
+- !ruby/object:Gem::Dependency
+  name: minitest
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5'
+description: Text goes in, markov gibberish comes out
+email:
+executables:
+- destroy
+extensions: []
+extra_rdoc_files: []
+files:
+- README.md
+- Rakefile
+- VERSION
+- bin/destroy
+- lib/loremarkov.rb
+- loremarkov.gemspec
+homepage: https://github.com/rickhull/loremarkov
+licenses:
+- GPL
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - "~>"
+    - !ruby/object:Gem::Version
+      version: '2'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.2.2
+signing_key:
+specification_version: 4
+summary: 'Lorem ipsum and more: create your own filler text'
+test_files: []