RubyGems - markovian - Versions diffs - 0.1.0 → 0.2.0 - Mend

markovian 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/.ruby-gemset +1 -1
data/.ruby-version +1 -1
data/.travis.yml +5 -0
data/Gemfile +1 -1
data/README.md +4 -13
data/changelog.md +12 -2
data/lib/markovian/corpus/chain.rb +52 -0
data/lib/markovian/{chain/text_compiler.rb → corpus/compiler.rb} +20 -16
data/lib/markovian/{chain → corpus}/dictionary.rb +16 -1
data/lib/markovian/{chain_set.rb → corpus.rb} +11 -2
data/lib/markovian/importers/twitter/csv_importer.rb +4 -0
data/lib/markovian/text_builder.rb +16 -15
data/lib/markovian/version.rb +1 -1
data/lib/markovian.rb +2 -3
data/markovian.gemspec +0 -1
metadata +8 -21
data/lib/markovian/chain.rb +0 -41

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 86536007f535d2ba077ed8ff3aeabf61d9ee6107
-  data.tar.gz: 61bd78dab63cafa0b0643ddb39793dec563beae5
+  metadata.gz: 1f03bf82f92106eab96196be22cb2e09eb3d022b
+  data.tar.gz: fe37c3945b518f64d617be2758aef29a5dbfc52a
 SHA512:
-  metadata.gz: e2695ad84f944bbb850e62693f3a96b4678f7d01ef54a557f58804056b6dea5f0431ca9cd21c38365676c61ae505cd931696b125b666672b9c30e42e335ec095
-  data.tar.gz: 4ba6686bba2a42acb3afc5effec994e18b168fab8c286db00afe1b401dc73cf09d9eb3f039e7aa66fa5202796795297ff1fd98d21963fd83f902ce25543b6d62
+  metadata.gz: 219a602a1d41dc3aaff820ad616d0690b0a42810f40e83e08a219537f6ba765855b2b8008e4af3fe0537570040d86b0f499222a3a9e5815a44a827dfadfe418e
+  data.tar.gz: d00c3cf9bc069f775edf0b496b5ccf79800129a50dbe3d0069b4401294393561cedbb9f3606a855ffa6081e65cf0cd3272bf3861f777d4cd8883dc9353a9a9e7

data/.gitignore CHANGED Viewed

@@ -16,3 +16,4 @@
 /tmp
 .DS_Store
 Gemfile.lock
+pkg

data/.ruby-gemset CHANGED Viewed

	@@ -1 +1 @@
1	- ~~markov-ahkoppel~~
1	+ markovian

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 2.2
1	+ 2.2.3

data/.travis.yml ADDED Viewed

@@ -0,0 +1,5 @@
+language: ruby
+cache: bundler
+rvm:
+  - 2.2
+  - jruby-9000

data/Gemfile CHANGED Viewed

@@ -4,7 +4,7 @@ source 'https://rubygems.org'
 gemspec
 group :development, :test do
-  gem 'byebug'
+  gem 'byebug', platform: :mri
 end
 group :test do

data/README.md CHANGED Viewed

@@ -19,20 +19,11 @@ Fuller documentation will come shortly. For now, let's see how we can use Markov
  => path_to_twitter_archive
 > importer = Markovian::Importers::Twitter::CsvImporter.new(path)
  => #<Markovian::Importers::Twitter::CsvImporter:0x007fd0ca3282a8 @path=path_to_twitter_archive>
-> tweets = importer.texts_for_markov_analysis; puts tweets.count
-14394
- => nil
-# Create a Chainset (the structure holding all the word relations)...
-> chainset = Markovian::ChainSet.new
- => #<Markovian::ChainSet:0x007fd0ca03df70 ...>
-# And add all the tweets to the Markov dictionary
-> tweets.each {|t| Markovian::Chain::TextCompiler.new(t, chainset).incorporate_into_chain}; puts "done."
-done.
- => nil
+# now assemble the corpus of tweets -- this may take a few seconds to compile
+> corpus = importer.corpus
+ => #<Markovian::Corpus:0x007fd0ca03df70 ...>
 # Now, we can build some text!
-> Markovian::TextBuilder.new("markov", chainset).construct
+> Markovian::TextBuilder.new(corpus).construct("markov")
 => "markov chains a lot better than a month, i've been here half an hour of night when you can get behind belgium for the offline train journey"
 ```

data/changelog.md CHANGED Viewed

@@ -1,8 +1,18 @@
 # CHANGELOG
+## 0.2.0
+* Rename Chainset/Chain to Corpus (better name is better)
+* Add Corpus#random_word to provide a starting place for texts
+* Refactor Rename Corpus::TextCompiler into Corpus::Compiler
+* Add equality operators for Corpus/Chain/Dictionary
+* Add Twitter::CsvImporter.corpus convenience method
+* TextBuilder has a better interface now
+* Dictionary#inspect produces sane output, not the entire dictionary contents
 ## 0.1.0 and below
-* Ability to build bidirectional chainsets (pair of chains) from arrays of texts
+* Ability to build bidirectional corpuss (pair of chains) from arrays of texts
 * Ability to import Twitter archives and produce an array of tweets
-* Ability to generate Markovian texts from a chainset
+* Ability to generate Markovian texts from a corpus
 * Gem framework

data/lib/markovian/corpus/chain.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require 'markovian/corpus/dictionary'
+# The Chain represents Markov info as it's being assembled or expanded from a text. To compensate
+# for small sample sizes, we track multiple chains (derived from both two-word phrases and single
+# word). Phrases are prefered, but if we can't find a match, we'll try with a single word.
+module Markovian
+  class Corpus
+    class Chain
+      def initialize
+        @one_key_dictionary = Dictionary.new
+        @two_key_dictionary = Dictionary.new
+      end
+      attr_reader :one_key_dictionary, :two_key_dictionary
+      def lengthen(word, next_word:, previous_word: nil)
+        @one_key_dictionary.push(word, next_word)
+        @two_key_dictionary.push(two_word_key(previous_word, word), next_word)
+        word
+      end
+      def next_word(word, previous_word: nil)
+        result_for_two_words(previous_word, word) || result_for_one_word(word)
+      end
+      def random_word
+        one_key_dictionary.random_word
+      end
+      def ==(other)
+        self.one_key_dictionary == other.one_key_dictionary &&
+          self.two_key_dictionary == other.two_key_dictionary
+      end
+      protected
+      def result_for_two_words(previous_word, word)
+        @two_key_dictionary.next_word(two_word_key(previous_word, word)) if previous_word
+      end
+      def result_for_one_word(word)
+        @one_key_dictionary.next_word(word)
+      end
+      # We represent the two words as a space-delimited phrase for simplicity and speed of access via
+      # hash keys.
+      def two_word_key(word1, word2)
+        "#{word1} #{word2}"
+      end
+    end
+  end
+end

data/lib/markovian/{chain/text_compiler.rb → corpus/compiler.rb} RENAMED Viewed

@@ -1,6 +1,6 @@
 require 'markovian/utils/text_splitter'
-# Given a text to analyze, this class returns a hash of Markov results: two-word phrases (two by
+# Given a piece of text, this class returns a hash of Markov results: two-word phrases (two by
 # default) pointing to an array of historical next words.
 #
 # So, for instance, the phrase "Cats are cute, cats are annoying" would map to:
@@ -17,34 +17,38 @@ require 'markovian/utils/text_splitter'
 # * Handling sentences or newlines is later -- I'm not sure the right way to do it.
 # * Capitalization is deferred for later.
 module Markovian
-  class Chain
-    class TextCompiler
+  class Corpus
+    class Compiler
       # Pass in a text, and optionally an existing Markov chain to add data to. In many cases, you
       # may be building a chain using a set of smaller texts instead of one large texts (dialog,
       # for instance, or Twitter archives), and so may call this class repeatedly for elements of
       # the parent corpus.
-      attr_reader :text, :chainset
-      def initialize(text, starter_chainset = ChainSet.new)
-        @text = text
-        @chainset = starter_chainset
+      attr_reader :corpus
+      def initialize(starter_corpus = Corpus.new)
+        @corpus = starter_corpus
       end
-      def incorporate_into_chain
-        add_text_to_chain(interesting_split_text, forward_chain)
-        # to assemble backward text, we just create a chainset with all the texts reversed
+      def build_corpus(texts)
+        texts.each {|t| incorporate_text_into_corpus(t)}
+        corpus
+      end
+      def incorporate_text_into_corpus(text)
+        add_text_to_chain(split_into_components(text), forward_chain)
+        # to assemble backward text, we just create a corpus with all the texts reversed
         # that allows us to see what words precede any given word
-        add_text_to_chain(interesting_split_text.reverse, backward_chain)
-        chainset
+        add_text_to_chain(split_into_components(text).reverse, backward_chain)
+        corpus
       end
       protected
       def forward_chain
-        chainset.forward
+        corpus.forward
       end
       def backward_chain
-        chainset.backward
+        corpus.backward
       end
       def add_text_to_chain(text_elements, chain)
@@ -58,8 +62,8 @@ module Markovian
         end
       end
-      def interesting_split_text
-        @interesting_split_text ||= Utils::TextSplitter.new(text).components
+      def split_into_components(text)
+        Utils::TextSplitter.new(text).components
       end
     end
   end

data/lib/markovian/{chain → corpus}/dictionary.rb RENAMED Viewed

@@ -4,7 +4,7 @@
 #
 # The key is an opaque value, which could represent either a single word or a phrase as desired.
 module Markovian
-  class Chain
+  class Corpus
     class Dictionary
       def push(key, word)
         dictionary[key] += [word]
@@ -14,6 +14,21 @@ module Markovian
         dictionary[key].sample
       end
+      def random_word
+        dictionary.keys.sample
+      end
+      def ==(other)
+        self.dictionary == other.dictionary
+      end
+      # We override this method to avoid spitting out every single element in the dictionary if
+      # this (or any object containing it) gets inspected.
+      # See http://stackoverflow.com/questions/5771339/emulate-default-objectinspect-output.
+      def inspect
+        "#<#{self.class}:0x#{__id__.to_s(16)} @dictionary: #{dictionary.length} entries>"
+      end
       protected
       def dictionary

data/lib/markovian/{chain_set.rb → corpus.rb} RENAMED Viewed

@@ -1,10 +1,10 @@
-require 'markovian/chain'
+require 'markovian/corpus/chain'
 # This class represents a pair of chains, one going forward and one going backward. With this, we
 # can construct phrases in which the original seed word appears at any point in the text (going
 # backward to create the earlier text, forward to create the rest).
 module Markovian
-  class ChainSet
+  class Corpus
     attr_reader :forward, :backward
     def initialize
       @forward, @backward = Chain.new, Chain.new
@@ -18,5 +18,14 @@ module Markovian
       # backward goes in the opposite direction to forward
       backward.next_word(word, previous_word: following_word)
     end
+    def random_word
+      forward.random_word
+    end
+    def ==(other)
+      self.forward == other.forward &&
+        self.backward == other.backward
+    end
   end
 end

data/lib/markovian/importers/twitter/csv_importer.rb CHANGED Viewed

@@ -17,6 +17,10 @@ module Markovian
           tweet_enumerator.reject {|t| t.empty?}
         end
+        def corpus
+          Corpus::Compiler.new.build_corpus(texts_for_markov_analysis)
+        end
         protected
         def csv_enumerator

data/lib/markovian/text_builder.rb CHANGED Viewed

@@ -1,31 +1,32 @@
 require 'markovian/utils/text_splitter'
-# This class, given a seed word and a Markov chain_set, will attempt to construct a new text using
+# This class, given a Markov corpus, will attempt to construct a new text based on a given seed using
 # the Markov associations.
 module Markovian
   class TextBuilder
-    attr_reader :seed_text, :chain_set
-    def initialize(seed_text, chain_set)
-      @seed_text = seed_text
-      @chain_set = chain_set
+    attr_reader :seed_text, :corpus
+    def initialize(corpus)
+      @corpus = corpus
     end
-    def construct(length: 140, seed: default_seed, start_result_with_seed_word: false)
+    def construct(seed_text, length: 140, start_result_with_seed: false)
       # TODO: if we don't hit a result for the first pair, move backward through the original text
       # until we get something
+      seed_pair = identify_starter_text(seed_text)
       result_with_next_word(
-        previous_pair: seed,
-        result: start_result_with_seed_word ? format_result_array(seed) : nil,
+        previous_pair: seed_pair,
+        result: start_result_with_seed ? seed_text : nil,
         length: length
       )
     end
-    def default_seed
-      if split_seed_text.length >= 2
-        split_seed_text[-2..-1]
+    def identify_starter_text(raw_text)
+      seed_components = split_seed_text(raw_text)
+      if seed_components.length >= 2
+        seed_components[-2..-1]
       else
         # if we only have a one-word seed text, the previous word is nil
-        [nil, split_seed_text.first]
+        [nil, seed_components.first]
       end
     end
@@ -33,7 +34,7 @@ module Markovian
     def result_with_next_word(previous_pair:, result:, length:)
       previous_word, current_word = previous_pair
-      if next_word = chain_set.next_word(current_word, previous_word: previous_word)
+      if next_word = corpus.next_word(current_word, previous_word: previous_word)
         # we use join rather than + to avoid leading spaces, and strip to ignore leading nils or
         # empty strings
         interim_result = format_result_array([result, next_word])
@@ -56,8 +57,8 @@ module Markovian
       array_of_words.compact.map(&:strip).join(" ")
     end
-    def split_seed_text
-      @split_seed_text ||= Utils::TextSplitter.new(seed_text).components
+    def split_seed_text(seed_text)
+      Utils::TextSplitter.new(seed_text).components
     end
   end
 end

data/lib/markovian/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Markovian
-  VERSION = "0.1.0"
+  VERSION = "0.2.0"
 end

data/lib/markovian.rb CHANGED Viewed

@@ -1,7 +1,6 @@
-require 'oj'
-require 'markovian/chain_set'
 require 'markovian/text_builder'
-require 'markovian/chain/text_compiler'
+require 'markovian/corpus'
+require 'markovian/corpus/compiler'
 # importers
 require 'markovian/importers/twitter/csv_importer'

data/markovian.gemspec CHANGED Viewed

@@ -19,7 +19,6 @@ Gem::Specification.new do |spec|
   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
   spec.require_paths = ["lib"]
-  spec.add_runtime_dependency "oj"
   spec.add_development_dependency "bundler", "~> 1.7"
   spec.add_development_dependency "rake", "~> 10.0"
 end

metadata CHANGED Viewed

@@ -1,29 +1,15 @@
 --- !ruby/object:Gem::Specification
 name: markovian
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Alex Koppel
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2015-09-09 00:00:00.000000000 Z
+date: 2015-09-14 00:00:00.000000000 Z
 dependencies:
-- !ruby/object:Gem::Dependency
-  name: oj
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: '0'
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: '0'
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
@@ -63,6 +49,7 @@ files:
 - ".rspec"
 - ".ruby-gemset"
 - ".ruby-version"
+- ".travis.yml"
 - CODE_OF_CONDUCT.md
 - Gemfile
 - LICENSE.txt
@@ -74,10 +61,10 @@ files:
 - db/seeds.rb
 - lib/.DS_Store
 - lib/markovian.rb
-- lib/markovian/chain.rb
-- lib/markovian/chain/dictionary.rb
-- lib/markovian/chain/text_compiler.rb
-- lib/markovian/chain_set.rb
+- lib/markovian/corpus.rb
+- lib/markovian/corpus/chain.rb
+- lib/markovian/corpus/compiler.rb
+- lib/markovian/corpus/dictionary.rb
 - lib/markovian/importers/twitter/csv_importer.rb
 - lib/markovian/importers/twitter/tweet.rb
 - lib/markovian/text_builder.rb
@@ -104,7 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.6
+rubygems_version: 2.4.5.1
 signing_key:
 specification_version: 4
 summary: A simple, hopefully easy-to-use Markov chain generator.

data/lib/markovian/chain.rb DELETED Viewed

@@ -1,41 +0,0 @@
-require 'markovian/chain/dictionary'
-# The Chain represents Markov info as it's being assembled or expanded from a text. To compensate
-# for small sample sizes, we track multiple chains (derived from both two-word phrases and single
-# word). Phrases are prefered, but if we can't find a match, we'll try with a single word.
-module Markovian
-  class Chain
-    def initialize
-      @one_key_dictionary = Dictionary.new
-      @two_key_dictionary = Dictionary.new
-    end
-    attr_reader :one_key_dictionary, :two_key_dictionary
-    def lengthen(word, next_word:, previous_word: nil)
-      @one_key_dictionary.push(word, next_word)
-      @two_key_dictionary.push(two_word_key(previous_word, word), next_word)
-      word
-    end
-    def next_word(word, previous_word: nil)
-      result_for_two_words(previous_word, word) || result_for_one_word(word)
-    end
-    protected
-    def result_for_two_words(previous_word, word)
-      @two_key_dictionary.next_word(two_word_key(previous_word, word)) if previous_word
-    end
-    def result_for_one_word(word)
-      @one_key_dictionary.next_word(word)
-    end
-    # We represent the two words as a space-delimited phrase for simplicity and speed of access via
-    # hash keys.
-    def two_word_key(word1, word2)
-      "#{word1} #{word2}"
-    end
-  end
-end