markovfun 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in markovfun.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Maria Pacana
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # Markovfun
2
+
3
+ This gem generates sentences from textfiles using trigrams.
4
+ It is based on Alex Rudnick's Python Markov chain generator,
5
+ <<<<<<< HEAD
6
+ the code for which is [here](https://github.com/alexrudnick/hackerschool-demos/tree/master/ngrams).
7
+ =======
8
+ the code for which is (here)[https://github.com/alexrudnick/hackerschool-demos/tree/master/ngrams].
9
+ >>>>>>> f8c8a08... Updated README
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ gem 'markovfun'
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ Or install it yourself as:
22
+
23
+ $ gem install markovfun
24
+
25
+ ## Usage
26
+
27
+ Here's how you can generate a sentence from a text file.
28
+
29
+ ```
30
+ sentences = Markovfun.get_sentences("bible.txt")
31
+ counts = Markovfun.get_counts(sentences)
32
+ probs = Markovfun.counts_to_probs(counts)
33
+ Markovfun.sentence_from_probs_hash(probs)
34
+ ```
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,3 @@
1
+ module Markovfun
2
+ VERSION = "0.0.1"
3
+ end
data/lib/markovfun.rb ADDED
@@ -0,0 +1,123 @@
1
+ require "markovfun/version"
2
+ require 'pry'
3
+
4
+ module Markovfun
5
+
6
+ # Generates a sentence, given a file.
7
+ def self.sentence_from_file(filename)
8
+ sentences = get_sentences(filename)
9
+ counts = buildcounts(sentences)
10
+ probs = counts_to_probs(counts)
11
+ sentence_from_probs_hash(probs)
12
+ end
13
+
14
+ # Gets lines from a file.
15
+ def self.get_lines(filename)
16
+ file = File.open(filename, "r")
17
+ data = file.read
18
+ file.close
19
+ lines = data.split("\n")
20
+ lines.map! { |l| l.strip.split(" ") }
21
+ end
22
+
23
+ # Gets sentences from a file.
24
+ def self.get_sentences(filename)
25
+ file = File.open(filename, "r")
26
+ data = file.read
27
+ file.close
28
+ data.gsub!(/\n/, "")
29
+ data.gsub!(/"/,"")
30
+ sentences = data.split(".")
31
+ sentences.map! { |s| s.strip.split(" ").push(".") }
32
+ sentences.select! { |s| s[0].capitalize == s[0] }
33
+ end
34
+
35
+ # Returns a counts hash, given a list of sentences.
36
+ # The keys to the hash are all observed combinations of [prev2, prev1],
37
+ # where prev2 and prev1 are the two previous words.
38
+ # The values are hashes, in which the keys are words (cur) that have followed
39
+ # prev2 and prev1, and the values are the number of occurrences.
40
+ def self.buildcounts(sentences)
41
+ counts_hash = {}
42
+ sentences.each do |sent|
43
+ # nil denotes the beginnings and ends of sentences
44
+ sent = [nil, nil] + sent + [nil]
45
+ sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
46
+ counts_hash[[prev2, prev1]] ||= {}
47
+ if !(counts_hash[[prev2, prev1]][cur])
48
+ counts_hash[[prev2, prev1]][cur] = 1
49
+ else
50
+ counts_hash[[prev2, prev1]][cur] += 1
51
+ end
52
+ end
53
+ end
54
+ counts_hash
55
+ end
56
+
57
+ # Generates a probability hash, given a counts hash.
58
+ # Similar to counts_hash, except containing the probability that a word
59
+ # follows two preceding words (as opposed to number of occurrences).
60
+ def self.counts_to_probs(counts_hash)
61
+ probs_hash = {}
62
+ counts_hash.each do |prev, cur_freq|
63
+ probs_hash[prev] ||= {}
64
+ cur_freq.each do |cur, freq|
65
+ prob = freq.to_f / cur_freq.values.reduce(:+)
66
+ probs_hash[prev][cur] = prob
67
+ end
68
+ end
69
+ probs_hash
70
+ end
71
+
72
+ # Generates a sample word, given a probability hash.
73
+ def self.sample_word(probs_hash)
74
+ score = rand
75
+ probs_hash.each do |word, prob|
76
+ return word if score < prob
77
+ score -= prob
78
+ end
79
+ end
80
+
81
+ # Generates a sample sentence, given a probability hash.
82
+ def self.sample_sentence(probs_hash)
83
+ prev2 = nil
84
+ prev1 = nil
85
+ out = []
86
+
87
+ while true
88
+ cur = sample_word(probs_hash[[prev2, prev1]])
89
+ if cur.nil?
90
+ return out
91
+ else
92
+ out << cur
93
+ prev2 = prev1
94
+ prev1 = cur
95
+ end
96
+ end
97
+ end
98
+
99
+ # Scores a sentence, depending on the likelihood that it occurs
100
+ # within a corpus.
101
+ def self.score_sentence(sent, probs)
102
+ total_surprise = 0
103
+ sent = [nil, nil] + sent + [nil]
104
+
105
+ sent.zip(sent[1..-1], sent[2..-1]).each do |prev2, prev1, cur|
106
+ total_surprise += -Math.log(probs[[prev2, prev1]][cur], 2)
107
+ end
108
+ total_surprise
109
+ end
110
+
111
+ # Generates a sentence from a probability hash.
112
+ def self.sentence_from_probs_hash(probs)
113
+ sent = []
114
+ while score_sentence(sent, probs) > 30 || sent.length < 4
115
+ sent = sample_sentence(probs)
116
+ end
117
+ puts "score: #{score_sentence(sent, probs)}"
118
+ sent = sent[0..-2].join(" ") + "."
119
+ puts sent
120
+ sent
121
+ end
122
+
123
+ end
data/markovfun.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'markovfun/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "markovfun"
8
+ spec.version = Markovfun::VERSION
9
+ spec.authors = ["Maria Pacana"]
10
+ spec.email = ["maria.pacana@gmail.com"]
11
+ spec.description = %q{Generates sentences using markov chains!}
12
+ spec.summary = %q{Generates sentences using markov chains!}
13
+ spec.homepage = "https://github.com/mariapacana/markovfun"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: markovfun
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Maria Pacana
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-12-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: Generates sentences using markov chains!
47
+ email:
48
+ - maria.pacana@gmail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE.txt
56
+ - README.md
57
+ - Rakefile
58
+ - lib/markovfun.rb
59
+ - lib/markovfun/version.rb
60
+ - markovfun.gemspec
61
+ homepage: https://github.com/mariapacana/markovfun
62
+ licenses:
63
+ - MIT
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ! '>='
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project:
82
+ rubygems_version: 1.8.24
83
+ signing_key:
84
+ specification_version: 3
85
+ summary: Generates sentences using markov chains!
86
+ test_files: []
87
+ has_rdoc: