my_first_markov 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/Rakefile +1 -0
- data/bin/my_first_markov.rb +33 -0
- data/lib/my_first_markov/chain.rb +93 -0
- data/lib/my_first_markov/version.rb +3 -0
- data/lib/my_first_markov.rb +3 -0
- data/my_first_markov.gemspec +25 -0
- data/test/sample_text.txt +1 -0
- metadata +72 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f1324ccda2ff0616ce6bd7597baf3f3149ef11d7
|
4
|
+
data.tar.gz: e49d8a1c525324edc4d9a72668221e114d96530f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: efbf5fe647b7fab8ce2c4a730628a7a6a5aa2a7cbb28b4d75275114db54649da1356609cc70b645d6806512359f0cdfb8d9ff909d78bdfd942a517fd4dd9cfd4
|
7
|
+
data.tar.gz: ce8ae7a0726c9c07ba4f428d888b69cf3db5be46d18b880bae28b2acfffda429785d218f6b49aed0d79b62b05b61eb5fb7fc7a46e5d3b7610bdd049f0edf7352
|
data/.gitignore
ADDED
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
markov
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.3.1
|
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
begin
|
4
|
+
require "rubygems"
|
5
|
+
gem "my_first_markov"
|
6
|
+
require "my_first_markov"
|
7
|
+
rescue LoadError => e
|
8
|
+
warn "LoadError: #{e.message.inspect}"
|
9
|
+
require_relative "../lib/my_first_markov"
|
10
|
+
end
|
11
|
+
|
12
|
+
if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
|
13
|
+
starting_entry = ARGV[0]
|
14
|
+
file = ARGV[1]
|
15
|
+
next_method = ARGV[2]
|
16
|
+
split_on = ARGV[3]
|
17
|
+
|
18
|
+
if [starting_entry, file].include?(nil) || (starting_entry =~ /(\-\-\?)|(\-\-help)/i)
|
19
|
+
msg = <<-EOH
|
20
|
+
$0 <some starting entry> <file-glob of entry observations> <split_on: word* | character> <next_method: random_next* | most_likely_next>
|
21
|
+
e.g.
|
22
|
+
$0 this ./test/sample_text.txt [random_next] [word]
|
23
|
+
> apple
|
24
|
+
|
25
|
+
$0 a ./test/sample_text.txt [random_next] character
|
26
|
+
> p
|
27
|
+
EOH
|
28
|
+
warn(msg)
|
29
|
+
exit
|
30
|
+
end
|
31
|
+
|
32
|
+
puts MyFirstMarkov::Chain.from_file(file, split_on, starting_entry, next_method)
|
33
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module MyFirstMarkov
|
2
|
+
class Chain
|
3
|
+
|
4
|
+
def self.next_methods
|
5
|
+
["random_next", "most_likely_next"]
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.default_next_method
|
9
|
+
next_methods.first
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.split_on_values
|
13
|
+
["word", "character"]
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.default_split_on_value
|
17
|
+
split_on_values.first
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.from_file(file, split_on, starting_entry, next_method)
|
21
|
+
unless split_on && MyFirstMarkov::Chain.split_on_values.include?(split_on.downcase)
|
22
|
+
split_on = MyFirstMarkov::Chain.default_split_on_value
|
23
|
+
end
|
24
|
+
split_on.downcase!
|
25
|
+
|
26
|
+
unless next_method && MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
|
27
|
+
next_method = MyFirstMarkov::Chain.default_next_method
|
28
|
+
end
|
29
|
+
next_method.downcase!
|
30
|
+
|
31
|
+
unless File.exists?(file)
|
32
|
+
fail("Unknown file: #{file.inspect}")
|
33
|
+
end
|
34
|
+
|
35
|
+
data = File.read(file)
|
36
|
+
("word" == split_on) ? entries = data.split : entries = data.split(//)
|
37
|
+
entries ||= []
|
38
|
+
|
39
|
+
new(entries).send(next_method, starting_entry)
|
40
|
+
end
|
41
|
+
|
42
|
+
def initialize(ordered_list)
|
43
|
+
@entries = Hash.new
|
44
|
+
ordered_list.each_with_index do |entry, index|
|
45
|
+
next_entry_idx = next_idx_or_nil(index, ordered_list.size)
|
46
|
+
add(entry, ordered_list[next_entry_idx]) if next_entry_idx
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def add(entry, next_entry)
|
51
|
+
@entries[entry] ||= Hash.new(0)
|
52
|
+
@entries[entry][next_entry] += 1
|
53
|
+
end
|
54
|
+
|
55
|
+
def most_likely_next(entry)
|
56
|
+
_next(entry) do |observation_total, next_entries_and_observations|
|
57
|
+
next_entries_and_observations
|
58
|
+
.sort {|a,b| b.last <=> a.last} # sort (in reverse) by observations
|
59
|
+
.first # choose an array with the largest observation (could be many with same #)
|
60
|
+
.first # the "entry" part, not the "num_observations"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def random_next(entry)
|
65
|
+
_next(entry) do |observation_total, next_entries_and_observations|
|
66
|
+
random_threshold = rand(observation_total) + 1
|
67
|
+
partial_observation_sum = 0
|
68
|
+
|
69
|
+
next_entries_and_observations.find { |next_entry, num_observations|
|
70
|
+
partial_observation_sum += num_observations
|
71
|
+
partial_observation_sum >= random_threshold
|
72
|
+
}.first # we want the "entry" not the "num_observations"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def _next(entry, &block)
|
80
|
+
return "" unless @entries.key?(entry)
|
81
|
+
|
82
|
+
# remember each entry contains a hash of the form {subsequent_entry: num_of_observations, other_subsequent_entry: num_of_observaions, ...}
|
83
|
+
# calling reduce on a hash converts to an array [[s_entry, observation_count], ...]
|
84
|
+
num_of_observations = @entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
|
85
|
+
return block.call(num_of_observations, @entries[entry])
|
86
|
+
end
|
87
|
+
|
88
|
+
def next_idx_or_nil(idx, list_size)
|
89
|
+
last_idx_in_list = list_size - 1
|
90
|
+
(idx + 1 < last_idx_in_list) ? idx + 1 : nil
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "my_first_markov/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "my_first_markov"
|
7
|
+
s.version = MyFirstMarkov::VERSION
|
8
|
+
s.authors = ["Jay Tee"]
|
9
|
+
s.email = ["jaytee@jayteesf.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Markov Chain implementation}
|
12
|
+
s.description = %q{Pass it a list of entries (words, letters, ???) then, give it an entry and query for a "next" entry - the most likely next or perhaps some random threshold based next-entry }
|
13
|
+
|
14
|
+
s.rubyforge_project = "my_first_markov"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency 'rspec'
|
22
|
+
|
23
|
+
# specify any dependencies here; for example:
|
24
|
+
# s.add_runtime_dependency "rest-client"
|
25
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
apple this apple is a text\nthis apple was a text\nthis is a big apple text\nthis could be an apple big old apple text\nthis apple is not apple text apple
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: my_first_markov
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jay Tee
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-01-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description: 'Pass it a list of entries (words, letters, ???) then, give it an entry
|
28
|
+
and query for a "next" entry - the most likely next or perhaps some random threshold
|
29
|
+
based next-entry '
|
30
|
+
email:
|
31
|
+
- jaytee@jayteesf.com
|
32
|
+
executables:
|
33
|
+
- my_first_markov.rb
|
34
|
+
extensions: []
|
35
|
+
extra_rdoc_files: []
|
36
|
+
files:
|
37
|
+
- ".gitignore"
|
38
|
+
- ".ruby-gemset"
|
39
|
+
- ".ruby-version"
|
40
|
+
- Gemfile
|
41
|
+
- Rakefile
|
42
|
+
- bin/my_first_markov.rb
|
43
|
+
- lib/my_first_markov.rb
|
44
|
+
- lib/my_first_markov/chain.rb
|
45
|
+
- lib/my_first_markov/version.rb
|
46
|
+
- my_first_markov.gemspec
|
47
|
+
- test/sample_text.txt
|
48
|
+
homepage: ''
|
49
|
+
licenses: []
|
50
|
+
metadata: {}
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
requirements: []
|
66
|
+
rubyforge_project: my_first_markov
|
67
|
+
rubygems_version: 2.5.1
|
68
|
+
signing_key:
|
69
|
+
specification_version: 4
|
70
|
+
summary: Markov Chain implementation
|
71
|
+
test_files:
|
72
|
+
- test/sample_text.txt
|