my_first_markov 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/Rakefile +1 -0
- data/bin/my_first_markov.rb +33 -0
- data/lib/my_first_markov/chain.rb +93 -0
- data/lib/my_first_markov/version.rb +3 -0
- data/lib/my_first_markov.rb +3 -0
- data/my_first_markov.gemspec +25 -0
- data/test/sample_text.txt +1 -0
- metadata +72 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f1324ccda2ff0616ce6bd7597baf3f3149ef11d7
|
4
|
+
data.tar.gz: e49d8a1c525324edc4d9a72668221e114d96530f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: efbf5fe647b7fab8ce2c4a730628a7a6a5aa2a7cbb28b4d75275114db54649da1356609cc70b645d6806512359f0cdfb8d9ff909d78bdfd942a517fd4dd9cfd4
|
7
|
+
data.tar.gz: ce8ae7a0726c9c07ba4f428d888b69cf3db5be46d18b880bae28b2acfffda429785d218f6b49aed0d79b62b05b61eb5fb7fc7a46e5d3b7610bdd049f0edf7352
|
data/.gitignore
ADDED
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
markov
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.3.1
|
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
begin
|
4
|
+
require "rubygems"
|
5
|
+
gem "my_first_markov"
|
6
|
+
require "my_first_markov"
|
7
|
+
rescue LoadError => e
|
8
|
+
warn "LoadError: #{e.message.inspect}"
|
9
|
+
require_relative "../lib/my_first_markov"
|
10
|
+
end
|
11
|
+
|
12
|
+
if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
|
13
|
+
starting_entry = ARGV[0]
|
14
|
+
file = ARGV[1]
|
15
|
+
next_method = ARGV[2]
|
16
|
+
split_on = ARGV[3]
|
17
|
+
|
18
|
+
if [starting_entry, file].include?(nil) || (starting_entry =~ /(\-\-\?)|(\-\-help)/i)
|
19
|
+
msg = <<-EOH
|
20
|
+
$0 <some starting entry> <file-glob of entry observations> <split_on: word* | character> <next_method: random_next* | most_likely_next>
|
21
|
+
e.g.
|
22
|
+
$0 this ./test/sample_text.txt [random_next] [word]
|
23
|
+
> apple
|
24
|
+
|
25
|
+
$0 a ./test/sample_text.txt [random_next] character
|
26
|
+
> p
|
27
|
+
EOH
|
28
|
+
warn(msg)
|
29
|
+
exit
|
30
|
+
end
|
31
|
+
|
32
|
+
puts MyFirstMarkov::Chain.from_file(file, split_on, starting_entry, next_method)
|
33
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module MyFirstMarkov
|
2
|
+
class Chain
|
3
|
+
|
4
|
+
def self.next_methods
|
5
|
+
["random_next", "most_likely_next"]
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.default_next_method
|
9
|
+
next_methods.first
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.split_on_values
|
13
|
+
["word", "character"]
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.default_split_on_value
|
17
|
+
split_on_values.first
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.from_file(file, split_on, starting_entry, next_method)
|
21
|
+
unless split_on && MyFirstMarkov::Chain.split_on_values.include?(split_on.downcase)
|
22
|
+
split_on = MyFirstMarkov::Chain.default_split_on_value
|
23
|
+
end
|
24
|
+
split_on.downcase!
|
25
|
+
|
26
|
+
unless next_method && MyFirstMarkov::Chain.next_methods.include?(next_method.downcase)
|
27
|
+
next_method = MyFirstMarkov::Chain.default_next_method
|
28
|
+
end
|
29
|
+
next_method.downcase!
|
30
|
+
|
31
|
+
unless File.exists?(file)
|
32
|
+
fail("Unknown file: #{file.inspect}")
|
33
|
+
end
|
34
|
+
|
35
|
+
data = File.read(file)
|
36
|
+
("word" == split_on) ? entries = data.split : entries = data.split(//)
|
37
|
+
entries ||= []
|
38
|
+
|
39
|
+
new(entries).send(next_method, starting_entry)
|
40
|
+
end
|
41
|
+
|
42
|
+
def initialize(ordered_list)
|
43
|
+
@entries = Hash.new
|
44
|
+
ordered_list.each_with_index do |entry, index|
|
45
|
+
next_entry_idx = next_idx_or_nil(index, ordered_list.size)
|
46
|
+
add(entry, ordered_list[next_entry_idx]) if next_entry_idx
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def add(entry, next_entry)
|
51
|
+
@entries[entry] ||= Hash.new(0)
|
52
|
+
@entries[entry][next_entry] += 1
|
53
|
+
end
|
54
|
+
|
55
|
+
def most_likely_next(entry)
|
56
|
+
_next(entry) do |observation_total, next_entries_and_observations|
|
57
|
+
next_entries_and_observations
|
58
|
+
.sort {|a,b| b.last <=> a.last} # sort (in reverse) by observations
|
59
|
+
.first # choose an array with the largest observation (could be many with same #)
|
60
|
+
.first # the "entry" part, not the "num_observations"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def random_next(entry)
|
65
|
+
_next(entry) do |observation_total, next_entries_and_observations|
|
66
|
+
random_threshold = rand(observation_total) + 1
|
67
|
+
partial_observation_sum = 0
|
68
|
+
|
69
|
+
next_entries_and_observations.find { |next_entry, num_observations|
|
70
|
+
partial_observation_sum += num_observations
|
71
|
+
partial_observation_sum >= random_threshold
|
72
|
+
}.first # we want the "entry" not the "num_observations"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def _next(entry, &block)
|
80
|
+
return "" unless @entries.key?(entry)
|
81
|
+
|
82
|
+
# remember each entry contains a hash of the form {subsequent_entry: num_of_observations, other_subsequent_entry: num_of_observaions, ...}
|
83
|
+
# calling reduce on a hash converts to an array [[s_entry, observation_count], ...]
|
84
|
+
num_of_observations = @entries[entry].reduce(0) {|sum,entry_observations| sum += entry_observations.last}
|
85
|
+
return block.call(num_of_observations, @entries[entry])
|
86
|
+
end
|
87
|
+
|
88
|
+
def next_idx_or_nil(idx, list_size)
|
89
|
+
last_idx_in_list = list_size - 1
|
90
|
+
(idx + 1 < last_idx_in_list) ? idx + 1 : nil
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "my_first_markov/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "my_first_markov"
|
7
|
+
s.version = MyFirstMarkov::VERSION
|
8
|
+
s.authors = ["Jay Tee"]
|
9
|
+
s.email = ["jaytee@jayteesf.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Markov Chain implementation}
|
12
|
+
s.description = %q{Pass it a list of entries (words, letters, ???) then, give it an entry and query for a "next" entry - the most likely next or perhaps some random threshold based next-entry }
|
13
|
+
|
14
|
+
s.rubyforge_project = "my_first_markov"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency 'rspec'
|
22
|
+
|
23
|
+
# specify any dependencies here; for example:
|
24
|
+
# s.add_runtime_dependency "rest-client"
|
25
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
apple this apple is a text\nthis apple was a text\nthis is a big apple text\nthis could be an apple big old apple text\nthis apple is not apple text apple
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: my_first_markov
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jay Tee
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-01-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description: 'Pass it a list of entries (words, letters, ???) then, give it an entry
|
28
|
+
and query for a "next" entry - the most likely next or perhaps some random threshold
|
29
|
+
based next-entry '
|
30
|
+
email:
|
31
|
+
- jaytee@jayteesf.com
|
32
|
+
executables:
|
33
|
+
- my_first_markov.rb
|
34
|
+
extensions: []
|
35
|
+
extra_rdoc_files: []
|
36
|
+
files:
|
37
|
+
- ".gitignore"
|
38
|
+
- ".ruby-gemset"
|
39
|
+
- ".ruby-version"
|
40
|
+
- Gemfile
|
41
|
+
- Rakefile
|
42
|
+
- bin/my_first_markov.rb
|
43
|
+
- lib/my_first_markov.rb
|
44
|
+
- lib/my_first_markov/chain.rb
|
45
|
+
- lib/my_first_markov/version.rb
|
46
|
+
- my_first_markov.gemspec
|
47
|
+
- test/sample_text.txt
|
48
|
+
homepage: ''
|
49
|
+
licenses: []
|
50
|
+
metadata: {}
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
requirements: []
|
66
|
+
rubyforge_project: my_first_markov
|
67
|
+
rubygems_version: 2.5.1
|
68
|
+
signing_key:
|
69
|
+
specification_version: 4
|
70
|
+
summary: Markov Chain implementation
|
71
|
+
test_files:
|
72
|
+
- test/sample_text.txt
|