loremarkov 0.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 60d48131f1f7b613839be5b724082fa40f950908
4
+ data.tar.gz: a2d5550f2d8e27f3d64b795c735c7c91509adab7
5
+ SHA512:
6
+ metadata.gz: 965b3e1b980b830abe8278da573331426a1fc6c8fc0fc63ac16f24fcb1914586cceff4f5ab2058077cc73a2cf7e8cdabc945e3ab3b865abc3308af14d64007ad
7
+ data.tar.gz: 36445f11de26ce86a6e18bfcfc90727281fabb5fb23fd6cc3fa3abd714df81bd93cb02b96954a00e7d6d814f3d7a617c53837eebdd7918c777f386e38cca07ed
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ * Based off of Kernighan & Pike's "The Practice of Programming" Chapter 3
2
+
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'buildar'
2
+
3
+ Buildar.new do |b|
4
+ b.gemspec_file = 'loremarkov.gemspec'
5
+ b.version_file = 'VERSION'
6
+ end
7
+
8
+ # task default: %w[test bench]
9
+
10
+ require 'rake/testtask'
11
+ desc "Run tests"
12
+ Rake::TestTask.new do |t|
13
+ t.name = "test"
14
+ t.pattern = "test/test_*.rb"
15
+ # t.warning = true
16
+ end
17
+
18
+ desc "Run benchmarks"
19
+ Rake::TestTask.new do |t|
20
+ t.name = "bench"
21
+ t.pattern = "test/bench_*.rb"
22
+ # t.warning = true
23
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0.1
data/bin/destroy ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ def usage msg=nil
4
+ puts "ERROR: #{msg}" if msg
5
+ puts <<EOF
6
+ USAGE:
7
+ destroy filename
8
+ EOF
9
+ exit 1
10
+ end
11
+
12
+ require 'loremarkov'
13
+
14
+ $stdout.sync = true
15
+
16
+ filename = ARGV.first or usage "provide an input file"
17
+ num_prefixes = ARGV[1] ? ARGV[1].to_i : 5
18
+ text = File.read(filename)
19
+
20
+ puts Loremarkov.new(num_prefixes).destroy text
data/lib/loremarkov.rb ADDED
@@ -0,0 +1,106 @@
1
+ class Loremarkov
2
+ TOKENS = ["\n", "\t", ' ', "'", '"']
3
+
4
+ # Decompose text into an array of tokens, including and delimited by TOKENS
5
+ # e.g. "Hello", he said.
6
+ # # => ['"', 'Hello', '"', ',', ' ', 'he', ' ', 'said.',]
7
+ # This operation can be losslessly reversed by calling #join on the resulting
8
+ # array.
9
+ # i.e. lex(str).join == str
10
+ #
11
+ def self.lex(str, tokens = TOKENS)
12
+ final_ary = []
13
+ word = ''
14
+ str.each_byte { |b| # yes I am terrible with encodings
15
+ # either a token (thereby ending the current word)
16
+ # or part of the current word
17
+ #
18
+ if tokens.include?(b.chr)
19
+ final_ary << word if !word.empty?
20
+ final_ary << b.chr
21
+ word = ''
22
+ else
23
+ word << b.chr
24
+ end
25
+ }
26
+ final_ary << word if !word.empty?
27
+ final_ary
28
+ end
29
+
30
+
31
+ # Generate a markov data structure
32
+ # Arrays of string for keys and values
33
+ # Keys are prefixes -- ordered word sequence of constant length
34
+ # Values are an accumulation of the next word after the prefix, however many
35
+ # times it may occur.
36
+ # e.g. If a prefix occurs twice, then the value will be
37
+ # an array of two words -- possibly the same word twice.
38
+ #
39
+ def self.analyze(text, num_prefix_words)
40
+ markov = {}
41
+ words = lex(text)
42
+
43
+ # Go through the possible valid prefixes.
44
+ # Adding 1 gives you the final key:
45
+ # *num_prefix_words* words with a nil value -- signifying EOF
46
+ #
47
+ (words.length - num_prefix_words + 1).times { |i|
48
+ prefix_words = []
49
+ num_prefix_words.times { |j| prefix_words << words[i + j] }
50
+
51
+ # set to empty array on a new prefix
52
+ #
53
+ markov[prefix_words] ||= []
54
+ # add the target word, which will be nil on the last iteration
55
+ markov[prefix_words] << words[i + num_prefix_words]
56
+ }
57
+ markov
58
+ end
59
+
60
+ # given the entire text, use an extremely conservative heuristic
61
+ # to grab only the first chunk to pass to lex
62
+ #
63
+ def self.start_prefix(text, num_prefix_words)
64
+ char_per_word = 20
65
+ token_frequency = 0.5
66
+ min_length = 60
67
+ length = [char_per_word * (num_prefix_words * (1 - token_frequency)).ceil, min_length].max
68
+ lex(text[0, length])[0, num_prefix_words]
69
+ end
70
+
71
+ attr_reader :markov
72
+
73
+ def initialize(num_prefix_words)
74
+ @num_prefix_words = num_prefix_words
75
+ @markov = {}
76
+ end
77
+
78
+ # text should have a definite end, not just a convenient buffer split
79
+ #
80
+ def analyze(text)
81
+ @markov.merge!(self.class.analyze(text, @num_prefix_words))
82
+ end
83
+
84
+ # given a prefix, give me the next word
85
+ #
86
+ def generate_one(prefix_words)
87
+ @markov[prefix_words].sample
88
+ end
89
+
90
+ # given the start prefix, generate words until EOF
91
+ #
92
+ def generate_all(start_prefix_words)
93
+ words = start_prefix_words
94
+ while tmp = generate_one(words[-1 * @num_prefix_words, @num_prefix_words])
95
+ words << tmp
96
+ end
97
+ words.join
98
+ end
99
+
100
+ # do it, you know you want to
101
+ #
102
+ def destroy(text)
103
+ analyze(text)
104
+ generate_all(self.class.start_prefix(text, @num_prefix_words))
105
+ end
106
+ end
@@ -0,0 +1,22 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'loremarkov'
3
+ s.summary = "Lorem ipsum and more: create your own filler text"
4
+ s.description = "Text goes in, markov gibberish comes out"
5
+ s.authors = ["Rick Hull"]
6
+ s.homepage = 'https://github.com/rickhull/loremarkov'
7
+ s.license = 'GPL'
8
+ s.files = [
9
+ 'loremarkov.gemspec',
10
+ 'VERSION',
11
+ 'Rakefile',
12
+ 'README.md',
13
+ 'lib/loremarkov.rb',
14
+ 'bin/destroy',
15
+ ]
16
+ s.executables = ['destroy']
17
+ s.add_development_dependency "buildar", "~> 2"
18
+ s.add_development_dependency "minitest", "~> 5"
19
+ s.required_ruby_version = "~> 2"
20
+
21
+ s.version = File.read(File.join(__dir__, 'VERSION')).chomp
22
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: loremarkov
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Rick Hull
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-12-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: buildar
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5'
41
+ description: Text goes in, markov gibberish comes out
42
+ email:
43
+ executables:
44
+ - destroy
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - README.md
49
+ - Rakefile
50
+ - VERSION
51
+ - bin/destroy
52
+ - lib/loremarkov.rb
53
+ - loremarkov.gemspec
54
+ homepage: https://github.com/rickhull/loremarkov
55
+ licenses:
56
+ - GPL
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '2'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubyforge_project:
74
+ rubygems_version: 2.2.2
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: 'Lorem ipsum and more: create your own filler text'
78
+ test_files: []