loremarkov 0.0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 60d48131f1f7b613839be5b724082fa40f950908
4
+ data.tar.gz: a2d5550f2d8e27f3d64b795c735c7c91509adab7
5
+ SHA512:
6
+ metadata.gz: 965b3e1b980b830abe8278da573331426a1fc6c8fc0fc63ac16f24fcb1914586cceff4f5ab2058077cc73a2cf7e8cdabc945e3ab3b865abc3308af14d64007ad
7
+ data.tar.gz: 36445f11de26ce86a6e18bfcfc90727281fabb5fb23fd6cc3fa3abd714df81bd93cb02b96954a00e7d6d814f3d7a617c53837eebdd7918c777f386e38cca07ed
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ * Based off of Kernighan & Pike's "The Practice of Programming" Chapter 3
2
+
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'buildar'
2
+
3
+ Buildar.new do |b|
4
+ b.gemspec_file = 'loremarkov.gemspec'
5
+ b.version_file = 'VERSION'
6
+ end
7
+
8
+ # task default: %w[test bench]
9
+
10
+ require 'rake/testtask'
11
+ desc "Run tests"
12
+ Rake::TestTask.new do |t|
13
+ t.name = "test"
14
+ t.pattern = "test/test_*.rb"
15
+ # t.warning = true
16
+ end
17
+
18
+ desc "Run benchmarks"
19
+ Rake::TestTask.new do |t|
20
+ t.name = "bench"
21
+ t.pattern = "test/bench_*.rb"
22
+ # t.warning = true
23
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0.1
data/bin/destroy ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ def usage msg=nil
4
+ puts "ERROR: #{msg}" if msg
5
+ puts <<EOF
6
+ USAGE:
7
+ destroy filename
8
+ EOF
9
+ exit 1
10
+ end
11
+
12
+ require 'loremarkov'
13
+
14
+ $stdout.sync = true
15
+
16
+ filename = ARGV.first or usage "provide an input file"
17
+ num_prefixes = ARGV[1] ? ARGV[1].to_i : 5
18
+ text = File.read(filename)
19
+
20
+ puts Loremarkov.new(num_prefixes).destroy text
data/lib/loremarkov.rb ADDED
@@ -0,0 +1,106 @@
1
+ class Loremarkov
2
+ TOKENS = ["\n", "\t", ' ', "'", '"']
3
+
4
+ # Decompose text into an array of tokens, including and delimited by TOKENS
5
+ # e.g. "Hello", he said.
6
+ # # => ['"', 'Hello', '"', ',', ' ', 'he', ' ', 'said.',]
7
+ # This operation can be losslessly reversed by calling #join on the resulting
8
+ # array.
9
+ # i.e. lex(str).join == str
10
+ #
11
+ def self.lex(str, tokens = TOKENS)
12
+ final_ary = []
13
+ word = ''
14
+ str.each_byte { |b| # yes I am terrible with encodings
15
+ # either a token (thereby ending the current word)
16
+ # or part of the current word
17
+ #
18
+ if tokens.include?(b.chr)
19
+ final_ary << word if !word.empty?
20
+ final_ary << b.chr
21
+ word = ''
22
+ else
23
+ word << b.chr
24
+ end
25
+ }
26
+ final_ary << word if !word.empty?
27
+ final_ary
28
+ end
29
+
30
+
31
+ # Generate a markov data structure
32
+ # Arrays of string for keys and values
33
+ # Keys are prefixes -- ordered word sequence of constant length
34
+ # Values are an accumulation of the next word after the prefix, however many
35
+ # times it may occur.
36
+ # e.g. If a prefix occurs twice, then the value will be
37
+ # an array of two words -- possibly the same word twice.
38
+ #
39
+ def self.analyze(text, num_prefix_words)
40
+ markov = {}
41
+ words = lex(text)
42
+
43
+ # Go through the possible valid prefixes.
44
+ # Adding 1 gives you the final key:
45
+ # *num_prefix_words* words with a nil value -- signifying EOF
46
+ #
47
+ (words.length - num_prefix_words + 1).times { |i|
48
+ prefix_words = []
49
+ num_prefix_words.times { |j| prefix_words << words[i + j] }
50
+
51
+ # set to empty array on a new prefix
52
+ #
53
+ markov[prefix_words] ||= []
54
+ # add the target word, which will be nil on the last iteration
55
+ markov[prefix_words] << words[i + num_prefix_words]
56
+ }
57
+ markov
58
+ end
59
+
60
+ # given the entire text, use an extremely conservative heuristic
61
+ # to grab only the first chunk to pass to lex
62
+ #
63
+ def self.start_prefix(text, num_prefix_words)
64
+ char_per_word = 20
65
+ token_frequency = 0.5
66
+ min_length = 60
67
+ length = [char_per_word * (num_prefix_words * (1 - token_frequency)).ceil, min_length].max
68
+ lex(text[0, length])[0, num_prefix_words]
69
+ end
70
+
71
+ attr_reader :markov
72
+
73
+ def initialize(num_prefix_words)
74
+ @num_prefix_words = num_prefix_words
75
+ @markov = {}
76
+ end
77
+
78
+ # text should have a definite end, not just a convenient buffer split
79
+ #
80
+ def analyze(text)
81
+ @markov.merge!(self.class.analyze(text, @num_prefix_words))
82
+ end
83
+
84
+ # given a prefix, give me the next word
85
+ #
86
+ def generate_one(prefix_words)
87
+ @markov[prefix_words].sample
88
+ end
89
+
90
+ # given the start prefix, generate words until EOF
91
+ #
92
+ def generate_all(start_prefix_words)
93
+ words = start_prefix_words
94
+ while tmp = generate_one(words[-1 * @num_prefix_words, @num_prefix_words])
95
+ words << tmp
96
+ end
97
+ words.join
98
+ end
99
+
100
+ # do it, you know you want to
101
+ #
102
+ def destroy(text)
103
+ analyze(text)
104
+ generate_all(self.class.start_prefix(text, @num_prefix_words))
105
+ end
106
+ end
@@ -0,0 +1,22 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'loremarkov'
3
+ s.summary = "Lorem ipsum and more: create your own filler text"
4
+ s.description = "Text goes in, markov gibberish comes out"
5
+ s.authors = ["Rick Hull"]
6
+ s.homepage = 'https://github.com/rickhull/loremarkov'
7
+ s.license = 'GPL'
8
+ s.files = [
9
+ 'loremarkov.gemspec',
10
+ 'VERSION',
11
+ 'Rakefile',
12
+ 'README.md',
13
+ 'lib/loremarkov.rb',
14
+ 'bin/destroy',
15
+ ]
16
+ s.executables = ['destroy']
17
+ s.add_development_dependency "buildar", "~> 2"
18
+ s.add_development_dependency "minitest", "~> 5"
19
+ s.required_ruby_version = "~> 2"
20
+
21
+ s.version = File.read(File.join(__dir__, 'VERSION')).chomp
22
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: loremarkov
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Rick Hull
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-12-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: buildar
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5'
41
+ description: Text goes in, markov gibberish comes out
42
+ email:
43
+ executables:
44
+ - destroy
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - README.md
49
+ - Rakefile
50
+ - VERSION
51
+ - bin/destroy
52
+ - lib/loremarkov.rb
53
+ - loremarkov.gemspec
54
+ homepage: https://github.com/rickhull/loremarkov
55
+ licenses:
56
+ - GPL
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '2'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubyforge_project:
74
+ rubygems_version: 2.2.2
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: 'Lorem ipsum and more: create your own filler text'
78
+ test_files: []