loremarkov 0.0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +2 -0
- data/Rakefile +23 -0
- data/VERSION +1 -0
- data/bin/destroy +20 -0
- data/lib/loremarkov.rb +106 -0
- data/loremarkov.gemspec +22 -0
- metadata +78 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 60d48131f1f7b613839be5b724082fa40f950908
|
4
|
+
data.tar.gz: a2d5550f2d8e27f3d64b795c735c7c91509adab7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 965b3e1b980b830abe8278da573331426a1fc6c8fc0fc63ac16f24fcb1914586cceff4f5ab2058077cc73a2cf7e8cdabc945e3ab3b865abc3308af14d64007ad
|
7
|
+
data.tar.gz: 36445f11de26ce86a6e18bfcfc90727281fabb5fb23fd6cc3fa3abd714df81bd93cb02b96954a00e7d6d814f3d7a617c53837eebdd7918c777f386e38cca07ed
|
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'buildar'
|
2
|
+
|
3
|
+
Buildar.new do |b|
|
4
|
+
b.gemspec_file = 'loremarkov.gemspec'
|
5
|
+
b.version_file = 'VERSION'
|
6
|
+
end
|
7
|
+
|
8
|
+
# task default: %w[test bench]
|
9
|
+
|
10
|
+
require 'rake/testtask'
|
11
|
+
desc "Run tests"
|
12
|
+
Rake::TestTask.new do |t|
|
13
|
+
t.name = "test"
|
14
|
+
t.pattern = "test/test_*.rb"
|
15
|
+
# t.warning = true
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Run benchmarks"
|
19
|
+
Rake::TestTask.new do |t|
|
20
|
+
t.name = "bench"
|
21
|
+
t.pattern = "test/bench_*.rb"
|
22
|
+
# t.warning = true
|
23
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0.1
|
data/bin/destroy
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
def usage msg=nil
|
4
|
+
puts "ERROR: #{msg}" if msg
|
5
|
+
puts <<EOF
|
6
|
+
USAGE:
|
7
|
+
destroy filename
|
8
|
+
EOF
|
9
|
+
exit 1
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'loremarkov'
|
13
|
+
|
14
|
+
$stdout.sync = true
|
15
|
+
|
16
|
+
filename = ARGV.first or usage "provide an input file"
|
17
|
+
num_prefixes = ARGV[1] ? ARGV[1].to_i : 5
|
18
|
+
text = File.read(filename)
|
19
|
+
|
20
|
+
puts Loremarkov.new(num_prefixes).destroy text
|
data/lib/loremarkov.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
class Loremarkov
|
2
|
+
TOKENS = ["\n", "\t", ' ', "'", '"']
|
3
|
+
|
4
|
+
# Decompose text into an array of tokens, including and delimited by TOKENS
|
5
|
+
# e.g. "Hello", he said.
|
6
|
+
# # => ['"', 'Hello', '"', ',', ' ', 'he', ' ', 'said.',]
|
7
|
+
# This operation can be losslessly reversed by calling #join on the resulting
|
8
|
+
# array.
|
9
|
+
# i.e. lex(str).join == str
|
10
|
+
#
|
11
|
+
def self.lex(str, tokens = TOKENS)
|
12
|
+
final_ary = []
|
13
|
+
word = ''
|
14
|
+
str.each_byte { |b| # yes I am terrible with encodings
|
15
|
+
# either a token (thereby ending the current word)
|
16
|
+
# or part of the current word
|
17
|
+
#
|
18
|
+
if tokens.include?(b.chr)
|
19
|
+
final_ary << word if !word.empty?
|
20
|
+
final_ary << b.chr
|
21
|
+
word = ''
|
22
|
+
else
|
23
|
+
word << b.chr
|
24
|
+
end
|
25
|
+
}
|
26
|
+
final_ary << word if !word.empty?
|
27
|
+
final_ary
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
# Generate a markov data structure
|
32
|
+
# Arrays of string for keys and values
|
33
|
+
# Keys are prefixes -- ordered word sequence of constant length
|
34
|
+
# Values are an accumulation of the next word after the prefix, however many
|
35
|
+
# times it may occur.
|
36
|
+
# e.g. If a prefix occurs twice, then the value will be
|
37
|
+
# an array of two words -- possibly the same word twice.
|
38
|
+
#
|
39
|
+
def self.analyze(text, num_prefix_words)
|
40
|
+
markov = {}
|
41
|
+
words = lex(text)
|
42
|
+
|
43
|
+
# Go through the possible valid prefixes.
|
44
|
+
# Adding 1 gives you the final key:
|
45
|
+
# *num_prefix_words* words with a nil value -- signifying EOF
|
46
|
+
#
|
47
|
+
(words.length - num_prefix_words + 1).times { |i|
|
48
|
+
prefix_words = []
|
49
|
+
num_prefix_words.times { |j| prefix_words << words[i + j] }
|
50
|
+
|
51
|
+
# set to empty array on a new prefix
|
52
|
+
#
|
53
|
+
markov[prefix_words] ||= []
|
54
|
+
# add the target word, which will be nil on the last iteration
|
55
|
+
markov[prefix_words] << words[i + num_prefix_words]
|
56
|
+
}
|
57
|
+
markov
|
58
|
+
end
|
59
|
+
|
60
|
+
# given the entire text, use an extremely conservative heuristic
|
61
|
+
# to grab only the first chunk to pass to lex
|
62
|
+
#
|
63
|
+
def self.start_prefix(text, num_prefix_words)
|
64
|
+
char_per_word = 20
|
65
|
+
token_frequency = 0.5
|
66
|
+
min_length = 60
|
67
|
+
length = [char_per_word * (num_prefix_words * (1 - token_frequency)).ceil, min_length].max
|
68
|
+
lex(text[0, length])[0, num_prefix_words]
|
69
|
+
end
|
70
|
+
|
71
|
+
attr_reader :markov
|
72
|
+
|
73
|
+
def initialize(num_prefix_words)
|
74
|
+
@num_prefix_words = num_prefix_words
|
75
|
+
@markov = {}
|
76
|
+
end
|
77
|
+
|
78
|
+
# text should have a definite end, not just a convenient buffer split
|
79
|
+
#
|
80
|
+
def analyze(text)
|
81
|
+
@markov.merge!(self.class.analyze(text, @num_prefix_words))
|
82
|
+
end
|
83
|
+
|
84
|
+
# given a prefix, give me the next word
|
85
|
+
#
|
86
|
+
def generate_one(prefix_words)
|
87
|
+
@markov[prefix_words].sample
|
88
|
+
end
|
89
|
+
|
90
|
+
# given the start prefix, generate words until EOF
|
91
|
+
#
|
92
|
+
def generate_all(start_prefix_words)
|
93
|
+
words = start_prefix_words
|
94
|
+
while tmp = generate_one(words[-1 * @num_prefix_words, @num_prefix_words])
|
95
|
+
words << tmp
|
96
|
+
end
|
97
|
+
words.join
|
98
|
+
end
|
99
|
+
|
100
|
+
# do it, you know you want to
|
101
|
+
#
|
102
|
+
def destroy(text)
|
103
|
+
analyze(text)
|
104
|
+
generate_all(self.class.start_prefix(text, @num_prefix_words))
|
105
|
+
end
|
106
|
+
end
|
data/loremarkov.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'loremarkov'
|
3
|
+
s.summary = "Lorem ipsum and more: create your own filler text"
|
4
|
+
s.description = "Text goes in, markov gibberish comes out"
|
5
|
+
s.authors = ["Rick Hull"]
|
6
|
+
s.homepage = 'https://github.com/rickhull/loremarkov'
|
7
|
+
s.license = 'GPL'
|
8
|
+
s.files = [
|
9
|
+
'loremarkov.gemspec',
|
10
|
+
'VERSION',
|
11
|
+
'Rakefile',
|
12
|
+
'README.md',
|
13
|
+
'lib/loremarkov.rb',
|
14
|
+
'bin/destroy',
|
15
|
+
]
|
16
|
+
s.executables = ['destroy']
|
17
|
+
s.add_development_dependency "buildar", "~> 2"
|
18
|
+
s.add_development_dependency "minitest", "~> 5"
|
19
|
+
s.required_ruby_version = "~> 2"
|
20
|
+
|
21
|
+
s.version = File.read(File.join(__dir__, 'VERSION')).chomp
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: loremarkov
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rick Hull
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-12-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: buildar
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5'
|
41
|
+
description: Text goes in, markov gibberish comes out
|
42
|
+
email:
|
43
|
+
executables:
|
44
|
+
- destroy
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- VERSION
|
51
|
+
- bin/destroy
|
52
|
+
- lib/loremarkov.rb
|
53
|
+
- loremarkov.gemspec
|
54
|
+
homepage: https://github.com/rickhull/loremarkov
|
55
|
+
licenses:
|
56
|
+
- GPL
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - "~>"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '2'
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements: []
|
73
|
+
rubyforge_project:
|
74
|
+
rubygems_version: 2.2.2
|
75
|
+
signing_key:
|
76
|
+
specification_version: 4
|
77
|
+
summary: 'Lorem ipsum and more: create your own filler text'
|
78
|
+
test_files: []
|