loremarkov 0.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +2 -0
- data/Rakefile +23 -0
- data/VERSION +1 -0
- data/bin/destroy +20 -0
- data/lib/loremarkov.rb +106 -0
- data/loremarkov.gemspec +22 -0
- metadata +78 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 60d48131f1f7b613839be5b724082fa40f950908
|
4
|
+
data.tar.gz: a2d5550f2d8e27f3d64b795c735c7c91509adab7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 965b3e1b980b830abe8278da573331426a1fc6c8fc0fc63ac16f24fcb1914586cceff4f5ab2058077cc73a2cf7e8cdabc945e3ab3b865abc3308af14d64007ad
|
7
|
+
data.tar.gz: 36445f11de26ce86a6e18bfcfc90727281fabb5fb23fd6cc3fa3abd714df81bd93cb02b96954a00e7d6d814f3d7a617c53837eebdd7918c777f386e38cca07ed
|
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'buildar'
|
2
|
+
|
3
|
+
Buildar.new do |b|
|
4
|
+
b.gemspec_file = 'loremarkov.gemspec'
|
5
|
+
b.version_file = 'VERSION'
|
6
|
+
end
|
7
|
+
|
8
|
+
# task default: %w[test bench]
|
9
|
+
|
10
|
+
require 'rake/testtask'
|
11
|
+
desc "Run tests"
|
12
|
+
Rake::TestTask.new do |t|
|
13
|
+
t.name = "test"
|
14
|
+
t.pattern = "test/test_*.rb"
|
15
|
+
# t.warning = true
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Run benchmarks"
|
19
|
+
Rake::TestTask.new do |t|
|
20
|
+
t.name = "bench"
|
21
|
+
t.pattern = "test/bench_*.rb"
|
22
|
+
# t.warning = true
|
23
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0.1
|
data/bin/destroy
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
def usage msg=nil
|
4
|
+
puts "ERROR: #{msg}" if msg
|
5
|
+
puts <<EOF
|
6
|
+
USAGE:
|
7
|
+
destroy filename
|
8
|
+
EOF
|
9
|
+
exit 1
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'loremarkov'
|
13
|
+
|
14
|
+
$stdout.sync = true
|
15
|
+
|
16
|
+
filename = ARGV.first or usage "provide an input file"
|
17
|
+
num_prefixes = ARGV[1] ? ARGV[1].to_i : 5
|
18
|
+
text = File.read(filename)
|
19
|
+
|
20
|
+
puts Loremarkov.new(num_prefixes).destroy text
|
data/lib/loremarkov.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
class Loremarkov
|
2
|
+
TOKENS = ["\n", "\t", ' ', "'", '"']
|
3
|
+
|
4
|
+
# Decompose text into an array of tokens, including and delimited by TOKENS
|
5
|
+
# e.g. "Hello", he said.
|
6
|
+
# # => ['"', 'Hello', '"', ',', ' ', 'he', ' ', 'said.',]
|
7
|
+
# This operation can be losslessly reversed by calling #join on the resulting
|
8
|
+
# array.
|
9
|
+
# i.e. lex(str).join == str
|
10
|
+
#
|
11
|
+
def self.lex(str, tokens = TOKENS)
|
12
|
+
final_ary = []
|
13
|
+
word = ''
|
14
|
+
str.each_byte { |b| # yes I am terrible with encodings
|
15
|
+
# either a token (thereby ending the current word)
|
16
|
+
# or part of the current word
|
17
|
+
#
|
18
|
+
if tokens.include?(b.chr)
|
19
|
+
final_ary << word if !word.empty?
|
20
|
+
final_ary << b.chr
|
21
|
+
word = ''
|
22
|
+
else
|
23
|
+
word << b.chr
|
24
|
+
end
|
25
|
+
}
|
26
|
+
final_ary << word if !word.empty?
|
27
|
+
final_ary
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
# Generate a markov data structure
|
32
|
+
# Arrays of string for keys and values
|
33
|
+
# Keys are prefixes -- ordered word sequence of constant length
|
34
|
+
# Values are an accumulation of the next word after the prefix, however many
|
35
|
+
# times it may occur.
|
36
|
+
# e.g. If a prefix occurs twice, then the value will be
|
37
|
+
# an array of two words -- possibly the same word twice.
|
38
|
+
#
|
39
|
+
def self.analyze(text, num_prefix_words)
|
40
|
+
markov = {}
|
41
|
+
words = lex(text)
|
42
|
+
|
43
|
+
# Go through the possible valid prefixes.
|
44
|
+
# Adding 1 gives you the final key:
|
45
|
+
# *num_prefix_words* words with a nil value -- signifying EOF
|
46
|
+
#
|
47
|
+
(words.length - num_prefix_words + 1).times { |i|
|
48
|
+
prefix_words = []
|
49
|
+
num_prefix_words.times { |j| prefix_words << words[i + j] }
|
50
|
+
|
51
|
+
# set to empty array on a new prefix
|
52
|
+
#
|
53
|
+
markov[prefix_words] ||= []
|
54
|
+
# add the target word, which will be nil on the last iteration
|
55
|
+
markov[prefix_words] << words[i + num_prefix_words]
|
56
|
+
}
|
57
|
+
markov
|
58
|
+
end
|
59
|
+
|
60
|
+
# given the entire text, use an extremely conservative heuristic
|
61
|
+
# to grab only the first chunk to pass to lex
|
62
|
+
#
|
63
|
+
def self.start_prefix(text, num_prefix_words)
|
64
|
+
char_per_word = 20
|
65
|
+
token_frequency = 0.5
|
66
|
+
min_length = 60
|
67
|
+
length = [char_per_word * (num_prefix_words * (1 - token_frequency)).ceil, min_length].max
|
68
|
+
lex(text[0, length])[0, num_prefix_words]
|
69
|
+
end
|
70
|
+
|
71
|
+
attr_reader :markov
|
72
|
+
|
73
|
+
def initialize(num_prefix_words)
|
74
|
+
@num_prefix_words = num_prefix_words
|
75
|
+
@markov = {}
|
76
|
+
end
|
77
|
+
|
78
|
+
# text should have a definite end, not just a convenient buffer split
|
79
|
+
#
|
80
|
+
def analyze(text)
|
81
|
+
@markov.merge!(self.class.analyze(text, @num_prefix_words))
|
82
|
+
end
|
83
|
+
|
84
|
+
# given a prefix, give me the next word
|
85
|
+
#
|
86
|
+
def generate_one(prefix_words)
|
87
|
+
@markov[prefix_words].sample
|
88
|
+
end
|
89
|
+
|
90
|
+
# given the start prefix, generate words until EOF
|
91
|
+
#
|
92
|
+
def generate_all(start_prefix_words)
|
93
|
+
words = start_prefix_words
|
94
|
+
while tmp = generate_one(words[-1 * @num_prefix_words, @num_prefix_words])
|
95
|
+
words << tmp
|
96
|
+
end
|
97
|
+
words.join
|
98
|
+
end
|
99
|
+
|
100
|
+
# do it, you know you want to
|
101
|
+
#
|
102
|
+
def destroy(text)
|
103
|
+
analyze(text)
|
104
|
+
generate_all(self.class.start_prefix(text, @num_prefix_words))
|
105
|
+
end
|
106
|
+
end
|
data/loremarkov.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'loremarkov'
|
3
|
+
s.summary = "Lorem ipsum and more: create your own filler text"
|
4
|
+
s.description = "Text goes in, markov gibberish comes out"
|
5
|
+
s.authors = ["Rick Hull"]
|
6
|
+
s.homepage = 'https://github.com/rickhull/loremarkov'
|
7
|
+
s.license = 'GPL'
|
8
|
+
s.files = [
|
9
|
+
'loremarkov.gemspec',
|
10
|
+
'VERSION',
|
11
|
+
'Rakefile',
|
12
|
+
'README.md',
|
13
|
+
'lib/loremarkov.rb',
|
14
|
+
'bin/destroy',
|
15
|
+
]
|
16
|
+
s.executables = ['destroy']
|
17
|
+
s.add_development_dependency "buildar", "~> 2"
|
18
|
+
s.add_development_dependency "minitest", "~> 5"
|
19
|
+
s.required_ruby_version = "~> 2"
|
20
|
+
|
21
|
+
s.version = File.read(File.join(__dir__, 'VERSION')).chomp
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: loremarkov
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rick Hull
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-12-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: buildar
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5'
|
41
|
+
description: Text goes in, markov gibberish comes out
|
42
|
+
email:
|
43
|
+
executables:
|
44
|
+
- destroy
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- VERSION
|
51
|
+
- bin/destroy
|
52
|
+
- lib/loremarkov.rb
|
53
|
+
- loremarkov.gemspec
|
54
|
+
homepage: https://github.com/rickhull/loremarkov
|
55
|
+
licenses:
|
56
|
+
- GPL
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - "~>"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '2'
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements: []
|
73
|
+
rubyforge_project:
|
74
|
+
rubygems_version: 2.2.2
|
75
|
+
signing_key:
|
76
|
+
specification_version: 4
|
77
|
+
summary: 'Lorem ipsum and more: create your own filler text'
|
78
|
+
test_files: []
|