lorem-ipsum 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/lorem-ipsum +58 -0
- data/data/lorem.txt +6 -0
- data/lib/lorem-ipsum.rb +105 -0
- metadata +69 -0
data/bin/lorem-ipsum
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
4
|
+
$DATA_DIR = File.join(File.dirname(__FILE__), *%w[.. data])
|
5
|
+
|
6
|
+
require 'lorem-ipsum'
|
7
|
+
require 'optparse'
|
8
|
+
|
9
|
+
init_options = { :max_ngraph => 3 }
|
10
|
+
gen_options = { :words => 100 }
|
11
|
+
|
12
|
+
opts = OptionParser.new do |opts|
|
13
|
+
opts.banner = "Usage: #{$0} [options] data-files"
|
14
|
+
|
15
|
+
def check_int_arg(val, range = { :min => nil , :max => nil })
|
16
|
+
err_msg = "Must specify integer between " \
|
17
|
+
"#{range[:min] || "-INF"} and #{range[:max] || "INF"}"
|
18
|
+
|
19
|
+
int_val = val.to_i
|
20
|
+
|
21
|
+
raise err_msg if range[:min] && int_val < range[:min]
|
22
|
+
raise err_msg if range[:max] && int_val > range[:max]
|
23
|
+
|
24
|
+
int_val
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("--max-ngraph MAX",
|
28
|
+
"Maximum length of n-graphs to track. Default is 3") do |v|
|
29
|
+
init_options[:max_ngraph] = check_int_arg(v, :min => 1)
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on("-w", "--words WORDS",
|
33
|
+
"Number of words to generate. Default is 100") do |v|
|
34
|
+
gen_options[:words] = check_int_arg(v, :min => 1)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
begin
|
40
|
+
opts.parse!(ARGV)
|
41
|
+
rescue
|
42
|
+
opts.abort
|
43
|
+
end
|
44
|
+
|
45
|
+
# There has to be a more Ruby way to do this...
|
46
|
+
if ARGV.empty?
|
47
|
+
files = Dir.entries($DATA_DIR).reject do |f|
|
48
|
+
f =~ /^\.\.?$/
|
49
|
+
end.collect do |f|
|
50
|
+
"#{$DATA_DIR}/#{f}"
|
51
|
+
end
|
52
|
+
else
|
53
|
+
files = ARGV
|
54
|
+
end
|
55
|
+
|
56
|
+
gen = LoremIpsum::Generator.new(files, init_options)
|
57
|
+
puts gen.generate(gen_options).strip
|
58
|
+
|
data/data/lorem.txt
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
|
2
|
+
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
|
3
|
+
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
|
4
|
+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
|
5
|
+
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
|
6
|
+
culpa qui officia deserunt mollit anim id est laborum.
|
data/lib/lorem-ipsum.rb
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
module LoremIpsum
|
2
|
+
|
3
|
+
class Generator
|
4
|
+
|
5
|
+
def initialize(data_files = [], opts = { :max_ngraph => 3 })
|
6
|
+
@letter_count = { :count => 0 }
|
7
|
+
@max_ngraph = opts[:max_ngraph]
|
8
|
+
|
9
|
+
data_files.each { |file| analyze(file) }
|
10
|
+
end
|
11
|
+
|
12
|
+
def analyze(filename)
|
13
|
+
File.open(filename) do |file|
|
14
|
+
while (line = file.gets)
|
15
|
+
# todo - not handling punctuation
|
16
|
+
line = line.strip.downcase.gsub(/[^a-z ]/, '') << ' '
|
17
|
+
|
18
|
+
word = "^"
|
19
|
+
line.chars do |c|
|
20
|
+
word << c
|
21
|
+
n = [@max_ngraph, word.length].min
|
22
|
+
ngraph = word[-n..-1]
|
23
|
+
|
24
|
+
ngraph.chars.inject(@letter_count) do |hash, char|
|
25
|
+
hash[char] ||= { :count => 0 }
|
26
|
+
hash[char][:count] += 1
|
27
|
+
hash = hash[char]
|
28
|
+
end
|
29
|
+
|
30
|
+
if c == ' '
|
31
|
+
word = "^"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def generate(options = { :words => 100 })
|
39
|
+
str = ""
|
40
|
+
if options[:words]
|
41
|
+
to_next_sentence = rand(10) + 5
|
42
|
+
start_of_sentence = true
|
43
|
+
1.upto(options[:words]) do |i|
|
44
|
+
word = next_word
|
45
|
+
if start_of_sentence
|
46
|
+
word.capitalize!
|
47
|
+
start_of_sentence = false
|
48
|
+
end
|
49
|
+
|
50
|
+
to_next_sentence -= 1
|
51
|
+
if to_next_sentence == 0 || i == options[:words]
|
52
|
+
word.gsub!(/ /,'. ')
|
53
|
+
to_next_sentence = rand(10) + 5
|
54
|
+
start_of_sentence = true
|
55
|
+
end
|
56
|
+
|
57
|
+
str << word
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
str
|
62
|
+
end
|
63
|
+
|
64
|
+
def next_word
|
65
|
+
word = "^"
|
66
|
+
word << next_char(word) while word[-1..-1] != ' '
|
67
|
+
word[1..-1]
|
68
|
+
end
|
69
|
+
|
70
|
+
def next_char(prev)
|
71
|
+
# Need to make sure our words don't get too long. Not everyone is Charles
|
72
|
+
# Dickens, even in fake-Latin land. These parameters seem to look nice,
|
73
|
+
# but salt to taste.
|
74
|
+
return ' ' if prev.length > 4 && rand(9 + prev.length) < prev.length
|
75
|
+
|
76
|
+
n = [@max_ngraph-1, prev.length].min
|
77
|
+
prev_ngraph = prev[-n..-1]
|
78
|
+
|
79
|
+
# If we don't have statistics for this n-graph, just use the stats
|
80
|
+
# for the (n-1)-graph
|
81
|
+
n_count = nil
|
82
|
+
until n_count
|
83
|
+
n_count = prev_ngraph.chars.inject(@letter_count) do |hash, char|
|
84
|
+
break if ! hash[char]
|
85
|
+
hash = hash[char]
|
86
|
+
end
|
87
|
+
prev_ngraph = prev_ngraph[1..-1]
|
88
|
+
|
89
|
+
n_count = @letter_count if ! prev_ngraph
|
90
|
+
end
|
91
|
+
n_count = n_count.reject { |k,v| k == :count || prev.empty? && k == ' ' }
|
92
|
+
|
93
|
+
num_letters ||= n_count.values.inject(0) { |s,c| s += c[:count] }
|
94
|
+
index = rand(num_letters + 1)
|
95
|
+
|
96
|
+
"abcdefghijklmnopqrstuvqxyz ".chars do |c|
|
97
|
+
index -= n_count[c] && n_count[c][:count] || 0
|
98
|
+
return c if index <= 0
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: lorem-ipsum
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Matt Austin
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-04-24 00:00:00 -04:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: lorem-ipsum is a simple, trainable dummy text generator.
|
23
|
+
email: maustin126@gmail.com
|
24
|
+
executables:
|
25
|
+
- lorem-ipsum
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files: []
|
29
|
+
|
30
|
+
files:
|
31
|
+
- lib/lorem-ipsum.rb
|
32
|
+
- data/lorem.txt
|
33
|
+
- bin/lorem-ipsum
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://github.com/maafy6/lorem-ipsum
|
36
|
+
licenses:
|
37
|
+
- MIT
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
none: false
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
hash: 3
|
49
|
+
segments:
|
50
|
+
- 0
|
51
|
+
version: "0"
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
requirements: []
|
62
|
+
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 1.6.2
|
65
|
+
signing_key:
|
66
|
+
specification_version: 3
|
67
|
+
summary: A simple dummy text generator.
|
68
|
+
test_files: []
|
69
|
+
|