lorem-ipsum 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/lorem-ipsum +58 -0
- data/data/lorem.txt +6 -0
- data/lib/lorem-ipsum.rb +105 -0
- metadata +69 -0
data/bin/lorem-ipsum
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
4
|
+
$DATA_DIR = File.join(File.dirname(__FILE__), *%w[.. data])
|
5
|
+
|
6
|
+
require 'lorem-ipsum'
|
7
|
+
require 'optparse'
|
8
|
+
|
9
|
+
init_options = { :max_ngraph => 3 }
|
10
|
+
gen_options = { :words => 100 }
|
11
|
+
|
12
|
+
opts = OptionParser.new do |opts|
|
13
|
+
opts.banner = "Usage: #{$0} [options] data-files"
|
14
|
+
|
15
|
+
def check_int_arg(val, range = { :min => nil , :max => nil })
|
16
|
+
err_msg = "Must specify integer between " \
|
17
|
+
"#{range[:min] || "-INF"} and #{range[:max] || "INF"}"
|
18
|
+
|
19
|
+
int_val = val.to_i
|
20
|
+
|
21
|
+
raise err_msg if range[:min] && int_val < range[:min]
|
22
|
+
raise err_msg if range[:max] && int_val > range[:max]
|
23
|
+
|
24
|
+
int_val
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("--max-ngraph MAX",
|
28
|
+
"Maximum length of n-graphs to track. Default is 3") do |v|
|
29
|
+
init_options[:max_ngraph] = check_int_arg(v, :min => 1)
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on("-w", "--words WORDS",
|
33
|
+
"Number of words to generate. Default is 100") do |v|
|
34
|
+
gen_options[:words] = check_int_arg(v, :min => 1)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
begin
|
40
|
+
opts.parse!(ARGV)
|
41
|
+
rescue
|
42
|
+
opts.abort
|
43
|
+
end
|
44
|
+
|
45
|
+
# There has to be a more Ruby way to do this...
|
46
|
+
if ARGV.empty?
|
47
|
+
files = Dir.entries($DATA_DIR).reject do |f|
|
48
|
+
f =~ /^\.\.?$/
|
49
|
+
end.collect do |f|
|
50
|
+
"#{$DATA_DIR}/#{f}"
|
51
|
+
end
|
52
|
+
else
|
53
|
+
files = ARGV
|
54
|
+
end
|
55
|
+
|
56
|
+
gen = LoremIpsum::Generator.new(files, init_options)
|
57
|
+
puts gen.generate(gen_options).strip
|
58
|
+
|
data/data/lorem.txt
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
|
2
|
+
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
|
3
|
+
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
|
4
|
+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
|
5
|
+
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
|
6
|
+
culpa qui officia deserunt mollit anim id est laborum.
|
data/lib/lorem-ipsum.rb
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
module LoremIpsum
|
2
|
+
|
3
|
+
class Generator
|
4
|
+
|
5
|
+
def initialize(data_files = [], opts = { :max_ngraph => 3 })
|
6
|
+
@letter_count = { :count => 0 }
|
7
|
+
@max_ngraph = opts[:max_ngraph]
|
8
|
+
|
9
|
+
data_files.each { |file| analyze(file) }
|
10
|
+
end
|
11
|
+
|
12
|
+
def analyze(filename)
|
13
|
+
File.open(filename) do |file|
|
14
|
+
while (line = file.gets)
|
15
|
+
# todo - not handling punctuation
|
16
|
+
line = line.strip.downcase.gsub(/[^a-z ]/, '') << ' '
|
17
|
+
|
18
|
+
word = "^"
|
19
|
+
line.chars do |c|
|
20
|
+
word << c
|
21
|
+
n = [@max_ngraph, word.length].min
|
22
|
+
ngraph = word[-n..-1]
|
23
|
+
|
24
|
+
ngraph.chars.inject(@letter_count) do |hash, char|
|
25
|
+
hash[char] ||= { :count => 0 }
|
26
|
+
hash[char][:count] += 1
|
27
|
+
hash = hash[char]
|
28
|
+
end
|
29
|
+
|
30
|
+
if c == ' '
|
31
|
+
word = "^"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def generate(options = { :words => 100 })
|
39
|
+
str = ""
|
40
|
+
if options[:words]
|
41
|
+
to_next_sentence = rand(10) + 5
|
42
|
+
start_of_sentence = true
|
43
|
+
1.upto(options[:words]) do |i|
|
44
|
+
word = next_word
|
45
|
+
if start_of_sentence
|
46
|
+
word.capitalize!
|
47
|
+
start_of_sentence = false
|
48
|
+
end
|
49
|
+
|
50
|
+
to_next_sentence -= 1
|
51
|
+
if to_next_sentence == 0 || i == options[:words]
|
52
|
+
word.gsub!(/ /,'. ')
|
53
|
+
to_next_sentence = rand(10) + 5
|
54
|
+
start_of_sentence = true
|
55
|
+
end
|
56
|
+
|
57
|
+
str << word
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
str
|
62
|
+
end
|
63
|
+
|
64
|
+
def next_word
|
65
|
+
word = "^"
|
66
|
+
word << next_char(word) while word[-1..-1] != ' '
|
67
|
+
word[1..-1]
|
68
|
+
end
|
69
|
+
|
70
|
+
def next_char(prev)
|
71
|
+
# Need to make sure our words don't get too long. Not everyone is Charles
|
72
|
+
# Dickens, even in fake-Latin land. These parameters seem to look nice,
|
73
|
+
# but salt to taste.
|
74
|
+
return ' ' if prev.length > 4 && rand(9 + prev.length) < prev.length
|
75
|
+
|
76
|
+
n = [@max_ngraph-1, prev.length].min
|
77
|
+
prev_ngraph = prev[-n..-1]
|
78
|
+
|
79
|
+
# If we don't have statistics for this n-graph, just use the stats
|
80
|
+
# for the (n-1)-graph
|
81
|
+
n_count = nil
|
82
|
+
until n_count
|
83
|
+
n_count = prev_ngraph.chars.inject(@letter_count) do |hash, char|
|
84
|
+
break if ! hash[char]
|
85
|
+
hash = hash[char]
|
86
|
+
end
|
87
|
+
prev_ngraph = prev_ngraph[1..-1]
|
88
|
+
|
89
|
+
n_count = @letter_count if ! prev_ngraph
|
90
|
+
end
|
91
|
+
n_count = n_count.reject { |k,v| k == :count || prev.empty? && k == ' ' }
|
92
|
+
|
93
|
+
num_letters ||= n_count.values.inject(0) { |s,c| s += c[:count] }
|
94
|
+
index = rand(num_letters + 1)
|
95
|
+
|
96
|
+
"abcdefghijklmnopqrstuvqxyz ".chars do |c|
|
97
|
+
index -= n_count[c] && n_count[c][:count] || 0
|
98
|
+
return c if index <= 0
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: lorem-ipsum
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Matt Austin
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-04-24 00:00:00 -04:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: lorem-ipsum is a simple, trainable dummy text generator.
|
23
|
+
email: maustin126@gmail.com
|
24
|
+
executables:
|
25
|
+
- lorem-ipsum
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files: []
|
29
|
+
|
30
|
+
files:
|
31
|
+
- lib/lorem-ipsum.rb
|
32
|
+
- data/lorem.txt
|
33
|
+
- bin/lorem-ipsum
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://github.com/maafy6/lorem-ipsum
|
36
|
+
licenses:
|
37
|
+
- MIT
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
none: false
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
hash: 3
|
49
|
+
segments:
|
50
|
+
- 0
|
51
|
+
version: "0"
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
requirements: []
|
62
|
+
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 1.6.2
|
65
|
+
signing_key:
|
66
|
+
specification_version: 3
|
67
|
+
summary: A simple dummy text generator.
|
68
|
+
test_files: []
|
69
|
+
|