lorem-ipsum 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/bin/lorem-ipsum +58 -0
  2. data/data/lorem.txt +6 -0
  3. data/lib/lorem-ipsum.rb +105 -0
  4. metadata +69 -0
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
4
+ $DATA_DIR = File.join(File.dirname(__FILE__), *%w[.. data])
5
+
6
+ require 'lorem-ipsum'
7
+ require 'optparse'
8
+
9
+ init_options = { :max_ngraph => 3 }
10
+ gen_options = { :words => 100 }
11
+
12
+ opts = OptionParser.new do |opts|
13
+ opts.banner = "Usage: #{$0} [options] data-files"
14
+
15
+ def check_int_arg(val, range = { :min => nil , :max => nil })
16
+ err_msg = "Must specify integer between " \
17
+ "#{range[:min] || "-INF"} and #{range[:max] || "INF"}"
18
+
19
+ int_val = val.to_i
20
+
21
+ raise err_msg if range[:min] && int_val < range[:min]
22
+ raise err_msg if range[:max] && int_val > range[:max]
23
+
24
+ int_val
25
+ end
26
+
27
+ opts.on("--max-ngraph MAX",
28
+ "Maximum length of n-graphs to track. Default is 3") do |v|
29
+ init_options[:max_ngraph] = check_int_arg(v, :min => 1)
30
+ end
31
+
32
+ opts.on("-w", "--words WORDS",
33
+ "Number of words to generate. Default is 100") do |v|
34
+ gen_options[:words] = check_int_arg(v, :min => 1)
35
+ end
36
+
37
+ end
38
+
39
+ begin
40
+ opts.parse!(ARGV)
41
+ rescue
42
+ opts.abort
43
+ end
44
+
45
+ # There has to be a more Ruby way to do this...
46
+ if ARGV.empty?
47
+ files = Dir.entries($DATA_DIR).reject do |f|
48
+ f =~ /^\.\.?$/
49
+ end.collect do |f|
50
+ "#{$DATA_DIR}/#{f}"
51
+ end
52
+ else
53
+ files = ARGV
54
+ end
55
+
56
+ gen = LoremIpsum::Generator.new(files, init_options)
57
+ puts gen.generate(gen_options).strip
58
+
@@ -0,0 +1,6 @@
1
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
2
+ incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
3
+ nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
4
+ Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
5
+ fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
6
+ culpa qui officia deserunt mollit anim id est laborum.
@@ -0,0 +1,105 @@
1
+ module LoremIpsum
2
+
3
+ class Generator
4
+
5
+ def initialize(data_files = [], opts = { :max_ngraph => 3 })
6
+ @letter_count = { :count => 0 }
7
+ @max_ngraph = opts[:max_ngraph]
8
+
9
+ data_files.each { |file| analyze(file) }
10
+ end
11
+
12
+ def analyze(filename)
13
+ File.open(filename) do |file|
14
+ while (line = file.gets)
15
+ # todo - not handling punctuation
16
+ line = line.strip.downcase.gsub(/[^a-z ]/, '') << ' '
17
+
18
+ word = "^"
19
+ line.chars do |c|
20
+ word << c
21
+ n = [@max_ngraph, word.length].min
22
+ ngraph = word[-n..-1]
23
+
24
+ ngraph.chars.inject(@letter_count) do |hash, char|
25
+ hash[char] ||= { :count => 0 }
26
+ hash[char][:count] += 1
27
+ hash = hash[char]
28
+ end
29
+
30
+ if c == ' '
31
+ word = "^"
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ def generate(options = { :words => 100 })
39
+ str = ""
40
+ if options[:words]
41
+ to_next_sentence = rand(10) + 5
42
+ start_of_sentence = true
43
+ 1.upto(options[:words]) do |i|
44
+ word = next_word
45
+ if start_of_sentence
46
+ word.capitalize!
47
+ start_of_sentence = false
48
+ end
49
+
50
+ to_next_sentence -= 1
51
+ if to_next_sentence == 0 || i == options[:words]
52
+ word.gsub!(/ /,'. ')
53
+ to_next_sentence = rand(10) + 5
54
+ start_of_sentence = true
55
+ end
56
+
57
+ str << word
58
+ end
59
+ end
60
+
61
+ str
62
+ end
63
+
64
+ def next_word
65
+ word = "^"
66
+ word << next_char(word) while word[-1..-1] != ' '
67
+ word[1..-1]
68
+ end
69
+
70
+ def next_char(prev)
71
+ # Need to make sure our words don't get too long. Not everyone is Charles
72
+ # Dickens, even in fake-Latin land. These parameters seem to look nice,
73
+ # but salt to taste.
74
+ return ' ' if prev.length > 4 && rand(9 + prev.length) < prev.length
75
+
76
+ n = [@max_ngraph-1, prev.length].min
77
+ prev_ngraph = prev[-n..-1]
78
+
79
+ # If we don't have statistics for this n-graph, just use the stats
80
+ # for the (n-1)-graph
81
+ n_count = nil
82
+ until n_count
83
+ n_count = prev_ngraph.chars.inject(@letter_count) do |hash, char|
84
+ break if ! hash[char]
85
+ hash = hash[char]
86
+ end
87
+ prev_ngraph = prev_ngraph[1..-1]
88
+
89
+ n_count = @letter_count if ! prev_ngraph
90
+ end
91
+ n_count = n_count.reject { |k,v| k == :count || prev.empty? && k == ' ' }
92
+
93
+ num_letters ||= n_count.values.inject(0) { |s,c| s += c[:count] }
94
+ index = rand(num_letters + 1)
95
+
96
+ "abcdefghijklmnopqrstuvqxyz ".chars do |c|
97
+ index -= n_count[c] && n_count[c][:count] || 0
98
+ return c if index <= 0
99
+ end
100
+ end
101
+
102
+ end
103
+
104
+ end
105
+
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lorem-ipsum
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Matt Austin
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-24 00:00:00 -04:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: lorem-ipsum is a simple, trainable dummy text generator.
23
+ email: maustin126@gmail.com
24
+ executables:
25
+ - lorem-ipsum
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - lib/lorem-ipsum.rb
32
+ - data/lorem.txt
33
+ - bin/lorem-ipsum
34
+ has_rdoc: true
35
+ homepage: http://github.com/maafy6/lorem-ipsum
36
+ licenses:
37
+ - MIT
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ hash: 3
49
+ segments:
50
+ - 0
51
+ version: "0"
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.6.2
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: A simple dummy text generator.
68
+ test_files: []
69
+