lorem-ipsum 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/bin/lorem-ipsum +58 -0
  2. data/data/lorem.txt +6 -0
  3. data/lib/lorem-ipsum.rb +105 -0
  4. metadata +69 -0
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
4
+ $DATA_DIR = File.join(File.dirname(__FILE__), *%w[.. data])
5
+
6
+ require 'lorem-ipsum'
7
+ require 'optparse'
8
+
9
+ init_options = { :max_ngraph => 3 }
10
+ gen_options = { :words => 100 }
11
+
12
+ opts = OptionParser.new do |opts|
13
+ opts.banner = "Usage: #{$0} [options] data-files"
14
+
15
+ def check_int_arg(val, range = { :min => nil , :max => nil })
16
+ err_msg = "Must specify integer between " \
17
+ "#{range[:min] || "-INF"} and #{range[:max] || "INF"}"
18
+
19
+ int_val = val.to_i
20
+
21
+ raise err_msg if range[:min] && int_val < range[:min]
22
+ raise err_msg if range[:max] && int_val > range[:max]
23
+
24
+ int_val
25
+ end
26
+
27
+ opts.on("--max-ngraph MAX",
28
+ "Maximum length of n-graphs to track. Default is 3") do |v|
29
+ init_options[:max_ngraph] = check_int_arg(v, :min => 1)
30
+ end
31
+
32
+ opts.on("-w", "--words WORDS",
33
+ "Number of words to generate. Default is 100") do |v|
34
+ gen_options[:words] = check_int_arg(v, :min => 1)
35
+ end
36
+
37
+ end
38
+
39
+ begin
40
+ opts.parse!(ARGV)
41
+ rescue
42
+ opts.abort
43
+ end
44
+
45
+ # There has to be a more Ruby way to do this...
46
+ if ARGV.empty?
47
+ files = Dir.entries($DATA_DIR).reject do |f|
48
+ f =~ /^\.\.?$/
49
+ end.collect do |f|
50
+ "#{$DATA_DIR}/#{f}"
51
+ end
52
+ else
53
+ files = ARGV
54
+ end
55
+
56
+ gen = LoremIpsum::Generator.new(files, init_options)
57
+ puts gen.generate(gen_options).strip
58
+
@@ -0,0 +1,6 @@
1
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
2
+ incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
3
+ nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
4
+ Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
5
+ fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
6
+ culpa qui officia deserunt mollit anim id est laborum.
@@ -0,0 +1,105 @@
1
+ module LoremIpsum
2
+
3
+ class Generator
4
+
5
+ def initialize(data_files = [], opts = { :max_ngraph => 3 })
6
+ @letter_count = { :count => 0 }
7
+ @max_ngraph = opts[:max_ngraph]
8
+
9
+ data_files.each { |file| analyze(file) }
10
+ end
11
+
12
+ def analyze(filename)
13
+ File.open(filename) do |file|
14
+ while (line = file.gets)
15
+ # todo - not handling punctuation
16
+ line = line.strip.downcase.gsub(/[^a-z ]/, '') << ' '
17
+
18
+ word = "^"
19
+ line.chars do |c|
20
+ word << c
21
+ n = [@max_ngraph, word.length].min
22
+ ngraph = word[-n..-1]
23
+
24
+ ngraph.chars.inject(@letter_count) do |hash, char|
25
+ hash[char] ||= { :count => 0 }
26
+ hash[char][:count] += 1
27
+ hash = hash[char]
28
+ end
29
+
30
+ if c == ' '
31
+ word = "^"
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ def generate(options = { :words => 100 })
39
+ str = ""
40
+ if options[:words]
41
+ to_next_sentence = rand(10) + 5
42
+ start_of_sentence = true
43
+ 1.upto(options[:words]) do |i|
44
+ word = next_word
45
+ if start_of_sentence
46
+ word.capitalize!
47
+ start_of_sentence = false
48
+ end
49
+
50
+ to_next_sentence -= 1
51
+ if to_next_sentence == 0 || i == options[:words]
52
+ word.gsub!(/ /,'. ')
53
+ to_next_sentence = rand(10) + 5
54
+ start_of_sentence = true
55
+ end
56
+
57
+ str << word
58
+ end
59
+ end
60
+
61
+ str
62
+ end
63
+
64
+ def next_word
65
+ word = "^"
66
+ word << next_char(word) while word[-1..-1] != ' '
67
+ word[1..-1]
68
+ end
69
+
70
+ def next_char(prev)
71
+ # Need to make sure our words don't get too long. Not everyone is Charles
72
+ # Dickens, even in fake-Latin land. These parameters seem to look nice,
73
+ # but salt to taste.
74
+ return ' ' if prev.length > 4 && rand(9 + prev.length) < prev.length
75
+
76
+ n = [@max_ngraph-1, prev.length].min
77
+ prev_ngraph = prev[-n..-1]
78
+
79
+ # If we don't have statistics for this n-graph, just use the stats
80
+ # for the (n-1)-graph
81
+ n_count = nil
82
+ until n_count
83
+ n_count = prev_ngraph.chars.inject(@letter_count) do |hash, char|
84
+ break if ! hash[char]
85
+ hash = hash[char]
86
+ end
87
+ prev_ngraph = prev_ngraph[1..-1]
88
+
89
+ n_count = @letter_count if ! prev_ngraph
90
+ end
91
+ n_count = n_count.reject { |k,v| k == :count || prev.empty? && k == ' ' }
92
+
93
+ num_letters ||= n_count.values.inject(0) { |s,c| s += c[:count] }
94
+ index = rand(num_letters + 1)
95
+
96
+ "abcdefghijklmnopqrstuvqxyz ".chars do |c|
97
+ index -= n_count[c] && n_count[c][:count] || 0
98
+ return c if index <= 0
99
+ end
100
+ end
101
+
102
+ end
103
+
104
+ end
105
+
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lorem-ipsum
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Matt Austin
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-24 00:00:00 -04:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: lorem-ipsum is a simple, trainable dummy text generator.
23
+ email: maustin126@gmail.com
24
+ executables:
25
+ - lorem-ipsum
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - lib/lorem-ipsum.rb
32
+ - data/lorem.txt
33
+ - bin/lorem-ipsum
34
+ has_rdoc: true
35
+ homepage: http://github.com/maafy6/lorem-ipsum
36
+ licenses:
37
+ - MIT
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ hash: 3
49
+ segments:
50
+ - 0
51
+ version: "0"
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.6.2
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: A simple dummy text generator.
68
+ test_files: []
69
+