lorem_jp 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,223 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # Class for Japanese Lorem Ipsum generation
4
+ #
5
+ class LoremJP
6
+ # Version number
7
+ VERSION = '0.0.1'
8
+
9
+ # Default dictionary directory
10
+ DICTIONARY_DIR = File.absolute_path('../../data', __FILE__)
11
+ # Default dictionary file
12
+ DEFAULT_DICTIONARY = File.join(DICTIONARY_DIR, 'dict.txt')
13
+
14
+ class << self
15
+ # Singleton interface of {#sentence}
16
+ # @param [Hash] options
17
+ # @option options [Integer] :chain number of words considered as
18
+ # past state in Marcov chain
19
+ # @option options [String] :dictionary file name of dictionary
20
+ # @return [String] Japanese meaningless sentence
21
+ def sentence(options = {})
22
+ return singleton_for_dict(options[:dictionary]).sentence(options)
23
+ end
24
+
25
+ private
26
+
27
+ def singleton_for_dict(dictionary)
28
+ @singleton ||= {}
29
+
30
+ @singleton[dictionary] ||= self.new(:dictionary => dictionary,
31
+ :lazy => true)
32
+
33
+ return @singleton[dictionary]
34
+ end
35
+ end
36
+
37
+ # @param [Hash] options
38
+ # @option options [String] :dictionary file name of dictionary
39
+ # @option options [Boolean] :lazy load dictionary file
40
+ # on first generation if true
41
+ def initialize(options = {})
42
+ @dictionary = options[:dictionary]
43
+ @chain = options[:chain] || 1
44
+
45
+ lazy = options[:lazy] || false
46
+
47
+ @dict = []
48
+ @tree = {}
49
+ @loaded = false
50
+
51
+ if ! lazy
52
+ load_dict(@dictionary)
53
+ end
54
+ end
55
+
56
+ # @param [Hash] options
57
+ # @option options [Integer] :chain number of words considered as past state
58
+ # in Marcov chain
59
+ # @return [String] Japanese meaningless sentence
60
+ def sentence(options = {})
61
+ unless @loaded
62
+ load_dict(@dictionary)
63
+ end
64
+
65
+ chain = options[:chain] || @chain
66
+
67
+ unless chain > 0
68
+ raise ArgumentError,
69
+ "invalid chain option value (#{chain})."
70
+ end
71
+ unless chain <= @chain
72
+ raise ArgumentError,
73
+ "chain option value (#{chain}) exceeds dict's chain (#{@chain})"
74
+ end
75
+
76
+ tokens = []
77
+ stack = [ 0 ] * chain
78
+
79
+ loop do
80
+ cands = lookup_candidates(stack)
81
+ cand = cands[rand(cands.length)]
82
+ break if cand < 0 # EOS
83
+
84
+ tokens << @dict[cand]
85
+
86
+ stack.shift
87
+ stack << cand
88
+ end
89
+
90
+ return tokens.join('')
91
+ end
92
+
93
+ private
94
+
95
+ def load_dict(dictionary)
96
+ if dictionary.respond_to?(:close)
97
+ load_dict_from_stream(dictionary)
98
+ else
99
+ load_dict_from_file(dictionary_file(dictionary))
100
+ end
101
+ end
102
+
103
+ def dictionary_file(filename)
104
+ pathname = nil
105
+ if filename
106
+ [ nil, DICTIONARY_DIR ].each do |basedir|
107
+ begin
108
+ pathname = File.realpath(filename, basedir)
109
+ break
110
+ rescue Errno::ENOENT
111
+ end
112
+ end
113
+ else
114
+ filename = DEFAULT_DICTIONARY
115
+ pathname = File.realpath(filename)
116
+ end
117
+
118
+ unless pathname
119
+ raise ArgumentError,
120
+ "dictionary file (#{filename}) not found"
121
+ end
122
+
123
+ return pathname
124
+ end
125
+
126
+ def load_dict_from_file(filename)
127
+ open(filename, 'r:utf-8:utf-8') { |handle|
128
+ load_dict_from_stream(handle)
129
+ }
130
+ end
131
+
132
+ def load_dict_from_stream(stream)
133
+ step = 0
134
+
135
+ stack = []
136
+
137
+ stream.each do |line|
138
+ line.chomp!
139
+
140
+ case step
141
+ when 0
142
+ # chain
143
+ @chain = line.to_i
144
+ step = 1
145
+ when 1
146
+ # first word dict entry is '' (empty)
147
+ @dict << line
148
+ step = 2
149
+ when 2
150
+ # word dictionary
151
+ if line == '' # separator
152
+ step = 3
153
+ else
154
+ @dict << line
155
+ end
156
+ else
157
+ # probability tree
158
+
159
+ # turn heading spaces into preceding stack
160
+ new_stack = []
161
+ tokens = line.split(%r{[ ]}xmo)
162
+ while tokens.length > 0
163
+ if tokens[0].empty?
164
+ tokens.shift # trim first (empty) token
165
+ new_stack << stack.shift
166
+ else
167
+ new_stack << tokens.join('')
168
+ tokens = []
169
+ end
170
+ end
171
+ stack = new_stack
172
+
173
+ insert_tree_node(stack)
174
+ end
175
+ end
176
+
177
+ @loaded = true
178
+ end
179
+
180
+ def insert_tree_node(stack)
181
+ stack = stack.dup
182
+
183
+ node = @tree
184
+ while stack.length > 0
185
+ token = stack.shift
186
+
187
+ if token =~ %r{=}xmo
188
+ child, cands = token.split(%r{=}, 2)
189
+
190
+ word_id = child.to_i
191
+
192
+ node[word_id] = cands.split(%r{, \s*}xmo).map { |token| token.to_i }
193
+
194
+ break
195
+ else
196
+ word_id = token.to_i
197
+ node[word_id] ||= {}
198
+ node = node[word_id]
199
+ end
200
+ end
201
+ end
202
+
203
+ def lookup_candidates(stack)
204
+ stack = stack.dup
205
+
206
+ node = @tree
207
+ while stack.length > 0
208
+ break if node.nil?
209
+ break if node.is_a?(Array)
210
+
211
+ word = stack.shift
212
+ node = node[word]
213
+ end
214
+
215
+ if node.is_a?(Hash)
216
+ return node.keys
217
+ elsif node.is_a?(Array)
218
+ return node
219
+ else
220
+ return [ -1 ] # EOS
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,40 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'lorem_jp'
3
+ require 'optparse'
4
+
5
+ class LoremJP
6
+
7
+ # Class for command line interface of Japanese Lorem Ipsum
8
+ #
9
+ class CLI
10
+ # Main routine for command line interface of +lorem_jp+
11
+ # @return [void]
12
+ def self.main
13
+ dict = nil
14
+ chain = nil
15
+
16
+ opt = OptionParser.new
17
+
18
+ opt.on('-f DICT', 'dictionary filename') {
19
+ |v| dict = v
20
+ }
21
+ opt.on('-c CHAIN', 'chain of precedences' +
22
+ ' (default: set in dictionary)') {
23
+ |v| chain = v.to_i
24
+ }
25
+
26
+ opt.parse! ARGV
27
+
28
+ options = {}
29
+ options[:dictionary] = dict if dict
30
+ options[:chain] = chain if chain
31
+
32
+ puts LoremJP.sentence(options)
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ if __FILE__ == $0
39
+ LoremJP::CLI.main
40
+ end
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = 'lorem_jp'
5
+ spec.version = '0.0.1'
6
+ spec.authors = ['ITO Nobuaki']
7
+ spec.email = ['daydream.trippers@gmail.com']
8
+ spec.description = %q{Japanese Lorem Ipsum generator}
9
+ spec.summary = %q{Japanese Lorem Ipsum generator}
10
+ spec.homepage = 'https://github.com/dayflower/lorem_jp/'
11
+ spec.license = 'MIT'
12
+
13
+ spec.files = [
14
+ 'lorem_jp.gemspec',
15
+ 'Gemfile',
16
+ 'Rakefile',
17
+ 'LICENSE.txt',
18
+ 'README.md',
19
+ 'lib/lorem_jp.rb',
20
+ 'lib/lorem_jp/cli.rb',
21
+ 'data/dict.txt',
22
+ 'bin/lorem_jp',
23
+ 'build/make_dict.rb',
24
+ 'build/fetcher.rb',
25
+ 'build/recipes/789.rb',
26
+ 'build/recipes/2363.rb',
27
+ 'build/recipes/52960.rb',
28
+ 'test/lorem_jp_spec.rb',
29
+ ]
30
+
31
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
32
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
33
+ spec.require_paths = ['lib']
34
+
35
+ spec.add_development_dependency 'bundler', '~> 1.3'
36
+ spec.add_development_dependency 'rake'
37
+ end
@@ -0,0 +1,60 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'minitest/spec'
3
+ require 'minitest/autorun'
4
+ require 'lorem_jp'
5
+ require 'stringio'
6
+
7
+ describe LoremJP do
8
+ it 'generate sentence with 1 chain' do
9
+ dict = StringIO.new(<<-'END_DICT')
10
+ 1
11
+
12
+ A
13
+ B
14
+ C
15
+ D
16
+
17
+ 0=1
18
+ 1=2
19
+ 2=3,4
20
+ 3=-1
21
+ 4=-1
22
+ END_DICT
23
+
24
+ lorem = LoremJP.new( :dictionary => dict, :lazy => false )
25
+
26
+ 5.times do
27
+ lorem.sentence.must_match %r{^ A B (C | D) $}xmo
28
+ end
29
+ end
30
+
31
+ it 'generate sentence with 2 chain' do
32
+ dict = StringIO.new(<<-'END_DICT')
33
+ 2
34
+
35
+ A
36
+ B
37
+ C
38
+ D
39
+
40
+ 0
41
+ 0=1
42
+ 1=2,4
43
+ 1
44
+ 2=3,4
45
+ 4=2,3
46
+ 2
47
+ 3=-1
48
+ 4=-1
49
+ 4
50
+ 2=-1
51
+ 3=-1
52
+ END_DICT
53
+
54
+ lorem = LoremJP.new( :dictionary => dict, :lazy => false )
55
+
56
+ 5.times do
57
+ lorem.sentence.must_match %r{^ A (B (C|D) | D (B|C) ) $}xmo
58
+ end
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lorem_jp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - ITO Nobuaki
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-06-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Japanese Lorem Ipsum generator
42
+ email:
43
+ - daydream.trippers@gmail.com
44
+ executables:
45
+ - lorem_jp
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - lorem_jp.gemspec
50
+ - Gemfile
51
+ - Rakefile
52
+ - LICENSE.txt
53
+ - README.md
54
+ - lib/lorem_jp.rb
55
+ - lib/lorem_jp/cli.rb
56
+ - data/dict.txt
57
+ - bin/lorem_jp
58
+ - build/make_dict.rb
59
+ - build/fetcher.rb
60
+ - build/recipes/789.rb
61
+ - build/recipes/2363.rb
62
+ - build/recipes/52960.rb
63
+ - test/lorem_jp_spec.rb
64
+ homepage: https://github.com/dayflower/lorem_jp/
65
+ licenses:
66
+ - MIT
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 2.0.2
85
+ signing_key:
86
+ specification_version: 4
87
+ summary: Japanese Lorem Ipsum generator
88
+ test_files:
89
+ - test/lorem_jp_spec.rb
90
+ has_rdoc: