lorem_jp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,223 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # Class for Japanese Lorem Ipsum generation
4
+ #
5
+ class LoremJP
6
+ # Version number
7
+ VERSION = '0.0.1'
8
+
9
+ # Default dictionary directory
10
+ DICTIONARY_DIR = File.absolute_path('../../data', __FILE__)
11
+ # Default dictionary file
12
+ DEFAULT_DICTIONARY = File.join(DICTIONARY_DIR, 'dict.txt')
13
+
14
+ class << self
15
+ # Singleton interface of {#sentence}
16
+ # @param [Hash] options
17
+ # @option options [Integer] :chain number of words considered as
18
+ # past state in Marcov chain
19
+ # @option options [String] :dictionary file name of dictionary
20
+ # @return [String] Japanese meaningless sentence
21
+ def sentence(options = {})
22
+ return singleton_for_dict(options[:dictionary]).sentence(options)
23
+ end
24
+
25
+ private
26
+
27
+ def singleton_for_dict(dictionary)
28
+ @singleton ||= {}
29
+
30
+ @singleton[dictionary] ||= self.new(:dictionary => dictionary,
31
+ :lazy => true)
32
+
33
+ return @singleton[dictionary]
34
+ end
35
+ end
36
+
37
+ # @param [Hash] options
38
+ # @option options [String] :dictionary file name of dictionary
39
+ # @option options [Boolean] :lazy load dictionary file
40
+ # on first generation if true
41
+ def initialize(options = {})
42
+ @dictionary = options[:dictionary]
43
+ @chain = options[:chain] || 1
44
+
45
+ lazy = options[:lazy] || false
46
+
47
+ @dict = []
48
+ @tree = {}
49
+ @loaded = false
50
+
51
+ if ! lazy
52
+ load_dict(@dictionary)
53
+ end
54
+ end
55
+
56
+ # @param [Hash] options
57
+ # @option options [Integer] :chain number of words considered as past state
58
+ # in Marcov chain
59
+ # @return [String] Japanese meaningless sentence
60
+ def sentence(options = {})
61
+ unless @loaded
62
+ load_dict(@dictionary)
63
+ end
64
+
65
+ chain = options[:chain] || @chain
66
+
67
+ unless chain > 0
68
+ raise ArgumentError,
69
+ "invalid chain option value (#{chain})."
70
+ end
71
+ unless chain <= @chain
72
+ raise ArgumentError,
73
+ "chain option value (#{chain}) exceeds dict's chain (#{@chain})"
74
+ end
75
+
76
+ tokens = []
77
+ stack = [ 0 ] * chain
78
+
79
+ loop do
80
+ cands = lookup_candidates(stack)
81
+ cand = cands[rand(cands.length)]
82
+ break if cand < 0 # EOS
83
+
84
+ tokens << @dict[cand]
85
+
86
+ stack.shift
87
+ stack << cand
88
+ end
89
+
90
+ return tokens.join('')
91
+ end
92
+
93
+ private
94
+
95
+ def load_dict(dictionary)
96
+ if dictionary.respond_to?(:close)
97
+ load_dict_from_stream(dictionary)
98
+ else
99
+ load_dict_from_file(dictionary_file(dictionary))
100
+ end
101
+ end
102
+
103
+ def dictionary_file(filename)
104
+ pathname = nil
105
+ if filename
106
+ [ nil, DICTIONARY_DIR ].each do |basedir|
107
+ begin
108
+ pathname = File.realpath(filename, basedir)
109
+ break
110
+ rescue Errno::ENOENT
111
+ end
112
+ end
113
+ else
114
+ filename = DEFAULT_DICTIONARY
115
+ pathname = File.realpath(filename)
116
+ end
117
+
118
+ unless pathname
119
+ raise ArgumentError,
120
+ "dictionary file (#{filename}) not found"
121
+ end
122
+
123
+ return pathname
124
+ end
125
+
126
+ def load_dict_from_file(filename)
127
+ open(filename, 'r:utf-8:utf-8') { |handle|
128
+ load_dict_from_stream(handle)
129
+ }
130
+ end
131
+
132
+ def load_dict_from_stream(stream)
133
+ step = 0
134
+
135
+ stack = []
136
+
137
+ stream.each do |line|
138
+ line.chomp!
139
+
140
+ case step
141
+ when 0
142
+ # chain
143
+ @chain = line.to_i
144
+ step = 1
145
+ when 1
146
+ # first word dict entry is '' (empty)
147
+ @dict << line
148
+ step = 2
149
+ when 2
150
+ # word dictionary
151
+ if line == '' # separator
152
+ step = 3
153
+ else
154
+ @dict << line
155
+ end
156
+ else
157
+ # probability tree
158
+
159
+ # turn heading spaces into preceding stack
160
+ new_stack = []
161
+ tokens = line.split(%r{[ ]}xmo)
162
+ while tokens.length > 0
163
+ if tokens[0].empty?
164
+ tokens.shift # trim first (empty) token
165
+ new_stack << stack.shift
166
+ else
167
+ new_stack << tokens.join('')
168
+ tokens = []
169
+ end
170
+ end
171
+ stack = new_stack
172
+
173
+ insert_tree_node(stack)
174
+ end
175
+ end
176
+
177
+ @loaded = true
178
+ end
179
+
180
+ def insert_tree_node(stack)
181
+ stack = stack.dup
182
+
183
+ node = @tree
184
+ while stack.length > 0
185
+ token = stack.shift
186
+
187
+ if token =~ %r{=}xmo
188
+ child, cands = token.split(%r{=}, 2)
189
+
190
+ word_id = child.to_i
191
+
192
+ node[word_id] = cands.split(%r{, \s*}xmo).map { |token| token.to_i }
193
+
194
+ break
195
+ else
196
+ word_id = token.to_i
197
+ node[word_id] ||= {}
198
+ node = node[word_id]
199
+ end
200
+ end
201
+ end
202
+
203
+ def lookup_candidates(stack)
204
+ stack = stack.dup
205
+
206
+ node = @tree
207
+ while stack.length > 0
208
+ break if node.nil?
209
+ break if node.is_a?(Array)
210
+
211
+ word = stack.shift
212
+ node = node[word]
213
+ end
214
+
215
+ if node.is_a?(Hash)
216
+ return node.keys
217
+ elsif node.is_a?(Array)
218
+ return node
219
+ else
220
+ return [ -1 ] # EOS
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,40 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'lorem_jp'
3
+ require 'optparse'
4
+
5
+ class LoremJP
6
+
7
+ # Class for command line interface of Japanese Lorem Ipsum
8
+ #
9
+ class CLI
10
+ # Main routine for command line interface of +lorem_jp+
11
+ # @return [void]
12
+ def self.main
13
+ dict = nil
14
+ chain = nil
15
+
16
+ opt = OptionParser.new
17
+
18
+ opt.on('-f DICT', 'dictionary filename') {
19
+ |v| dict = v
20
+ }
21
+ opt.on('-c CHAIN', 'chain of precedences' +
22
+ ' (default: set in dictionary)') {
23
+ |v| chain = v.to_i
24
+ }
25
+
26
+ opt.parse! ARGV
27
+
28
+ options = {}
29
+ options[:dictionary] = dict if dict
30
+ options[:chain] = chain if chain
31
+
32
+ puts LoremJP.sentence(options)
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ if __FILE__ == $0
39
+ LoremJP::CLI.main
40
+ end
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = 'lorem_jp'
5
+ spec.version = '0.0.1'
6
+ spec.authors = ['ITO Nobuaki']
7
+ spec.email = ['daydream.trippers@gmail.com']
8
+ spec.description = %q{Japanese Lorem Ipsum generator}
9
+ spec.summary = %q{Japanese Lorem Ipsum generator}
10
+ spec.homepage = 'https://github.com/dayflower/lorem_jp/'
11
+ spec.license = 'MIT'
12
+
13
+ spec.files = [
14
+ 'lorem_jp.gemspec',
15
+ 'Gemfile',
16
+ 'Rakefile',
17
+ 'LICENSE.txt',
18
+ 'README.md',
19
+ 'lib/lorem_jp.rb',
20
+ 'lib/lorem_jp/cli.rb',
21
+ 'data/dict.txt',
22
+ 'bin/lorem_jp',
23
+ 'build/make_dict.rb',
24
+ 'build/fetcher.rb',
25
+ 'build/recipes/789.rb',
26
+ 'build/recipes/2363.rb',
27
+ 'build/recipes/52960.rb',
28
+ 'test/lorem_jp_spec.rb',
29
+ ]
30
+
31
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
32
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
33
+ spec.require_paths = ['lib']
34
+
35
+ spec.add_development_dependency 'bundler', '~> 1.3'
36
+ spec.add_development_dependency 'rake'
37
+ end
@@ -0,0 +1,60 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'minitest/spec'
3
+ require 'minitest/autorun'
4
+ require 'lorem_jp'
5
+ require 'stringio'
6
+
7
+ describe LoremJP do
8
+ it 'generate sentence with 1 chain' do
9
+ dict = StringIO.new(<<-'END_DICT')
10
+ 1
11
+
12
+ A
13
+ B
14
+ C
15
+ D
16
+
17
+ 0=1
18
+ 1=2
19
+ 2=3,4
20
+ 3=-1
21
+ 4=-1
22
+ END_DICT
23
+
24
+ lorem = LoremJP.new( :dictionary => dict, :lazy => false )
25
+
26
+ 5.times do
27
+ lorem.sentence.must_match %r{^ A B (C | D) $}xmo
28
+ end
29
+ end
30
+
31
+ it 'generate sentence with 2 chain' do
32
+ dict = StringIO.new(<<-'END_DICT')
33
+ 2
34
+
35
+ A
36
+ B
37
+ C
38
+ D
39
+
40
+ 0
41
+ 0=1
42
+ 1=2,4
43
+ 1
44
+ 2=3,4
45
+ 4=2,3
46
+ 2
47
+ 3=-1
48
+ 4=-1
49
+ 4
50
+ 2=-1
51
+ 3=-1
52
+ END_DICT
53
+
54
+ lorem = LoremJP.new( :dictionary => dict, :lazy => false )
55
+
56
+ 5.times do
57
+ lorem.sentence.must_match %r{^ A (B (C|D) | D (B|C) ) $}xmo
58
+ end
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lorem_jp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - ITO Nobuaki
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-06-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Japanese Lorem Ipsum generator
42
+ email:
43
+ - daydream.trippers@gmail.com
44
+ executables:
45
+ - lorem_jp
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - lorem_jp.gemspec
50
+ - Gemfile
51
+ - Rakefile
52
+ - LICENSE.txt
53
+ - README.md
54
+ - lib/lorem_jp.rb
55
+ - lib/lorem_jp/cli.rb
56
+ - data/dict.txt
57
+ - bin/lorem_jp
58
+ - build/make_dict.rb
59
+ - build/fetcher.rb
60
+ - build/recipes/789.rb
61
+ - build/recipes/2363.rb
62
+ - build/recipes/52960.rb
63
+ - test/lorem_jp_spec.rb
64
+ homepage: https://github.com/dayflower/lorem_jp/
65
+ licenses:
66
+ - MIT
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 2.0.2
85
+ signing_key:
86
+ specification_version: 4
87
+ summary: Japanese Lorem Ipsum generator
88
+ test_files:
89
+ - test/lorem_jp_spec.rb
90
+ has_rdoc: