lorem_jp 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +57 -0
- data/Rakefile +7 -0
- data/bin/lorem_jp +6 -0
- data/build/fetcher.rb +248 -0
- data/build/make_dict.rb +183 -0
- data/build/recipes/2363.rb +30 -0
- data/build/recipes/52960.rb +28 -0
- data/build/recipes/789.rb +63 -0
- data/data/dict.txt +4449 -0
- data/lib/lorem_jp.rb +223 -0
- data/lib/lorem_jp/cli.rb +40 -0
- data/lorem_jp.gemspec +37 -0
- data/test/lorem_jp_spec.rb +60 -0
- metadata +90 -0
data/lib/lorem_jp.rb
ADDED
@@ -0,0 +1,223 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
# Class for Japanese Lorem Ipsum generation
|
4
|
+
#
|
5
|
+
class LoremJP
|
6
|
+
# Version number
|
7
|
+
VERSION = '0.0.1'
|
8
|
+
|
9
|
+
# Default dictionary directory
|
10
|
+
DICTIONARY_DIR = File.absolute_path('../../data', __FILE__)
|
11
|
+
# Default dictionary file
|
12
|
+
DEFAULT_DICTIONARY = File.join(DICTIONARY_DIR, 'dict.txt')
|
13
|
+
|
14
|
+
class << self
|
15
|
+
# Singleton interface of {#sentence}
|
16
|
+
# @param [Hash] options
|
17
|
+
# @option options [Integer] :chain number of words considered as
|
18
|
+
# past state in Marcov chain
|
19
|
+
# @option options [String] :dictionary file name of dictionary
|
20
|
+
# @return [String] Japanese meaningless sentence
|
21
|
+
def sentence(options = {})
|
22
|
+
return singleton_for_dict(options[:dictionary]).sentence(options)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def singleton_for_dict(dictionary)
|
28
|
+
@singleton ||= {}
|
29
|
+
|
30
|
+
@singleton[dictionary] ||= self.new(:dictionary => dictionary,
|
31
|
+
:lazy => true)
|
32
|
+
|
33
|
+
return @singleton[dictionary]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# @param [Hash] options
|
38
|
+
# @option options [String] :dictionary file name of dictionary
|
39
|
+
# @option options [Boolean] :lazy load dictionary file
|
40
|
+
# on first generation if true
|
41
|
+
def initialize(options = {})
|
42
|
+
@dictionary = options[:dictionary]
|
43
|
+
@chain = options[:chain] || 1
|
44
|
+
|
45
|
+
lazy = options[:lazy] || false
|
46
|
+
|
47
|
+
@dict = []
|
48
|
+
@tree = {}
|
49
|
+
@loaded = false
|
50
|
+
|
51
|
+
if ! lazy
|
52
|
+
load_dict(@dictionary)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# @param [Hash] options
|
57
|
+
# @option options [Integer] :chain number of words considered as past state
|
58
|
+
# in Marcov chain
|
59
|
+
# @return [String] Japanese meaningless sentence
|
60
|
+
def sentence(options = {})
|
61
|
+
unless @loaded
|
62
|
+
load_dict(@dictionary)
|
63
|
+
end
|
64
|
+
|
65
|
+
chain = options[:chain] || @chain
|
66
|
+
|
67
|
+
unless chain > 0
|
68
|
+
raise ArgumentError,
|
69
|
+
"invalid chain option value (#{chain})."
|
70
|
+
end
|
71
|
+
unless chain <= @chain
|
72
|
+
raise ArgumentError,
|
73
|
+
"chain option value (#{chain}) exceeds dict's chain (#{@chain})"
|
74
|
+
end
|
75
|
+
|
76
|
+
tokens = []
|
77
|
+
stack = [ 0 ] * chain
|
78
|
+
|
79
|
+
loop do
|
80
|
+
cands = lookup_candidates(stack)
|
81
|
+
cand = cands[rand(cands.length)]
|
82
|
+
break if cand < 0 # EOS
|
83
|
+
|
84
|
+
tokens << @dict[cand]
|
85
|
+
|
86
|
+
stack.shift
|
87
|
+
stack << cand
|
88
|
+
end
|
89
|
+
|
90
|
+
return tokens.join('')
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def load_dict(dictionary)
|
96
|
+
if dictionary.respond_to?(:close)
|
97
|
+
load_dict_from_stream(dictionary)
|
98
|
+
else
|
99
|
+
load_dict_from_file(dictionary_file(dictionary))
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def dictionary_file(filename)
|
104
|
+
pathname = nil
|
105
|
+
if filename
|
106
|
+
[ nil, DICTIONARY_DIR ].each do |basedir|
|
107
|
+
begin
|
108
|
+
pathname = File.realpath(filename, basedir)
|
109
|
+
break
|
110
|
+
rescue Errno::ENOENT
|
111
|
+
end
|
112
|
+
end
|
113
|
+
else
|
114
|
+
filename = DEFAULT_DICTIONARY
|
115
|
+
pathname = File.realpath(filename)
|
116
|
+
end
|
117
|
+
|
118
|
+
unless pathname
|
119
|
+
raise ArgumentError,
|
120
|
+
"dictionary file (#{filename}) not found"
|
121
|
+
end
|
122
|
+
|
123
|
+
return pathname
|
124
|
+
end
|
125
|
+
|
126
|
+
def load_dict_from_file(filename)
|
127
|
+
open(filename, 'r:utf-8:utf-8') { |handle|
|
128
|
+
load_dict_from_stream(handle)
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
def load_dict_from_stream(stream)
|
133
|
+
step = 0
|
134
|
+
|
135
|
+
stack = []
|
136
|
+
|
137
|
+
stream.each do |line|
|
138
|
+
line.chomp!
|
139
|
+
|
140
|
+
case step
|
141
|
+
when 0
|
142
|
+
# chain
|
143
|
+
@chain = line.to_i
|
144
|
+
step = 1
|
145
|
+
when 1
|
146
|
+
# first word dict entry is '' (empty)
|
147
|
+
@dict << line
|
148
|
+
step = 2
|
149
|
+
when 2
|
150
|
+
# word dictionary
|
151
|
+
if line == '' # separator
|
152
|
+
step = 3
|
153
|
+
else
|
154
|
+
@dict << line
|
155
|
+
end
|
156
|
+
else
|
157
|
+
# probability tree
|
158
|
+
|
159
|
+
# turn heading spaces into preceding stack
|
160
|
+
new_stack = []
|
161
|
+
tokens = line.split(%r{[ ]}xmo)
|
162
|
+
while tokens.length > 0
|
163
|
+
if tokens[0].empty?
|
164
|
+
tokens.shift # trim first (empty) token
|
165
|
+
new_stack << stack.shift
|
166
|
+
else
|
167
|
+
new_stack << tokens.join('')
|
168
|
+
tokens = []
|
169
|
+
end
|
170
|
+
end
|
171
|
+
stack = new_stack
|
172
|
+
|
173
|
+
insert_tree_node(stack)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
@loaded = true
|
178
|
+
end
|
179
|
+
|
180
|
+
def insert_tree_node(stack)
|
181
|
+
stack = stack.dup
|
182
|
+
|
183
|
+
node = @tree
|
184
|
+
while stack.length > 0
|
185
|
+
token = stack.shift
|
186
|
+
|
187
|
+
if token =~ %r{=}xmo
|
188
|
+
child, cands = token.split(%r{=}, 2)
|
189
|
+
|
190
|
+
word_id = child.to_i
|
191
|
+
|
192
|
+
node[word_id] = cands.split(%r{, \s*}xmo).map { |token| token.to_i }
|
193
|
+
|
194
|
+
break
|
195
|
+
else
|
196
|
+
word_id = token.to_i
|
197
|
+
node[word_id] ||= {}
|
198
|
+
node = node[word_id]
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def lookup_candidates(stack)
|
204
|
+
stack = stack.dup
|
205
|
+
|
206
|
+
node = @tree
|
207
|
+
while stack.length > 0
|
208
|
+
break if node.nil?
|
209
|
+
break if node.is_a?(Array)
|
210
|
+
|
211
|
+
word = stack.shift
|
212
|
+
node = node[word]
|
213
|
+
end
|
214
|
+
|
215
|
+
if node.is_a?(Hash)
|
216
|
+
return node.keys
|
217
|
+
elsif node.is_a?(Array)
|
218
|
+
return node
|
219
|
+
else
|
220
|
+
return [ -1 ] # EOS
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
data/lib/lorem_jp/cli.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'lorem_jp'
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
class LoremJP
|
6
|
+
|
7
|
+
# Class for command line interface of Japanese Lorem Ipsum
|
8
|
+
#
|
9
|
+
class CLI
|
10
|
+
# Main routine for command line interface of +lorem_jp+
|
11
|
+
# @return [void]
|
12
|
+
def self.main
|
13
|
+
dict = nil
|
14
|
+
chain = nil
|
15
|
+
|
16
|
+
opt = OptionParser.new
|
17
|
+
|
18
|
+
opt.on('-f DICT', 'dictionary filename') {
|
19
|
+
|v| dict = v
|
20
|
+
}
|
21
|
+
opt.on('-c CHAIN', 'chain of precedences' +
|
22
|
+
' (default: set in dictionary)') {
|
23
|
+
|v| chain = v.to_i
|
24
|
+
}
|
25
|
+
|
26
|
+
opt.parse! ARGV
|
27
|
+
|
28
|
+
options = {}
|
29
|
+
options[:dictionary] = dict if dict
|
30
|
+
options[:chain] = chain if chain
|
31
|
+
|
32
|
+
puts LoremJP.sentence(options)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
if __FILE__ == $0
|
39
|
+
LoremJP::CLI.main
|
40
|
+
end
|
data/lorem_jp.gemspec
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = 'lorem_jp'
|
5
|
+
spec.version = '0.0.1'
|
6
|
+
spec.authors = ['ITO Nobuaki']
|
7
|
+
spec.email = ['daydream.trippers@gmail.com']
|
8
|
+
spec.description = %q{Japanese Lorem Ipsum generator}
|
9
|
+
spec.summary = %q{Japanese Lorem Ipsum generator}
|
10
|
+
spec.homepage = 'https://github.com/dayflower/lorem_jp/'
|
11
|
+
spec.license = 'MIT'
|
12
|
+
|
13
|
+
spec.files = [
|
14
|
+
'lorem_jp.gemspec',
|
15
|
+
'Gemfile',
|
16
|
+
'Rakefile',
|
17
|
+
'LICENSE.txt',
|
18
|
+
'README.md',
|
19
|
+
'lib/lorem_jp.rb',
|
20
|
+
'lib/lorem_jp/cli.rb',
|
21
|
+
'data/dict.txt',
|
22
|
+
'bin/lorem_jp',
|
23
|
+
'build/make_dict.rb',
|
24
|
+
'build/fetcher.rb',
|
25
|
+
'build/recipes/789.rb',
|
26
|
+
'build/recipes/2363.rb',
|
27
|
+
'build/recipes/52960.rb',
|
28
|
+
'test/lorem_jp_spec.rb',
|
29
|
+
]
|
30
|
+
|
31
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
32
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
33
|
+
spec.require_paths = ['lib']
|
34
|
+
|
35
|
+
spec.add_development_dependency 'bundler', '~> 1.3'
|
36
|
+
spec.add_development_dependency 'rake'
|
37
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'minitest/spec'
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'lorem_jp'
|
5
|
+
require 'stringio'
|
6
|
+
|
7
|
+
describe LoremJP do
|
8
|
+
it 'generate sentence with 1 chain' do
|
9
|
+
dict = StringIO.new(<<-'END_DICT')
|
10
|
+
1
|
11
|
+
|
12
|
+
A
|
13
|
+
B
|
14
|
+
C
|
15
|
+
D
|
16
|
+
|
17
|
+
0=1
|
18
|
+
1=2
|
19
|
+
2=3,4
|
20
|
+
3=-1
|
21
|
+
4=-1
|
22
|
+
END_DICT
|
23
|
+
|
24
|
+
lorem = LoremJP.new( :dictionary => dict, :lazy => false )
|
25
|
+
|
26
|
+
5.times do
|
27
|
+
lorem.sentence.must_match %r{^ A B (C | D) $}xmo
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'generate sentence with 2 chain' do
|
32
|
+
dict = StringIO.new(<<-'END_DICT')
|
33
|
+
2
|
34
|
+
|
35
|
+
A
|
36
|
+
B
|
37
|
+
C
|
38
|
+
D
|
39
|
+
|
40
|
+
0
|
41
|
+
0=1
|
42
|
+
1=2,4
|
43
|
+
1
|
44
|
+
2=3,4
|
45
|
+
4=2,3
|
46
|
+
2
|
47
|
+
3=-1
|
48
|
+
4=-1
|
49
|
+
4
|
50
|
+
2=-1
|
51
|
+
3=-1
|
52
|
+
END_DICT
|
53
|
+
|
54
|
+
lorem = LoremJP.new( :dictionary => dict, :lazy => false )
|
55
|
+
|
56
|
+
5.times do
|
57
|
+
lorem.sentence.must_match %r{^ A (B (C|D) | D (B|C) ) $}xmo
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: lorem_jp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ITO Nobuaki
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-06-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Japanese Lorem Ipsum generator
|
42
|
+
email:
|
43
|
+
- daydream.trippers@gmail.com
|
44
|
+
executables:
|
45
|
+
- lorem_jp
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lorem_jp.gemspec
|
50
|
+
- Gemfile
|
51
|
+
- Rakefile
|
52
|
+
- LICENSE.txt
|
53
|
+
- README.md
|
54
|
+
- lib/lorem_jp.rb
|
55
|
+
- lib/lorem_jp/cli.rb
|
56
|
+
- data/dict.txt
|
57
|
+
- bin/lorem_jp
|
58
|
+
- build/make_dict.rb
|
59
|
+
- build/fetcher.rb
|
60
|
+
- build/recipes/789.rb
|
61
|
+
- build/recipes/2363.rb
|
62
|
+
- build/recipes/52960.rb
|
63
|
+
- test/lorem_jp_spec.rb
|
64
|
+
homepage: https://github.com/dayflower/lorem_jp/
|
65
|
+
licenses:
|
66
|
+
- MIT
|
67
|
+
metadata: {}
|
68
|
+
post_install_message:
|
69
|
+
rdoc_options: []
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - '>='
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
requirements: []
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 2.0.2
|
85
|
+
signing_key:
|
86
|
+
specification_version: 4
|
87
|
+
summary: Japanese Lorem Ipsum generator
|
88
|
+
test_files:
|
89
|
+
- test/lorem_jp_spec.rb
|
90
|
+
has_rdoc:
|