lorem_jp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +57 -0
- data/Rakefile +7 -0
- data/bin/lorem_jp +6 -0
- data/build/fetcher.rb +248 -0
- data/build/make_dict.rb +183 -0
- data/build/recipes/2363.rb +30 -0
- data/build/recipes/52960.rb +28 -0
- data/build/recipes/789.rb +63 -0
- data/data/dict.txt +4449 -0
- data/lib/lorem_jp.rb +223 -0
- data/lib/lorem_jp/cli.rb +40 -0
- data/lorem_jp.gemspec +37 -0
- data/test/lorem_jp_spec.rb +60 -0
- metadata +90 -0
data/lib/lorem_jp.rb
ADDED
@@ -0,0 +1,223 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
# Class for Japanese Lorem Ipsum generation
|
4
|
+
#
|
5
|
+
class LoremJP
|
6
|
+
# Version number
|
7
|
+
VERSION = '0.0.1'
|
8
|
+
|
9
|
+
# Default dictionary directory
|
10
|
+
DICTIONARY_DIR = File.absolute_path('../../data', __FILE__)
|
11
|
+
# Default dictionary file
|
12
|
+
DEFAULT_DICTIONARY = File.join(DICTIONARY_DIR, 'dict.txt')
|
13
|
+
|
14
|
+
class << self
|
15
|
+
# Singleton interface of {#sentence}
|
16
|
+
# @param [Hash] options
|
17
|
+
# @option options [Integer] :chain number of words considered as
|
18
|
+
# past state in Marcov chain
|
19
|
+
# @option options [String] :dictionary file name of dictionary
|
20
|
+
# @return [String] Japanese meaningless sentence
|
21
|
+
def sentence(options = {})
|
22
|
+
return singleton_for_dict(options[:dictionary]).sentence(options)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def singleton_for_dict(dictionary)
|
28
|
+
@singleton ||= {}
|
29
|
+
|
30
|
+
@singleton[dictionary] ||= self.new(:dictionary => dictionary,
|
31
|
+
:lazy => true)
|
32
|
+
|
33
|
+
return @singleton[dictionary]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# @param [Hash] options
|
38
|
+
# @option options [String] :dictionary file name of dictionary
|
39
|
+
# @option options [Boolean] :lazy load dictionary file
|
40
|
+
# on first generation if true
|
41
|
+
def initialize(options = {})
|
42
|
+
@dictionary = options[:dictionary]
|
43
|
+
@chain = options[:chain] || 1
|
44
|
+
|
45
|
+
lazy = options[:lazy] || false
|
46
|
+
|
47
|
+
@dict = []
|
48
|
+
@tree = {}
|
49
|
+
@loaded = false
|
50
|
+
|
51
|
+
if ! lazy
|
52
|
+
load_dict(@dictionary)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# @param [Hash] options
|
57
|
+
# @option options [Integer] :chain number of words considered as past state
|
58
|
+
# in Marcov chain
|
59
|
+
# @return [String] Japanese meaningless sentence
|
60
|
+
def sentence(options = {})
|
61
|
+
unless @loaded
|
62
|
+
load_dict(@dictionary)
|
63
|
+
end
|
64
|
+
|
65
|
+
chain = options[:chain] || @chain
|
66
|
+
|
67
|
+
unless chain > 0
|
68
|
+
raise ArgumentError,
|
69
|
+
"invalid chain option value (#{chain})."
|
70
|
+
end
|
71
|
+
unless chain <= @chain
|
72
|
+
raise ArgumentError,
|
73
|
+
"chain option value (#{chain}) exceeds dict's chain (#{@chain})"
|
74
|
+
end
|
75
|
+
|
76
|
+
tokens = []
|
77
|
+
stack = [ 0 ] * chain
|
78
|
+
|
79
|
+
loop do
|
80
|
+
cands = lookup_candidates(stack)
|
81
|
+
cand = cands[rand(cands.length)]
|
82
|
+
break if cand < 0 # EOS
|
83
|
+
|
84
|
+
tokens << @dict[cand]
|
85
|
+
|
86
|
+
stack.shift
|
87
|
+
stack << cand
|
88
|
+
end
|
89
|
+
|
90
|
+
return tokens.join('')
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def load_dict(dictionary)
|
96
|
+
if dictionary.respond_to?(:close)
|
97
|
+
load_dict_from_stream(dictionary)
|
98
|
+
else
|
99
|
+
load_dict_from_file(dictionary_file(dictionary))
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def dictionary_file(filename)
|
104
|
+
pathname = nil
|
105
|
+
if filename
|
106
|
+
[ nil, DICTIONARY_DIR ].each do |basedir|
|
107
|
+
begin
|
108
|
+
pathname = File.realpath(filename, basedir)
|
109
|
+
break
|
110
|
+
rescue Errno::ENOENT
|
111
|
+
end
|
112
|
+
end
|
113
|
+
else
|
114
|
+
filename = DEFAULT_DICTIONARY
|
115
|
+
pathname = File.realpath(filename)
|
116
|
+
end
|
117
|
+
|
118
|
+
unless pathname
|
119
|
+
raise ArgumentError,
|
120
|
+
"dictionary file (#{filename}) not found"
|
121
|
+
end
|
122
|
+
|
123
|
+
return pathname
|
124
|
+
end
|
125
|
+
|
126
|
+
def load_dict_from_file(filename)
|
127
|
+
open(filename, 'r:utf-8:utf-8') { |handle|
|
128
|
+
load_dict_from_stream(handle)
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
def load_dict_from_stream(stream)
|
133
|
+
step = 0
|
134
|
+
|
135
|
+
stack = []
|
136
|
+
|
137
|
+
stream.each do |line|
|
138
|
+
line.chomp!
|
139
|
+
|
140
|
+
case step
|
141
|
+
when 0
|
142
|
+
# chain
|
143
|
+
@chain = line.to_i
|
144
|
+
step = 1
|
145
|
+
when 1
|
146
|
+
# first word dict entry is '' (empty)
|
147
|
+
@dict << line
|
148
|
+
step = 2
|
149
|
+
when 2
|
150
|
+
# word dictionary
|
151
|
+
if line == '' # separator
|
152
|
+
step = 3
|
153
|
+
else
|
154
|
+
@dict << line
|
155
|
+
end
|
156
|
+
else
|
157
|
+
# probability tree
|
158
|
+
|
159
|
+
# turn heading spaces into preceding stack
|
160
|
+
new_stack = []
|
161
|
+
tokens = line.split(%r{[ ]}xmo)
|
162
|
+
while tokens.length > 0
|
163
|
+
if tokens[0].empty?
|
164
|
+
tokens.shift # trim first (empty) token
|
165
|
+
new_stack << stack.shift
|
166
|
+
else
|
167
|
+
new_stack << tokens.join('')
|
168
|
+
tokens = []
|
169
|
+
end
|
170
|
+
end
|
171
|
+
stack = new_stack
|
172
|
+
|
173
|
+
insert_tree_node(stack)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
@loaded = true
|
178
|
+
end
|
179
|
+
|
180
|
+
def insert_tree_node(stack)
|
181
|
+
stack = stack.dup
|
182
|
+
|
183
|
+
node = @tree
|
184
|
+
while stack.length > 0
|
185
|
+
token = stack.shift
|
186
|
+
|
187
|
+
if token =~ %r{=}xmo
|
188
|
+
child, cands = token.split(%r{=}, 2)
|
189
|
+
|
190
|
+
word_id = child.to_i
|
191
|
+
|
192
|
+
node[word_id] = cands.split(%r{, \s*}xmo).map { |token| token.to_i }
|
193
|
+
|
194
|
+
break
|
195
|
+
else
|
196
|
+
word_id = token.to_i
|
197
|
+
node[word_id] ||= {}
|
198
|
+
node = node[word_id]
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def lookup_candidates(stack)
|
204
|
+
stack = stack.dup
|
205
|
+
|
206
|
+
node = @tree
|
207
|
+
while stack.length > 0
|
208
|
+
break if node.nil?
|
209
|
+
break if node.is_a?(Array)
|
210
|
+
|
211
|
+
word = stack.shift
|
212
|
+
node = node[word]
|
213
|
+
end
|
214
|
+
|
215
|
+
if node.is_a?(Hash)
|
216
|
+
return node.keys
|
217
|
+
elsif node.is_a?(Array)
|
218
|
+
return node
|
219
|
+
else
|
220
|
+
return [ -1 ] # EOS
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
data/lib/lorem_jp/cli.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'lorem_jp'
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
class LoremJP
|
6
|
+
|
7
|
+
# Class for command line interface of Japanese Lorem Ipsum
|
8
|
+
#
|
9
|
+
class CLI
|
10
|
+
# Main routine for command line interface of +lorem_jp+
|
11
|
+
# @return [void]
|
12
|
+
def self.main
|
13
|
+
dict = nil
|
14
|
+
chain = nil
|
15
|
+
|
16
|
+
opt = OptionParser.new
|
17
|
+
|
18
|
+
opt.on('-f DICT', 'dictionary filename') {
|
19
|
+
|v| dict = v
|
20
|
+
}
|
21
|
+
opt.on('-c CHAIN', 'chain of precedences' +
|
22
|
+
' (default: set in dictionary)') {
|
23
|
+
|v| chain = v.to_i
|
24
|
+
}
|
25
|
+
|
26
|
+
opt.parse! ARGV
|
27
|
+
|
28
|
+
options = {}
|
29
|
+
options[:dictionary] = dict if dict
|
30
|
+
options[:chain] = chain if chain
|
31
|
+
|
32
|
+
puts LoremJP.sentence(options)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
if __FILE__ == $0
|
39
|
+
LoremJP::CLI.main
|
40
|
+
end
|
data/lorem_jp.gemspec
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = 'lorem_jp'
|
5
|
+
spec.version = '0.0.1'
|
6
|
+
spec.authors = ['ITO Nobuaki']
|
7
|
+
spec.email = ['daydream.trippers@gmail.com']
|
8
|
+
spec.description = %q{Japanese Lorem Ipsum generator}
|
9
|
+
spec.summary = %q{Japanese Lorem Ipsum generator}
|
10
|
+
spec.homepage = 'https://github.com/dayflower/lorem_jp/'
|
11
|
+
spec.license = 'MIT'
|
12
|
+
|
13
|
+
spec.files = [
|
14
|
+
'lorem_jp.gemspec',
|
15
|
+
'Gemfile',
|
16
|
+
'Rakefile',
|
17
|
+
'LICENSE.txt',
|
18
|
+
'README.md',
|
19
|
+
'lib/lorem_jp.rb',
|
20
|
+
'lib/lorem_jp/cli.rb',
|
21
|
+
'data/dict.txt',
|
22
|
+
'bin/lorem_jp',
|
23
|
+
'build/make_dict.rb',
|
24
|
+
'build/fetcher.rb',
|
25
|
+
'build/recipes/789.rb',
|
26
|
+
'build/recipes/2363.rb',
|
27
|
+
'build/recipes/52960.rb',
|
28
|
+
'test/lorem_jp_spec.rb',
|
29
|
+
]
|
30
|
+
|
31
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
32
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
33
|
+
spec.require_paths = ['lib']
|
34
|
+
|
35
|
+
spec.add_development_dependency 'bundler', '~> 1.3'
|
36
|
+
spec.add_development_dependency 'rake'
|
37
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'minitest/spec'
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'lorem_jp'
|
5
|
+
require 'stringio'
|
6
|
+
|
7
|
+
describe LoremJP do
|
8
|
+
it 'generate sentence with 1 chain' do
|
9
|
+
dict = StringIO.new(<<-'END_DICT')
|
10
|
+
1
|
11
|
+
|
12
|
+
A
|
13
|
+
B
|
14
|
+
C
|
15
|
+
D
|
16
|
+
|
17
|
+
0=1
|
18
|
+
1=2
|
19
|
+
2=3,4
|
20
|
+
3=-1
|
21
|
+
4=-1
|
22
|
+
END_DICT
|
23
|
+
|
24
|
+
lorem = LoremJP.new( :dictionary => dict, :lazy => false )
|
25
|
+
|
26
|
+
5.times do
|
27
|
+
lorem.sentence.must_match %r{^ A B (C | D) $}xmo
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'generate sentence with 2 chain' do
|
32
|
+
dict = StringIO.new(<<-'END_DICT')
|
33
|
+
2
|
34
|
+
|
35
|
+
A
|
36
|
+
B
|
37
|
+
C
|
38
|
+
D
|
39
|
+
|
40
|
+
0
|
41
|
+
0=1
|
42
|
+
1=2,4
|
43
|
+
1
|
44
|
+
2=3,4
|
45
|
+
4=2,3
|
46
|
+
2
|
47
|
+
3=-1
|
48
|
+
4=-1
|
49
|
+
4
|
50
|
+
2=-1
|
51
|
+
3=-1
|
52
|
+
END_DICT
|
53
|
+
|
54
|
+
lorem = LoremJP.new( :dictionary => dict, :lazy => false )
|
55
|
+
|
56
|
+
5.times do
|
57
|
+
lorem.sentence.must_match %r{^ A (B (C|D) | D (B|C) ) $}xmo
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: lorem_jp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ITO Nobuaki
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-06-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Japanese Lorem Ipsum generator
|
42
|
+
email:
|
43
|
+
- daydream.trippers@gmail.com
|
44
|
+
executables:
|
45
|
+
- lorem_jp
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lorem_jp.gemspec
|
50
|
+
- Gemfile
|
51
|
+
- Rakefile
|
52
|
+
- LICENSE.txt
|
53
|
+
- README.md
|
54
|
+
- lib/lorem_jp.rb
|
55
|
+
- lib/lorem_jp/cli.rb
|
56
|
+
- data/dict.txt
|
57
|
+
- bin/lorem_jp
|
58
|
+
- build/make_dict.rb
|
59
|
+
- build/fetcher.rb
|
60
|
+
- build/recipes/789.rb
|
61
|
+
- build/recipes/2363.rb
|
62
|
+
- build/recipes/52960.rb
|
63
|
+
- test/lorem_jp_spec.rb
|
64
|
+
homepage: https://github.com/dayflower/lorem_jp/
|
65
|
+
licenses:
|
66
|
+
- MIT
|
67
|
+
metadata: {}
|
68
|
+
post_install_message:
|
69
|
+
rdoc_options: []
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - '>='
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
requirements: []
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 2.0.2
|
85
|
+
signing_key:
|
86
|
+
specification_version: 4
|
87
|
+
summary: Japanese Lorem Ipsum generator
|
88
|
+
test_files:
|
89
|
+
- test/lorem_jp_spec.rb
|
90
|
+
has_rdoc:
|