faker-okinawa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +66 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/faker-okinawa.gemspec +23 -0
- data/lib/faker/okinawa.rb +14 -0
- data/lib/faker/okinawa/address.rb +35 -0
- data/lib/faker/okinawa/awamori.rb +14 -0
- data/lib/faker/okinawa/base.rb +14 -0
- data/lib/faker/okinawa/fish.rb +14 -0
- data/lib/faker/okinawa/food.rb +14 -0
- data/lib/faker/okinawa/name.rb +14 -0
- data/lib/faker/okinawa/odic.rb +40 -0
- data/lib/faker/okinawa/school.rb +14 -0
- data/lib/faker/okinawa/version.rb +5 -0
- data/o-dic/address.dic +1068 -0
- data/o-dic/amami.dic +69 -0
- data/o-dic/awamori.dic +809 -0
- data/o-dic/base.dic +63 -0
- data/o-dic/bin-dic/ATOK.zip +0 -0
- data/o-dic/bin-dic/Kotoeri.dmg +0 -0
- data/o-dic/bin-dic/MSIME.zip +0 -0
- data/o-dic/bin-dic/MSIME2000.zip +0 -0
- data/o-dic/bin-dic/MSIME2002.zip +0 -0
- data/o-dic/bin-dic/MSIME2003.zip +0 -0
- data/o-dic/bin-dic/MSIME2007.zip +0 -0
- data/o-dic/bin-dic/VJEDelta40.zip +0 -0
- data/o-dic/bin-dic/anthydic20090901.tar.bz2 +0 -0
- data/o-dic/bin-dic/cannadic.tar.bz2 +0 -0
- data/o-dic/bin-dic/cannadic20030407.tar.bz2 +0 -0
- data/o-dic/bin-dic/cannadic20051104.tar.bz2 +0 -0
- data/o-dic/bin-dic/google-20111218.zip +0 -0
- data/o-dic/bus.dic +216 -0
- data/o-dic/city.dic +209 -0
- data/o-dic/doc/History.doc +138 -0
- data/o-dic/doc/README.1ST +79 -0
- data/o-dic/doc/format.txt +169 -0
- data/o-dic/food.dic +524 -0
- data/o-dic/geo.dic +815 -0
- data/o-dic/history.dic +1078 -0
- data/o-dic/island.dic +354 -0
- data/o-dic/misc.dic +1166 -0
- data/o-dic/name.dic +1039 -0
- data/o-dic/park.dic +535 -0
- data/o-dic/sakana.dic +160 -0
- data/o-dic/school.dic +1068 -0
- data/o-dic/script/ODIC.pm +98 -0
- data/o-dic/script/ccount.pl +111 -0
- data/o-dic/script/geta_checker.sh +29 -0
- data/o-dic/script/oki2atk.pl +192 -0
- data/o-dic/script/oki2canna.pl +131 -0
- data/o-dic/script/oki2cha.pl +154 -0
- data/o-dic/script/oki2jis.pl +167 -0
- data/o-dic/script/oki2kotoeri.pl +127 -0
- data/o-dic/script/oki2mozc.pl +156 -0
- data/o-dic/script/oki2msime.pl +136 -0
- data/o-dic/script/oki2osxjapaneseim.pl +168 -0
- data/o-dic/script/oki2vje.pl +138 -0
- data/o-dic/script/wcount.pl +114 -0
- metadata +135 -0
@@ -0,0 +1,138 @@
|
|
1
|
+
#!/usr/bin/perl
|
2
|
+
#
|
3
|
+
# oki2vje.pl - 沖縄辞書のファイルをVJE-Deltaに持って行くテキストに変換する
|
4
|
+
#
|
5
|
+
# $Id: oki2vje.pl,v 1.16 2006/09/01 18:20:06 void Exp $
|
6
|
+
#
|
7
|
+
# このスクリプトを使って、以下のように「okinawa.txt」を生成しておいて、
|
8
|
+
# $ cat ../*.dic | ./oki2vje.pl | sort -u > okinawa.txt
|
9
|
+
# VJE-Deltaの辞書ユーティリティを起動し、
|
10
|
+
# あらかじめ「ファイル」-「新規作成」をやってから
|
11
|
+
# 「ファイル]-「テキストの登録/削除」にokinawa.txtを食わせてください。
|
12
|
+
require 5.6.0;
|
13
|
+
use FindBin;
|
14
|
+
use lib $FindBin::Bin; # For search scripts/ODIC.pm
|
15
|
+
require 'ODIC.pm';
|
16
|
+
use strict;
|
17
|
+
our $phonate;
|
18
|
+
our $word;
|
19
|
+
our $class;
|
20
|
+
|
21
|
+
while (<>) {
|
22
|
+
# JIS X 0208 に含まれていない文字を含む行を読み飛ばす
|
23
|
+
# 読み飛ばしにはコメントに「〓あり」マーカーが必要
|
24
|
+
next if (/〓あり/);
|
25
|
+
|
26
|
+
s/#.*$//; # `#'以降を取り去る
|
27
|
+
next if (/^\s*$/); # その結果空行になった行は読み飛ばす。
|
28
|
+
if (/(\S+)\s+(\S+)\s+(\S+)/) {
|
29
|
+
$phonate = $1; # 読み
|
30
|
+
$word = $2; # 単語
|
31
|
+
$class = $3; # 品詞
|
32
|
+
ODIC::check_phonate($phonate);
|
33
|
+
ODIC::check_word($word);
|
34
|
+
&convert_class;
|
35
|
+
}
|
36
|
+
else {
|
37
|
+
print STDERR "Error: $.: too few field number `$_'\n";
|
38
|
+
print "$_";
|
39
|
+
}
|
40
|
+
}
|
41
|
+
&version;
|
42
|
+
exit 0;
|
43
|
+
|
44
|
+
|
45
|
+
sub convert_class {
|
46
|
+
if ($class eq "普通名詞") {
|
47
|
+
$class = '名詞';
|
48
|
+
}
|
49
|
+
elsif ($class eq "サ変名詞") {
|
50
|
+
$class = '名サ';
|
51
|
+
}
|
52
|
+
elsif ($class eq "形動名詞") {
|
53
|
+
$class = '名形';
|
54
|
+
}
|
55
|
+
elsif ($class eq "姓") {
|
56
|
+
$class = '人姓';
|
57
|
+
}
|
58
|
+
elsif ($class eq "名") {
|
59
|
+
$class = '人名';
|
60
|
+
}
|
61
|
+
elsif ($class eq "その他の人名") {
|
62
|
+
$class = '人名';
|
63
|
+
}
|
64
|
+
elsif ($class eq "単純地名") {
|
65
|
+
$class = '地名';
|
66
|
+
}
|
67
|
+
elsif ($class eq "接尾語付き地名") {
|
68
|
+
$class = '地名行政区分';
|
69
|
+
}
|
70
|
+
elsif ($class eq "組織名") {
|
71
|
+
$class = '組織';
|
72
|
+
}
|
73
|
+
elsif ($class eq "その他固有名詞") {
|
74
|
+
$class = '固名';
|
75
|
+
}
|
76
|
+
elsif ($class eq "副詞") {
|
77
|
+
$class = '副詞';
|
78
|
+
}
|
79
|
+
elsif ($class eq "接続詞") {
|
80
|
+
$class = '接続';
|
81
|
+
}
|
82
|
+
elsif ($class eq "感動詞") {
|
83
|
+
$class = '感動';
|
84
|
+
}
|
85
|
+
elsif ($class eq "形容詞") {
|
86
|
+
$class = '形容';
|
87
|
+
}
|
88
|
+
elsif ($class eq "形容動詞") {
|
89
|
+
$class = '形動';
|
90
|
+
}
|
91
|
+
elsif ($class eq "接頭語") {
|
92
|
+
$class = '接頭';
|
93
|
+
}
|
94
|
+
elsif ($class eq "数字列接頭語") {
|
95
|
+
$class = '冠数';
|
96
|
+
}
|
97
|
+
elsif ($class eq "接尾語") {
|
98
|
+
$class = '接尾';
|
99
|
+
}
|
100
|
+
elsif ($class eq "人名接尾語") {
|
101
|
+
$class = '接尾人名';
|
102
|
+
}
|
103
|
+
elsif ($class eq "地名接尾語") {
|
104
|
+
$class = '接尾地名';
|
105
|
+
}
|
106
|
+
elsif ($class eq "組織名接尾語") {
|
107
|
+
$class = '接尾';
|
108
|
+
}
|
109
|
+
elsif ($class eq "数字列接尾語") {
|
110
|
+
$class = '助数';
|
111
|
+
}
|
112
|
+
elsif ($class eq "成句") {
|
113
|
+
$class = '名詞';
|
114
|
+
}
|
115
|
+
elsif ($class eq "無品詞") {
|
116
|
+
$class = '単漢';
|
117
|
+
}
|
118
|
+
else {
|
119
|
+
print STDERR "Error: $.: unknown class `$class': $phonate\t$word\n";
|
120
|
+
$class = '〓';
|
121
|
+
}
|
122
|
+
|
123
|
+
print ODIC::to_eucjp("$phonate\t$word\t[$class]\n");
|
124
|
+
}
|
125
|
+
|
126
|
+
sub version {
|
127
|
+
my $sec;
|
128
|
+
my $min;
|
129
|
+
my $hour;
|
130
|
+
my $mday;
|
131
|
+
my $mon;
|
132
|
+
my $year;
|
133
|
+
|
134
|
+
($sec, $min, $hour, $mday, $mon, $year) = localtime(time());
|
135
|
+
$year += 1900;
|
136
|
+
$mon++;
|
137
|
+
print ODIC::to_eucjp("おきなわじしょのひづけ\t$year/$mon/$mday(沖縄辞書の日付け)\t[名詞]\n");
|
138
|
+
}
|
@@ -0,0 +1,114 @@
|
|
1
|
+
#!/usr/bin/perl
|
2
|
+
#
|
3
|
+
# wcount.pl - 沖縄辞書の単語数をファイルごとに集計
|
4
|
+
#
|
5
|
+
# $Id: wcount.pl,v 1.5 2002/06/16 04:31:52 void Exp $
|
6
|
+
#
|
7
|
+
# # foreach i (../*.dic)
|
8
|
+
# cat $i | ./wcount.pl | wc -l
|
9
|
+
# echo $i
|
10
|
+
# end
|
11
|
+
require 5.6.0;
|
12
|
+
require 'ODIC.pm';
|
13
|
+
use strict;
|
14
|
+
our $phonate;
|
15
|
+
our $word;
|
16
|
+
our $class;
|
17
|
+
|
18
|
+
while (<>) {
|
19
|
+
s/#.*$//; # `#'以降を取り去る
|
20
|
+
next if (/^\s*$/); # その結果空行になった行は読み飛ばす。
|
21
|
+
if (/(\S+)\s+(\S+)\s+(\S+)/) {
|
22
|
+
$phonate = $1; # 読み
|
23
|
+
$word = $2; # 単語
|
24
|
+
$class = $3; # 品詞
|
25
|
+
ODIC::check_phonate($phonate);
|
26
|
+
ODIC::check_word($word);
|
27
|
+
&check_class;
|
28
|
+
}
|
29
|
+
else {
|
30
|
+
print STDERR "Error: $.: too few field number `$_'\n";
|
31
|
+
print "$_";
|
32
|
+
}
|
33
|
+
}
|
34
|
+
exit 0;
|
35
|
+
|
36
|
+
|
37
|
+
sub check_class {
|
38
|
+
if ($class eq "普通名詞") {
|
39
|
+
print "$phonate\t$word\tclass\n";
|
40
|
+
}
|
41
|
+
elsif ($class eq "サ変名詞") {
|
42
|
+
print "$phonate\t$word\tclass\n";
|
43
|
+
}
|
44
|
+
elsif ($class eq "形動名詞") {
|
45
|
+
print "$phonate\t$word\tclass\n";
|
46
|
+
}
|
47
|
+
elsif ($class eq "姓") {
|
48
|
+
print "$phonate\t$word\tclass\n";
|
49
|
+
}
|
50
|
+
elsif ($class eq "名") {
|
51
|
+
print "$phonate\t$word\tclass\n";
|
52
|
+
}
|
53
|
+
elsif ($class eq "その他の人名") {
|
54
|
+
print "$phonate\t$word\tclass\n";
|
55
|
+
}
|
56
|
+
elsif ($class eq "単純地名") {
|
57
|
+
print "$phonate\t$word\tclass\n";
|
58
|
+
}
|
59
|
+
elsif ($class eq "接尾語付き地名") {
|
60
|
+
print "$phonate\t$word\tclass\n";
|
61
|
+
}
|
62
|
+
elsif ($class eq "組織名") {
|
63
|
+
print "$phonate\t$word\tclass\n";
|
64
|
+
}
|
65
|
+
elsif ($class eq "その他固有名詞") {
|
66
|
+
print "$phonate\t$word\tclass\n";
|
67
|
+
}
|
68
|
+
elsif ($class eq "副詞") {
|
69
|
+
print "$phonate\t$word\tclass\n";
|
70
|
+
}
|
71
|
+
elsif ($class eq "接続詞") {
|
72
|
+
print "$phonate\t$word\tclass\n";
|
73
|
+
}
|
74
|
+
elsif ($class eq "感動詞") {
|
75
|
+
print "$phonate\t$word\tclass\n";
|
76
|
+
}
|
77
|
+
elsif ($class eq "形容詞") {
|
78
|
+
print "$phonate\t$word\tclass\n";
|
79
|
+
}
|
80
|
+
elsif ($class eq "形容動詞") {
|
81
|
+
print "$phonate\t$word\tclass\n";
|
82
|
+
}
|
83
|
+
elsif ($class eq "接頭語") {
|
84
|
+
print "$phonate\t$word\tclass\n";
|
85
|
+
}
|
86
|
+
elsif ($class eq "数字列接頭語") {
|
87
|
+
print "$phonate\t$word\tclass\n";
|
88
|
+
}
|
89
|
+
elsif ($class eq "接尾語") {
|
90
|
+
print "$phonate\t$word\tclass\n";
|
91
|
+
}
|
92
|
+
elsif ($class eq "人名接尾語") {
|
93
|
+
print "$phonate\t$word\tclass\n";
|
94
|
+
}
|
95
|
+
elsif ($class eq "地名接尾語") {
|
96
|
+
print "$phonate\t$word\tclass\n";
|
97
|
+
}
|
98
|
+
elsif ($class eq "組織名接尾語") {
|
99
|
+
print "$phonate\t$word\tclass\n";
|
100
|
+
}
|
101
|
+
elsif ($class eq "数字列接尾語") {
|
102
|
+
print "$phonate\t$word\tclass\n";
|
103
|
+
}
|
104
|
+
elsif ($class eq "成句") {
|
105
|
+
print "$phonate\t$word\tclass\n";
|
106
|
+
}
|
107
|
+
elsif ($class eq "無品詞") {
|
108
|
+
print "$phonate\t$word\tclass\n";
|
109
|
+
}
|
110
|
+
else {
|
111
|
+
print STDERR "Error: $.: unknown class `$class': $phonate\t$word\n";
|
112
|
+
print "$phonate\t$word\tclass\n";
|
113
|
+
}
|
114
|
+
}
|
metadata
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: faker-okinawa
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Seiei Miyagi
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-02-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.11'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
description:
|
42
|
+
email:
|
43
|
+
- hanachin@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- ".gitignore"
|
49
|
+
- CODE_OF_CONDUCT.md
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- bin/console
|
55
|
+
- bin/setup
|
56
|
+
- faker-okinawa.gemspec
|
57
|
+
- lib/faker/okinawa.rb
|
58
|
+
- lib/faker/okinawa/address.rb
|
59
|
+
- lib/faker/okinawa/awamori.rb
|
60
|
+
- lib/faker/okinawa/base.rb
|
61
|
+
- lib/faker/okinawa/fish.rb
|
62
|
+
- lib/faker/okinawa/food.rb
|
63
|
+
- lib/faker/okinawa/name.rb
|
64
|
+
- lib/faker/okinawa/odic.rb
|
65
|
+
- lib/faker/okinawa/school.rb
|
66
|
+
- lib/faker/okinawa/version.rb
|
67
|
+
- o-dic/address.dic
|
68
|
+
- o-dic/amami.dic
|
69
|
+
- o-dic/awamori.dic
|
70
|
+
- o-dic/base.dic
|
71
|
+
- o-dic/bin-dic/ATOK.zip
|
72
|
+
- o-dic/bin-dic/Kotoeri.dmg
|
73
|
+
- o-dic/bin-dic/MSIME.zip
|
74
|
+
- o-dic/bin-dic/MSIME2000.zip
|
75
|
+
- o-dic/bin-dic/MSIME2002.zip
|
76
|
+
- o-dic/bin-dic/MSIME2003.zip
|
77
|
+
- o-dic/bin-dic/MSIME2007.zip
|
78
|
+
- o-dic/bin-dic/VJEDelta40.zip
|
79
|
+
- o-dic/bin-dic/anthydic20090901.tar.bz2
|
80
|
+
- o-dic/bin-dic/cannadic.tar.bz2
|
81
|
+
- o-dic/bin-dic/cannadic20030407.tar.bz2
|
82
|
+
- o-dic/bin-dic/cannadic20051104.tar.bz2
|
83
|
+
- o-dic/bin-dic/google-20111218.zip
|
84
|
+
- o-dic/bus.dic
|
85
|
+
- o-dic/city.dic
|
86
|
+
- o-dic/doc/History.doc
|
87
|
+
- o-dic/doc/README.1ST
|
88
|
+
- o-dic/doc/format.txt
|
89
|
+
- o-dic/food.dic
|
90
|
+
- o-dic/geo.dic
|
91
|
+
- o-dic/history.dic
|
92
|
+
- o-dic/island.dic
|
93
|
+
- o-dic/misc.dic
|
94
|
+
- o-dic/name.dic
|
95
|
+
- o-dic/park.dic
|
96
|
+
- o-dic/sakana.dic
|
97
|
+
- o-dic/school.dic
|
98
|
+
- o-dic/script/ODIC.pm
|
99
|
+
- o-dic/script/ccount.pl
|
100
|
+
- o-dic/script/geta_checker.sh
|
101
|
+
- o-dic/script/oki2atk.pl
|
102
|
+
- o-dic/script/oki2canna.pl
|
103
|
+
- o-dic/script/oki2cha.pl
|
104
|
+
- o-dic/script/oki2jis.pl
|
105
|
+
- o-dic/script/oki2kotoeri.pl
|
106
|
+
- o-dic/script/oki2mozc.pl
|
107
|
+
- o-dic/script/oki2msime.pl
|
108
|
+
- o-dic/script/oki2osxjapaneseim.pl
|
109
|
+
- o-dic/script/oki2vje.pl
|
110
|
+
- o-dic/script/wcount.pl
|
111
|
+
homepage: https://github.com/okinawarb/faker-okinawa
|
112
|
+
licenses:
|
113
|
+
- MIT
|
114
|
+
metadata: {}
|
115
|
+
post_install_message:
|
116
|
+
rdoc_options: []
|
117
|
+
require_paths:
|
118
|
+
- lib
|
119
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0'
|
129
|
+
requirements: []
|
130
|
+
rubyforge_project:
|
131
|
+
rubygems_version: 2.5.1
|
132
|
+
signing_key:
|
133
|
+
specification_version: 4
|
134
|
+
summary: Faker::Okinawa generates Okinawa fake data.
|
135
|
+
test_files: []
|