charguess 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +134 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +26 -0
- data/Rakefile +25 -0
- data/ext/charguess/charguess.c +29 -0
- data/ext/charguess/extconf.rb +11 -0
- data/ext/libcharguess/cpp/.deps/EUCJPProber.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/EUCJPProber.Po +87 -0
- data/ext/libcharguess/cpp/.deps/EUCKRProber.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/EUCKRProber.Po +85 -0
- data/ext/libcharguess/cpp/.deps/EUCTWProber.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/EUCTWProber.Po +85 -0
- data/ext/libcharguess/cpp/.deps/EscCharsetProber.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/EscCharsetProber.Po +83 -0
- data/ext/libcharguess/cpp/.deps/EscSM.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/EscSM.Po +77 -0
- data/ext/libcharguess/cpp/.deps/GB2312Prober.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/GB2312Prober.Po +85 -0
- data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Po +78 -0
- data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Po +78 -0
- data/ext/libcharguess/cpp/.deps/LangGreekModel.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/LangGreekModel.Po +78 -0
- data/ext/libcharguess/cpp/.deps/LangHungarianModel.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/LangHungarianModel.Po +78 -0
- data/ext/libcharguess/cpp/.deps/LangThaiModel.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/LangThaiModel.Po +78 -0
- data/ext/libcharguess/cpp/.deps/Latin1Prober.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/Latin1Prober.Po +78 -0
- data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Po +102 -0
- data/ext/libcharguess/cpp/.deps/MBCSSM.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/MBCSSM.Po +77 -0
- data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Po +80 -0
- data/ext/libcharguess/cpp/.deps/SBCharsetProber.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/SBCharsetProber.Po +78 -0
- data/ext/libcharguess/cpp/.deps/SJISProber.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/SJISProber.Po +86 -0
- data/ext/libcharguess/cpp/.deps/UTF8Prober.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/UTF8Prober.Po +82 -0
- data/ext/libcharguess/cpp/.deps/big5Prober.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/big5Prober.Po +84 -0
- data/ext/libcharguess/cpp/.deps/charDistribution.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/charDistribution.Po +87 -0
- data/ext/libcharguess/cpp/.deps/chardet.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/chardet.Po +84 -0
- data/ext/libcharguess/cpp/.deps/charguess.Po +77 -0
- data/ext/libcharguess/cpp/.deps/jpCntx.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/jpCntx.Po +75 -0
- data/ext/libcharguess/cpp/.deps/universal.Plo +1 -0
- data/ext/libcharguess/cpp/.deps/universal.Po +111 -0
- data/ext/libcharguess/cpp/AUTHORS +3 -0
- data/ext/libcharguess/cpp/Big5Freq.tab +928 -0
- data/ext/libcharguess/cpp/COPYING +340 -0
- data/ext/libcharguess/cpp/COPYRIGHT +20 -0
- data/ext/libcharguess/cpp/ChangeLog +0 -0
- data/ext/libcharguess/cpp/EUCJPProber.cpp +80 -0
- data/ext/libcharguess/cpp/EUCJPProber.h +58 -0
- data/ext/libcharguess/cpp/EUCKRFreq.tab +615 -0
- data/ext/libcharguess/cpp/EUCKRProber.cpp +80 -0
- data/ext/libcharguess/cpp/EUCKRProber.h +54 -0
- data/ext/libcharguess/cpp/EUCTWFreq.tab +448 -0
- data/ext/libcharguess/cpp/EUCTWProber.cpp +79 -0
- data/ext/libcharguess/cpp/EUCTWProber.h +53 -0
- data/ext/libcharguess/cpp/EscCharsetProber.cpp +89 -0
- data/ext/libcharguess/cpp/EscCharsetProber.h +49 -0
- data/ext/libcharguess/cpp/EscSM.cpp +244 -0
- data/ext/libcharguess/cpp/GB2312Freq.tab +476 -0
- data/ext/libcharguess/cpp/GB2312Prober.cpp +84 -0
- data/ext/libcharguess/cpp/GB2312Prober.h +56 -0
- data/ext/libcharguess/cpp/INSTALL +229 -0
- data/ext/libcharguess/cpp/JISFreq.tab +574 -0
- data/ext/libcharguess/cpp/LICENSE +504 -0
- data/ext/libcharguess/cpp/LangBulgarianModel.cpp +230 -0
- data/ext/libcharguess/cpp/LangCyrillicModel.cpp +340 -0
- data/ext/libcharguess/cpp/LangGreekModel.cpp +229 -0
- data/ext/libcharguess/cpp/LangHungarianModel.cpp +228 -0
- data/ext/libcharguess/cpp/LangThaiModel.cpp +206 -0
- data/ext/libcharguess/cpp/Latin1Prober.cpp +190 -0
- data/ext/libcharguess/cpp/Latin1Prober.h +49 -0
- data/ext/libcharguess/cpp/MBCSGroupProber.cpp +186 -0
- data/ext/libcharguess/cpp/MBCSGroupProber.h +58 -0
- data/ext/libcharguess/cpp/MBCSSM.cpp +610 -0
- data/ext/libcharguess/cpp/Makefile.am +45 -0
- data/ext/libcharguess/cpp/Makefile.in +608 -0
- data/ext/libcharguess/cpp/NEWS +0 -0
- data/ext/libcharguess/cpp/README +0 -0
- data/ext/libcharguess/cpp/SBCSGroupProber.cpp +244 -0
- data/ext/libcharguess/cpp/SBCSGroupProber.h +54 -0
- data/ext/libcharguess/cpp/SBCharsetProber.cpp +100 -0
- data/ext/libcharguess/cpp/SBCharsetProber.h +89 -0
- data/ext/libcharguess/cpp/SJISProber.cpp +86 -0
- data/ext/libcharguess/cpp/SJISProber.h +60 -0
- data/ext/libcharguess/cpp/UTF8Prober.cpp +75 -0
- data/ext/libcharguess/cpp/UTF8Prober.h +46 -0
- data/ext/libcharguess/cpp/aclocal.m4 +1008 -0
- data/ext/libcharguess/cpp/autogen.sh +153 -0
- data/ext/libcharguess/cpp/big5Prober.cpp +76 -0
- data/ext/libcharguess/cpp/big5Prober.h +53 -0
- data/ext/libcharguess/cpp/charDistribution.cpp +90 -0
- data/ext/libcharguess/cpp/charDistribution.h +219 -0
- data/ext/libcharguess/cpp/charguess.cpp +56 -0
- data/ext/libcharguess/cpp/charguess.h +23 -0
- data/ext/libcharguess/cpp/charsetProber.h +50 -0
- data/ext/libcharguess/cpp/codingStateMachine.h +92 -0
- data/ext/libcharguess/cpp/config.h +36 -0
- data/ext/libcharguess/cpp/config.h.in +35 -0
- data/ext/libcharguess/cpp/config.status +1075 -0
- data/ext/libcharguess/cpp/configure +5226 -0
- data/ext/libcharguess/cpp/configure.in +49 -0
- data/ext/libcharguess/cpp/depcomp +472 -0
- data/ext/libcharguess/cpp/fix_copyright +32 -0
- data/ext/libcharguess/cpp/install-sh +294 -0
- data/ext/libcharguess/cpp/jpCntx.cpp +194 -0
- data/ext/libcharguess/cpp/jpCntx.h +100 -0
- data/ext/libcharguess/cpp/missing +336 -0
- data/ext/libcharguess/cpp/mkinstalldirs +111 -0
- data/ext/libcharguess/cpp/pkgInt.h +72 -0
- data/ext/libcharguess/cpp/stamp-h1 +1 -0
- data/ext/libcharguess/cpp/test/test.cpp +78 -0
- data/ext/libcharguess/cpp/types.h +41 -0
- data/ext/libcharguess/cpp/universal.cpp +273 -0
- data/ext/libcharguess/cpp/universal.h +65 -0
- data/script/console +9 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/tasks/extconf/charguess.rake +47 -0
- data/tasks/extconf.rake +13 -0
- data/test/test_charguess.rb +7 -0
- data/test/test_charguess_extn.rb +10 -0
- data/test/test_helper.rb +3 -0
- metadata +219 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
#!/bin/sh
|
|
2
|
+
#
|
|
3
|
+
# install - install a program, script, or datafile
|
|
4
|
+
#
|
|
5
|
+
# This originates from X11R5 (mit/util/scripts/install.sh), which was
|
|
6
|
+
# later released in X11R6 (xc/config/util/install.sh) with the
|
|
7
|
+
# following copyright and license.
|
|
8
|
+
#
|
|
9
|
+
# Copyright (C) 1994 X Consortium
|
|
10
|
+
#
|
|
11
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
# of this software and associated documentation files (the "Software"), to
|
|
13
|
+
# deal in the Software without restriction, including without limitation the
|
|
14
|
+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
15
|
+
# sell copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
# furnished to do so, subject to the following conditions:
|
|
17
|
+
#
|
|
18
|
+
# The above copyright notice and this permission notice shall be included in
|
|
19
|
+
# all copies or substantial portions of the Software.
|
|
20
|
+
#
|
|
21
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
|
25
|
+
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
|
|
26
|
+
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
27
|
+
#
|
|
28
|
+
# Except as contained in this notice, the name of the X Consortium shall not
|
|
29
|
+
# be used in advertising or otherwise to promote the sale, use or other deal-
|
|
30
|
+
# ings in this Software without prior written authorization from the X Consor-
|
|
31
|
+
# tium.
|
|
32
|
+
#
|
|
33
|
+
#
|
|
34
|
+
# FSF changes to this file are in the public domain.
|
|
35
|
+
#
|
|
36
|
+
# Calling this script install-sh is preferred over install.sh, to prevent
|
|
37
|
+
# `make' implicit rules from creating a file called install from it
|
|
38
|
+
# when there is no Makefile.
|
|
39
|
+
#
|
|
40
|
+
# This script is compatible with the BSD install script, but was written
|
|
41
|
+
# from scratch. It can only install one file at a time, a restriction
|
|
42
|
+
# shared with many OS's install programs.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# set DOITPROG to echo to test this script
|
|
46
|
+
|
|
47
|
+
# Don't use :- since 4.3BSD and earlier shells don't like it.
|
|
48
|
+
doit="${DOITPROG-}"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# put in absolute paths if you don't have them in your path; or use env. vars.
|
|
52
|
+
|
|
53
|
+
mvprog="${MVPROG-mv}"
|
|
54
|
+
cpprog="${CPPROG-cp}"
|
|
55
|
+
chmodprog="${CHMODPROG-chmod}"
|
|
56
|
+
chownprog="${CHOWNPROG-chown}"
|
|
57
|
+
chgrpprog="${CHGRPPROG-chgrp}"
|
|
58
|
+
stripprog="${STRIPPROG-strip}"
|
|
59
|
+
rmprog="${RMPROG-rm}"
|
|
60
|
+
mkdirprog="${MKDIRPROG-mkdir}"
|
|
61
|
+
|
|
62
|
+
transformbasename=""
|
|
63
|
+
transform_arg=""
|
|
64
|
+
instcmd="$mvprog"
|
|
65
|
+
chmodcmd="$chmodprog 0755"
|
|
66
|
+
chowncmd=""
|
|
67
|
+
chgrpcmd=""
|
|
68
|
+
stripcmd=""
|
|
69
|
+
rmcmd="$rmprog -f"
|
|
70
|
+
mvcmd="$mvprog"
|
|
71
|
+
src=""
|
|
72
|
+
dst=""
|
|
73
|
+
dir_arg=""
|
|
74
|
+
|
|
75
|
+
while [ x"$1" != x ]; do
|
|
76
|
+
case $1 in
|
|
77
|
+
-c) instcmd=$cpprog
|
|
78
|
+
shift
|
|
79
|
+
continue;;
|
|
80
|
+
|
|
81
|
+
-d) dir_arg=true
|
|
82
|
+
shift
|
|
83
|
+
continue;;
|
|
84
|
+
|
|
85
|
+
-m) chmodcmd="$chmodprog $2"
|
|
86
|
+
shift
|
|
87
|
+
shift
|
|
88
|
+
continue;;
|
|
89
|
+
|
|
90
|
+
-o) chowncmd="$chownprog $2"
|
|
91
|
+
shift
|
|
92
|
+
shift
|
|
93
|
+
continue;;
|
|
94
|
+
|
|
95
|
+
-g) chgrpcmd="$chgrpprog $2"
|
|
96
|
+
shift
|
|
97
|
+
shift
|
|
98
|
+
continue;;
|
|
99
|
+
|
|
100
|
+
-s) stripcmd=$stripprog
|
|
101
|
+
shift
|
|
102
|
+
continue;;
|
|
103
|
+
|
|
104
|
+
-t=*) transformarg=`echo $1 | sed 's/-t=//'`
|
|
105
|
+
shift
|
|
106
|
+
continue;;
|
|
107
|
+
|
|
108
|
+
-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
|
|
109
|
+
shift
|
|
110
|
+
continue;;
|
|
111
|
+
|
|
112
|
+
*) if [ x"$src" = x ]
|
|
113
|
+
then
|
|
114
|
+
src=$1
|
|
115
|
+
else
|
|
116
|
+
# this colon is to work around a 386BSD /bin/sh bug
|
|
117
|
+
:
|
|
118
|
+
dst=$1
|
|
119
|
+
fi
|
|
120
|
+
shift
|
|
121
|
+
continue;;
|
|
122
|
+
esac
|
|
123
|
+
done
|
|
124
|
+
|
|
125
|
+
if [ x"$src" = x ]
|
|
126
|
+
then
|
|
127
|
+
echo "$0: no input file specified" >&2
|
|
128
|
+
exit 1
|
|
129
|
+
else
|
|
130
|
+
:
|
|
131
|
+
fi
|
|
132
|
+
|
|
133
|
+
if [ x"$dir_arg" != x ]; then
|
|
134
|
+
dst=$src
|
|
135
|
+
src=""
|
|
136
|
+
|
|
137
|
+
if [ -d "$dst" ]; then
|
|
138
|
+
instcmd=:
|
|
139
|
+
chmodcmd=""
|
|
140
|
+
else
|
|
141
|
+
instcmd=$mkdirprog
|
|
142
|
+
fi
|
|
143
|
+
else
|
|
144
|
+
|
|
145
|
+
# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
|
|
146
|
+
# might cause directories to be created, which would be especially bad
|
|
147
|
+
# if $src (and thus $dsttmp) contains '*'.
|
|
148
|
+
|
|
149
|
+
if [ -f "$src" ] || [ -d "$src" ]
|
|
150
|
+
then
|
|
151
|
+
:
|
|
152
|
+
else
|
|
153
|
+
echo "$0: $src does not exist" >&2
|
|
154
|
+
exit 1
|
|
155
|
+
fi
|
|
156
|
+
|
|
157
|
+
if [ x"$dst" = x ]
|
|
158
|
+
then
|
|
159
|
+
echo "$0: no destination specified" >&2
|
|
160
|
+
exit 1
|
|
161
|
+
else
|
|
162
|
+
:
|
|
163
|
+
fi
|
|
164
|
+
|
|
165
|
+
# If destination is a directory, append the input filename; if your system
|
|
166
|
+
# does not like double slashes in filenames, you may need to add some logic
|
|
167
|
+
|
|
168
|
+
if [ -d "$dst" ]
|
|
169
|
+
then
|
|
170
|
+
dst=$dst/`basename "$src"`
|
|
171
|
+
else
|
|
172
|
+
:
|
|
173
|
+
fi
|
|
174
|
+
fi
|
|
175
|
+
|
|
176
|
+
## this sed command emulates the dirname command
|
|
177
|
+
dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
|
|
178
|
+
|
|
179
|
+
# Make sure that the destination directory exists.
|
|
180
|
+
# this part is taken from Noah Friedman's mkinstalldirs script
|
|
181
|
+
|
|
182
|
+
# Skip lots of stat calls in the usual case.
|
|
183
|
+
if [ ! -d "$dstdir" ]; then
|
|
184
|
+
defaultIFS='
|
|
185
|
+
'
|
|
186
|
+
IFS="${IFS-$defaultIFS}"
|
|
187
|
+
|
|
188
|
+
oIFS=$IFS
|
|
189
|
+
# Some sh's can't handle IFS=/ for some reason.
|
|
190
|
+
IFS='%'
|
|
191
|
+
set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
|
|
192
|
+
IFS=$oIFS
|
|
193
|
+
|
|
194
|
+
pathcomp=''
|
|
195
|
+
|
|
196
|
+
while [ $# -ne 0 ] ; do
|
|
197
|
+
pathcomp=$pathcomp$1
|
|
198
|
+
shift
|
|
199
|
+
|
|
200
|
+
if [ ! -d "$pathcomp" ] ;
|
|
201
|
+
then
|
|
202
|
+
$mkdirprog "$pathcomp"
|
|
203
|
+
else
|
|
204
|
+
:
|
|
205
|
+
fi
|
|
206
|
+
|
|
207
|
+
pathcomp=$pathcomp/
|
|
208
|
+
done
|
|
209
|
+
fi
|
|
210
|
+
|
|
211
|
+
if [ x"$dir_arg" != x ]
|
|
212
|
+
then
|
|
213
|
+
$doit $instcmd "$dst" &&
|
|
214
|
+
|
|
215
|
+
if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dst"; else : ; fi &&
|
|
216
|
+
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dst"; else : ; fi &&
|
|
217
|
+
if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dst"; else : ; fi &&
|
|
218
|
+
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dst"; else : ; fi
|
|
219
|
+
else
|
|
220
|
+
|
|
221
|
+
# If we're going to rename the final executable, determine the name now.
|
|
222
|
+
|
|
223
|
+
if [ x"$transformarg" = x ]
|
|
224
|
+
then
|
|
225
|
+
dstfile=`basename "$dst"`
|
|
226
|
+
else
|
|
227
|
+
dstfile=`basename "$dst" $transformbasename |
|
|
228
|
+
sed $transformarg`$transformbasename
|
|
229
|
+
fi
|
|
230
|
+
|
|
231
|
+
# don't allow the sed command to completely eliminate the filename
|
|
232
|
+
|
|
233
|
+
if [ x"$dstfile" = x ]
|
|
234
|
+
then
|
|
235
|
+
dstfile=`basename "$dst"`
|
|
236
|
+
else
|
|
237
|
+
:
|
|
238
|
+
fi
|
|
239
|
+
|
|
240
|
+
# Make a couple of temp file names in the proper directory.
|
|
241
|
+
|
|
242
|
+
dsttmp=$dstdir/_inst.$$_
|
|
243
|
+
rmtmp=$dstdir/_rm.$$_
|
|
244
|
+
|
|
245
|
+
# Trap to clean up temp files at exit.
|
|
246
|
+
|
|
247
|
+
trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0
|
|
248
|
+
trap '(exit $?); exit' 1 2 13 15
|
|
249
|
+
|
|
250
|
+
# Move or copy the file name to the temp name
|
|
251
|
+
|
|
252
|
+
$doit $instcmd "$src" "$dsttmp" &&
|
|
253
|
+
|
|
254
|
+
# and set any options; do chmod last to preserve setuid bits
|
|
255
|
+
|
|
256
|
+
# If any of these fail, we abort the whole thing. If we want to
|
|
257
|
+
# ignore errors from any of these, just make sure not to ignore
|
|
258
|
+
# errors from the above "$doit $instcmd $src $dsttmp" command.
|
|
259
|
+
|
|
260
|
+
if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dsttmp"; else :;fi &&
|
|
261
|
+
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dsttmp"; else :;fi &&
|
|
262
|
+
if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dsttmp"; else :;fi &&
|
|
263
|
+
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dsttmp"; else :;fi &&
|
|
264
|
+
|
|
265
|
+
# Now remove or move aside any old file at destination location. We try this
|
|
266
|
+
# two ways since rm can't unlink itself on some systems and the destination
|
|
267
|
+
# file might be busy for other reasons. In this case, the final cleanup
|
|
268
|
+
# might fail but the new file should still install successfully.
|
|
269
|
+
|
|
270
|
+
{
|
|
271
|
+
if [ -f "$dstdir/$dstfile" ]
|
|
272
|
+
then
|
|
273
|
+
$doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null ||
|
|
274
|
+
$doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null ||
|
|
275
|
+
{
|
|
276
|
+
echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
|
|
277
|
+
(exit 1); exit
|
|
278
|
+
}
|
|
279
|
+
else
|
|
280
|
+
:
|
|
281
|
+
fi
|
|
282
|
+
} &&
|
|
283
|
+
|
|
284
|
+
# Now rename the file to the real destination.
|
|
285
|
+
|
|
286
|
+
$doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
|
|
287
|
+
|
|
288
|
+
fi &&
|
|
289
|
+
|
|
290
|
+
# The final little trick to "correctly" pass the exit status to the exit trap.
|
|
291
|
+
|
|
292
|
+
{
|
|
293
|
+
(exit 0); exit
|
|
294
|
+
}
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
#include "types.h"
|
|
2
|
+
#include "jpCntx.h"
|
|
3
|
+
|
|
4
|
+
//This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
|
5
|
+
char jp2CharContext[83][83] =
|
|
6
|
+
{
|
|
7
|
+
{ 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,},
|
|
8
|
+
{ 2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4,},
|
|
9
|
+
{ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,},
|
|
10
|
+
{ 0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4,},
|
|
11
|
+
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
|
|
12
|
+
{ 0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4,},
|
|
13
|
+
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
|
|
14
|
+
{ 0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3,},
|
|
15
|
+
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
|
|
16
|
+
{ 0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4,},
|
|
17
|
+
{ 1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4,},
|
|
18
|
+
{ 0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3,},
|
|
19
|
+
{ 0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3,},
|
|
20
|
+
{ 0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3,},
|
|
21
|
+
{ 0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4,},
|
|
22
|
+
{ 0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3,},
|
|
23
|
+
{ 2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4,},
|
|
24
|
+
{ 0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3,},
|
|
25
|
+
{ 0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5,},
|
|
26
|
+
{ 0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3,},
|
|
27
|
+
{ 2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5,},
|
|
28
|
+
{ 0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4,},
|
|
29
|
+
{ 1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4,},
|
|
30
|
+
{ 0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3,},
|
|
31
|
+
{ 0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3,},
|
|
32
|
+
{ 0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3,},
|
|
33
|
+
{ 0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5,},
|
|
34
|
+
{ 0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4,},
|
|
35
|
+
{ 0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5,},
|
|
36
|
+
{ 0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3,},
|
|
37
|
+
{ 0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4,},
|
|
38
|
+
{ 0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4,},
|
|
39
|
+
{ 0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4,},
|
|
40
|
+
{ 0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,},
|
|
41
|
+
{ 0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,},
|
|
42
|
+
{ 1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3,},
|
|
43
|
+
{ 0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0,},
|
|
44
|
+
{ 0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3,},
|
|
45
|
+
{ 0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3,},
|
|
46
|
+
{ 0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5,},
|
|
47
|
+
{ 0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4,},
|
|
48
|
+
{ 2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5,},
|
|
49
|
+
{ 0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3,},
|
|
50
|
+
{ 0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3,},
|
|
51
|
+
{ 0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3,},
|
|
52
|
+
{ 0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3,},
|
|
53
|
+
{ 0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4,},
|
|
54
|
+
{ 0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4,},
|
|
55
|
+
{ 0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2,},
|
|
56
|
+
{ 0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3,},
|
|
57
|
+
{ 0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3,},
|
|
58
|
+
{ 0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3,},
|
|
59
|
+
{ 0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3,},
|
|
60
|
+
{ 0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4,},
|
|
61
|
+
{ 0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3,},
|
|
62
|
+
{ 0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4,},
|
|
63
|
+
{ 0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3,},
|
|
64
|
+
{ 0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3,},
|
|
65
|
+
{ 0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4,},
|
|
66
|
+
{ 0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4,},
|
|
67
|
+
{ 0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3,},
|
|
68
|
+
{ 2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4,},
|
|
69
|
+
{ 0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4,},
|
|
70
|
+
{ 0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3,},
|
|
71
|
+
{ 0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4,},
|
|
72
|
+
{ 0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4,},
|
|
73
|
+
{ 1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4,},
|
|
74
|
+
{ 0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3,},
|
|
75
|
+
{ 0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,},
|
|
76
|
+
{ 0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2,},
|
|
77
|
+
{ 0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3,},
|
|
78
|
+
{ 0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3,},
|
|
79
|
+
{ 0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5,},
|
|
80
|
+
{ 0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3,},
|
|
81
|
+
{ 0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4,},
|
|
82
|
+
{ 1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4,},
|
|
83
|
+
{ 0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4,},
|
|
84
|
+
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
|
|
85
|
+
{ 0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3,},
|
|
86
|
+
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1,},
|
|
87
|
+
{ 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2,},
|
|
88
|
+
{ 0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3,},
|
|
89
|
+
{ 0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1,},
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
#define MINIMUM_DATA_THRESHOLD 4
|
|
93
|
+
|
|
94
|
+
void JapaneseContextAnalysis::HandleData(const char* aBuf, PRUint32 aLen)
|
|
95
|
+
{
|
|
96
|
+
PRUint32 charLen;
|
|
97
|
+
PRInt32 order;
|
|
98
|
+
PRUint32 i;
|
|
99
|
+
|
|
100
|
+
if (mDone)
|
|
101
|
+
return;
|
|
102
|
+
|
|
103
|
+
//The buffer we got is byte oriented, and a character may span in more than one
|
|
104
|
+
//buffers. In case the last one or two byte in last buffer is not complete, we
|
|
105
|
+
//record how many byte needed to complete that character and skip these bytes here.
|
|
106
|
+
//We can choose to record those bytes as well and analyse the character once it
|
|
107
|
+
//is complete, but since a character will not make much difference, by simply skipping
|
|
108
|
+
//this character will simply our logic and improve performance.
|
|
109
|
+
for (i = mNeedToSkipCharNum; i < aLen; )
|
|
110
|
+
{
|
|
111
|
+
order = GetOrder(aBuf+i, &charLen);
|
|
112
|
+
i+= charLen;
|
|
113
|
+
if (i > aLen){
|
|
114
|
+
mNeedToSkipCharNum = i - aLen;
|
|
115
|
+
mLastCharOrder = -1;
|
|
116
|
+
}
|
|
117
|
+
else
|
|
118
|
+
{
|
|
119
|
+
if (order != -1 && mLastCharOrder != -1)
|
|
120
|
+
{
|
|
121
|
+
mTotalRel ++;
|
|
122
|
+
if (mTotalRel > MAX_REL_THRESHOLD)
|
|
123
|
+
{
|
|
124
|
+
mDone = PR_TRUE;
|
|
125
|
+
break;
|
|
126
|
+
}
|
|
127
|
+
mRelSample[jp2CharContext[mLastCharOrder][order]]++;
|
|
128
|
+
}
|
|
129
|
+
mLastCharOrder = order;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
void JapaneseContextAnalysis::Reset(void)
|
|
137
|
+
{
|
|
138
|
+
mTotalRel = 0;
|
|
139
|
+
for (PRUint32 i = 0; i < NUM_OF_CATEGORY; i++)
|
|
140
|
+
mRelSample[i] = 0;
|
|
141
|
+
mNeedToSkipCharNum = 0;
|
|
142
|
+
mLastCharOrder = -1;
|
|
143
|
+
mDone = PR_FALSE;
|
|
144
|
+
}
|
|
145
|
+
#define DONT_KNOW (float)-1
|
|
146
|
+
|
|
147
|
+
float JapaneseContextAnalysis::GetConfidence()
|
|
148
|
+
{
|
|
149
|
+
//This is just one way to calculate confidence. It works well for me.
|
|
150
|
+
if (mTotalRel > MINIMUM_DATA_THRESHOLD)
|
|
151
|
+
return ((float)(mTotalRel - mRelSample[0]))/mTotalRel;
|
|
152
|
+
else
|
|
153
|
+
return (float)DONT_KNOW;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
PRInt32 SJISContextAnalysis::GetOrder(const char* str, PRUint32 *charLen)
|
|
158
|
+
{
|
|
159
|
+
//find out current char's byte length
|
|
160
|
+
if ((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f ||
|
|
161
|
+
(unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xfc )
|
|
162
|
+
*charLen = 2;
|
|
163
|
+
else
|
|
164
|
+
*charLen = 1;
|
|
165
|
+
|
|
166
|
+
//return its order if it is hiragana
|
|
167
|
+
if (*str == '\202' &&
|
|
168
|
+
(unsigned char)*(str+1) >= (unsigned char)0x9f &&
|
|
169
|
+
(unsigned char)*(str+1) <= (unsigned char)0xf1)
|
|
170
|
+
return (unsigned char)*(str+1) - (unsigned char)0x9f;
|
|
171
|
+
return -1;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
PRInt32 EUCJPContextAnalysis::GetOrder(const char* str, PRUint32 *charLen)
|
|
175
|
+
{
|
|
176
|
+
//find out current char's byte length
|
|
177
|
+
if ((unsigned char)*str == (unsigned char)0x8e ||
|
|
178
|
+
(unsigned char)*str >= (unsigned char)0xa1 &&
|
|
179
|
+
(unsigned char)*str <= (unsigned char)0xfe)
|
|
180
|
+
*charLen = 2;
|
|
181
|
+
else if ((unsigned char)*str == (unsigned char)0x8f)
|
|
182
|
+
*charLen = 3;
|
|
183
|
+
else
|
|
184
|
+
*charLen = 1;
|
|
185
|
+
|
|
186
|
+
//return its order if it is hiragana
|
|
187
|
+
if ((unsigned char)*str == (unsigned char)0xa4 &&
|
|
188
|
+
(unsigned char)*(str+1) >= (unsigned char)0xa1 &&
|
|
189
|
+
(unsigned char)*(str+1) <= (unsigned char)0xf3)
|
|
190
|
+
return (unsigned char)*(str+1) - (unsigned char)0xa1;
|
|
191
|
+
return -1;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#ifndef __JPCNTX_H__
|
|
2
|
+
#define __JPCNTX_H__
|
|
3
|
+
|
|
4
|
+
#define NUM_OF_CATEGORY 6
|
|
5
|
+
|
|
6
|
+
#include "types.h"
|
|
7
|
+
|
|
8
|
+
#define ENOUGH_REL_THRESHOLD 100
|
|
9
|
+
#define MAX_REL_THRESHOLD 1000
|
|
10
|
+
|
|
11
|
+
//hiragana frequency category table
|
|
12
|
+
extern char jp2CharContext[83][83];
|
|
13
|
+
|
|
14
|
+
class JapaneseContextAnalysis
|
|
15
|
+
{
|
|
16
|
+
public:
|
|
17
|
+
JapaneseContextAnalysis() {Reset();};
|
|
18
|
+
|
|
19
|
+
void HandleData(const char* aBuf, PRUint32 aLen);
|
|
20
|
+
|
|
21
|
+
void HandleOneChar(const char* aStr, PRUint32 aCharLen)
|
|
22
|
+
{
|
|
23
|
+
PRInt32 order;
|
|
24
|
+
|
|
25
|
+
//if we received enough data, stop here
|
|
26
|
+
if (mTotalRel > MAX_REL_THRESHOLD) mDone = PR_TRUE;
|
|
27
|
+
if (mDone) return;
|
|
28
|
+
|
|
29
|
+
//Only 2-bytes characters are of our interest
|
|
30
|
+
order = (aCharLen == 2) ? GetOrder(aStr) : -1;
|
|
31
|
+
if (order != -1 && mLastCharOrder != -1)
|
|
32
|
+
{
|
|
33
|
+
mTotalRel++;
|
|
34
|
+
//count this sequence to its category counter
|
|
35
|
+
mRelSample[jp2CharContext[mLastCharOrder][order]]++;
|
|
36
|
+
}
|
|
37
|
+
mLastCharOrder = order;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
float GetConfidence();
|
|
41
|
+
void Reset(void);
|
|
42
|
+
void SetOpion(){};
|
|
43
|
+
PRBool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;};
|
|
44
|
+
|
|
45
|
+
protected:
|
|
46
|
+
virtual PRInt32 GetOrder(const char* str, PRUint32 *charLen) = 0;
|
|
47
|
+
virtual PRInt32 GetOrder(const char* str) = 0;
|
|
48
|
+
|
|
49
|
+
//category counters, each interger counts sequence in its category
|
|
50
|
+
PRUint32 mRelSample[NUM_OF_CATEGORY];
|
|
51
|
+
|
|
52
|
+
//total sequence received
|
|
53
|
+
PRUint32 mTotalRel;
|
|
54
|
+
|
|
55
|
+
//The order of previous char
|
|
56
|
+
PRInt32 mLastCharOrder;
|
|
57
|
+
|
|
58
|
+
//if last byte in current buffer is not the last byte of a character, we
|
|
59
|
+
//need to know how many byte to skip in next buffer.
|
|
60
|
+
PRUint32 mNeedToSkipCharNum;
|
|
61
|
+
|
|
62
|
+
//If this flag is set to PR_TRUE, detection is done and conclusion has been made
|
|
63
|
+
PRBool mDone;
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class SJISContextAnalysis : public JapaneseContextAnalysis
|
|
68
|
+
{
|
|
69
|
+
//SJISContextAnalysis(){};
|
|
70
|
+
protected:
|
|
71
|
+
PRInt32 GetOrder(const char* str, PRUint32 *charLen);
|
|
72
|
+
|
|
73
|
+
PRInt32 GetOrder(const char* str)
|
|
74
|
+
{
|
|
75
|
+
//We only interested in Hiragana, so first byte is '\202'
|
|
76
|
+
if (*str == '\202' &&
|
|
77
|
+
(unsigned char)*(str+1) >= (unsigned char)0x9f &&
|
|
78
|
+
(unsigned char)*(str+1) <= (unsigned char)0xf1)
|
|
79
|
+
return (unsigned char)*(str+1) - (unsigned char)0x9f;
|
|
80
|
+
return -1;
|
|
81
|
+
};
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
class EUCJPContextAnalysis : public JapaneseContextAnalysis
|
|
85
|
+
{
|
|
86
|
+
protected:
|
|
87
|
+
PRInt32 GetOrder(const char* str, PRUint32 *charLen);
|
|
88
|
+
PRInt32 GetOrder(const char* str)
|
|
89
|
+
//We only interested in Hiragana, so first byte is '\244'
|
|
90
|
+
{
|
|
91
|
+
if (*str == '\244' &&
|
|
92
|
+
(unsigned char)*(str+1) >= (unsigned char)0xa1 &&
|
|
93
|
+
(unsigned char)*(str+1) <= (unsigned char)0xf3)
|
|
94
|
+
return (unsigned char)*(str+1) - (unsigned char)0xa1;
|
|
95
|
+
return -1;
|
|
96
|
+
};
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
#endif /* __JPCNTX_H__ */
|
|
100
|
+
|