nkf 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 58b94b86db23f7c0d37177682e6518b12e268f419c0cbbff8f8ba80840227a1c
4
+ data.tar.gz: 92de3688e975436deeed4bfabe31b3a12316a95666b15555143ed756c415f90e
5
+ SHA512:
6
+ metadata.gz: df67191af5aefe0f1064376a3a77019c385004173f2b89499aa3d4ada0a702be84a5cf7f095f5a982eed2915cae87fac70018c6cf8c22f427a27584aed13dadd
7
+ data.tar.gz: f999e10fba1b9e82e69512f2bbfdc1bb26e01de4c393652fa5f55591375641d9073f06788cd7a7ee889d701a81f4cbb3c5562eae03e62161c77d12a5d713f8a1
@@ -0,0 +1,24 @@
1
+ name: build
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ name: build (${{ matrix.ruby }} / ${{ matrix.os }})
8
+ strategy:
9
+ matrix:
10
+ ruby: [ 2.7, 2.6, 2.5, head ]
11
+ os: [ ubuntu-latest, macos-latest ]
12
+ runs-on: ${{ matrix.os }}
13
+ steps:
14
+ - uses: actions/checkout@master
15
+ - name: Set up Ruby
16
+ uses: ruby/setup-ruby@v1
17
+ with:
18
+ ruby-version: ${{ matrix.ruby }}
19
+ - name: Install dependencies
20
+ run: |
21
+ gem install bundler --no-document
22
+ bundle install
23
+ - name: Run test
24
+ run: rake compile test
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ *.bundle
10
+ *.dll
11
+ *.so
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem "rake"
4
+ gem "rake-compiler"
5
+ gem "test-unit"
@@ -0,0 +1,22 @@
1
+ Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved.
2
+
3
+ Redistribution and use in source and binary forms, with or without
4
+ modification, are permitted provided that the following conditions
5
+ are met:
6
+ 1. Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ 2. Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+
12
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
13
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
15
+ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
16
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
17
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
18
+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
19
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
20
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
21
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
22
+ SUCH DAMAGE.
@@ -0,0 +1,38 @@
1
+ # NKF
2
+
3
+ This is a Ruby Extension version of nkf (Network Kanji Filter).
4
+ It converts the first argument and returns converted result. Conversion
5
+ details are specified by flags as the first argument.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'nkf'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle install
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install nkf
22
+
23
+ ## Usage
24
+
25
+ ```ruby
26
+ require 'nkf'
27
+ output = NKF.nkf("-s", input)
28
+ ```
29
+
30
+ ## Development
31
+
32
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
33
+
34
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
35
+
36
+ ## Contributing
37
+
38
+ Bug reports and pull requests are welcome on GitHub at https://github.com/ruby/nkf.
@@ -0,0 +1,12 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/test_*.rb"]
8
+ end
9
+
10
+ require 'rake/extensiontask'
11
+ Rake::ExtensionTask.new("nkf")
12
+ task :default => :test
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "nkf"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: false
2
+ require 'mkmf'
3
+ create_makefile('nkf')
@@ -0,0 +1,51 @@
1
+ #ifndef _CONFIG_H_
2
+ #define _CONFIG_H_
3
+
4
+ /* UTF8 input and output */
5
+ #define UTF8_INPUT_ENABLE
6
+ #define UTF8_OUTPUT_ENABLE
7
+
8
+ /* invert characters invalid in Shift_JIS to CP932 */
9
+ #define SHIFTJIS_CP932
10
+
11
+ /* fix input encoding when given by option */
12
+ #define INPUT_CODE_FIX
13
+
14
+ /* --overwrite option */
15
+ /* by Satoru Takabayashi <ccsatoru@vega.aichi-u.ac.jp> */
16
+ #define OVERWRITE
17
+
18
+ /* --cap-input, --url-input option */
19
+ #define INPUT_OPTION
20
+
21
+ /* --numchar-input option */
22
+ #define NUMCHAR_OPTION
23
+
24
+ /* --debug, --no-output option */
25
+ #define CHECK_OPTION
26
+
27
+ /* JIS X0212 */
28
+ #define X0212_ENABLE
29
+
30
+ /* --exec-in, --exec-out option
31
+ * require pipe, fork, execvp and so on.
32
+ * please undef this on MS-DOS, MinGW
33
+ * this is still buggy arround child process
34
+ */
35
+ /* #define EXEC_IO */
36
+
37
+ /* Unicode Normalization */
38
+ #define UNICODE_NORMALIZATION
39
+
40
+ /*
41
+ * Select Default Output Encoding
42
+ *
43
+ */
44
+
45
+ /* #define DEFAULT_CODE_JIS */
46
+ /* #define DEFAULT_CODE_SJIS */
47
+ /* #define DEFAULT_CODE_WINDOWS_31J */
48
+ /* #define DEFAULT_CODE_EUC */
49
+ /* #define DEFAULT_CODE_UTF8 */
50
+
51
+ #endif /* _CONFIG_H_ */
@@ -0,0 +1,7205 @@
1
+ /*
2
+ * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3
+ * Copyright (c) 1996-2018, The nkf Project.
4
+ *
5
+ * This software is provided 'as-is', without any express or implied
6
+ * warranty. In no event will the authors be held liable for any damages
7
+ * arising from the use of this software.
8
+ *
9
+ * Permission is granted to anyone to use this software for any purpose,
10
+ * including commercial applications, and to alter it and redistribute it
11
+ * freely, subject to the following restrictions:
12
+ *
13
+ * 1. The origin of this software must not be misrepresented; you must not
14
+ * claim that you wrote the original software. If you use this software
15
+ * in a product, an acknowledgment in the product documentation would be
16
+ * appreciated but is not required.
17
+ *
18
+ * 2. Altered source versions must be plainly marked as such, and must not be
19
+ * misrepresented as being the original software.
20
+ *
21
+ * 3. This notice may not be removed or altered from any source distribution.
22
+ */
23
+ #define NKF_VERSION "2.1.5"
24
+ #define NKF_RELEASE_DATE "2018-12-15"
25
+ #define COPY_RIGHT \
26
+ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27
+ "Copyright (C) 1996-2018, The nkf Project."
28
+
29
+ #include "config.h"
30
+ #include "nkf.h"
31
+ #include "utf8tbl.h"
32
+ #ifdef __WIN32__
33
+ #include <windows.h>
34
+ #include <locale.h>
35
+ #endif
36
+ #if defined(__OS2__)
37
+ # define INCL_DOS
38
+ # define INCL_DOSERRORS
39
+ # include <os2.h>
40
+ #endif
41
+ #include <assert.h>
42
+
43
+
44
+ /* state of output_mode and input_mode
45
+
46
+ c2 0 means ASCII
47
+ JIS_X_0201_1976_K
48
+ ISO_8859_1
49
+ JIS_X_0208
50
+ EOF all termination
51
+ c1 32bit data
52
+
53
+ */
54
+
55
+ /* MIME ENCODE */
56
+
57
+ #define FIXED_MIME 7
58
+ #define STRICT_MIME 8
59
+
60
+ /* byte order */
61
+ enum byte_order {
62
+ ENDIAN_BIG = 1,
63
+ ENDIAN_LITTLE = 2,
64
+ ENDIAN_2143 = 3,
65
+ ENDIAN_3412 = 4
66
+ };
67
+
68
+ /* ASCII CODE */
69
+
70
+ #define BS 0x08
71
+ #define TAB 0x09
72
+ #define LF 0x0a
73
+ #define CR 0x0d
74
+ #define ESC 0x1b
75
+ #define SP 0x20
76
+ #define DEL 0x7f
77
+ #define SI 0x0f
78
+ #define SO 0x0e
79
+ #define SS2 0x8e
80
+ #define SS3 0x8f
81
+ #define CRLF 0x0D0A
82
+
83
+
84
+ /* encodings */
85
+
86
+ enum nkf_encodings {
87
+ ASCII,
88
+ ISO_8859_1,
89
+ ISO_2022_JP,
90
+ CP50220,
91
+ CP50221,
92
+ CP50222,
93
+ ISO_2022_JP_1,
94
+ ISO_2022_JP_3,
95
+ ISO_2022_JP_2004,
96
+ SHIFT_JIS,
97
+ WINDOWS_31J,
98
+ CP10001,
99
+ EUC_JP,
100
+ EUCJP_NKF,
101
+ CP51932,
102
+ EUCJP_MS,
103
+ EUCJP_ASCII,
104
+ SHIFT_JISX0213,
105
+ SHIFT_JIS_2004,
106
+ EUC_JISX0213,
107
+ EUC_JIS_2004,
108
+ UTF_8,
109
+ UTF_8N,
110
+ UTF_8_BOM,
111
+ UTF8_MAC,
112
+ UTF_16,
113
+ UTF_16BE,
114
+ UTF_16BE_BOM,
115
+ UTF_16LE,
116
+ UTF_16LE_BOM,
117
+ UTF_32,
118
+ UTF_32BE,
119
+ UTF_32BE_BOM,
120
+ UTF_32LE,
121
+ UTF_32LE_BOM,
122
+ BINARY,
123
+ NKF_ENCODING_TABLE_SIZE,
124
+ JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125
+ /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126
+ /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127
+ /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128
+ JIS_X_0208 = 0x1168, /* @B */
129
+ JIS_X_0212 = 0x1159, /* D */
130
+ /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131
+ JIS_X_0213_2 = 0x1229, /* P */
132
+ JIS_X_0213_1 = 0x1233 /* Q */
133
+ };
134
+
135
+ static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136
+ static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137
+ static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138
+ static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139
+ static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140
+ static void j_oconv(nkf_char c2, nkf_char c1);
141
+ static void s_oconv(nkf_char c2, nkf_char c1);
142
+ static void e_oconv(nkf_char c2, nkf_char c1);
143
+ static void w_oconv(nkf_char c2, nkf_char c1);
144
+ static void w_oconv16(nkf_char c2, nkf_char c1);
145
+ static void w_oconv32(nkf_char c2, nkf_char c1);
146
+
147
+ typedef struct {
148
+ const char *name;
149
+ nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150
+ void (*oconv)(nkf_char c2, nkf_char c1);
151
+ } nkf_native_encoding;
152
+
153
+ nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
154
+ nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
155
+ nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
156
+ nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
157
+ nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
158
+ nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
159
+ nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
160
+
161
+ typedef struct {
162
+ const int id;
163
+ const char *name;
164
+ const nkf_native_encoding *base_encoding;
165
+ } nkf_encoding;
166
+
167
+ nkf_encoding nkf_encoding_table[] = {
168
+ {ASCII, "US-ASCII", &NkfEncodingASCII},
169
+ {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170
+ {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171
+ {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172
+ {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173
+ {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174
+ {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175
+ {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176
+ {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177
+ {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178
+ {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179
+ {CP10001, "CP10001", &NkfEncodingShift_JIS},
180
+ {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181
+ {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182
+ {CP51932, "CP51932", &NkfEncodingEUC_JP},
183
+ {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184
+ {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185
+ {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186
+ {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187
+ {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188
+ {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189
+ {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190
+ {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191
+ {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192
+ {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193
+ {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194
+ {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195
+ {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196
+ {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197
+ {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198
+ {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199
+ {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200
+ {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201
+ {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202
+ {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203
+ {BINARY, "BINARY", &NkfEncodingASCII},
204
+ {-1, NULL, NULL}
205
+ };
206
+
207
+ struct {
208
+ const char *name;
209
+ const int id;
210
+ } encoding_name_to_id_table[] = {
211
+ {"US-ASCII", ASCII},
212
+ {"ASCII", ASCII},
213
+ {"646", ASCII},
214
+ {"ROMAN8", ASCII},
215
+ {"ISO-2022-JP", ISO_2022_JP},
216
+ {"ISO2022JP-CP932", CP50220},
217
+ {"CP50220", CP50220},
218
+ {"CP50221", CP50221},
219
+ {"CSISO2022JP", CP50221},
220
+ {"CP50222", CP50222},
221
+ {"ISO-2022-JP-1", ISO_2022_JP_1},
222
+ {"ISO-2022-JP-3", ISO_2022_JP_3},
223
+ {"ISO-2022-JP-2004", ISO_2022_JP_2004},
224
+ {"SHIFT_JIS", SHIFT_JIS},
225
+ {"SJIS", SHIFT_JIS},
226
+ {"MS_Kanji", SHIFT_JIS},
227
+ {"PCK", SHIFT_JIS},
228
+ {"WINDOWS-31J", WINDOWS_31J},
229
+ {"CSWINDOWS31J", WINDOWS_31J},
230
+ {"CP932", WINDOWS_31J},
231
+ {"MS932", WINDOWS_31J},
232
+ {"CP10001", CP10001},
233
+ {"EUCJP", EUC_JP},
234
+ {"EUC-JP", EUC_JP},
235
+ {"EUCJP-NKF", EUCJP_NKF},
236
+ {"CP51932", CP51932},
237
+ {"EUC-JP-MS", EUCJP_MS},
238
+ {"EUCJP-MS", EUCJP_MS},
239
+ {"EUCJPMS", EUCJP_MS},
240
+ {"EUC-JP-ASCII", EUCJP_ASCII},
241
+ {"EUCJP-ASCII", EUCJP_ASCII},
242
+ {"SHIFT_JISX0213", SHIFT_JISX0213},
243
+ {"SHIFT_JIS-2004", SHIFT_JIS_2004},
244
+ {"EUC-JISX0213", EUC_JISX0213},
245
+ {"EUC-JIS-2004", EUC_JIS_2004},
246
+ {"UTF-8", UTF_8},
247
+ {"UTF-8N", UTF_8N},
248
+ {"UTF-8-BOM", UTF_8_BOM},
249
+ {"UTF8-MAC", UTF8_MAC},
250
+ {"UTF-8-MAC", UTF8_MAC},
251
+ {"UTF-16", UTF_16},
252
+ {"UTF-16BE", UTF_16BE},
253
+ {"UTF-16BE-BOM", UTF_16BE_BOM},
254
+ {"UTF-16LE", UTF_16LE},
255
+ {"UTF-16LE-BOM", UTF_16LE_BOM},
256
+ {"UTF-32", UTF_32},
257
+ {"UTF-32BE", UTF_32BE},
258
+ {"UTF-32BE-BOM", UTF_32BE_BOM},
259
+ {"UTF-32LE", UTF_32LE},
260
+ {"UTF-32LE-BOM", UTF_32LE_BOM},
261
+ {"BINARY", BINARY},
262
+ {NULL, -1}
263
+ };
264
+
265
+ #if defined(DEFAULT_CODE_JIS)
266
+ #define DEFAULT_ENCIDX ISO_2022_JP
267
+ #elif defined(DEFAULT_CODE_SJIS)
268
+ #define DEFAULT_ENCIDX SHIFT_JIS
269
+ #elif defined(DEFAULT_CODE_WINDOWS_31J)
270
+ #define DEFAULT_ENCIDX WINDOWS_31J
271
+ #elif defined(DEFAULT_CODE_EUC)
272
+ #define DEFAULT_ENCIDX EUC_JP
273
+ #elif defined(DEFAULT_CODE_UTF8)
274
+ #define DEFAULT_ENCIDX UTF_8
275
+ #endif
276
+
277
+
278
+ #define is_alnum(c) \
279
+ (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
280
+
281
+ /* I don't trust portablity of toupper */
282
+ #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283
+ #define nkf_isoctal(c) ('0'<=c && c<='7')
284
+ #define nkf_isdigit(c) ('0'<=c && c<='9')
285
+ #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286
+ #define nkf_isblank(c) (c == SP || c == TAB)
287
+ #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288
+ #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289
+ #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290
+ #define nkf_isprint(c) (SP<=c && c<='~')
291
+ #define nkf_isgraph(c) ('!'<=c && c<='~')
292
+ #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293
+ ('A'<=c&&c<='F') ? (c-'A'+10) : \
294
+ ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295
+ #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296
+ #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297
+ #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298
+ ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299
+ && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
300
+
301
+ #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302
+ #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
303
+
304
+ #define HOLD_SIZE 1024
305
+ #if defined(INT_IS_SHORT)
306
+ #define IOBUF_SIZE 2048
307
+ #else
308
+ #define IOBUF_SIZE 16384
309
+ #endif
310
+
311
+ #define DEFAULT_J 'B'
312
+ #define DEFAULT_R 'B'
313
+
314
+
315
+ #define GETA1 0x22
316
+ #define GETA2 0x2e
317
+
318
+
319
+ /* MIME preprocessor */
320
+
321
+ #ifdef EASYWIN /*Easy Win */
322
+ extern POINT _BufferSize;
323
+ #endif
324
+
325
+ struct input_code{
326
+ const char *name;
327
+ nkf_char stat;
328
+ nkf_char score;
329
+ nkf_char index;
330
+ nkf_char buf[3];
331
+ void (*status_func)(struct input_code *, nkf_char);
332
+ nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
333
+ int _file_stat;
334
+ };
335
+
336
+ static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
337
+ static nkf_encoding *input_encoding = NULL;
338
+ static nkf_encoding *output_encoding = NULL;
339
+
340
+ #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
341
+ /* UCS Mapping
342
+ * 0: Shift_JIS, eucJP-ascii
343
+ * 1: eucJP-ms
344
+ * 2: CP932, CP51932
345
+ * 3: CP10001
346
+ */
347
+ #define UCS_MAP_ASCII 0
348
+ #define UCS_MAP_MS 1
349
+ #define UCS_MAP_CP932 2
350
+ #define UCS_MAP_CP10001 3
351
+ static int ms_ucs_map_f = UCS_MAP_ASCII;
352
+ #endif
353
+ #ifdef UTF8_INPUT_ENABLE
354
+ /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355
+ static int no_cp932ext_f = FALSE;
356
+ /* ignore ZERO WIDTH NO-BREAK SPACE */
357
+ static int no_best_fit_chars_f = FALSE;
358
+ static int input_endian = ENDIAN_BIG;
359
+ static int input_bom_f = FALSE;
360
+ static nkf_char unicode_subchar = '?'; /* the regular substitution character */
361
+ static void (*encode_fallback)(nkf_char c) = NULL;
362
+ static void w_status(struct input_code *, nkf_char);
363
+ #endif
364
+ #ifdef UTF8_OUTPUT_ENABLE
365
+ static int output_bom_f = FALSE;
366
+ static int output_endian = ENDIAN_BIG;
367
+ #endif
368
+
369
+ static void std_putc(nkf_char c);
370
+ static nkf_char std_getc(FILE *f);
371
+ static nkf_char std_ungetc(nkf_char c,FILE *f);
372
+
373
+ static nkf_char broken_getc(FILE *f);
374
+ static nkf_char broken_ungetc(nkf_char c,FILE *f);
375
+
376
+ static nkf_char mime_getc(FILE *f);
377
+
378
+ static void mime_putc(nkf_char c);
379
+
380
+ /* buffers */
381
+
382
+ #if !defined(PERL_XS) && !defined(WIN32DLL)
383
+ static unsigned char stdibuf[IOBUF_SIZE];
384
+ static unsigned char stdobuf[IOBUF_SIZE];
385
+ #endif
386
+
387
+ #define NKF_UNSPECIFIED (-TRUE)
388
+
389
+ /* flags */
390
+ static int unbuf_f = FALSE;
391
+ static int estab_f = FALSE;
392
+ static int nop_f = FALSE;
393
+ static int binmode_f = TRUE; /* binary mode */
394
+ static int rot_f = FALSE; /* rot14/43 mode */
395
+ static int hira_f = FALSE; /* hira/kata henkan */
396
+ static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
397
+ static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
398
+ static int mime_decode_f = FALSE; /* mime decode is explicitly on */
399
+ static int mimebuf_f = FALSE; /* MIME buffered input */
400
+ static int broken_f = FALSE; /* convert ESC-less broken JIS */
401
+ static int iso8859_f = FALSE; /* ISO8859 through */
402
+ static int mimeout_f = FALSE; /* base64 mode */
403
+ static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
404
+ static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
405
+
406
+ #ifdef UNICODE_NORMALIZATION
407
+ static int nfc_f = FALSE;
408
+ static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
409
+ static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
410
+ #endif
411
+
412
+ #ifdef INPUT_OPTION
413
+ static int cap_f = FALSE;
414
+ static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
415
+ static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
416
+
417
+ static int url_f = FALSE;
418
+ static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
419
+ static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
420
+ #endif
421
+
422
+ #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
423
+ #define CLASS_MASK NKF_INT32_C(0xFF000000)
424
+ #define CLASS_UNICODE NKF_INT32_C(0x01000000)
425
+ #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
426
+ #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
427
+ #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
428
+ #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
429
+ #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
430
+ #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
431
+ #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
432
+ #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
433
+
434
+ #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
435
+
436
+ #ifdef NUMCHAR_OPTION
437
+ static int numchar_f = FALSE;
438
+ static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
439
+ static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
440
+ #endif
441
+
442
+ #ifdef CHECK_OPTION
443
+ static int noout_f = FALSE;
444
+ static void no_putc(nkf_char c);
445
+ static int debug_f = FALSE;
446
+ static void debug(const char *str);
447
+ static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
448
+ #endif
449
+
450
+ static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
451
+ static void set_input_codename(const char *codename);
452
+
453
+ #ifdef EXEC_IO
454
+ static int exec_f = 0;
455
+ #endif
456
+
457
+ #ifdef SHIFTJIS_CP932
458
+ /* invert IBM extended characters to others */
459
+ static int cp51932_f = FALSE;
460
+
461
+ /* invert NEC-selected IBM extended characters to IBM extended characters */
462
+ static int cp932inv_f = TRUE;
463
+
464
+ /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
465
+ #endif /* SHIFTJIS_CP932 */
466
+
467
+ static int x0212_f = FALSE;
468
+ static int x0213_f = FALSE;
469
+
470
+ static unsigned char prefix_table[256];
471
+
472
+ static void e_status(struct input_code *, nkf_char);
473
+ static void s_status(struct input_code *, nkf_char);
474
+
475
+ struct input_code input_code_list[] = {
476
+ {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
477
+ {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
478
+ #ifdef UTF8_INPUT_ENABLE
479
+ {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
480
+ {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
481
+ {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
482
+ #endif
483
+ {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
484
+ };
485
+
486
+ static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
487
+ static int base64_count = 0;
488
+
489
+ /* X0208 -> ASCII converter */
490
+
491
+ /* fold parameter */
492
+ static int f_line = 0; /* chars in line */
493
+ static int f_prev = 0;
494
+ static int fold_preserve_f = FALSE; /* preserve new lines */
495
+ static int fold_f = FALSE;
496
+ static int fold_len = 0;
497
+
498
+ /* options */
499
+ static unsigned char kanji_intro = DEFAULT_J;
500
+ static unsigned char ascii_intro = DEFAULT_R;
501
+
502
+ /* Folding */
503
+
504
+ #define FOLD_MARGIN 10
505
+ #define DEFAULT_FOLD 60
506
+
507
+ static int fold_margin = FOLD_MARGIN;
508
+
509
+ /* process default */
510
+
511
+ static nkf_char
512
+ no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
513
+ {
514
+ fprintf(stderr,"nkf internal module connection failure.\n");
515
+ exit(EXIT_FAILURE);
516
+ return 0; /* LINT */
517
+ }
518
+
519
+ static void
520
+ no_connection(nkf_char c2, nkf_char c1)
521
+ {
522
+ no_connection2(c2,c1,0);
523
+ }
524
+
525
+ static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
526
+ static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
527
+
528
+ static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
529
+ static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
530
+ static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
531
+ static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
532
+ static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
533
+ static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
534
+ static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
535
+
536
+ /* static redirections */
537
+
538
+ static void (*o_putc)(nkf_char c) = std_putc;
539
+
540
+ static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
541
+ static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
542
+
543
+ static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
544
+ static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
545
+
546
+ static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
547
+
548
+ static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
549
+ static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
550
+
551
+ /* for strict mime */
552
+ static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
553
+ static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
554
+
555
+ /* Global states */
556
+ static int output_mode = ASCII; /* output kanji mode */
557
+ static int input_mode = ASCII; /* input kanji mode */
558
+ static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
559
+
560
+ /* X0201 / X0208 conversion tables */
561
+
562
+ /* X0201 kana conversion table */
563
+ /* 90-9F A0-DF */
564
+ static const unsigned char cv[]= {
565
+ 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566
+ 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567
+ 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568
+ 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569
+ 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570
+ 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571
+ 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572
+ 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573
+ 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574
+ 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575
+ 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576
+ 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577
+ 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578
+ 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579
+ 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580
+ 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
581
+ 0x00,0x00};
582
+
583
+
584
+ /* X0201 kana conversion table for daguten */
585
+ /* 90-9F A0-DF */
586
+ static const unsigned char dv[]= {
587
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592
+ 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593
+ 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594
+ 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595
+ 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596
+ 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597
+ 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598
+ 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603
+ 0x00,0x00};
604
+
605
+ /* X0201 kana conversion table for han-daguten */
606
+ /* 90-9F A0-DF */
607
+ static const unsigned char ev[]= {
608
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618
+ 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619
+ 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624
+ 0x00,0x00};
625
+
626
+ /* X0201 kana to X0213 conversion table for han-daguten */
627
+ /* 90-9F A0-DF */
628
+ static const unsigned char ev_x0213[]= {
629
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634
+ 0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635
+ 0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636
+ 0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637
+ 0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638
+ 0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645
+ 0x00,0x00};
646
+
647
+
648
+ /* X0208 kigou conversion table */
649
+ /* 0x8140 - 0x819e */
650
+ static const unsigned char fv[] = {
651
+
652
+ 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653
+ 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654
+ 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655
+ 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656
+ 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657
+ 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658
+ 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659
+ 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660
+ 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662
+ 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
664
+ } ;
665
+
666
+
667
+
668
+ static int option_mode = 0;
669
+ static int file_out_f = FALSE;
670
+ #ifdef OVERWRITE
671
+ static int overwrite_f = FALSE;
672
+ static int preserve_time_f = FALSE;
673
+ static int backup_f = FALSE;
674
+ static char *backup_suffix = "";
675
+ #endif
676
+
677
+ static int eolmode_f = 0; /* CR, LF, CRLF */
678
+ static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
679
+ static nkf_char prev_cr = 0; /* CR or 0 */
680
+ #ifdef EASYWIN /*Easy Win */
681
+ static int end_check;
682
+ #endif /*Easy Win */
683
+
684
+ static void *
685
+ nkf_xmalloc(size_t size)
686
+ {
687
+ void *ptr;
688
+
689
+ if (size == 0) size = 1;
690
+
691
+ ptr = malloc(size);
692
+ if (ptr == NULL) {
693
+ perror("can't malloc");
694
+ exit(EXIT_FAILURE);
695
+ }
696
+
697
+ return ptr;
698
+ }
699
+
700
+ static void *
701
+ nkf_xrealloc(void *ptr, size_t size)
702
+ {
703
+ if (size == 0) size = 1;
704
+
705
+ ptr = realloc(ptr, size);
706
+ if (ptr == NULL) {
707
+ perror("can't realloc");
708
+ exit(EXIT_FAILURE);
709
+ }
710
+
711
+ return ptr;
712
+ }
713
+
714
+ #define nkf_xfree(ptr) free(ptr)
715
+
716
+ static int
717
+ nkf_str_caseeql(const char *src, const char *target)
718
+ {
719
+ int i;
720
+ for (i = 0; src[i] && target[i]; i++) {
721
+ if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
722
+ }
723
+ if (src[i] || target[i]) return FALSE;
724
+ else return TRUE;
725
+ }
726
+
727
+ static nkf_encoding*
728
+ nkf_enc_from_index(int idx)
729
+ {
730
+ if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
731
+ return 0;
732
+ }
733
+ return &nkf_encoding_table[idx];
734
+ }
735
+
736
+ static int
737
+ nkf_enc_find_index(const char *name)
738
+ {
739
+ int i;
740
+ if (name[0] == 'X' && *(name+1) == '-') name += 2;
741
+ for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
742
+ if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
743
+ return encoding_name_to_id_table[i].id;
744
+ }
745
+ }
746
+ return -1;
747
+ }
748
+
749
+ static nkf_encoding*
750
+ nkf_enc_find(const char *name)
751
+ {
752
+ int idx = -1;
753
+ idx = nkf_enc_find_index(name);
754
+ if (idx < 0) return 0;
755
+ return nkf_enc_from_index(idx);
756
+ }
757
+
758
+ #define nkf_enc_name(enc) (enc)->name
759
+ #define nkf_enc_to_index(enc) (enc)->id
760
+ #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
761
+ #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
762
+ #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
763
+ #define nkf_enc_asciicompat(enc) (\
764
+ nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
765
+ nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
766
+ #define nkf_enc_unicode_p(enc) (\
767
+ nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
768
+ nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
769
+ nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
770
+ #define nkf_enc_cp5022x_p(enc) (\
771
+ nkf_enc_to_index(enc) == CP50220 ||\
772
+ nkf_enc_to_index(enc) == CP50221 ||\
773
+ nkf_enc_to_index(enc) == CP50222)
774
+
775
+ #ifdef DEFAULT_CODE_LOCALE
776
+ static const char*
777
+ nkf_locale_charmap(void)
778
+ {
779
+ #ifdef HAVE_LANGINFO_H
780
+ return nl_langinfo(CODESET);
781
+ #elif defined(__WIN32__)
782
+ static char buf[16];
783
+ sprintf(buf, "CP%d", GetACP());
784
+ return buf;
785
+ #elif defined(__OS2__)
786
+ # if defined(INT_IS_SHORT)
787
+ /* OS/2 1.x */
788
+ return NULL;
789
+ # else
790
+ /* OS/2 32bit */
791
+ static char buf[16];
792
+ ULONG ulCP[1], ulncp;
793
+ DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
794
+ if (ulCP[0] == 932 || ulCP[0] == 943)
795
+ strcpy(buf, "Shift_JIS");
796
+ else
797
+ sprintf(buf, "CP%lu", ulCP[0]);
798
+ return buf;
799
+ # endif
800
+ #endif
801
+ return NULL;
802
+ }
803
+
804
+ static nkf_encoding*
805
+ nkf_locale_encoding(void)
806
+ {
807
+ nkf_encoding *enc = 0;
808
+ const char *encname = nkf_locale_charmap();
809
+ if (encname)
810
+ enc = nkf_enc_find(encname);
811
+ return enc;
812
+ }
813
+ #endif /* DEFAULT_CODE_LOCALE */
814
+
815
+ static nkf_encoding*
816
+ nkf_utf8_encoding(void)
817
+ {
818
+ return &nkf_encoding_table[UTF_8];
819
+ }
820
+
821
+ static nkf_encoding*
822
+ nkf_default_encoding(void)
823
+ {
824
+ nkf_encoding *enc = 0;
825
+ #ifdef DEFAULT_CODE_LOCALE
826
+ enc = nkf_locale_encoding();
827
+ #elif defined(DEFAULT_ENCIDX)
828
+ enc = nkf_enc_from_index(DEFAULT_ENCIDX);
829
+ #endif
830
+ if (!enc) enc = nkf_utf8_encoding();
831
+ return enc;
832
+ }
833
+
834
+ typedef struct {
835
+ long capa;
836
+ long len;
837
+ nkf_char *ptr;
838
+ } nkf_buf_t;
839
+
840
+ static nkf_buf_t *
841
+ nkf_buf_new(int length)
842
+ {
843
+ nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
844
+ buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
845
+ buf->capa = length;
846
+ buf->len = 0;
847
+ return buf;
848
+ }
849
+
850
+ #if 0
851
+ static void
852
+ nkf_buf_dispose(nkf_buf_t *buf)
853
+ {
854
+ nkf_xfree(buf->ptr);
855
+ nkf_xfree(buf);
856
+ }
857
+ #endif
858
+
859
+ #define nkf_buf_length(buf) ((buf)->len)
860
+ #define nkf_buf_empty_p(buf) ((buf)->len == 0)
861
+
862
+ static nkf_char
863
+ nkf_buf_at(nkf_buf_t *buf, int index)
864
+ {
865
+ assert(index <= buf->len);
866
+ return buf->ptr[index];
867
+ }
868
+
869
+ static void
870
+ nkf_buf_clear(nkf_buf_t *buf)
871
+ {
872
+ buf->len = 0;
873
+ }
874
+
875
+ static void
876
+ nkf_buf_push(nkf_buf_t *buf, nkf_char c)
877
+ {
878
+ if (buf->capa <= buf->len) {
879
+ exit(EXIT_FAILURE);
880
+ }
881
+ buf->ptr[buf->len++] = c;
882
+ }
883
+
884
+ static nkf_char
885
+ nkf_buf_pop(nkf_buf_t *buf)
886
+ {
887
+ assert(!nkf_buf_empty_p(buf));
888
+ return buf->ptr[--buf->len];
889
+ }
890
+
891
+ /* Normalization Form C */
892
+ #ifndef PERL_XS
893
+ #ifdef WIN32DLL
894
+ #define fprintf dllprintf
895
+ #endif
896
+
897
+ static void
898
+ version(void)
899
+ {
900
+ fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
901
+ }
902
+
903
+ static void
904
+ usage(void)
905
+ {
906
+ fprintf(HELP_OUTPUT,
907
+ "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
908
+ #ifdef UTF8_OUTPUT_ENABLE
909
+ " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
910
+ " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
911
+ #else
912
+ #endif
913
+ #ifdef UTF8_INPUT_ENABLE
914
+ " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
915
+ " UTF option is -W[8,[16,32][B,L]]\n"
916
+ #else
917
+ " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
918
+ #endif
919
+ );
920
+ fprintf(HELP_OUTPUT,
921
+ " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
922
+ " M[BQ] MIME encode [B:base64 Q:quoted]\n"
923
+ " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
924
+ );
925
+ fprintf(HELP_OUTPUT,
926
+ " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
927
+ " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
928
+ " 4: JISX0208 Katakana to JISX0201 Katakana\n"
929
+ " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
930
+ );
931
+ fprintf(HELP_OUTPUT,
932
+ " O Output to File (DEFAULT 'nkf.out')\n"
933
+ " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
934
+ );
935
+ fprintf(HELP_OUTPUT,
936
+ " --ic=<encoding> Specify the input encoding\n"
937
+ " --oc=<encoding> Specify the output encoding\n"
938
+ " --hiragana --katakana Hiragana/Katakana Conversion\n"
939
+ " --katakana-hiragana Converts each other\n"
940
+ );
941
+ fprintf(HELP_OUTPUT,
942
+ #ifdef INPUT_OPTION
943
+ " --{cap, url}-input Convert hex after ':' or '%%'\n"
944
+ #endif
945
+ #ifdef NUMCHAR_OPTION
946
+ " --numchar-input Convert Unicode Character Reference\n"
947
+ #endif
948
+ #ifdef UTF8_INPUT_ENABLE
949
+ " --fb-{skip, html, xml, perl, java, subchar}\n"
950
+ " Specify unassigned character's replacement\n"
951
+ #endif
952
+ );
953
+ fprintf(HELP_OUTPUT,
954
+ #ifdef OVERWRITE
955
+ " --in-place[=SUF] Overwrite original files\n"
956
+ " --overwrite[=SUF] Preserve timestamp of original files\n"
957
+ #endif
958
+ " -g --guess Guess the input code\n"
959
+ " -v --version Print the version\n"
960
+ " --help/-V Print this help / configuration\n"
961
+ );
962
+ version();
963
+ }
964
+
965
+ static void
966
+ show_configuration(void)
967
+ {
968
+ fprintf(HELP_OUTPUT,
969
+ "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
970
+ " Compile-time options:\n"
971
+ " Compiled at: " __DATE__ " " __TIME__ "\n"
972
+ );
973
+ fprintf(HELP_OUTPUT,
974
+ " Default output encoding: "
975
+ #ifdef DEFAULT_CODE_LOCALE
976
+ "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
977
+ #elif defined(DEFAULT_ENCIDX)
978
+ "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
979
+ #else
980
+ "NONE\n"
981
+ #endif
982
+ );
983
+ fprintf(HELP_OUTPUT,
984
+ " Default output end of line: "
985
+ #if DEFAULT_NEWLINE == CR
986
+ "CR"
987
+ #elif DEFAULT_NEWLINE == CRLF
988
+ "CRLF"
989
+ #else
990
+ "LF"
991
+ #endif
992
+ "\n"
993
+ " Decode MIME encoded string: "
994
+ #if MIME_DECODE_DEFAULT
995
+ "ON"
996
+ #else
997
+ "OFF"
998
+ #endif
999
+ "\n"
1000
+ " Convert JIS X 0201 Katakana: "
1001
+ #if X0201_DEFAULT
1002
+ "ON"
1003
+ #else
1004
+ "OFF"
1005
+ #endif
1006
+ "\n"
1007
+ " --help, --version output: "
1008
+ #if HELP_OUTPUT_HELP_OUTPUT
1009
+ "HELP_OUTPUT"
1010
+ #else
1011
+ "STDOUT"
1012
+ #endif
1013
+ "\n");
1014
+ }
1015
+ #endif /*PERL_XS*/
1016
+
1017
+ #ifdef OVERWRITE
1018
+ static char*
1019
+ get_backup_filename(const char *suffix, const char *filename)
1020
+ {
1021
+ char *backup_filename;
1022
+ int asterisk_count = 0;
1023
+ int i, j;
1024
+ int filename_length = strlen(filename);
1025
+
1026
+ for(i = 0; suffix[i]; i++){
1027
+ if(suffix[i] == '*') asterisk_count++;
1028
+ }
1029
+
1030
+ if(asterisk_count){
1031
+ backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032
+ for(i = 0, j = 0; suffix[i];){
1033
+ if(suffix[i] == '*'){
1034
+ backup_filename[j] = '\0';
1035
+ strncat(backup_filename, filename, filename_length);
1036
+ i++;
1037
+ j += filename_length;
1038
+ }else{
1039
+ backup_filename[j++] = suffix[i++];
1040
+ }
1041
+ }
1042
+ backup_filename[j] = '\0';
1043
+ }else{
1044
+ j = filename_length + strlen(suffix);
1045
+ backup_filename = nkf_xmalloc(j + 1);
1046
+ strcpy(backup_filename, filename);
1047
+ strcat(backup_filename, suffix);
1048
+ backup_filename[j] = '\0';
1049
+ }
1050
+ return backup_filename;
1051
+ }
1052
+ #endif
1053
+
1054
+ #ifdef UTF8_INPUT_ENABLE
1055
+ static void
1056
+ nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1057
+ {
1058
+ int shift = 20;
1059
+ c &= VALUE_MASK;
1060
+ while(shift >= 0){
1061
+ if(c >= NKF_INT32_C(1)<<shift){
1062
+ while(shift >= 0){
1063
+ (*f)(0, bin2hex(c>>shift));
1064
+ shift -= 4;
1065
+ }
1066
+ }else{
1067
+ shift -= 4;
1068
+ }
1069
+ }
1070
+ return;
1071
+ }
1072
+
1073
+ static void
1074
+ encode_fallback_html(nkf_char c)
1075
+ {
1076
+ (*oconv)(0, '&');
1077
+ (*oconv)(0, '#');
1078
+ c &= VALUE_MASK;
1079
+ if(c >= NKF_INT32_C(1000000))
1080
+ (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1081
+ if(c >= NKF_INT32_C(100000))
1082
+ (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1083
+ if(c >= 10000)
1084
+ (*oconv)(0, 0x30+(c/10000 )%10);
1085
+ if(c >= 1000)
1086
+ (*oconv)(0, 0x30+(c/1000 )%10);
1087
+ if(c >= 100)
1088
+ (*oconv)(0, 0x30+(c/100 )%10);
1089
+ if(c >= 10)
1090
+ (*oconv)(0, 0x30+(c/10 )%10);
1091
+ if(c >= 0)
1092
+ (*oconv)(0, 0x30+ c %10);
1093
+ (*oconv)(0, ';');
1094
+ return;
1095
+ }
1096
+
1097
+ static void
1098
+ encode_fallback_xml(nkf_char c)
1099
+ {
1100
+ (*oconv)(0, '&');
1101
+ (*oconv)(0, '#');
1102
+ (*oconv)(0, 'x');
1103
+ nkf_each_char_to_hex(oconv, c);
1104
+ (*oconv)(0, ';');
1105
+ return;
1106
+ }
1107
+
1108
+ static void
1109
+ encode_fallback_java(nkf_char c)
1110
+ {
1111
+ (*oconv)(0, '\\');
1112
+ c &= VALUE_MASK;
1113
+ if(!nkf_char_unicode_bmp_p(c)){
1114
+ int high = (c >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
1115
+ int low = (c & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
1116
+ (*oconv)(0, 'u');
1117
+ (*oconv)(0, bin2hex(high>>12));
1118
+ (*oconv)(0, bin2hex(high>> 8));
1119
+ (*oconv)(0, bin2hex(high>> 4));
1120
+ (*oconv)(0, bin2hex(high ));
1121
+ (*oconv)(0, '\\');
1122
+ (*oconv)(0, 'u');
1123
+ (*oconv)(0, bin2hex(low>>12));
1124
+ (*oconv)(0, bin2hex(low>> 8));
1125
+ (*oconv)(0, bin2hex(low>> 4));
1126
+ (*oconv)(0, bin2hex(low ));
1127
+ }else{
1128
+ (*oconv)(0, 'u');
1129
+ (*oconv)(0, bin2hex(c>>12));
1130
+ (*oconv)(0, bin2hex(c>> 8));
1131
+ (*oconv)(0, bin2hex(c>> 4));
1132
+ (*oconv)(0, bin2hex(c ));
1133
+ }
1134
+ return;
1135
+ }
1136
+
1137
+ static void
1138
+ encode_fallback_perl(nkf_char c)
1139
+ {
1140
+ (*oconv)(0, '\\');
1141
+ (*oconv)(0, 'x');
1142
+ (*oconv)(0, '{');
1143
+ nkf_each_char_to_hex(oconv, c);
1144
+ (*oconv)(0, '}');
1145
+ return;
1146
+ }
1147
+
1148
+ static void
1149
+ encode_fallback_subchar(nkf_char c)
1150
+ {
1151
+ c = unicode_subchar;
1152
+ (*oconv)((c>>8)&0xFF, c&0xFF);
1153
+ return;
1154
+ }
1155
+ #endif
1156
+
1157
+ static const struct {
1158
+ const char *name;
1159
+ const char *alias;
1160
+ } long_option[] = {
1161
+ {"ic=", ""},
1162
+ {"oc=", ""},
1163
+ {"base64","jMB"},
1164
+ {"euc","e"},
1165
+ {"euc-input","E"},
1166
+ {"fj","jm"},
1167
+ {"help",""},
1168
+ {"jis","j"},
1169
+ {"jis-input","J"},
1170
+ {"mac","sLm"},
1171
+ {"mime","jM"},
1172
+ {"mime-input","m"},
1173
+ {"msdos","sLw"},
1174
+ {"sjis","s"},
1175
+ {"sjis-input","S"},
1176
+ {"unix","eLu"},
1177
+ {"version","v"},
1178
+ {"windows","sLw"},
1179
+ {"hiragana","h1"},
1180
+ {"katakana","h2"},
1181
+ {"katakana-hiragana","h3"},
1182
+ {"guess=", ""},
1183
+ {"guess", "g2"},
1184
+ {"cp932", ""},
1185
+ {"no-cp932", ""},
1186
+ #ifdef X0212_ENABLE
1187
+ {"x0212", ""},
1188
+ #endif
1189
+ #ifdef UTF8_OUTPUT_ENABLE
1190
+ {"utf8", "w"},
1191
+ {"utf16", "w16"},
1192
+ {"ms-ucs-map", ""},
1193
+ {"fb-skip", ""},
1194
+ {"fb-html", ""},
1195
+ {"fb-xml", ""},
1196
+ {"fb-perl", ""},
1197
+ {"fb-java", ""},
1198
+ {"fb-subchar", ""},
1199
+ {"fb-subchar=", ""},
1200
+ #endif
1201
+ #ifdef UTF8_INPUT_ENABLE
1202
+ {"utf8-input", "W"},
1203
+ {"utf16-input", "W16"},
1204
+ {"no-cp932ext", ""},
1205
+ {"no-best-fit-chars",""},
1206
+ #endif
1207
+ #ifdef UNICODE_NORMALIZATION
1208
+ {"utf8mac-input", ""},
1209
+ #endif
1210
+ #ifdef OVERWRITE
1211
+ {"overwrite", ""},
1212
+ {"overwrite=", ""},
1213
+ {"in-place", ""},
1214
+ {"in-place=", ""},
1215
+ #endif
1216
+ #ifdef INPUT_OPTION
1217
+ {"cap-input", ""},
1218
+ {"url-input", ""},
1219
+ #endif
1220
+ #ifdef NUMCHAR_OPTION
1221
+ {"numchar-input", ""},
1222
+ #endif
1223
+ #ifdef CHECK_OPTION
1224
+ {"no-output", ""},
1225
+ {"debug", ""},
1226
+ #endif
1227
+ #ifdef SHIFTJIS_CP932
1228
+ {"cp932inv", ""},
1229
+ #endif
1230
+ #ifdef EXEC_IO
1231
+ {"exec-in", ""},
1232
+ {"exec-out", ""},
1233
+ #endif
1234
+ {"prefix=", ""},
1235
+ };
1236
+
1237
+ static void
1238
+ set_input_encoding(nkf_encoding *enc)
1239
+ {
1240
+ switch (nkf_enc_to_index(enc)) {
1241
+ case ISO_8859_1:
1242
+ iso8859_f = TRUE;
1243
+ break;
1244
+ case CP50221:
1245
+ case CP50222:
1246
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1247
+ case CP50220:
1248
+ #ifdef SHIFTJIS_CP932
1249
+ cp51932_f = TRUE;
1250
+ #endif
1251
+ #ifdef UTF8_OUTPUT_ENABLE
1252
+ ms_ucs_map_f = UCS_MAP_CP932;
1253
+ #endif
1254
+ break;
1255
+ case ISO_2022_JP_1:
1256
+ x0212_f = TRUE;
1257
+ break;
1258
+ case ISO_2022_JP_3:
1259
+ x0212_f = TRUE;
1260
+ x0213_f = TRUE;
1261
+ break;
1262
+ case ISO_2022_JP_2004:
1263
+ x0212_f = TRUE;
1264
+ x0213_f = TRUE;
1265
+ break;
1266
+ case SHIFT_JIS:
1267
+ break;
1268
+ case WINDOWS_31J:
1269
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1270
+ #ifdef SHIFTJIS_CP932
1271
+ cp51932_f = TRUE;
1272
+ #endif
1273
+ #ifdef UTF8_OUTPUT_ENABLE
1274
+ ms_ucs_map_f = UCS_MAP_CP932;
1275
+ #endif
1276
+ break;
1277
+ break;
1278
+ case CP10001:
1279
+ #ifdef SHIFTJIS_CP932
1280
+ cp51932_f = TRUE;
1281
+ #endif
1282
+ #ifdef UTF8_OUTPUT_ENABLE
1283
+ ms_ucs_map_f = UCS_MAP_CP10001;
1284
+ #endif
1285
+ break;
1286
+ case EUC_JP:
1287
+ break;
1288
+ case EUCJP_NKF:
1289
+ break;
1290
+ case CP51932:
1291
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1292
+ #ifdef SHIFTJIS_CP932
1293
+ cp51932_f = TRUE;
1294
+ #endif
1295
+ #ifdef UTF8_OUTPUT_ENABLE
1296
+ ms_ucs_map_f = UCS_MAP_CP932;
1297
+ #endif
1298
+ break;
1299
+ case EUCJP_MS:
1300
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1301
+ #ifdef SHIFTJIS_CP932
1302
+ cp51932_f = FALSE;
1303
+ #endif
1304
+ #ifdef UTF8_OUTPUT_ENABLE
1305
+ ms_ucs_map_f = UCS_MAP_MS;
1306
+ #endif
1307
+ break;
1308
+ case EUCJP_ASCII:
1309
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1310
+ #ifdef SHIFTJIS_CP932
1311
+ cp51932_f = FALSE;
1312
+ #endif
1313
+ #ifdef UTF8_OUTPUT_ENABLE
1314
+ ms_ucs_map_f = UCS_MAP_ASCII;
1315
+ #endif
1316
+ break;
1317
+ case SHIFT_JISX0213:
1318
+ case SHIFT_JIS_2004:
1319
+ x0213_f = TRUE;
1320
+ #ifdef SHIFTJIS_CP932
1321
+ cp51932_f = FALSE;
1322
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1323
+ #endif
1324
+ break;
1325
+ case EUC_JISX0213:
1326
+ case EUC_JIS_2004:
1327
+ x0213_f = TRUE;
1328
+ #ifdef SHIFTJIS_CP932
1329
+ cp51932_f = FALSE;
1330
+ #endif
1331
+ break;
1332
+ #ifdef UTF8_INPUT_ENABLE
1333
+ #ifdef UNICODE_NORMALIZATION
1334
+ case UTF8_MAC:
1335
+ nfc_f = TRUE;
1336
+ break;
1337
+ #endif
1338
+ case UTF_16:
1339
+ case UTF_16BE:
1340
+ case UTF_16BE_BOM:
1341
+ input_endian = ENDIAN_BIG;
1342
+ break;
1343
+ case UTF_16LE:
1344
+ case UTF_16LE_BOM:
1345
+ input_endian = ENDIAN_LITTLE;
1346
+ break;
1347
+ case UTF_32:
1348
+ case UTF_32BE:
1349
+ case UTF_32BE_BOM:
1350
+ input_endian = ENDIAN_BIG;
1351
+ break;
1352
+ case UTF_32LE:
1353
+ case UTF_32LE_BOM:
1354
+ input_endian = ENDIAN_LITTLE;
1355
+ break;
1356
+ #endif
1357
+ }
1358
+ }
1359
+
1360
+ static void
1361
+ set_output_encoding(nkf_encoding *enc)
1362
+ {
1363
+ switch (nkf_enc_to_index(enc)) {
1364
+ case CP50220:
1365
+ #ifdef SHIFTJIS_CP932
1366
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1367
+ #endif
1368
+ #ifdef UTF8_OUTPUT_ENABLE
1369
+ ms_ucs_map_f = UCS_MAP_CP932;
1370
+ #endif
1371
+ break;
1372
+ case CP50221:
1373
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1374
+ #ifdef SHIFTJIS_CP932
1375
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1376
+ #endif
1377
+ #ifdef UTF8_OUTPUT_ENABLE
1378
+ ms_ucs_map_f = UCS_MAP_CP932;
1379
+ #endif
1380
+ break;
1381
+ case ISO_2022_JP:
1382
+ #ifdef SHIFTJIS_CP932
1383
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1384
+ #endif
1385
+ break;
1386
+ case ISO_2022_JP_1:
1387
+ x0212_f = TRUE;
1388
+ #ifdef SHIFTJIS_CP932
1389
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1390
+ #endif
1391
+ break;
1392
+ case ISO_2022_JP_3:
1393
+ case ISO_2022_JP_2004:
1394
+ x0212_f = TRUE;
1395
+ x0213_f = TRUE;
1396
+ #ifdef SHIFTJIS_CP932
1397
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1398
+ #endif
1399
+ break;
1400
+ case SHIFT_JIS:
1401
+ break;
1402
+ case WINDOWS_31J:
1403
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1404
+ #ifdef UTF8_OUTPUT_ENABLE
1405
+ ms_ucs_map_f = UCS_MAP_CP932;
1406
+ #endif
1407
+ break;
1408
+ case CP10001:
1409
+ #ifdef UTF8_OUTPUT_ENABLE
1410
+ ms_ucs_map_f = UCS_MAP_CP10001;
1411
+ #endif
1412
+ break;
1413
+ case EUC_JP:
1414
+ x0212_f = TRUE;
1415
+ #ifdef SHIFTJIS_CP932
1416
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1417
+ #endif
1418
+ #ifdef UTF8_OUTPUT_ENABLE
1419
+ ms_ucs_map_f = UCS_MAP_ASCII;
1420
+ #endif
1421
+ break;
1422
+ case EUCJP_NKF:
1423
+ x0212_f = FALSE;
1424
+ #ifdef SHIFTJIS_CP932
1425
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1426
+ #endif
1427
+ #ifdef UTF8_OUTPUT_ENABLE
1428
+ ms_ucs_map_f = UCS_MAP_ASCII;
1429
+ #endif
1430
+ break;
1431
+ case CP51932:
1432
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1433
+ #ifdef SHIFTJIS_CP932
1434
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1435
+ #endif
1436
+ #ifdef UTF8_OUTPUT_ENABLE
1437
+ ms_ucs_map_f = UCS_MAP_CP932;
1438
+ #endif
1439
+ break;
1440
+ case EUCJP_MS:
1441
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1442
+ x0212_f = TRUE;
1443
+ #ifdef UTF8_OUTPUT_ENABLE
1444
+ ms_ucs_map_f = UCS_MAP_MS;
1445
+ #endif
1446
+ break;
1447
+ case EUCJP_ASCII:
1448
+ if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1449
+ x0212_f = TRUE;
1450
+ #ifdef UTF8_OUTPUT_ENABLE
1451
+ ms_ucs_map_f = UCS_MAP_ASCII;
1452
+ #endif
1453
+ break;
1454
+ case SHIFT_JISX0213:
1455
+ case SHIFT_JIS_2004:
1456
+ x0213_f = TRUE;
1457
+ #ifdef SHIFTJIS_CP932
1458
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1459
+ #endif
1460
+ break;
1461
+ case EUC_JISX0213:
1462
+ case EUC_JIS_2004:
1463
+ x0212_f = TRUE;
1464
+ x0213_f = TRUE;
1465
+ #ifdef SHIFTJIS_CP932
1466
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1467
+ #endif
1468
+ break;
1469
+ #ifdef UTF8_OUTPUT_ENABLE
1470
+ case UTF_8_BOM:
1471
+ output_bom_f = TRUE;
1472
+ break;
1473
+ case UTF_16:
1474
+ case UTF_16BE_BOM:
1475
+ output_bom_f = TRUE;
1476
+ break;
1477
+ case UTF_16LE:
1478
+ output_endian = ENDIAN_LITTLE;
1479
+ output_bom_f = FALSE;
1480
+ break;
1481
+ case UTF_16LE_BOM:
1482
+ output_endian = ENDIAN_LITTLE;
1483
+ output_bom_f = TRUE;
1484
+ break;
1485
+ case UTF_32:
1486
+ case UTF_32BE_BOM:
1487
+ output_bom_f = TRUE;
1488
+ break;
1489
+ case UTF_32LE:
1490
+ output_endian = ENDIAN_LITTLE;
1491
+ output_bom_f = FALSE;
1492
+ break;
1493
+ case UTF_32LE_BOM:
1494
+ output_endian = ENDIAN_LITTLE;
1495
+ output_bom_f = TRUE;
1496
+ break;
1497
+ #endif
1498
+ }
1499
+ }
1500
+
1501
+ static struct input_code*
1502
+ find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1503
+ {
1504
+ if (iconv_func){
1505
+ struct input_code *p = input_code_list;
1506
+ while (p->name){
1507
+ if (iconv_func == p->iconv_func){
1508
+ return p;
1509
+ }
1510
+ p++;
1511
+ }
1512
+ }
1513
+ return 0;
1514
+ }
1515
+
1516
+ static void
1517
+ set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1518
+ {
1519
+ #ifdef INPUT_CODE_FIX
1520
+ if (f || !input_encoding)
1521
+ #endif
1522
+ if (estab_f != f){
1523
+ estab_f = f;
1524
+ }
1525
+
1526
+ if (iconv_func
1527
+ #ifdef INPUT_CODE_FIX
1528
+ && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1529
+ #endif
1530
+ ){
1531
+ iconv = iconv_func;
1532
+ }
1533
+ #ifdef CHECK_OPTION
1534
+ if (estab_f && iconv_for_check != iconv){
1535
+ struct input_code *p = find_inputcode_byfunc(iconv);
1536
+ if (p){
1537
+ set_input_codename(p->name);
1538
+ debug(p->name);
1539
+ }
1540
+ iconv_for_check = iconv;
1541
+ }
1542
+ #endif
1543
+ }
1544
+
1545
+ #ifdef X0212_ENABLE
1546
+ static nkf_char
1547
+ x0212_shift(nkf_char c)
1548
+ {
1549
+ nkf_char ret = c;
1550
+ c &= 0x7f;
1551
+ if (is_eucg3(ret)){
1552
+ if (0x75 <= c && c <= 0x7f){
1553
+ ret = c + (0x109 - 0x75);
1554
+ }
1555
+ }else{
1556
+ if (0x75 <= c && c <= 0x7f){
1557
+ ret = c + (0x113 - 0x75);
1558
+ }
1559
+ }
1560
+ return ret;
1561
+ }
1562
+
1563
+
1564
+ static nkf_char
1565
+ x0212_unshift(nkf_char c)
1566
+ {
1567
+ nkf_char ret = c;
1568
+ if (0x7f <= c && c <= 0x88){
1569
+ ret = c + (0x75 - 0x7f);
1570
+ }else if (0x89 <= c && c <= 0x92){
1571
+ ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1572
+ }
1573
+ return ret;
1574
+ }
1575
+ #endif /* X0212_ENABLE */
1576
+
1577
+ static int
1578
+ is_x0213_2_in_x0212(nkf_char c1)
1579
+ {
1580
+ static const char x0213_2_table[] =
1581
+ {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1582
+ int ku = c1 - 0x20;
1583
+ if (ku <= 15)
1584
+ return x0213_2_table[ku]; /* 1, 3-5, 8, 12-15 */
1585
+ if (78 <= ku && ku <= 94)
1586
+ return 1;
1587
+ return 0;
1588
+ }
1589
+
1590
+ static nkf_char
1591
+ e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1592
+ {
1593
+ nkf_char ndx;
1594
+ if (is_eucg3(c2)){
1595
+ ndx = c2 & 0x7f;
1596
+ if (x0213_f && is_x0213_2_in_x0212(ndx)){
1597
+ if((0x21 <= ndx && ndx <= 0x2F)){
1598
+ if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1599
+ if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1600
+ return 0;
1601
+ }else if(0x6E <= ndx && ndx <= 0x7E){
1602
+ if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1603
+ if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1604
+ return 0;
1605
+ }
1606
+ return 1;
1607
+ }
1608
+ #ifdef X0212_ENABLE
1609
+ else if(nkf_isgraph(ndx)){
1610
+ nkf_char val = 0;
1611
+ const unsigned short *ptr;
1612
+ ptr = x0212_shiftjis[ndx - 0x21];
1613
+ if (ptr){
1614
+ val = ptr[(c1 & 0x7f) - 0x21];
1615
+ }
1616
+ if (val){
1617
+ c2 = val >> 8;
1618
+ c1 = val & 0xff;
1619
+ if (p2) *p2 = c2;
1620
+ if (p1) *p1 = c1;
1621
+ return 0;
1622
+ }
1623
+ c2 = x0212_shift(c2);
1624
+ }
1625
+ #endif /* X0212_ENABLE */
1626
+ }
1627
+ if(0x7F < c2) return 1;
1628
+ if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1629
+ if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1630
+ return 0;
1631
+ }
1632
+
1633
+ static nkf_char
1634
+ s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1635
+ {
1636
+ #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1637
+ nkf_char val;
1638
+ #endif
1639
+ static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1640
+ if (0xFC < c1) return 1;
1641
+ #ifdef SHIFTJIS_CP932
1642
+ if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){
1643
+ val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1644
+ if (val){
1645
+ c2 = val >> 8;
1646
+ c1 = val & 0xff;
1647
+ }
1648
+ }
1649
+ if (cp932inv_f
1650
+ && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1651
+ val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1652
+ if (val){
1653
+ c2 = val >> 8;
1654
+ c1 = val & 0xff;
1655
+ }
1656
+ }
1657
+ #endif /* SHIFTJIS_CP932 */
1658
+ #ifdef X0212_ENABLE
1659
+ if (!x0213_f && is_ibmext_in_sjis(c2)){
1660
+ val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1661
+ if (val){
1662
+ if (val > 0x7FFF){
1663
+ c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1664
+ c1 = val & 0xff;
1665
+ }else{
1666
+ c2 = val >> 8;
1667
+ c1 = val & 0xff;
1668
+ }
1669
+ if (p2) *p2 = c2;
1670
+ if (p1) *p1 = c1;
1671
+ return 0;
1672
+ }
1673
+ }
1674
+ #endif
1675
+ if(c2 >= 0x80){
1676
+ if(x0213_f && c2 >= 0xF0){
1677
+ if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1678
+ c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1679
+ }else{ /* 78<=k<=94 */
1680
+ c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1681
+ if (0x9E < c1) c2++;
1682
+ }
1683
+ }else{
1684
+ #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1685
+ #define SJ6394 0x0161 /* 63 - 94 ku offset */
1686
+ c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1687
+ if (0x9E < c1) c2++;
1688
+ }
1689
+ if (c1 < 0x9F)
1690
+ c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1691
+ else {
1692
+ c1 = c1 - 0x7E;
1693
+ }
1694
+ }
1695
+
1696
+ #ifdef X0212_ENABLE
1697
+ c2 = x0212_unshift(c2);
1698
+ #endif
1699
+ if (p2) *p2 = c2;
1700
+ if (p1) *p1 = c1;
1701
+ return 0;
1702
+ }
1703
+
1704
+ #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1705
+ static void
1706
+ nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1707
+ {
1708
+ val &= VALUE_MASK;
1709
+ if (val < 0x80){
1710
+ *p1 = val;
1711
+ *p2 = 0;
1712
+ *p3 = 0;
1713
+ *p4 = 0;
1714
+ }else if (val < 0x800){
1715
+ *p1 = 0xc0 | (val >> 6);
1716
+ *p2 = 0x80 | (val & 0x3f);
1717
+ *p3 = 0;
1718
+ *p4 = 0;
1719
+ } else if (nkf_char_unicode_bmp_p(val)) {
1720
+ *p1 = 0xe0 | (val >> 12);
1721
+ *p2 = 0x80 | ((val >> 6) & 0x3f);
1722
+ *p3 = 0x80 | ( val & 0x3f);
1723
+ *p4 = 0;
1724
+ } else if (nkf_char_unicode_value_p(val)) {
1725
+ *p1 = 0xf0 | (val >> 18);
1726
+ *p2 = 0x80 | ((val >> 12) & 0x3f);
1727
+ *p3 = 0x80 | ((val >> 6) & 0x3f);
1728
+ *p4 = 0x80 | ( val & 0x3f);
1729
+ } else {
1730
+ *p1 = 0;
1731
+ *p2 = 0;
1732
+ *p3 = 0;
1733
+ *p4 = 0;
1734
+ }
1735
+ }
1736
+
1737
+ static nkf_char
1738
+ nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1739
+ {
1740
+ nkf_char wc;
1741
+ if (c1 <= 0x7F) {
1742
+ /* single byte */
1743
+ wc = c1;
1744
+ }
1745
+ else if (c1 <= 0xC1) {
1746
+ /* trail byte or invalid */
1747
+ return -1;
1748
+ }
1749
+ else if (c1 <= 0xDF) {
1750
+ /* 2 bytes */
1751
+ wc = (c1 & 0x1F) << 6;
1752
+ wc |= (c2 & 0x3F);
1753
+ }
1754
+ else if (c1 <= 0xEF) {
1755
+ /* 3 bytes */
1756
+ wc = (c1 & 0x0F) << 12;
1757
+ wc |= (c2 & 0x3F) << 6;
1758
+ wc |= (c3 & 0x3F);
1759
+ }
1760
+ else if (c2 <= 0xF4) {
1761
+ /* 4 bytes */
1762
+ wc = (c1 & 0x0F) << 18;
1763
+ wc |= (c2 & 0x3F) << 12;
1764
+ wc |= (c3 & 0x3F) << 6;
1765
+ wc |= (c4 & 0x3F);
1766
+ }
1767
+ else {
1768
+ return -1;
1769
+ }
1770
+ return wc;
1771
+ }
1772
+ #endif
1773
+
1774
+ #ifdef UTF8_INPUT_ENABLE
1775
+ static int
1776
+ unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1777
+ const unsigned short *const *pp, nkf_char psize,
1778
+ nkf_char *p2, nkf_char *p1)
1779
+ {
1780
+ nkf_char c2;
1781
+ const unsigned short *p;
1782
+ unsigned short val;
1783
+
1784
+ if (pp == 0) return 1;
1785
+
1786
+ c1 -= 0x80;
1787
+ if (c1 < 0 || psize <= c1) return 1;
1788
+ p = pp[c1];
1789
+ if (p == 0) return 1;
1790
+
1791
+ c0 -= 0x80;
1792
+ if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1793
+ val = p[c0];
1794
+ if (val == 0) return 1;
1795
+ if (no_cp932ext_f && (
1796
+ (val>>8) == 0x2D || /* NEC special characters */
1797
+ val > NKF_INT32_C(0xF300) /* IBM extended characters */
1798
+ )) return 1;
1799
+
1800
+ c2 = val >> 8;
1801
+ if (val > 0x7FFF){
1802
+ c2 &= 0x7f;
1803
+ c2 |= PREFIX_EUCG3;
1804
+ }
1805
+ if (c2 == SO) c2 = JIS_X_0201_1976_K;
1806
+ c1 = val & 0xFF;
1807
+ if (p2) *p2 = c2;
1808
+ if (p1) *p1 = c1;
1809
+ return 0;
1810
+ }
1811
+
1812
+ static int
1813
+ unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1814
+ {
1815
+ const unsigned short *const *pp;
1816
+ const unsigned short *const *const *ppp;
1817
+ static const char no_best_fit_chars_table_C2[] =
1818
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1819
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1820
+ 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1821
+ 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1822
+ static const char no_best_fit_chars_table_C2_ms[] =
1823
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1824
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1825
+ 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1826
+ 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1827
+ static const char no_best_fit_chars_table_932_C2[] =
1828
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1829
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1830
+ 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1831
+ 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1832
+ static const char no_best_fit_chars_table_932_C3[] =
1833
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1834
+ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1835
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1836
+ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1837
+ nkf_char ret = 0;
1838
+
1839
+ if(c2 < 0x80){
1840
+ *p2 = 0;
1841
+ *p1 = c2;
1842
+ }else if(c2 < 0xe0){
1843
+ if(no_best_fit_chars_f){
1844
+ if(ms_ucs_map_f == UCS_MAP_CP932){
1845
+ switch(c2){
1846
+ case 0xC2:
1847
+ if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1848
+ break;
1849
+ case 0xC3:
1850
+ if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1851
+ break;
1852
+ }
1853
+ }else if(!cp932inv_f){
1854
+ switch(c2){
1855
+ case 0xC2:
1856
+ if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1857
+ break;
1858
+ case 0xC3:
1859
+ if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1860
+ break;
1861
+ }
1862
+ }else if(ms_ucs_map_f == UCS_MAP_MS){
1863
+ if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1864
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1865
+ switch(c2){
1866
+ case 0xC2:
1867
+ switch(c1){
1868
+ case 0xA2:
1869
+ case 0xA3:
1870
+ case 0xA5:
1871
+ case 0xA6:
1872
+ case 0xAC:
1873
+ case 0xAF:
1874
+ case 0xB8:
1875
+ return 1;
1876
+ }
1877
+ break;
1878
+ }
1879
+ }
1880
+ }
1881
+ pp =
1882
+ ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1883
+ ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1884
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1885
+ x0213_f ? utf8_to_euc_2bytes_x0213 :
1886
+ utf8_to_euc_2bytes;
1887
+ ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1888
+ }else if(c0 < 0xF0){
1889
+ if(no_best_fit_chars_f){
1890
+ if(ms_ucs_map_f == UCS_MAP_CP932){
1891
+ if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1892
+ }else if(ms_ucs_map_f == UCS_MAP_MS){
1893
+ switch(c2){
1894
+ case 0xE2:
1895
+ switch(c1){
1896
+ case 0x80:
1897
+ if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1898
+ break;
1899
+ case 0x88:
1900
+ if(c0 == 0x92) return 1;
1901
+ break;
1902
+ }
1903
+ break;
1904
+ case 0xE3:
1905
+ if(c1 == 0x80 || c0 == 0x9C) return 1;
1906
+ break;
1907
+ }
1908
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1909
+ switch(c2){
1910
+ case 0xE3:
1911
+ switch(c1){
1912
+ case 0x82:
1913
+ if(c0 == 0x94) return 1;
1914
+ break;
1915
+ case 0x83:
1916
+ if(c0 == 0xBB) return 1;
1917
+ break;
1918
+ }
1919
+ break;
1920
+ }
1921
+ }else{
1922
+ switch(c2){
1923
+ case 0xE2:
1924
+ switch(c1){
1925
+ case 0x80:
1926
+ if(c0 == 0x95) return 1;
1927
+ break;
1928
+ case 0x88:
1929
+ if(c0 == 0xA5) return 1;
1930
+ break;
1931
+ }
1932
+ break;
1933
+ case 0xEF:
1934
+ switch(c1){
1935
+ case 0xBC:
1936
+ if(c0 == 0x8D) return 1;
1937
+ break;
1938
+ case 0xBD:
1939
+ if(c0 == 0x9E && !cp932inv_f) return 1;
1940
+ break;
1941
+ case 0xBF:
1942
+ if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1943
+ break;
1944
+ }
1945
+ break;
1946
+ }
1947
+ }
1948
+ }
1949
+ ppp =
1950
+ ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1951
+ ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1952
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1953
+ x0213_f ? utf8_to_euc_3bytes_x0213 :
1954
+ utf8_to_euc_3bytes;
1955
+ ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1956
+ }else return -1;
1957
+ #ifdef SHIFTJIS_CP932
1958
+ if (!ret&& is_eucg3(*p2)) {
1959
+ if (cp932inv_f) {
1960
+ if (encode_fallback) ret = 1;
1961
+ }
1962
+ else {
1963
+ nkf_char s2, s1;
1964
+ if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1965
+ s2e_conv(s2, s1, p2, p1);
1966
+ }else{
1967
+ ret = 1;
1968
+ }
1969
+ }
1970
+ }
1971
+ #endif
1972
+ return ret;
1973
+ }
1974
+
1975
+ #ifdef UTF8_OUTPUT_ENABLE
1976
+ #define X0213_SURROGATE_FIND(tbl, size, euc) do { \
1977
+ int i; \
1978
+ for (i = 0; i < size; i++) \
1979
+ if (tbl[i][0] == euc) { \
1980
+ low = tbl[i][2]; \
1981
+ break; \
1982
+ } \
1983
+ } while (0)
1984
+
1985
+ static nkf_char
1986
+ e2w_conv(nkf_char c2, nkf_char c1)
1987
+ {
1988
+ const unsigned short *p;
1989
+
1990
+ if (c2 == JIS_X_0201_1976_K) {
1991
+ if (ms_ucs_map_f == UCS_MAP_CP10001) {
1992
+ switch (c1) {
1993
+ case 0x20:
1994
+ return 0xA0;
1995
+ case 0x7D:
1996
+ return 0xA9;
1997
+ }
1998
+ }
1999
+ p = euc_to_utf8_1byte;
2000
+ #ifdef X0212_ENABLE
2001
+ } else if (is_eucg3(c2)){
2002
+ if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
2003
+ return 0xA6;
2004
+ }
2005
+ c2 = (c2&0x7f) - 0x21;
2006
+ if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2007
+ p =
2008
+ x0213_f ? x0212_to_utf8_2bytes_x0213[c2] :
2009
+ x0212_to_utf8_2bytes[c2];
2010
+ else
2011
+ return 0;
2012
+ #endif
2013
+ } else {
2014
+ c2 &= 0x7f;
2015
+ c2 = (c2&0x7f) - 0x21;
2016
+ if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2017
+ p =
2018
+ x0213_f ? euc_to_utf8_2bytes_x0213[c2] :
2019
+ ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
2020
+ ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
2021
+ euc_to_utf8_2bytes_ms[c2];
2022
+ else
2023
+ return 0;
2024
+ }
2025
+ if (!p) return 0;
2026
+ c1 = (c1 & 0x7f) - 0x21;
2027
+ if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2028
+ nkf_char val = p[c1];
2029
+ if (x0213_f && 0xD800<=val && val<=0xDBFF) {
2030
+ nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2031
+ nkf_char low = 0;
2032
+ if (p==x0212_to_utf8_2bytes_x0213[c2]) {
2033
+ X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc);
2034
+ } else {
2035
+ X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc);
2036
+ }
2037
+ if (!low) return 0;
2038
+ return UTF16_TO_UTF32(val, low);
2039
+ } else {
2040
+ return val;
2041
+ }
2042
+ }
2043
+ return 0;
2044
+ }
2045
+
2046
+ static nkf_char
2047
+ e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
2048
+ {
2049
+ nkf_char euc;
2050
+ int i;
2051
+ for (i = 0; i < sizeof_x0213_combining_chars; i++)
2052
+ if (x0213_combining_chars[i] == comb)
2053
+ break;
2054
+ if (i >= sizeof_x0213_combining_chars)
2055
+ return 0;
2056
+ euc = (c2&0x7f)<<8 | (c1&0x7f);
2057
+ for (i = 0; i < sizeof_x0213_combining_table; i++)
2058
+ if (x0213_combining_table[i][0] == euc)
2059
+ return x0213_combining_table[i][1];
2060
+ return 0;
2061
+ }
2062
+ #endif
2063
+
2064
+ static nkf_char
2065
+ w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
2066
+ {
2067
+ nkf_char ret = 0;
2068
+
2069
+ if (!c1){
2070
+ *p2 = 0;
2071
+ *p1 = c2;
2072
+ }else if (0xc0 <= c2 && c2 <= 0xef) {
2073
+ ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2074
+ #ifdef NUMCHAR_OPTION
2075
+ if (ret > 0){
2076
+ if (p2) *p2 = 0;
2077
+ if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
2078
+ ret = 0;
2079
+ }
2080
+ #endif
2081
+ }
2082
+ return ret;
2083
+ }
2084
+
2085
+ #ifdef UTF8_INPUT_ENABLE
2086
+ static nkf_char
2087
+ w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
2088
+ {
2089
+ nkf_char c1, c2, c3, c4;
2090
+ nkf_char ret = 0;
2091
+ val &= VALUE_MASK;
2092
+ if (val < 0x80) {
2093
+ *p2 = 0;
2094
+ *p1 = val;
2095
+ }
2096
+ else if (nkf_char_unicode_bmp_p(val)){
2097
+ nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2098
+ ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2099
+ if (ret > 0){
2100
+ *p2 = 0;
2101
+ *p1 = nkf_char_unicode_new(val);
2102
+ ret = 0;
2103
+ }
2104
+ }
2105
+ else {
2106
+ int i;
2107
+ if (x0213_f) {
2108
+ c1 = (val >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2109
+ c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2110
+ for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
2111
+ if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) {
2112
+ val = x0213_1_surrogate_table[i][0];
2113
+ *p2 = val >> 8;
2114
+ *p1 = val & 0xFF;
2115
+ return 0;
2116
+ }
2117
+ for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
2118
+ if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) {
2119
+ val = x0213_2_surrogate_table[i][0];
2120
+ *p2 = PREFIX_EUCG3 | (val >> 8);
2121
+ *p1 = val & 0xFF;
2122
+ return 0;
2123
+ }
2124
+ }
2125
+ *p2 = 0;
2126
+ *p1 = nkf_char_unicode_new(val);
2127
+ }
2128
+ return ret;
2129
+ }
2130
+ #endif
2131
+
2132
+ static nkf_char
2133
+ e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2134
+ {
2135
+ if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2136
+ if (iso2022jp_f && !x0201_f) {
2137
+ c2 = GETA1; c1 = GETA2;
2138
+ } else {
2139
+ c2 = JIS_X_0201_1976_K;
2140
+ c1 &= 0x7f;
2141
+ }
2142
+ #ifdef X0212_ENABLE
2143
+ }else if (c2 == 0x8f){
2144
+ if (c0 == 0){
2145
+ return -1;
2146
+ }
2147
+ if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2148
+ /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2149
+ c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2150
+ c2 = 0;
2151
+ } else {
2152
+ c2 = (c2 << 8) | (c1 & 0x7f);
2153
+ c1 = c0 & 0x7f;
2154
+ #ifdef SHIFTJIS_CP932
2155
+ if (cp51932_f){
2156
+ nkf_char s2, s1;
2157
+ if (e2s_conv(c2, c1, &s2, &s1) == 0){
2158
+ s2e_conv(s2, s1, &c2, &c1);
2159
+ if (c2 < 0x100){
2160
+ c1 &= 0x7f;
2161
+ c2 &= 0x7f;
2162
+ }
2163
+ }
2164
+ }
2165
+ #endif /* SHIFTJIS_CP932 */
2166
+ }
2167
+ #endif /* X0212_ENABLE */
2168
+ } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2169
+ /* NOP */
2170
+ } else {
2171
+ if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2172
+ /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2173
+ c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2174
+ c2 = 0;
2175
+ } else {
2176
+ c1 &= 0x7f;
2177
+ c2 &= 0x7f;
2178
+ #ifdef SHIFTJIS_CP932
2179
+ if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2180
+ nkf_char s2, s1;
2181
+ if (e2s_conv(c2, c1, &s2, &s1) == 0){
2182
+ s2e_conv(s2, s1, &c2, &c1);
2183
+ if (c2 < 0x100){
2184
+ c1 &= 0x7f;
2185
+ c2 &= 0x7f;
2186
+ }
2187
+ }
2188
+ }
2189
+ #endif /* SHIFTJIS_CP932 */
2190
+ }
2191
+ }
2192
+ (*oconv)(c2, c1);
2193
+ return 0;
2194
+ }
2195
+
2196
+ static nkf_char
2197
+ s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2198
+ {
2199
+ if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2200
+ if (iso2022jp_f && !x0201_f) {
2201
+ c2 = GETA1; c1 = GETA2;
2202
+ } else {
2203
+ c1 &= 0x7f;
2204
+ }
2205
+ } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2206
+ /* NOP */
2207
+ } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2208
+ /* CP932 UDC */
2209
+ if(c1 == 0x7F) return 0;
2210
+ c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2211
+ c2 = 0;
2212
+ } else {
2213
+ nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2214
+ if (ret) return ret;
2215
+ }
2216
+ (*oconv)(c2, c1);
2217
+ return 0;
2218
+ }
2219
+
2220
+ static int
2221
+ x0213_wait_combining_p(nkf_char wc)
2222
+ {
2223
+ int i;
2224
+ for (i = 0; i < sizeof_x0213_combining_table; i++) {
2225
+ if (x0213_combining_table[i][1] == wc) {
2226
+ return TRUE;
2227
+ }
2228
+ }
2229
+ return FALSE;
2230
+ }
2231
+
2232
+ static int
2233
+ x0213_combining_p(nkf_char wc)
2234
+ {
2235
+ int i;
2236
+ for (i = 0; i < sizeof_x0213_combining_chars; i++) {
2237
+ if (x0213_combining_chars[i] == wc) {
2238
+ return TRUE;
2239
+ }
2240
+ }
2241
+ return FALSE;
2242
+ }