nkf 0.1.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/nkf/nkf.c CHANGED
@@ -9,50 +9,44 @@
9
9
 
10
10
  #define RUBY_NKF_REVISION "$Revision$"
11
11
  #define RUBY_NKF_VERSION NKF_VERSION " (" NKF_RELEASE_DATE ")"
12
- #define NKF_GEM_VERSION "0.1.3"
12
+ #define NKF_GEM_VERSION "0.3.0"
13
13
 
14
14
  #include "ruby/ruby.h"
15
15
  #include "ruby/encoding.h"
16
16
 
17
17
  /* Replace nkf's getchar/putchar for variable modification */
18
- /* we never use getc, ungetc */
18
+ /* we never use ungetc */
19
19
 
20
20
  #undef getc
21
21
  #undef ungetc
22
- #define getc(f) (input_ctr>=i_len?-1:input[input_ctr++])
23
- #define ungetc(c,f) input_ctr--
22
+ #define getc(f) rb_nkf_getc(nkf_state)
23
+ #define ungetc(c,f) rb_nkf_ungetc(nkf_state, c)
24
24
 
25
25
  #define INCSIZE 32
26
26
  #undef putchar
27
27
  #undef TRUE
28
28
  #undef FALSE
29
- #define putchar(c) rb_nkf_putchar(c)
29
+ #define putchar(c) rb_nkf_putchar(nkf_state, c)
30
30
 
31
31
  /* Input/Output pointers */
32
32
 
33
- static unsigned char *output;
34
- static unsigned char *input;
35
- static int input_ctr;
36
- static int i_len;
37
- static int output_ctr;
38
- static int o_len;
39
- static int incsize;
40
-
41
- static VALUE result;
42
-
43
- static int
44
- rb_nkf_putchar(unsigned int c)
45
- {
46
- if (output_ctr >= o_len) {
47
- o_len += incsize;
48
- rb_str_resize(result, o_len);
49
- incsize *= 2;
50
- output = (unsigned char *)RSTRING_PTR(result);
51
- }
52
- output[output_ctr++] = c;
53
-
54
- return c;
55
- }
33
+ typedef struct {
34
+ unsigned char *input;
35
+ int input_ctr;
36
+ int i_len;
37
+ unsigned char *output;
38
+ int output_ctr;
39
+ int o_len;
40
+ int incsize;
41
+ VALUE result;
42
+ } rb_nkf_callback_state_t;
43
+
44
+ struct nkf_state_t;
45
+ static int rb_nkf_getc(struct nkf_state_t *nkf_state);
46
+ #if 0
47
+ static int rb_nkf_ungetc(struct nkf_state_t *nkf_state, int c);
48
+ #endif
49
+ static int rb_nkf_putchar(struct nkf_state_t *nkf_state, unsigned int c);
56
50
 
57
51
  /* Include kanji filter main part */
58
52
  /* getchar and putchar will be replaced during inclusion */
@@ -62,6 +56,42 @@ rb_nkf_putchar(unsigned int c)
62
56
  #include "nkf-utf8/utf8tbl.c"
63
57
  #include "nkf-utf8/nkf.c"
64
58
 
59
+ static int
60
+ rb_nkf_getc(nkf_state_t *nkf_state)
61
+ {
62
+ rb_nkf_callback_state_t *callback_state = nkf_state->callback_arg;
63
+
64
+ return callback_state->input_ctr >= callback_state->i_len ?
65
+ -1 : callback_state->input[callback_state->input_ctr++];
66
+ }
67
+
68
+ #if 0
69
+ static int
70
+ rb_nkf_ungetc(nkf_state_t *nkf_state, int c)
71
+ {
72
+ rb_nkf_callback_state_t *callback_state = nkf_state->callback_arg;
73
+
74
+ callback_state->input_ctr--;
75
+ return c;
76
+ }
77
+ #endif
78
+
79
+ static int
80
+ rb_nkf_putchar(nkf_state_t *nkf_state, unsigned int c)
81
+ {
82
+ rb_nkf_callback_state_t *callback_state = nkf_state->callback_arg;
83
+
84
+ if (callback_state->output_ctr >= callback_state->o_len) {
85
+ callback_state->o_len += callback_state->incsize;
86
+ rb_str_resize(callback_state->result, callback_state->o_len);
87
+ callback_state->incsize *= 2;
88
+ callback_state->output = (unsigned char *)RSTRING_PTR(callback_state->result);
89
+ }
90
+ callback_state->output[callback_state->output_ctr++] = c;
91
+
92
+ return c;
93
+ }
94
+
65
95
  rb_encoding* rb_nkf_enc_get(const char *name)
66
96
  {
67
97
  int idx = rb_enc_find_index(name);
@@ -75,7 +105,7 @@ rb_encoding* rb_nkf_enc_get(const char *name)
75
105
  return rb_enc_from_index(idx);
76
106
  }
77
107
 
78
- int nkf_split_options(const char *arg)
108
+ static int nkf_split_options(nkf_state_t *nkf_state, const char *arg)
79
109
  {
80
110
  int count = 0;
81
111
  unsigned char option[256];
@@ -109,7 +139,7 @@ int nkf_split_options(const char *arg)
109
139
  is_double_quoted = TRUE;
110
140
  }else if(arg[i] == ' '){
111
141
  option[j] = '\0';
112
- options(option);
142
+ options(nkf_state, option);
113
143
  j = 0;
114
144
  }else{
115
145
  option[j++] = arg[i];
@@ -117,7 +147,7 @@ int nkf_split_options(const char *arg)
117
147
  }
118
148
  if(j){
119
149
  option[j] = '\0';
120
- options(option);
150
+ options(nkf_state, option);
121
151
  }
122
152
  return count;
123
153
  }
@@ -136,10 +166,16 @@ int nkf_split_options(const char *arg)
136
166
  static VALUE
137
167
  rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
138
168
  {
169
+ nkf_state_t nkf_state_object = {0};
170
+ nkf_state_t *nkf_state = &nkf_state_object;
171
+ rb_nkf_callback_state_t callback_state;
139
172
  VALUE tmp;
140
- reinit();
141
- nkf_split_options(StringValueCStr(opt));
142
- if (!output_encoding) rb_raise(rb_eArgError, "no output encoding given");
173
+ nkf_state_init(nkf_state);
174
+ nkf_split_options(nkf_state, StringValueCStr(opt));
175
+ if (!output_encoding) {
176
+ nkf_state_dispose(nkf_state);
177
+ rb_raise(rb_eArgError, "no output encoding given");
178
+ }
143
179
 
144
180
  switch (nkf_enc_to_index(output_encoding)) {
145
181
  case UTF_8_BOM: output_encoding = nkf_enc_from_index(UTF_8); break;
@@ -150,31 +186,33 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
150
186
  }
151
187
  output_bom_f = FALSE;
152
188
 
153
- incsize = INCSIZE;
189
+ callback_state.incsize = INCSIZE;
154
190
 
155
- input_ctr = 0;
156
- input = (unsigned char *)StringValuePtr(src);
157
- i_len = RSTRING_LENINT(src);
158
- tmp = rb_str_new(0, i_len*3 + 10);
191
+ callback_state.input_ctr = 0;
192
+ callback_state.input = (unsigned char *)StringValuePtr(src);
193
+ callback_state.i_len = RSTRING_LENINT(src);
194
+ tmp = rb_str_new(0, callback_state.i_len*3 + 10);
159
195
 
160
- output_ctr = 0;
161
- output = (unsigned char *)RSTRING_PTR(tmp);
162
- o_len = RSTRING_LENINT(tmp);
163
- *output = '\0';
196
+ callback_state.output_ctr = 0;
197
+ callback_state.output = (unsigned char *)RSTRING_PTR(tmp);
198
+ callback_state.o_len = RSTRING_LENINT(tmp);
199
+ callback_state.result = tmp;
200
+ *callback_state.output = '\0';
164
201
 
165
202
  /* use _result_ begin*/
166
- result = tmp;
167
- kanji_convert(NULL);
168
- result = Qnil;
203
+ nkf_state->callback_arg = &callback_state;
204
+ kanji_convert(nkf_state, NULL);
205
+ nkf_state->callback_arg = NULL;
169
206
  /* use _result_ end */
170
207
 
171
- rb_str_set_len(tmp, output_ctr);
208
+ rb_str_set_len(tmp, callback_state.output_ctr);
172
209
 
173
210
  if (mimeout_f)
174
211
  rb_enc_associate(tmp, rb_usascii_encoding());
175
212
  else
176
213
  rb_enc_associate(tmp, rb_nkf_enc_get(nkf_enc_name(output_encoding)));
177
214
 
215
+ nkf_state_dispose(nkf_state);
178
216
  return tmp;
179
217
  }
180
218
 
@@ -190,17 +228,23 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
190
228
  static VALUE
191
229
  rb_nkf_guess(VALUE obj, VALUE src)
192
230
  {
193
- reinit();
231
+ nkf_state_t nkf_state_object = {0};
232
+ nkf_state_t *nkf_state = &nkf_state_object;
233
+ rb_nkf_callback_state_t callback_state = {0};
234
+ VALUE guessed;
235
+ nkf_state_init(nkf_state);
194
236
 
195
- input_ctr = 0;
196
- input = (unsigned char *)StringValuePtr(src);
197
- i_len = RSTRING_LENINT(src);
237
+ callback_state.input_ctr = 0;
238
+ callback_state.input = (unsigned char *)StringValuePtr(src);
239
+ callback_state.i_len = RSTRING_LENINT(src);
198
240
 
199
241
  guess_f = TRUE;
200
- kanji_convert( NULL );
201
- guess_f = FALSE;
242
+ nkf_state->callback_arg = &callback_state;
243
+ kanji_convert(nkf_state, NULL);
202
244
 
203
- return rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code()));
245
+ guessed = rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code(nkf_state)));
246
+ nkf_state_dispose(nkf_state);
247
+ return guessed;
204
248
  }
205
249
 
206
250
 
@@ -477,6 +521,10 @@ rb_nkf_guess(VALUE obj, VALUE src)
477
521
  void
478
522
  Init_nkf(void)
479
523
  {
524
+ #ifdef HAVE_RB_EXT_RACTOR_SAFE
525
+ rb_ext_ractor_safe(true);
526
+ #endif
527
+
480
528
  VALUE mNKF = rb_define_module("NKF");
481
529
 
482
530
  rb_define_module_function(mNKF, "nkf", rb_nkf_convert, 2);
data/lib/nkf.rb ADDED
@@ -0,0 +1,6 @@
1
+ if RUBY_ENGINE == "jruby"
2
+ require 'nkf.jar'
3
+ JRuby::Util.load_ext('org.jruby.ext.nkf.NKFLibrary')
4
+ else
5
+ require 'nkf.so'
6
+ end
data/nkf.gemspec CHANGED
@@ -11,8 +11,8 @@ end
11
11
  Gem::Specification.new do |spec|
12
12
  spec.name = "nkf"
13
13
  spec.version = source_version
14
- spec.authors = ["NARUSE Yui"]
15
- spec.email = ["naruse@airemix.jp"]
14
+ spec.authors = ["NARUSE Yui", "Charles Oliver Nutter"]
15
+ spec.email = ["naruse@airemix.jp", "headius@headius.com"]
16
16
 
17
17
  spec.summary = %q{Ruby extension for Network Kanji Filter}
18
18
  spec.description = %q{Ruby extension for Network Kanji Filter}
@@ -22,14 +22,23 @@ Gem::Specification.new do |spec|
22
22
 
23
23
  spec.metadata["homepage_uri"] = spec.homepage
24
24
  spec.metadata["source_code_uri"] = spec.homepage
25
+ spec.metadata["changelog_uri"] = spec.homepage + "/releases"
25
26
 
26
27
  # Specify which files should be added to the gem when it is released.
27
28
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
28
29
  spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
29
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
30
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features|bin|.github|test_sig)/}) }
30
31
  end
32
+
33
+ if Gem::Platform === spec.platform and spec.platform =~ 'java' or RUBY_ENGINE == 'jruby'
34
+ spec.platform = 'java'
35
+ spec.licenses += ["EPL-2.0", "LGPL-2.1"]
36
+ spec.files += Dir["lib/nkf.jar"]
37
+ else
38
+ spec.extensions = ["ext/nkf/extconf.rb"]
39
+ end
40
+
31
41
  spec.bindir = "exe"
32
42
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
43
  spec.require_paths = ["lib"]
34
- spec.extensions = ["ext/nkf/extconf.rb"]
35
44
  end
data/sig/kconv.rbs ADDED
@@ -0,0 +1,166 @@
1
+ # <!-- rdoc-file=lib/kconv.rb -->
2
+ # Kanji Converter for Ruby.
3
+ #
4
+ module Kconv
5
+ # <!-- rdoc-file=lib/kconv.rb -->
6
+ # ASCII
7
+ #
8
+ ASCII: Encoding
9
+
10
+ # <!-- rdoc-file=lib/kconv.rb -->
11
+ # Auto-Detect
12
+ #
13
+ AUTO: nil
14
+
15
+ # <!-- rdoc-file=lib/kconv.rb -->
16
+ # BINARY
17
+ #
18
+ BINARY: Encoding
19
+
20
+ # <!-- rdoc-file=lib/kconv.rb -->
21
+ # EUC-JP
22
+ #
23
+ EUC: Encoding
24
+
25
+ # <!-- rdoc-file=lib/kconv.rb -->
26
+ # ISO-2022-JP
27
+ #
28
+ JIS: Encoding
29
+
30
+ # <!-- rdoc-file=lib/kconv.rb -->
31
+ # NOCONV
32
+ #
33
+ NOCONV: nil
34
+
35
+ # <!-- rdoc-file=lib/kconv.rb -->
36
+ # Shift_JIS
37
+ #
38
+ SJIS: Encoding
39
+
40
+ # <!-- rdoc-file=lib/kconv.rb -->
41
+ # UNKNOWN
42
+ #
43
+ UNKNOWN: nil
44
+
45
+ # <!-- rdoc-file=lib/kconv.rb -->
46
+ # UTF-16
47
+ #
48
+ UTF16: Encoding
49
+
50
+ # <!-- rdoc-file=lib/kconv.rb -->
51
+ # UTF-32
52
+ #
53
+ UTF32: Encoding
54
+
55
+ # <!-- rdoc-file=lib/kconv.rb -->
56
+ # UTF-8
57
+ #
58
+ UTF8: Encoding
59
+
60
+ # <!--
61
+ # rdoc-file=lib/kconv.rb
62
+ # - Kconv.guess(str) => encoding
63
+ # -->
64
+ # Guess input encoding by NKF.guess
65
+ #
66
+ def self.guess: (String str) -> Encoding
67
+
68
+ # <!--
69
+ # rdoc-file=lib/kconv.rb
70
+ # - Kconv.iseuc(str) => true or false
71
+ # -->
72
+ # Returns whether input encoding is EUC-JP or not.
73
+ #
74
+ # **Note** don't expect this return value is MatchData.
75
+ #
76
+ def self.iseuc: (String str) -> bool
77
+
78
+ # <!--
79
+ # rdoc-file=lib/kconv.rb
80
+ # - Kconv.isjis(str) => true or false
81
+ # -->
82
+ # Returns whether input encoding is ISO-2022-JP or not.
83
+ #
84
+ def self.isjis: (String str) -> bool
85
+
86
+ # <!--
87
+ # rdoc-file=lib/kconv.rb
88
+ # - Kconv.issjis(str) => true or false
89
+ # -->
90
+ # Returns whether input encoding is Shift_JIS or not.
91
+ #
92
+ def self.issjis: (String str) -> bool
93
+
94
+ # <!--
95
+ # rdoc-file=lib/kconv.rb
96
+ # - Kconv.isutf8(str) => true or false
97
+ # -->
98
+ # Returns whether input encoding is UTF-8 or not.
99
+ #
100
+ def self.isutf8: (String str) -> bool
101
+
102
+ # <!--
103
+ # rdoc-file=lib/kconv.rb
104
+ # - Kconv.kconv(str, to_enc, from_enc=nil)
105
+ # -->
106
+ # Convert `str` to `to_enc`. `to_enc` and `from_enc` are given as constants of
107
+ # Kconv or Encoding objects.
108
+ #
109
+ def self.kconv: (String str, Encoding? out_code, ?Encoding? in_code) -> String
110
+
111
+ # <!--
112
+ # rdoc-file=lib/kconv.rb
113
+ # - Kconv.toeuc(str) => string
114
+ # -->
115
+ # Convert `str` to EUC-JP
116
+ #
117
+ def self.toeuc: (String str) -> String
118
+
119
+ # <!--
120
+ # rdoc-file=lib/kconv.rb
121
+ # - Kconv.tojis(str) => string
122
+ # -->
123
+ # Convert `str` to ISO-2022-JP
124
+ #
125
+ def self.tojis: (String str) -> String
126
+
127
+ # <!--
128
+ # rdoc-file=lib/kconv.rb
129
+ # - Kconv.tolocale => string
130
+ # -->
131
+ # Convert `self` to locale encoding
132
+ #
133
+ def self.tolocale: (String str) -> String
134
+
135
+ # <!--
136
+ # rdoc-file=lib/kconv.rb
137
+ # - Kconv.tosjis(str) => string
138
+ # -->
139
+ # Convert `str` to Shift_JIS
140
+ #
141
+ def self.tosjis: (String str) -> String
142
+
143
+ # <!--
144
+ # rdoc-file=lib/kconv.rb
145
+ # - Kconv.toutf16(str) => string
146
+ # -->
147
+ # Convert `str` to UTF-16
148
+ #
149
+ def self.toutf16: (String str) -> String
150
+
151
+ # <!--
152
+ # rdoc-file=lib/kconv.rb
153
+ # - Kconv.toutf32(str) => string
154
+ # -->
155
+ # Convert `str` to UTF-32
156
+ #
157
+ def self.toutf32: (String str) -> String
158
+
159
+ # <!--
160
+ # rdoc-file=lib/kconv.rb
161
+ # - Kconv.toutf8(str) => string
162
+ # -->
163
+ # Convert `str` to UTF-8
164
+ #
165
+ def self.toutf8: (String str) -> String
166
+ end