console 0.2.3 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -1,13 +1,21 @@
1
1
  Console is a helper class for displaying super-ASCII strings on the console.
2
2
 
3
- Console is needed when you want to mix multi-column (e.g. Chinese, Japanese and
4
- Korean characters) and single-column (e.g. ASCII) characters and know how much
5
- horizontal realestate the result takes on the screen. This is generally
6
- necessary when you want to have internationalization support in a console
7
- program.
3
+ Console is necessary when you want to mix multi-column (e.g. Chinese, Japanese
4
+ and Korean characters) and single-column (e.g. ASCII, Latin, Vietnamese)
5
+ characters on screen in such a way that the display width matters. This is
6
+ typically the case in curses programs with i18n support, but can be necessary
7
+ in certain internationalized $stdout applications as well.
8
+
9
+ Note that display width is different from a) the number of bytes in the string,
10
+ and b) the number of characters in the string. When you move beyond ASCII
11
+ strings, these three metrics can all have distinct values for a given string.
8
12
 
9
13
  The Console gem currently provides these methods:
10
14
 
15
+ - Console.init_locale!: set the program's locale from the appropriate
16
+ environment variables. (Ruby 1.8 programs must call this before calling any
17
+ of the other methods. Ruby 1.9 programs can call it or skip it without
18
+ effect.)
11
19
  - Console.display_width: calculates the display width of a string
12
20
  - Console.display_slice: returns a substring according to display offset
13
21
  and display width parameters.
@@ -17,6 +25,27 @@ If you require 'console', you will get just those methods.
17
25
  If you require 'console/string', you will get String#display_width and
18
26
  String#display_slice methods directly on all strings.
19
27
 
20
- The console homepage is <code>http://masanjin.net/console/</code>.
28
+ = EXAMPLE USAGE
29
+
30
+ ## encoding: UTF-8 (this comment required for ruby 1.9)
31
+ require 'rubygems' # this line required for ruby 1.8
32
+ require 'console'
33
+ require 'console/string'
34
+
35
+ Console.init_locale!
36
+
37
+ STRING = "我能吞下玻璃而不傷身體。Góa ē-tàng chia̍h po-lê, mā bē tio̍h-siong.私はガラスを食べられます。それは私を傷つけません。I can eat glass and it doesn't hurt me."
38
+ COLS = 30
21
39
 
40
+ rows = STRING.display_width / COLS
41
+ (0 .. rows).each { |i| puts STRING.display_slice(i * COLS, COLS) }
42
+
43
+ The result will be displayed in an even 30-column block on the console, even
44
+ though some characters in the string require two columns to display and some
45
+ characters require one column.
46
+
47
+ = MORE
48
+
49
+ The console homepage is <code>http://masanjin.net/console/</code>.
22
50
  The console git repo is <code>git://masanjin.net/console</code>.
51
+
@@ -1,61 +1,121 @@
1
1
  /*
2
- * console.c -- unit tests for ruby Console library
2
+ * console.c -- ruby console library
3
3
  * Author: William Morgan (mailto: wmorgan-ruby-console@masanjin.net)
4
4
  * Copyright: Copyright 2010 William Morgan
5
5
  * License: same terms as Ruby itself
6
6
  */
7
7
 
8
- #define _XOPEN_SOURCE
9
8
  #include <wchar.h>
10
9
  #include <stdlib.h>
11
10
  #include <ruby.h>
11
+ #include <locale.h>
12
+
13
+ #ifdef HAVE_RUBY_ENCODING_H
12
14
  #include <ruby/encoding.h>
15
+ #endif
16
+
17
+ static inline int calc_width(char* string, long strlen, long byte_offset, size_t* num_bytes, size_t* num_cols) {
18
+ wchar_t wc;
19
+ size_t width = -1;
20
+
21
+ *num_bytes = mbrtowc(&wc, string + byte_offset, strlen - byte_offset, NULL);
22
+
23
+ if(*num_bytes == (size_t)-2) {
24
+ rb_raise(rb_eArgError, "malformed string: incomplete multibyte character at position %ld", byte_offset);
25
+ return -1;
26
+ }
27
+ else if(*num_bytes == (size_t)-1) {
28
+ rb_raise(rb_eArgError, "malformed string: invalid multibyte character at position %ld", byte_offset);
29
+ return -1;
30
+ }
31
+ else if(*num_bytes == 0) {
32
+ rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", byte_offset);
33
+ return -1;
34
+ }
35
+
36
+ *num_cols = wcwidth(wc);
37
+ /* sometimes this seems to happen! maybe it's not bad...
38
+ if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
39
+ */
40
+
41
+ return 0;
42
+ }
43
+
44
+ /*
45
+ * call-seq: init_locale!
46
+ *
47
+ * Sets the program's current locale from the appropriate environment variables.
48
+ * (see `man 3 setlocale` for details).
49
+ *
50
+ * Equivalent to:
51
+ * char* old_locale = setlocale(LC_ALL, NULL);
52
+ * return old_locale;
53
+ *
54
+ * in C.
55
+ *
56
+ * If you are using Ruby 1.8, you *must* call this at least once before calling
57
+ * the other methods in this package. Otherwise, using non-ASCII strings will
58
+ * be considered invalid, and #display_width and #display_slice will raise
59
+ * ArgumentErrors.
60
+ *
61
+ * Ruby 1.9 users do not need to call this, since Ruby 1.9 appears to set the
62
+ * locale in this manner already. Calling it won't matter, however.
63
+ *
64
+ * Returns a string representing the old locale. If you wish to change locales
65
+ * several times, you can use this value to return to the previous locale.
66
+ * Otherwise, just ignore it.
67
+ */
68
+
69
+ static VALUE init_locale(VALUE v_self) {
70
+ char* old_locale = setlocale(LC_ALL, NULL);
71
+ setlocale(LC_ALL, ""); // set ctype locale according to appropriate env vars
72
+
73
+ return rb_str_new2(old_locale);
74
+ }
13
75
 
14
76
  /*
15
77
  * call-seq: display_width(string)
16
- *
78
+ *
17
79
  * Returns the display width of <code>string</code>, that is, the number of
18
- * columns that the string will take up when printed to screen. Note that this
19
- * is different from the number of characters (some characters take up one
20
- * column, some (e.g. Chinese characters) take up two columns), and the number
21
- * of bytes (e.g. UTF-8 is a multibyte encoding) in a string.
80
+ * columns that the string will take up when printed to screen. This is
81
+ * different from both the number of characters and the number of bytes in a
82
+ * string.
83
+ *
84
+ * In Ruby 1.8, the input string is assumed to be in the current locale's
85
+ * encoding. If it isn't, an ArgumentError will be raised. Be sure to call
86
+ * init_locale! before calling this method! Otherwise every non-ASCII string
87
+ * will trigger an ArgumentError.
22
88
  *
23
- * For Ruby 1.8, the string is assumed to be in the current locale's encoding.
24
- * If not, terrible things will happen.
89
+ * In Ruby 1.9, the string will be automatically converted from its encoding
90
+ * into the current locale's encoding for processing.
25
91
  *
26
- * For Ruby 1.9, the string will be magically converted from whatever encoding
27
- * it is in, into the current locale's encoding, for processing. This may fail.
92
+ * Throws an ArgumentError when it encounters an invalid character. On Ruby
93
+ * 1.8, this includes any string not in the current locale's encoding. On Ruby
94
+ * 1.9, this should only occur if Ruby is unable to convert the string from its
95
+ * encoding into the current locale's encoding.
28
96
  */
29
97
  static VALUE display_width(VALUE v_self, VALUE v_string) {
30
98
  Check_Type(v_string, T_STRING);
31
99
 
32
- /* for ruby 1.8, we assume the string is in your locale's CTYPE encoding
33
- * already. if not, terrible things will happen.
34
- *
35
- * for ruby 1.9, we explicitly convert it to the locale's CTYPE encoding,
36
- * like this:
37
- */
38
100
  #ifdef HAVE_RUBY_ENCODING_H
101
+ // convert from whatever encoding it's in.
102
+ // TODO: do i have to use rb_protect to relay any exceptions?
39
103
  v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil);
40
104
  #endif
105
+
41
106
  char* string = RSTRING_PTR(v_string);
42
107
 
43
- mbstate_t mbs; memset(&mbs, 0, sizeof(mbs));
44
108
  long display_width = 0;
45
- long remaining_bytes = RSTRING_LEN(v_string);
46
- while(remaining_bytes > 0) {
47
- wchar_t wc;
48
- size_t num_bytes = mbrtowc(&wc, string, remaining_bytes, &mbs);
49
- if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
50
-
51
- int width = wcwidth(wc);
52
- /* sometimes this seems to happen! maybe it's not bad...
53
- if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
54
- */
55
-
56
- display_width += width;
57
- remaining_bytes -= num_bytes;
58
- string += num_bytes; // advance string pointer
109
+ long strlen = RSTRING_LEN(v_string);
110
+ long offset = 0;
111
+
112
+ while(offset < strlen) {
113
+ size_t num_bytes, num_cols;
114
+ int err = calc_width(string, strlen, offset, &num_bytes, &num_cols);
115
+ if(err) break;
116
+
117
+ display_width += num_cols;
118
+ offset += num_bytes;
59
119
  }
60
120
 
61
121
  return LONG2NUM(display_width);
@@ -67,29 +127,34 @@ static const char* default_pad_string = " ";
67
127
  * call-seq:
68
128
  * display_slice(string, start_offset, display_width=1, pad_string=" ")
69
129
  *
70
- * Returns a slice of a string based on display width, rather than character or
71
- * bytes. I.e, the <code>start_offset</code> and <code>display_width</code>
72
- * offsets index the columns required to display the string, and not individual
130
+ * Returns a slice of a string, based on display width, rather than character
131
+ * or bytes. I.e, the <code>start_offset</code> and <code>display_width</code>
132
+ * offsets index the columns required to display the string, not individual
73
133
  * characters or bytes.
74
134
  *
75
135
  * This is useful if you want to display a part of a string on screen, as you
76
- * can pull out a specific portion by its display size.
136
+ * can pull out a specific portion based on display size.
77
137
  *
78
138
  * Padding: slicing can truncate multi-column characters. If the slice
79
139
  * truncates a character, the string will be padded with
80
140
  * <code>pad_string</code>, on the left side, right side, or both, as
81
141
  * necessary. If <code>pad_string</code> is <code>nil</code> then no padding
82
- * will be done. <code>pad_string</code> should be a single-column
83
- * string for this to make sense.
142
+ * will be done. <code>pad_string</code> should be a single-column string for
143
+ * this to make sense.
84
144
  *
85
- * For Ruby 1.8, the string is assumed to be in the current locale's encoding.
86
- * If not, terrible things will happen.
145
+ * In Ruby 1.8, the input string is assumed to be in the current locale's
146
+ * encoding. If it isn't, an ArgumentError will be raised. Be sure to call
147
+ * init_locale! before calling this method! Otherwise every non-ASCII string
148
+ * will trigger an ArgumentError.
87
149
  *
88
- * For Ruby 1.9, the string will be magically converted from whatever encoding
89
- * it is in, into the current locale's encoding, for processing. This may fail.
150
+ * In Ruby 1.9, the string will be automatically converted from its encoding
151
+ * into the current locale's encoding. Regardless of the original encoding, the
152
+ * returned string will be in the current locale's encoding.
90
153
  *
91
- * The returned string WILL be in the current locale encoding, regardless of the
92
- * encoding of the original string.
154
+ * Throws an ArgumentError when it encounters an invalid character. On Ruby
155
+ * 1.8, this includes any string not in the current locale's encoding. On Ruby
156
+ * 1.9, this should only occur if Ruby is unable to convert the string from its
157
+ * encoding into the current locale's encoding.
93
158
  */
94
159
  static VALUE display_slice(int argc, VALUE *argv, VALUE v_self) {
95
160
  VALUE v_string, v_display_start, v_display_width, v_pad_string;
@@ -106,35 +171,27 @@ static VALUE display_slice(int argc, VALUE *argv, VALUE v_self) {
106
171
  else display_width = NUM2INT(v_display_width);
107
172
  if(display_width < 0) return Qnil; // you fail
108
173
 
109
- char* pad_string;
174
+ const char* pad_string;
110
175
  if(argc < 4) pad_string = default_pad_string; // only fill in default if unspecified; nil is a valid value
111
176
  else if(v_pad_string == Qnil) pad_string = "";
112
177
  else pad_string = RSTRING_PTR(v_pad_string);
113
178
 
114
- /* see comments in display_width() */
115
179
  #ifdef HAVE_RUBY_ENCODING_H
180
+ // TODO: do i have to use rb_protect to relay any exceptions?
116
181
  v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil);
117
182
  #endif
118
183
  char* string = RSTRING_PTR(v_string);
119
-
120
- mbstate_t mbs; memset(&mbs, 0, sizeof(mbs));
121
- long remaining_bytes = RSTRING_LEN(v_string);
184
+ long slen = RSTRING_LEN(v_string);
122
185
 
123
186
  // first, advance the string pointer so that we've seen display_start width characters
124
187
  long current_width = 0;
125
- while((remaining_bytes > 0) && (current_width < display_start)) {
126
- wchar_t wc;
127
- size_t num_bytes = mbrtowc(&wc, string, remaining_bytes, &mbs);
128
- if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
129
-
130
- int width = wcwidth(wc);
131
- /* sometimes this seems to happen! maybe it's not bad...
132
- if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
133
- */
134
-
135
- current_width += width;
136
- remaining_bytes -= num_bytes;
137
- string += num_bytes; // advance string pointer
188
+ long offset = 0;
189
+ while((offset < slen) && (current_width < display_start)) {
190
+ size_t num_bytes, num_cols;
191
+ int err = calc_width(string, slen, offset, &num_bytes, &num_cols);
192
+
193
+ current_width += num_cols;
194
+ offset += num_bytes;
138
195
  }
139
196
 
140
197
  /* here's a weird behavior (to me!) of String#slice that we emulate:
@@ -144,58 +201,61 @@ static VALUE display_slice(int argc, VALUE *argv, VALUE v_self) {
144
201
  if((current_width < display_start)) return Qnil;
145
202
 
146
203
  /* determine left padding */
147
- char* pad_left = "";
204
+ const char* pad_left = "";
148
205
  if((current_width > display_start) && (display_width > 0)) pad_left = pad_string;
149
206
 
150
207
  // now, advance the string_end pointer so that we've seen an additional display_width width characters
151
- char* string_end = string;
208
+ long end_offset = offset;
152
209
  current_width -= display_start;
153
- while((remaining_bytes > 0) && (current_width < display_width)) {
154
- wchar_t wc;
155
- size_t num_bytes = mbrtowc(&wc, string_end, remaining_bytes, &mbs);
156
- if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
157
-
158
- int width = wcwidth(wc);
159
- /* sometimes this seems to happen! maybe it's not bad...
160
- if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
161
- */
210
+ while((end_offset < slen) && (current_width < display_width)) {
211
+ size_t num_bytes, num_cols;
212
+ int err = calc_width(string, slen, end_offset, &num_bytes, &num_cols);
162
213
 
163
- if((current_width + width) > display_width) break; // have to stop here
214
+ if((current_width + num_cols) > (size_t)display_width) break; // have to stop here
164
215
 
165
- current_width += width;
166
- remaining_bytes -= num_bytes;
167
- string_end += num_bytes; // advance string pointer
216
+ current_width += num_cols;
217
+ end_offset += num_bytes;
168
218
  }
169
219
 
170
220
  /* determine right padding */
171
- char* pad_right = "";
172
- if((current_width < display_width) && (remaining_bytes > 0)) pad_right = pad_string;
221
+ const char* pad_right = "";
222
+ if((current_width < display_width) && (end_offset < slen)) pad_right = pad_string;
173
223
 
174
224
  // finally, construct a new string
175
- int bytesize = string_end - string;
225
+ int bytesize = end_offset - offset;
176
226
  int leftsize = strlen(pad_left);
177
227
  int rightsize = strlen(pad_right);
178
228
 
179
229
  char* new_string = calloc(bytesize + leftsize + rightsize + 1, sizeof(char));
180
230
  if(leftsize > 0) strcpy(new_string, pad_left);
181
- if(bytesize > 0) memcpy(new_string + leftsize, string, bytesize * sizeof(char));
231
+ if(bytesize > 0) memcpy(new_string + leftsize, string + offset, bytesize * sizeof(char));
182
232
  if(rightsize > 0) strcpy(new_string + leftsize + bytesize, pad_right);
183
233
 
184
- (new_string + bytesize + leftsize + rightsize)[0] = 0;
234
+ (new_string + bytesize + leftsize + rightsize)[0] = 0;
185
235
 
236
+ #ifdef HAVE_RUBY_ENCODING_H
186
237
  return rb_enc_str_new(new_string, bytesize + leftsize + rightsize, rb_enc_get(v_string));
238
+ #else
239
+ return rb_str_new(new_string, bytesize + leftsize + rightsize);
240
+ #endif
187
241
  }
188
242
 
189
243
  /*
190
244
  * A helper class for console-based programs that need to deal with non-ASCII
191
245
  * code. If you are writing a curses/ncurses program, or otherwise care about
192
- * the number of characters on the screen, this is crucial stuff.
246
+ * the display width of characters on the screen, this is crucial stuff.
193
247
  *
194
248
  * Provides:
195
249
  *
196
- * Console.display_width: get the number of display columns used by a string.
250
+ * Console.init_locale!: set the program's locale from the appropriate
251
+ * environment variables. (Ruby 1.8 programs must call this before calling any
252
+ * of the other methods. Ruby 1.9 programs can call it or skip it without
253
+ * effect.)
197
254
  *
198
- * Console.display_slice: get a substrig by display column offset and size.
255
+ * Console.display_width: calculates the display width of a string
256
+ *
257
+ * Console.display_slice: returns a substring according to display offset
258
+ * and display width parameters.
199
259
  *
200
260
  */
201
261
 
@@ -205,5 +265,5 @@ void Init_console() {
205
265
  cConsole = rb_define_class("Console", rb_cObject);
206
266
  rb_define_module_function(cConsole, "display_width", display_width, 1);
207
267
  rb_define_module_function(cConsole, "display_slice", display_slice, -1);
268
+ rb_define_module_function(cConsole, "init_locale!", init_locale, 0);
208
269
  }
209
-
@@ -29,11 +29,9 @@ class String
29
29
  ## Returns a substring according to display-based start and offset values. See
30
30
  ## Console.display_slice for what this means.
31
31
  ##
32
- ## Stupid rdoc is not interpreting this next directive, and I'm tired of wasting
33
- ## my life figuring out why.
34
- ##
35
32
  ## :call-seq:
36
33
  ## display_slice(start, offset=1, pad_string=" ")
37
34
  ##
35
+ ## (rdoc fail)
38
36
  def display_slice(*a); Console.display_slice self, *a end
39
37
  end
data/test/console.rb CHANGED
@@ -8,6 +8,8 @@
8
8
  require 'test/unit'
9
9
  require 'console'
10
10
 
11
+ Console.init_locale!
12
+
11
13
  class ConsoleTest < ::Test::Unit::TestCase
12
14
  def setup
13
15
  @s = "能吞aê"
@@ -57,8 +59,6 @@ class ConsoleTest < ::Test::Unit::TestCase
57
59
  end
58
60
 
59
61
  def test_slice_misaligned_start_offsets_get_padded
60
- s = "能吞aê"
61
-
62
62
  assert_equal "", Console.display_slice(@s, 0, 0)
63
63
  assert_equal " ", Console.display_slice(@s, 0, 1)
64
64
  assert_equal "能", Console.display_slice(@s, 0, 2)
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: console
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 13
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
- - 2
8
8
  - 3
9
- version: 0.2.3
9
+ version: "0.3"
10
10
  platform: ruby
11
11
  authors:
12
12
  - William Morgan
@@ -14,11 +14,11 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-05-19 08:36:26 -04:00
17
+ date: 2011-01-17 20:18:21 -08:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
21
- description: Console is a helper class for displaying super-ASCII strings on the console. Console is needed when you want to mix two-column (e.g. Chinese) and single-column (e.g. ASCII) characters and know how much horizontal realestate the result takes on the screen. This is generally necessary when you want to have internationalization support in a console program. Console currently provides utility methods for determining the display width of a string, and for taking a substring based on display position and display width.
21
+ description: Console is a helper class for displaying super-ASCII strings on the console. Console is needed when you want to mix two-column (e.g. Chinese) and single-column (e.g. ASCII) characters and know how much horizontal realestate the result takes on the screen. This is generally necessary when you want to have internationalization support in a console-based program. Console currently provides utility methods for determining the display width of a string, and for taking a substring based on display position and display width.
22
22
  email: wmorgan-console@masanjin.net
23
23
  executables: []
24
24
 
@@ -43,25 +43,29 @@ rdoc_options: []
43
43
  require_paths:
44
44
  - lib
45
45
  required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
46
47
  requirements:
47
48
  - - ">="
48
49
  - !ruby/object:Gem::Version
50
+ hash: 3
49
51
  segments:
50
52
  - 0
51
53
  version: "0"
52
54
  required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
53
56
  requirements:
54
57
  - - ">="
55
58
  - !ruby/object:Gem::Version
59
+ hash: 3
56
60
  segments:
57
61
  - 0
58
62
  version: "0"
59
63
  requirements: []
60
64
 
61
65
  rubyforge_project:
62
- rubygems_version: 1.3.6
66
+ rubygems_version: 1.3.7
63
67
  signing_key:
64
68
  specification_version: 3
65
- summary: Console is a helper for displaying super-ASCII strings on the console.
69
+ summary: Console is a helper for properly handling super-ASCII strings on the console.
66
70
  test_files: []
67
71