console 0.2.3 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -1,13 +1,21 @@
1
1
  Console is a helper class for displaying super-ASCII strings on the console.
2
2
 
3
- Console is needed when you want to mix multi-column (e.g. Chinese, Japanese and
4
- Korean characters) and single-column (e.g. ASCII) characters and know how much
5
- horizontal realestate the result takes on the screen. This is generally
6
- necessary when you want to have internationalization support in a console
7
- program.
3
+ Console is necessary when you want to mix multi-column (e.g. Chinese, Japanese
4
+ and Korean characters) and single-column (e.g. ASCII, Latin, Vietnamese)
5
+ characters on screen in such a way that the display width matters. This is
6
+ typically the case in curses programs with i18n support, but can be necessary
7
+ in certain internationalized $stdout applications as well.
8
+
9
+ Note that display width is different from a) the number of bytes in the string,
10
+ and b) the number of characters in the string. When you move beyond ASCII
11
+ strings, these three metrics can all have distinct values for a given string.
8
12
 
9
13
  The Console gem currently provides these methods:
10
14
 
15
+ - Console.init_locale!: set the program's locale from the appropriate
16
+ environment variables. (Ruby 1.8 programs must call this before calling any
17
+ of the other methods. Ruby 1.9 programs can call it or skip it without
18
+ effect.)
11
19
  - Console.display_width: calculates the display width of a string
12
20
  - Console.display_slice: returns a substring according to display offset
13
21
  and display width parameters.
@@ -17,6 +25,27 @@ If you require 'console', you will get just those methods.
17
25
  If you require 'console/string', you will get String#display_width and
18
26
  String#display_slice methods directly on all strings.
19
27
 
20
- The console homepage is <code>http://masanjin.net/console/</code>.
28
+ = EXAMPLE USAGE
29
+
30
+ ## encoding: UTF-8 (this comment required for ruby 1.9)
31
+ require 'rubygems' # this line required for ruby 1.8
32
+ require 'console'
33
+ require 'console/string'
34
+
35
+ Console.init_locale!
36
+
37
+ STRING = "我能吞下玻璃而不傷身體。Góa ē-tàng chia̍h po-lê, mā bē tio̍h-siong.私はガラスを食べられます。それは私を傷つけません。I can eat glass and it doesn't hurt me."
38
+ COLS = 30
21
39
 
40
+ rows = STRING.display_width / COLS
41
+ (0 .. rows).each { |i| puts STRING.display_slice(i * COLS, COLS) }
42
+
43
+ The result will be displayed in an even 30-column block on the console, even
44
+ though some characters in the string require two columns to display and some
45
+ characters require one column.
46
+
47
+ = MORE
48
+
49
+ The console homepage is <code>http://masanjin.net/console/</code>.
22
50
  The console git repo is <code>git://masanjin.net/console</code>.
51
+
@@ -1,61 +1,121 @@
1
1
  /*
2
- * console.c -- unit tests for ruby Console library
2
+ * console.c -- ruby console library
3
3
  * Author: William Morgan (mailto: wmorgan-ruby-console@masanjin.net)
4
4
  * Copyright: Copyright 2010 William Morgan
5
5
  * License: same terms as Ruby itself
6
6
  */
7
7
 
8
- #define _XOPEN_SOURCE
9
8
  #include <wchar.h>
10
9
  #include <stdlib.h>
11
10
  #include <ruby.h>
11
+ #include <locale.h>
12
+
13
+ #ifdef HAVE_RUBY_ENCODING_H
12
14
  #include <ruby/encoding.h>
15
+ #endif
16
+
17
+ static inline int calc_width(char* string, long strlen, long byte_offset, size_t* num_bytes, size_t* num_cols) {
18
+ wchar_t wc;
19
+ size_t width = -1;
20
+
21
+ *num_bytes = mbrtowc(&wc, string + byte_offset, strlen - byte_offset, NULL);
22
+
23
+ if(*num_bytes == (size_t)-2) {
24
+ rb_raise(rb_eArgError, "malformed string: incomplete multibyte character at position %ld", byte_offset);
25
+ return -1;
26
+ }
27
+ else if(*num_bytes == (size_t)-1) {
28
+ rb_raise(rb_eArgError, "malformed string: invalid multibyte character at position %ld", byte_offset);
29
+ return -1;
30
+ }
31
+ else if(*num_bytes == 0) {
32
+ rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", byte_offset);
33
+ return -1;
34
+ }
35
+
36
+ *num_cols = wcwidth(wc);
37
+ /* sometimes this seems to happen! maybe it's not bad...
38
+ if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
39
+ */
40
+
41
+ return 0;
42
+ }
43
+
44
+ /*
45
+ * call-seq: init_locale!
46
+ *
47
+ * Sets the program's current locale from the appropriate environment variables.
48
+ * (see `man 3 setlocale` for details).
49
+ *
50
+ * Equivalent to:
51
+ * char* old_locale = setlocale(LC_ALL, NULL);
52
+ * return old_locale;
53
+ *
54
+ * in C.
55
+ *
56
+ * If you are using Ruby 1.8, you *must* call this at least once before calling
57
+ * the other methods in this package. Otherwise, using non-ASCII strings will
58
+ * be considered invalid, and #display_width and #display_slice will raise
59
+ * ArgumentErrors.
60
+ *
61
+ * Ruby 1.9 users do not need to call this, since Ruby 1.9 appears to set the
62
+ * locale in this manner already. Calling it won't matter, however.
63
+ *
64
+ * Returns a string representing the old locale. If you wish to change locales
65
+ * several times, you can use this value to return to the previous locale.
66
+ * Otherwise, just ignore it.
67
+ */
68
+
69
+ static VALUE init_locale(VALUE v_self) {
70
+ char* old_locale = setlocale(LC_ALL, NULL);
71
+ setlocale(LC_ALL, ""); // set ctype locale according to appropriate env vars
72
+
73
+ return rb_str_new2(old_locale);
74
+ }
13
75
 
14
76
  /*
15
77
  * call-seq: display_width(string)
16
- *
78
+ *
17
79
  * Returns the display width of <code>string</code>, that is, the number of
18
- * columns that the string will take up when printed to screen. Note that this
19
- * is different from the number of characters (some characters take up one
20
- * column, some (e.g. Chinese characters) take up two columns), and the number
21
- * of bytes (e.g. UTF-8 is a multibyte encoding) in a string.
80
+ * columns that the string will take up when printed to screen. This is
81
+ * different from both the number of characters and the number of bytes in a
82
+ * string.
83
+ *
84
+ * In Ruby 1.8, the input string is assumed to be in the current locale's
85
+ * encoding. If it isn't, an ArgumentError will be raised. Be sure to call
86
+ * init_locale! before calling this method! Otherwise every non-ASCII string
87
+ * will trigger an ArgumentError.
22
88
  *
23
- * For Ruby 1.8, the string is assumed to be in the current locale's encoding.
24
- * If not, terrible things will happen.
89
+ * In Ruby 1.9, the string will be automatically converted from its encoding
90
+ * into the current locale's encoding for processing.
25
91
  *
26
- * For Ruby 1.9, the string will be magically converted from whatever encoding
27
- * it is in, into the current locale's encoding, for processing. This may fail.
92
+ * Throws an ArgumentError when it encounters an invalid character. On Ruby
93
+ * 1.8, this includes any string not in the current locale's encoding. On Ruby
94
+ * 1.9, this should only occur if Ruby is unable to convert the string from its
95
+ * encoding into the current locale's encoding.
28
96
  */
29
97
  static VALUE display_width(VALUE v_self, VALUE v_string) {
30
98
  Check_Type(v_string, T_STRING);
31
99
 
32
- /* for ruby 1.8, we assume the string is in your locale's CTYPE encoding
33
- * already. if not, terrible things will happen.
34
- *
35
- * for ruby 1.9, we explicitly convert it to the locale's CTYPE encoding,
36
- * like this:
37
- */
38
100
  #ifdef HAVE_RUBY_ENCODING_H
101
+ // convert from whatever encoding it's in.
102
+ // TODO: do i have to use rb_protect to relay any exceptions?
39
103
  v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil);
40
104
  #endif
105
+
41
106
  char* string = RSTRING_PTR(v_string);
42
107
 
43
- mbstate_t mbs; memset(&mbs, 0, sizeof(mbs));
44
108
  long display_width = 0;
45
- long remaining_bytes = RSTRING_LEN(v_string);
46
- while(remaining_bytes > 0) {
47
- wchar_t wc;
48
- size_t num_bytes = mbrtowc(&wc, string, remaining_bytes, &mbs);
49
- if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
50
-
51
- int width = wcwidth(wc);
52
- /* sometimes this seems to happen! maybe it's not bad...
53
- if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
54
- */
55
-
56
- display_width += width;
57
- remaining_bytes -= num_bytes;
58
- string += num_bytes; // advance string pointer
109
+ long strlen = RSTRING_LEN(v_string);
110
+ long offset = 0;
111
+
112
+ while(offset < strlen) {
113
+ size_t num_bytes, num_cols;
114
+ int err = calc_width(string, strlen, offset, &num_bytes, &num_cols);
115
+ if(err) break;
116
+
117
+ display_width += num_cols;
118
+ offset += num_bytes;
59
119
  }
60
120
 
61
121
  return LONG2NUM(display_width);
@@ -67,29 +127,34 @@ static const char* default_pad_string = " ";
67
127
  * call-seq:
68
128
  * display_slice(string, start_offset, display_width=1, pad_string=" ")
69
129
  *
70
- * Returns a slice of a string based on display width, rather than character or
71
- * bytes. I.e, the <code>start_offset</code> and <code>display_width</code>
72
- * offsets index the columns required to display the string, and not individual
130
+ * Returns a slice of a string, based on display width, rather than character
131
+ * or bytes. I.e, the <code>start_offset</code> and <code>display_width</code>
132
+ * offsets index the columns required to display the string, not individual
73
133
  * characters or bytes.
74
134
  *
75
135
  * This is useful if you want to display a part of a string on screen, as you
76
- * can pull out a specific portion by its display size.
136
+ * can pull out a specific portion based on display size.
77
137
  *
78
138
  * Padding: slicing can truncate multi-column characters. If the slice
79
139
  * truncates a character, the string will be padded with
80
140
  * <code>pad_string</code>, on the left side, right side, or both, as
81
141
  * necessary. If <code>pad_string</code> is <code>nil</code> then no padding
82
- * will be done. <code>pad_string</code> should be a single-column
83
- * string for this to make sense.
142
+ * will be done. <code>pad_string</code> should be a single-column string for
143
+ * this to make sense.
84
144
  *
85
- * For Ruby 1.8, the string is assumed to be in the current locale's encoding.
86
- * If not, terrible things will happen.
145
+ * In Ruby 1.8, the input string is assumed to be in the current locale's
146
+ * encoding. If it isn't, an ArgumentError will be raised. Be sure to call
147
+ * init_locale! before calling this method! Otherwise every non-ASCII string
148
+ * will trigger an ArgumentError.
87
149
  *
88
- * For Ruby 1.9, the string will be magically converted from whatever encoding
89
- * it is in, into the current locale's encoding, for processing. This may fail.
150
+ * In Ruby 1.9, the string will be automatically converted from its encoding
151
+ * into the current locale's encoding. Regardless of the original encoding, the
152
+ * returned string will be in the current locale's encoding.
90
153
  *
91
- * The returned string WILL be in the current locale encoding, regardless of the
92
- * encoding of the original string.
154
+ * Throws an ArgumentError when it encounters an invalid character. On Ruby
155
+ * 1.8, this includes any string not in the current locale's encoding. On Ruby
156
+ * 1.9, this should only occur if Ruby is unable to convert the string from its
157
+ * encoding into the current locale's encoding.
93
158
  */
94
159
  static VALUE display_slice(int argc, VALUE *argv, VALUE v_self) {
95
160
  VALUE v_string, v_display_start, v_display_width, v_pad_string;
@@ -106,35 +171,27 @@ static VALUE display_slice(int argc, VALUE *argv, VALUE v_self) {
106
171
  else display_width = NUM2INT(v_display_width);
107
172
  if(display_width < 0) return Qnil; // you fail
108
173
 
109
- char* pad_string;
174
+ const char* pad_string;
110
175
  if(argc < 4) pad_string = default_pad_string; // only fill in default if unspecified; nil is a valid value
111
176
  else if(v_pad_string == Qnil) pad_string = "";
112
177
  else pad_string = RSTRING_PTR(v_pad_string);
113
178
 
114
- /* see comments in display_width() */
115
179
  #ifdef HAVE_RUBY_ENCODING_H
180
+ // TODO: do i have to use rb_protect to relay any exceptions?
116
181
  v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil);
117
182
  #endif
118
183
  char* string = RSTRING_PTR(v_string);
119
-
120
- mbstate_t mbs; memset(&mbs, 0, sizeof(mbs));
121
- long remaining_bytes = RSTRING_LEN(v_string);
184
+ long slen = RSTRING_LEN(v_string);
122
185
 
123
186
  // first, advance the string pointer so that we've seen display_start width characters
124
187
  long current_width = 0;
125
- while((remaining_bytes > 0) && (current_width < display_start)) {
126
- wchar_t wc;
127
- size_t num_bytes = mbrtowc(&wc, string, remaining_bytes, &mbs);
128
- if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
129
-
130
- int width = wcwidth(wc);
131
- /* sometimes this seems to happen! maybe it's not bad...
132
- if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
133
- */
134
-
135
- current_width += width;
136
- remaining_bytes -= num_bytes;
137
- string += num_bytes; // advance string pointer
188
+ long offset = 0;
189
+ while((offset < slen) && (current_width < display_start)) {
190
+ size_t num_bytes, num_cols;
191
+ int err = calc_width(string, slen, offset, &num_bytes, &num_cols);
192
+
193
+ current_width += num_cols;
194
+ offset += num_bytes;
138
195
  }
139
196
 
140
197
  /* here's a weird behavior (to me!) of String#slice that we emulate:
@@ -144,58 +201,61 @@ static VALUE display_slice(int argc, VALUE *argv, VALUE v_self) {
144
201
  if((current_width < display_start)) return Qnil;
145
202
 
146
203
  /* determine left padding */
147
- char* pad_left = "";
204
+ const char* pad_left = "";
148
205
  if((current_width > display_start) && (display_width > 0)) pad_left = pad_string;
149
206
 
150
207
  // now, advance the string_end pointer so that we've seen an additional display_width width characters
151
- char* string_end = string;
208
+ long end_offset = offset;
152
209
  current_width -= display_start;
153
- while((remaining_bytes > 0) && (current_width < display_width)) {
154
- wchar_t wc;
155
- size_t num_bytes = mbrtowc(&wc, string_end, remaining_bytes, &mbs);
156
- if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
157
-
158
- int width = wcwidth(wc);
159
- /* sometimes this seems to happen! maybe it's not bad...
160
- if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
161
- */
210
+ while((end_offset < slen) && (current_width < display_width)) {
211
+ size_t num_bytes, num_cols;
212
+ int err = calc_width(string, slen, end_offset, &num_bytes, &num_cols);
162
213
 
163
- if((current_width + width) > display_width) break; // have to stop here
214
+ if((current_width + num_cols) > (size_t)display_width) break; // have to stop here
164
215
 
165
- current_width += width;
166
- remaining_bytes -= num_bytes;
167
- string_end += num_bytes; // advance string pointer
216
+ current_width += num_cols;
217
+ end_offset += num_bytes;
168
218
  }
169
219
 
170
220
  /* determine right padding */
171
- char* pad_right = "";
172
- if((current_width < display_width) && (remaining_bytes > 0)) pad_right = pad_string;
221
+ const char* pad_right = "";
222
+ if((current_width < display_width) && (end_offset < slen)) pad_right = pad_string;
173
223
 
174
224
  // finally, construct a new string
175
- int bytesize = string_end - string;
225
+ int bytesize = end_offset - offset;
176
226
  int leftsize = strlen(pad_left);
177
227
  int rightsize = strlen(pad_right);
178
228
 
179
229
  char* new_string = calloc(bytesize + leftsize + rightsize + 1, sizeof(char));
180
230
  if(leftsize > 0) strcpy(new_string, pad_left);
181
- if(bytesize > 0) memcpy(new_string + leftsize, string, bytesize * sizeof(char));
231
+ if(bytesize > 0) memcpy(new_string + leftsize, string + offset, bytesize * sizeof(char));
182
232
  if(rightsize > 0) strcpy(new_string + leftsize + bytesize, pad_right);
183
233
 
184
- (new_string + bytesize + leftsize + rightsize)[0] = 0;
234
+ (new_string + bytesize + leftsize + rightsize)[0] = 0;
185
235
 
236
+ #ifdef HAVE_RUBY_ENCODING_H
186
237
  return rb_enc_str_new(new_string, bytesize + leftsize + rightsize, rb_enc_get(v_string));
238
+ #else
239
+ return rb_str_new(new_string, bytesize + leftsize + rightsize);
240
+ #endif
187
241
  }
188
242
 
189
243
  /*
190
244
  * A helper class for console-based programs that need to deal with non-ASCII
191
245
  * code. If you are writing a curses/ncurses program, or otherwise care about
192
- * the number of characters on the screen, this is crucial stuff.
246
+ * the display width of characters on the screen, this is crucial stuff.
193
247
  *
194
248
  * Provides:
195
249
  *
196
- * Console.display_width: get the number of display columns used by a string.
250
+ * Console.init_locale!: set the program's locale from the appropriate
251
+ * environment variables. (Ruby 1.8 programs must call this before calling any
252
+ * of the other methods. Ruby 1.9 programs can call it or skip it without
253
+ * effect.)
197
254
  *
198
- * Console.display_slice: get a substrig by display column offset and size.
255
+ * Console.display_width: calculates the display width of a string
256
+ *
257
+ * Console.display_slice: returns a substring according to display offset
258
+ * and display width parameters.
199
259
  *
200
260
  */
201
261
 
@@ -205,5 +265,5 @@ void Init_console() {
205
265
  cConsole = rb_define_class("Console", rb_cObject);
206
266
  rb_define_module_function(cConsole, "display_width", display_width, 1);
207
267
  rb_define_module_function(cConsole, "display_slice", display_slice, -1);
268
+ rb_define_module_function(cConsole, "init_locale!", init_locale, 0);
208
269
  }
209
-
@@ -29,11 +29,9 @@ class String
29
29
  ## Returns a substring according to display-based start and offset values. See
30
30
  ## Console.display_slice for what this means.
31
31
  ##
32
- ## Stupid rdoc is not interpreting this next directive, and I'm tired of wasting
33
- ## my life figuring out why.
34
- ##
35
32
  ## :call-seq:
36
33
  ## display_slice(start, offset=1, pad_string=" ")
37
34
  ##
35
+ ## (rdoc fail)
38
36
  def display_slice(*a); Console.display_slice self, *a end
39
37
  end
data/test/console.rb CHANGED
@@ -8,6 +8,8 @@
8
8
  require 'test/unit'
9
9
  require 'console'
10
10
 
11
+ Console.init_locale!
12
+
11
13
  class ConsoleTest < ::Test::Unit::TestCase
12
14
  def setup
13
15
  @s = "能吞aê"
@@ -57,8 +59,6 @@ class ConsoleTest < ::Test::Unit::TestCase
57
59
  end
58
60
 
59
61
  def test_slice_misaligned_start_offsets_get_padded
60
- s = "能吞aê"
61
-
62
62
  assert_equal "", Console.display_slice(@s, 0, 0)
63
63
  assert_equal " ", Console.display_slice(@s, 0, 1)
64
64
  assert_equal "能", Console.display_slice(@s, 0, 2)
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: console
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 13
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
- - 2
8
8
  - 3
9
- version: 0.2.3
9
+ version: "0.3"
10
10
  platform: ruby
11
11
  authors:
12
12
  - William Morgan
@@ -14,11 +14,11 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-05-19 08:36:26 -04:00
17
+ date: 2011-01-17 20:18:21 -08:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
21
- description: Console is a helper class for displaying super-ASCII strings on the console. Console is needed when you want to mix two-column (e.g. Chinese) and single-column (e.g. ASCII) characters and know how much horizontal realestate the result takes on the screen. This is generally necessary when you want to have internationalization support in a console program. Console currently provides utility methods for determining the display width of a string, and for taking a substring based on display position and display width.
21
+ description: Console is a helper class for displaying super-ASCII strings on the console. Console is needed when you want to mix two-column (e.g. Chinese) and single-column (e.g. ASCII) characters and know how much horizontal realestate the result takes on the screen. This is generally necessary when you want to have internationalization support in a console-based program. Console currently provides utility methods for determining the display width of a string, and for taking a substring based on display position and display width.
22
22
  email: wmorgan-console@masanjin.net
23
23
  executables: []
24
24
 
@@ -43,25 +43,29 @@ rdoc_options: []
43
43
  require_paths:
44
44
  - lib
45
45
  required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
46
47
  requirements:
47
48
  - - ">="
48
49
  - !ruby/object:Gem::Version
50
+ hash: 3
49
51
  segments:
50
52
  - 0
51
53
  version: "0"
52
54
  required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
53
56
  requirements:
54
57
  - - ">="
55
58
  - !ruby/object:Gem::Version
59
+ hash: 3
56
60
  segments:
57
61
  - 0
58
62
  version: "0"
59
63
  requirements: []
60
64
 
61
65
  rubyforge_project:
62
- rubygems_version: 1.3.6
66
+ rubygems_version: 1.3.7
63
67
  signing_key:
64
68
  specification_version: 3
65
- summary: Console is a helper for displaying super-ASCII strings on the console.
69
+ summary: Console is a helper for properly handling super-ASCII strings on the console.
66
70
  test_files: []
67
71