console 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,203 @@
1
+ /*
2
+ * console.c -- unit tests for ruby Console library
3
+ * Author: William Morgan (mailto: wmorgan-ruby-console@masanjin.net)
4
+ * Copyright: Copyright 2010 William Morgan
5
+ * License: same terms as Ruby itself
6
+ */
7
+
8
+ #define _XOPEN_SOURCE
9
+ #include <wchar.h>
10
+ #include <stdlib.h>
11
+ #include <ruby.h>
12
+ #include <ruby/encoding.h>
13
+
14
+ /*
15
+ * call-seq: display_width(string)
16
+ *
17
+ * Returns the display width of <code>string</code>, that is, the number of
18
+ * columns that the string will take up when printed to screen. Note that this
19
+ * is different from the number of characters (some characters take up one
20
+ * column, some (e.g. Chinese characters) take up two columns), and the number
21
+ * of bytes (e.g. UTF-8 is a multibyte encoding) in a string.
22
+ *
23
+ * For Ruby 1.8, the string is assumed to be in the current locale's encoding.
24
+ * If not, terrible things will happen.
25
+ *
26
+ * For Ruby 1.9, the string will be magically converted from whatever encoding
27
+ * it is in, into the current locale's encoding, for processing. This may fail.
28
+ */
29
+ static VALUE display_width(VALUE v_self, VALUE v_string) {
30
+ Check_Type(v_string, T_STRING);
31
+
32
+ /* for ruby 1.8, we assume the string is in your locale's CTYPE encoding
33
+ * already. if not, terrible things will happen.
34
+ *
35
+ * for ruby 1.9, we explicitly convert it to the locale's CTYPE encoding,
36
+ * like this:
37
+ */
38
+ #ifdef HAVE_RUBY_ENCODING_H
39
+ v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil);
40
+ #endif
41
+ char* string = RSTRING_PTR(v_string);
42
+
43
+ mbstate_t mbs; memset(&mbs, 0, sizeof(mbs));
44
+ long display_width = 0;
45
+ long remaining_bytes = RSTRING_LEN(v_string);
46
+ while(remaining_bytes > 0) {
47
+ wchar_t wc;
48
+ size_t num_bytes = mbrtowc(&wc, string, remaining_bytes, &mbs);
49
+ if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
50
+
51
+ int width = wcwidth(wc);
52
+ if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character");
53
+
54
+ display_width += width;
55
+ remaining_bytes -= num_bytes;
56
+ string += num_bytes; // advance string pointer
57
+ }
58
+
59
+ return LONG2NUM(display_width);
60
+ }
61
+
62
+ static const char* default_pad_string = " ";
63
+
64
+ /*
65
+ * call-seq:
66
+ * slice(string, start_offset, display_width=1, pad_string=" ")
67
+ *
68
+ * Returns a slice of a string based on display width, rather than character or
69
+ * bytes. I.e, the <code>start_offset</code> and <code>display_width</code>
70
+ * offsets index the columns required to display the string, and not individual
71
+ * characters or bytes.
72
+ *
73
+ * This is useful if you want to display a part of a string on screen, as you
74
+ * can pull out a specific portion by its display size.
75
+ *
76
+ * Padding: slicing can truncate multi-column characters. If the slice
77
+ * truncates a character, the string will be padded with
78
+ * <code>pad_string</code>, on the left side, right side, or both, as
79
+ * necessary. If <code>pad_string</code> is <code>nil</code> then no padding
80
+ * will be done. <code>pad_string</code> should be a single-column
81
+ * string for this to make sense.
82
+ *
83
+ * For Ruby 1.8, the string is assumed to be in the current locale's encoding.
84
+ * If not, terrible things will happen.
85
+ *
86
+ * For Ruby 1.9, the string will be magically converted from whatever encoding
87
+ * it is in, into the current locale's encoding, for processing. This may fail.
88
+ *
89
+ * The returned string WILL be in the current locale encoding, regardless of the
90
+ * encoding of the original string.
91
+ */
92
+ static VALUE slice(int argc, VALUE *argv, VALUE v_self) {
93
+ VALUE v_string, v_display_start, v_display_width, v_pad_string;
94
+ rb_scan_args(argc, argv, "22", &v_string, &v_display_start, &v_display_width, &v_pad_string);
95
+ Check_Type(v_string, T_STRING);
96
+
97
+ /* try and mimic String#slice's argument handling as much as possible */
98
+ int display_start = NUM2INT(v_display_start);
99
+ if(display_start < 0) display_start = NUM2LONG(display_width(v_self, v_string)) + display_start; // negative start means from the end of the string
100
+ if(display_start < 0) return Qnil; // but if you go too far, you fail
101
+
102
+ int display_width;
103
+ if(argc < 3) display_width = 1; // default value just like String#slice (although it makes slightly less sense)
104
+ else display_width = NUM2INT(v_display_width);
105
+ if(display_width < 0) return Qnil; // you fail
106
+
107
+ char* pad_string;
108
+ if(argc < 4) pad_string = default_pad_string; // only fill in default if unspecified; nil is a valid value
109
+ else if(v_pad_string == Qnil) pad_string = "";
110
+ else pad_string = RSTRING_PTR(v_pad_string);
111
+
112
+ /* see comments in display_width() */
113
+ #ifdef HAVE_RUBY_ENCODING_H
114
+ v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil);
115
+ #endif
116
+ char* string = RSTRING_PTR(v_string);
117
+
118
+ mbstate_t mbs; memset(&mbs, 0, sizeof(mbs));
119
+ long remaining_bytes = RSTRING_LEN(v_string);
120
+
121
+ // first, advance the string pointer so that we've seen display_start width characters
122
+ long current_width = 0;
123
+ while((remaining_bytes > 0) && (current_width < display_start)) {
124
+ wchar_t wc;
125
+ size_t num_bytes = mbrtowc(&wc, string, remaining_bytes, &mbs);
126
+ if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
127
+
128
+ int width = wcwidth(wc);
129
+ if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character");
130
+
131
+ current_width += width;
132
+ remaining_bytes -= num_bytes;
133
+ string += num_bytes; // advance string pointer
134
+ }
135
+
136
+ /* here's a weird behavior (to me!) of String#slice that we emulate:
137
+ * if the start point is the string length itself, you get an empty
138
+ * string back; if the start point is greater than that, you get nil.
139
+ */
140
+ if((current_width < display_start)) return Qnil;
141
+
142
+ /* determine left padding */
143
+ char* pad_left = "";
144
+ if((current_width > display_start) && (display_width > 0)) pad_left = pad_string;
145
+
146
+ // now, advance the string_end pointer so that we've seen an additional display_width width characters
147
+ char* string_end = string;
148
+ current_width -= display_start;
149
+ while((remaining_bytes > 0) && (current_width < display_width)) {
150
+ wchar_t wc;
151
+ size_t num_bytes = mbrtowc(&wc, string_end, remaining_bytes, &mbs);
152
+ if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
153
+
154
+ int width = wcwidth(wc);
155
+ if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character");
156
+
157
+ if((current_width + width) > display_width) break; // have to stop here
158
+
159
+ current_width += width;
160
+ remaining_bytes -= num_bytes;
161
+ string_end += num_bytes; // advance string pointer
162
+ }
163
+
164
+ /* determine right padding */
165
+ char* pad_right = "";
166
+ if((current_width < display_width) && (remaining_bytes > 0)) pad_right = pad_string;
167
+
168
+ // finally, construct a new string
169
+ int bytesize = string_end - string;
170
+ int leftsize = strlen(pad_left);
171
+ int rightsize = strlen(pad_right);
172
+
173
+ char* new_string = calloc(bytesize + leftsize + rightsize + 1, sizeof(char));
174
+ if(leftsize > 0) strcpy(new_string, pad_left);
175
+ if(bytesize > 0) memcpy(new_string + leftsize, string, bytesize * sizeof(char));
176
+ if(rightsize > 0) strcpy(new_string + leftsize + bytesize, pad_right);
177
+
178
+ (new_string + bytesize + leftsize + rightsize)[0] = 0;
179
+
180
+ return rb_enc_str_new(new_string, bytesize + leftsize + rightsize, rb_enc_get(v_string));
181
+ }
182
+
183
+ /*
184
+ * A helper class for console-based programs that need to deal with non-ASCII
185
+ * code. If you are writing a curses/ncurses program, or otherwise care about
186
+ * the number of characters on the screen, this is crucial stuff.
187
+ *
188
+ * Provides:
189
+ *
190
+ * Console.display_width: get the number of display columns used by a string.
191
+ *
192
+ * Console.slice: get a substrig by display column offset and size.
193
+ *
194
+ */
195
+
196
+ void Init_console() {
197
+ VALUE cConsole;
198
+
199
+ cConsole = rb_define_class("Console", rb_cObject);
200
+ rb_define_module_function(cConsole, "display_width", display_width, 1);
201
+ rb_define_module_function(cConsole, "slice", slice, -1);
202
+ }
203
+
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile("console")
data/test/console.rb ADDED
@@ -0,0 +1,122 @@
1
+ # encoding: utf-8
2
+
3
+ ## test/console.rb -- unit tests for ruby Console library
4
+ ## Author: William Morgan (mailto: wmorgan-ruby-console@masanjin.net)
5
+ ## Copyright: Copyright 2010 William Morgan
6
+ ## License: same terms as Ruby itself
7
+
8
+ require 'test/unit'
9
+ require 'console'
10
+
11
+ class ConsoleTest < ::Test::Unit::TestCase
12
+ def setup
13
+ @s = "能吞aê"
14
+ end
15
+
16
+ def test_slice_of_zero_width_is_empty_string
17
+ assert_equal "", Console.slice(@s, 0, 0)
18
+ assert_equal "", Console.slice(@s, 1, 0)
19
+ end
20
+
21
+ def test_slice_out_of_bounds_is_nil
22
+ assert_equal nil, Console.slice(@s, 100, 3)
23
+ assert_equal nil, Console.slice(@s, -100, 3)
24
+ end
25
+
26
+ def test_slice_with_negative_offset
27
+ assert_equal "ê", Console.slice(@s, -1, 1)
28
+ assert_equal "aê", Console.slice(@s, -2, 2)
29
+ assert_equal "a", Console.slice(@s, -2, 1)
30
+ end
31
+
32
+ def test_slice_width_argument_defaults_to_1
33
+ assert_equal "ê", Console.slice(@s, -1)
34
+ assert_equal "a", Console.slice(@s, -2)
35
+ end
36
+
37
+ def test_slice_works_on_chinese_characters
38
+ assert_equal "能", Console.slice(@s, 0, 2);
39
+ assert_equal "能吞", Console.slice(@s, 0, 4);
40
+ assert_equal "能吞a", Console.slice(@s, 0, 5);
41
+ assert_equal "能吞aê", Console.slice(@s, 0, 6);
42
+ end
43
+
44
+ def test_slice_with_excessive_width_is_still_cool
45
+ assert_equal "能吞aê", Console.slice(@s, 0, 100);
46
+ assert_equal "吞aê", Console.slice(@s, 2, 100);
47
+ assert_equal "aê", Console.slice(@s, 4, 100);
48
+ assert_equal "ê", Console.slice(@s, 5, 100);
49
+ assert_equal "", Console.slice(@s, 6, 100); # yep, we get a non-nil at this value
50
+ assert_equal nil, Console.slice(@s, 7, 100);
51
+ end
52
+
53
+ def test_slice_with_the_biggest_valid_start_offset_behaves_like_String_slice_does
54
+ assert_equal "", Console.slice(@s, 6, 100);
55
+ assert_equal "", Console.slice(@s, 6, 0);
56
+ assert_equal nil, Console.slice(@s, 6, -1);
57
+ end
58
+
59
+ def test_slice_misaligned_start_offsets_get_padded
60
+ s = "能吞aê"
61
+
62
+ assert_equal "", Console.slice(@s, 0, 0)
63
+ assert_equal " ", Console.slice(@s, 0, 1)
64
+ assert_equal "能", Console.slice(@s, 0, 2)
65
+ assert_equal "能 ", Console.slice(@s, 0, 3)
66
+
67
+ assert_equal "", Console.slice(@s, 1, 0)
68
+ assert_equal " ", Console.slice(@s, 1, 1)
69
+ assert_equal " ", Console.slice(@s, 1, 2)
70
+ assert_equal " 吞", Console.slice(@s, 1, 3)
71
+
72
+ assert_equal "", Console.slice(@s, 3, 0);
73
+ assert_equal " ", Console.slice(@s, 3, 1);
74
+ assert_equal " a", Console.slice(@s, 3, 2);
75
+ assert_equal " aê", Console.slice(@s, 3, 3);
76
+ end
77
+
78
+ def test_slice_misaligned_start_offsets_get_padded_with_specified_character
79
+ assert_equal "", Console.slice(@s, 0, 0, "X")
80
+ assert_equal "X", Console.slice(@s, 0, 1, "X")
81
+ assert_equal "XX", Console.slice(@s, 1, 2, "X")
82
+ end
83
+
84
+ def test_slice_fails_on_nonstrings
85
+ assert_raises(TypeError) { Console.slice :potato, 1, 1 }
86
+ assert_raises(TypeError) { Console.slice 3, 1, 1 }
87
+ end
88
+
89
+ def test_display_width_empty_string_is_zero
90
+ assert_equal 0, Console.display_width("")
91
+ end
92
+
93
+ def test_display_width_works_on_ASCII_strings
94
+ assert_equal 1, Console.display_width("a")
95
+ assert_equal 1, Console.display_width(" ")
96
+ assert_equal 6, Console.display_width("potato")
97
+ end
98
+
99
+ def test_display_width_works_on_accented_characters
100
+ assert_equal 1, Console.display_width("ê")
101
+ assert_equal 4, Console.display_width("êêêê")
102
+ end
103
+
104
+ def test_display_width_works_on_chinese_characters
105
+ assert_equal 2, Console.display_width("能")
106
+ assert_equal 4, Console.display_width("能吞")
107
+ end
108
+
109
+ def test_display_width_works_on_mixed_stuff
110
+ assert_equal 2, Console.display_width("aê")
111
+ assert_equal 5, Console.display_width("能吞a")
112
+ assert_equal 6, Console.display_width("能吞aê")
113
+ assert_equal 6, Console.display_width("aê能吞")
114
+ assert_equal 6, Console.display_width("a能ê吞")
115
+ assert_equal 6, Console.display_width("能aê吞")
116
+ end
117
+
118
+ def test_display_width_fails_on_nonstrings
119
+ assert_raises(TypeError) { Console.display_width :potato }
120
+ assert_raises(TypeError) { Console.display_width 3 }
121
+ end
122
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: console
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ version: "0.1"
9
+ platform: ruby
10
+ authors:
11
+ - William Morgan
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+
16
+ date: 2010-05-10 21:59:22 -04:00
17
+ default_executable:
18
+ dependencies: []
19
+
20
+ description: Console is a hlper for displaying super-ASCII strings on the console. It turns out that all these funny foreign characters not only take up strange numbers of bytes, they also take up strange numbers of columns on the screen when you print them. Console procides utility methods for determining the display width of a string, and for taking a substring in a display-width-centric manner.
21
+ email: wmorgan-console@masanjin.net
22
+ executables: []
23
+
24
+ extensions:
25
+ - ext/console/extconf.rb
26
+ extra_rdoc_files: []
27
+
28
+ files:
29
+ - ext/console/extconf.rb
30
+ - ext/console/console.c
31
+ - test/console.rb
32
+ has_rdoc: true
33
+ homepage: http://console.rubyforge.org
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options: []
38
+
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ segments:
53
+ - 0
54
+ version: "0"
55
+ requirements: []
56
+
57
+ rubyforge_project:
58
+ rubygems_version: 1.3.6
59
+ signing_key:
60
+ specification_version: 3
61
+ summary: Console is a helper for displaying super-ASCII strings on the console.
62
+ test_files: []
63
+