console 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/console/console.c +203 -0
- data/ext/console/extconf.rb +2 -0
- data/test/console.rb +122 -0
- metadata +63 -0
@@ -0,0 +1,203 @@
|
|
1
|
+
/*
|
2
|
+
* console.c -- unit tests for ruby Console library
|
3
|
+
* Author: William Morgan (mailto: wmorgan-ruby-console@masanjin.net)
|
4
|
+
* Copyright: Copyright 2010 William Morgan
|
5
|
+
* License: same terms as Ruby itself
|
6
|
+
*/
|
7
|
+
|
8
|
+
#define _XOPEN_SOURCE
|
9
|
+
#include <wchar.h>
|
10
|
+
#include <stdlib.h>
|
11
|
+
#include <ruby.h>
|
12
|
+
#include <ruby/encoding.h>
|
13
|
+
|
14
|
+
/*
|
15
|
+
* call-seq: display_width(string)
|
16
|
+
*
|
17
|
+
* Returns the display width of <code>string</code>, that is, the number of
|
18
|
+
* columns that the string will take up when printed to screen. Note that this
|
19
|
+
* is different from the number of characters (some characters take up one
|
20
|
+
* column, some (e.g. Chinese characters) take up two columns), and the number
|
21
|
+
* of bytes (e.g. UTF-8 is a multibyte encoding) in a string.
|
22
|
+
*
|
23
|
+
* For Ruby 1.8, the string is assumed to be in the current locale's encoding.
|
24
|
+
* If not, terrible things will happen.
|
25
|
+
*
|
26
|
+
* For Ruby 1.9, the string will be magically converted from whatever encoding
|
27
|
+
* it is in, into the current locale's encoding, for processing. This may fail.
|
28
|
+
*/
|
29
|
+
static VALUE display_width(VALUE v_self, VALUE v_string) {
|
30
|
+
Check_Type(v_string, T_STRING);
|
31
|
+
|
32
|
+
/* for ruby 1.8, we assume the string is in your locale's CTYPE encoding
|
33
|
+
* already. if not, terrible things will happen.
|
34
|
+
*
|
35
|
+
* for ruby 1.9, we explicitly convert it to the locale's CTYPE encoding,
|
36
|
+
* like this:
|
37
|
+
*/
|
38
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
39
|
+
v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil);
|
40
|
+
#endif
|
41
|
+
char* string = RSTRING_PTR(v_string);
|
42
|
+
|
43
|
+
mbstate_t mbs; memset(&mbs, 0, sizeof(mbs));
|
44
|
+
long display_width = 0;
|
45
|
+
long remaining_bytes = RSTRING_LEN(v_string);
|
46
|
+
while(remaining_bytes > 0) {
|
47
|
+
wchar_t wc;
|
48
|
+
size_t num_bytes = mbrtowc(&wc, string, remaining_bytes, &mbs);
|
49
|
+
if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
|
50
|
+
|
51
|
+
int width = wcwidth(wc);
|
52
|
+
if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character");
|
53
|
+
|
54
|
+
display_width += width;
|
55
|
+
remaining_bytes -= num_bytes;
|
56
|
+
string += num_bytes; // advance string pointer
|
57
|
+
}
|
58
|
+
|
59
|
+
return LONG2NUM(display_width);
|
60
|
+
}
|
61
|
+
|
62
|
+
static const char* default_pad_string = " ";
|
63
|
+
|
64
|
+
/*
|
65
|
+
* call-seq:
|
66
|
+
* slice(string, start_offset, display_width=1, pad_string=" ")
|
67
|
+
*
|
68
|
+
* Returns a slice of a string based on display width, rather than character or
|
69
|
+
* bytes. I.e, the <code>start_offset</code> and <code>display_width</code>
|
70
|
+
* offsets index the columns required to display the string, and not individual
|
71
|
+
* characters or bytes.
|
72
|
+
*
|
73
|
+
* This is useful if you want to display a part of a string on screen, as you
|
74
|
+
* can pull out a specific portion by its display size.
|
75
|
+
*
|
76
|
+
* Padding: slicing can truncate multi-column characters. If the slice
|
77
|
+
* truncates a character, the string will be padded with
|
78
|
+
* <code>pad_string</code>, on the left side, right side, or both, as
|
79
|
+
* necessary. If <code>pad_string</code> is <code>nil</code> then no padding
|
80
|
+
* will be done. <code>pad_string</code> should be a single-column
|
81
|
+
* string for this to make sense.
|
82
|
+
*
|
83
|
+
* For Ruby 1.8, the string is assumed to be in the current locale's encoding.
|
84
|
+
* If not, terrible things will happen.
|
85
|
+
*
|
86
|
+
* For Ruby 1.9, the string will be magically converted from whatever encoding
|
87
|
+
* it is in, into the current locale's encoding, for processing. This may fail.
|
88
|
+
*
|
89
|
+
* The returned string WILL be in the current locale encoding, regardless of the
|
90
|
+
* encoding of the original string.
|
91
|
+
*/
|
92
|
+
static VALUE slice(int argc, VALUE *argv, VALUE v_self) {
|
93
|
+
VALUE v_string, v_display_start, v_display_width, v_pad_string;
|
94
|
+
rb_scan_args(argc, argv, "22", &v_string, &v_display_start, &v_display_width, &v_pad_string);
|
95
|
+
Check_Type(v_string, T_STRING);
|
96
|
+
|
97
|
+
/* try and mimic String#slice's argument handling as much as possible */
|
98
|
+
int display_start = NUM2INT(v_display_start);
|
99
|
+
if(display_start < 0) display_start = NUM2LONG(display_width(v_self, v_string)) + display_start; // negative start means from the end of the string
|
100
|
+
if(display_start < 0) return Qnil; // but if you go too far, you fail
|
101
|
+
|
102
|
+
int display_width;
|
103
|
+
if(argc < 3) display_width = 1; // default value just like String#slice (although it makes slightly less sense)
|
104
|
+
else display_width = NUM2INT(v_display_width);
|
105
|
+
if(display_width < 0) return Qnil; // you fail
|
106
|
+
|
107
|
+
char* pad_string;
|
108
|
+
if(argc < 4) pad_string = default_pad_string; // only fill in default if unspecified; nil is a valid value
|
109
|
+
else if(v_pad_string == Qnil) pad_string = "";
|
110
|
+
else pad_string = RSTRING_PTR(v_pad_string);
|
111
|
+
|
112
|
+
/* see comments in display_width() */
|
113
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
114
|
+
v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil);
|
115
|
+
#endif
|
116
|
+
char* string = RSTRING_PTR(v_string);
|
117
|
+
|
118
|
+
mbstate_t mbs; memset(&mbs, 0, sizeof(mbs));
|
119
|
+
long remaining_bytes = RSTRING_LEN(v_string);
|
120
|
+
|
121
|
+
// first, advance the string pointer so that we've seen display_start width characters
|
122
|
+
long current_width = 0;
|
123
|
+
while((remaining_bytes > 0) && (current_width < display_start)) {
|
124
|
+
wchar_t wc;
|
125
|
+
size_t num_bytes = mbrtowc(&wc, string, remaining_bytes, &mbs);
|
126
|
+
if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
|
127
|
+
|
128
|
+
int width = wcwidth(wc);
|
129
|
+
if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character");
|
130
|
+
|
131
|
+
current_width += width;
|
132
|
+
remaining_bytes -= num_bytes;
|
133
|
+
string += num_bytes; // advance string pointer
|
134
|
+
}
|
135
|
+
|
136
|
+
/* here's a weird behavior (to me!) of String#slice that we emulate:
|
137
|
+
* if the start point is the string length itself, you get an empty
|
138
|
+
* string back; if the start point is greater than that, you get nil.
|
139
|
+
*/
|
140
|
+
if((current_width < display_start)) return Qnil;
|
141
|
+
|
142
|
+
/* determine left padding */
|
143
|
+
char* pad_left = "";
|
144
|
+
if((current_width > display_start) && (display_width > 0)) pad_left = pad_string;
|
145
|
+
|
146
|
+
// now, advance the string_end pointer so that we've seen an additional display_width width characters
|
147
|
+
char* string_end = string;
|
148
|
+
current_width -= display_start;
|
149
|
+
while((remaining_bytes > 0) && (current_width < display_width)) {
|
150
|
+
wchar_t wc;
|
151
|
+
size_t num_bytes = mbrtowc(&wc, string_end, remaining_bytes, &mbs);
|
152
|
+
if(num_bytes == 0) rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", RSTRING_LEN(v_string) - remaining_bytes);
|
153
|
+
|
154
|
+
int width = wcwidth(wc);
|
155
|
+
if(width == 0) rb_raise(rb_eArgError, "bad string: non-printable character");
|
156
|
+
|
157
|
+
if((current_width + width) > display_width) break; // have to stop here
|
158
|
+
|
159
|
+
current_width += width;
|
160
|
+
remaining_bytes -= num_bytes;
|
161
|
+
string_end += num_bytes; // advance string pointer
|
162
|
+
}
|
163
|
+
|
164
|
+
/* determine right padding */
|
165
|
+
char* pad_right = "";
|
166
|
+
if((current_width < display_width) && (remaining_bytes > 0)) pad_right = pad_string;
|
167
|
+
|
168
|
+
// finally, construct a new string
|
169
|
+
int bytesize = string_end - string;
|
170
|
+
int leftsize = strlen(pad_left);
|
171
|
+
int rightsize = strlen(pad_right);
|
172
|
+
|
173
|
+
char* new_string = calloc(bytesize + leftsize + rightsize + 1, sizeof(char));
|
174
|
+
if(leftsize > 0) strcpy(new_string, pad_left);
|
175
|
+
if(bytesize > 0) memcpy(new_string + leftsize, string, bytesize * sizeof(char));
|
176
|
+
if(rightsize > 0) strcpy(new_string + leftsize + bytesize, pad_right);
|
177
|
+
|
178
|
+
(new_string + bytesize + leftsize + rightsize)[0] = 0;
|
179
|
+
|
180
|
+
return rb_enc_str_new(new_string, bytesize + leftsize + rightsize, rb_enc_get(v_string));
|
181
|
+
}
|
182
|
+
|
183
|
+
/*
|
184
|
+
* A helper class for console-based programs that need to deal with non-ASCII
|
185
|
+
* code. If you are writing a curses/ncurses program, or otherwise care about
|
186
|
+
* the number of characters on the screen, this is crucial stuff.
|
187
|
+
*
|
188
|
+
* Provides:
|
189
|
+
*
|
190
|
+
* Console.display_width: get the number of display columns used by a string.
|
191
|
+
*
|
192
|
+
* Console.slice: get a substrig by display column offset and size.
|
193
|
+
*
|
194
|
+
*/
|
195
|
+
|
196
|
+
void Init_console() {
|
197
|
+
VALUE cConsole;
|
198
|
+
|
199
|
+
cConsole = rb_define_class("Console", rb_cObject);
|
200
|
+
rb_define_module_function(cConsole, "display_width", display_width, 1);
|
201
|
+
rb_define_module_function(cConsole, "slice", slice, -1);
|
202
|
+
}
|
203
|
+
|
data/test/console.rb
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
## test/console.rb -- unit tests for ruby Console library
|
4
|
+
## Author: William Morgan (mailto: wmorgan-ruby-console@masanjin.net)
|
5
|
+
## Copyright: Copyright 2010 William Morgan
|
6
|
+
## License: same terms as Ruby itself
|
7
|
+
|
8
|
+
require 'test/unit'
|
9
|
+
require 'console'
|
10
|
+
|
11
|
+
class ConsoleTest < ::Test::Unit::TestCase
|
12
|
+
def setup
|
13
|
+
@s = "能吞aê"
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_slice_of_zero_width_is_empty_string
|
17
|
+
assert_equal "", Console.slice(@s, 0, 0)
|
18
|
+
assert_equal "", Console.slice(@s, 1, 0)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_slice_out_of_bounds_is_nil
|
22
|
+
assert_equal nil, Console.slice(@s, 100, 3)
|
23
|
+
assert_equal nil, Console.slice(@s, -100, 3)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_slice_with_negative_offset
|
27
|
+
assert_equal "ê", Console.slice(@s, -1, 1)
|
28
|
+
assert_equal "aê", Console.slice(@s, -2, 2)
|
29
|
+
assert_equal "a", Console.slice(@s, -2, 1)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_slice_width_argument_defaults_to_1
|
33
|
+
assert_equal "ê", Console.slice(@s, -1)
|
34
|
+
assert_equal "a", Console.slice(@s, -2)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_slice_works_on_chinese_characters
|
38
|
+
assert_equal "能", Console.slice(@s, 0, 2);
|
39
|
+
assert_equal "能吞", Console.slice(@s, 0, 4);
|
40
|
+
assert_equal "能吞a", Console.slice(@s, 0, 5);
|
41
|
+
assert_equal "能吞aê", Console.slice(@s, 0, 6);
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_slice_with_excessive_width_is_still_cool
|
45
|
+
assert_equal "能吞aê", Console.slice(@s, 0, 100);
|
46
|
+
assert_equal "吞aê", Console.slice(@s, 2, 100);
|
47
|
+
assert_equal "aê", Console.slice(@s, 4, 100);
|
48
|
+
assert_equal "ê", Console.slice(@s, 5, 100);
|
49
|
+
assert_equal "", Console.slice(@s, 6, 100); # yep, we get a non-nil at this value
|
50
|
+
assert_equal nil, Console.slice(@s, 7, 100);
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_slice_with_the_biggest_valid_start_offset_behaves_like_String_slice_does
|
54
|
+
assert_equal "", Console.slice(@s, 6, 100);
|
55
|
+
assert_equal "", Console.slice(@s, 6, 0);
|
56
|
+
assert_equal nil, Console.slice(@s, 6, -1);
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_slice_misaligned_start_offsets_get_padded
|
60
|
+
s = "能吞aê"
|
61
|
+
|
62
|
+
assert_equal "", Console.slice(@s, 0, 0)
|
63
|
+
assert_equal " ", Console.slice(@s, 0, 1)
|
64
|
+
assert_equal "能", Console.slice(@s, 0, 2)
|
65
|
+
assert_equal "能 ", Console.slice(@s, 0, 3)
|
66
|
+
|
67
|
+
assert_equal "", Console.slice(@s, 1, 0)
|
68
|
+
assert_equal " ", Console.slice(@s, 1, 1)
|
69
|
+
assert_equal " ", Console.slice(@s, 1, 2)
|
70
|
+
assert_equal " 吞", Console.slice(@s, 1, 3)
|
71
|
+
|
72
|
+
assert_equal "", Console.slice(@s, 3, 0);
|
73
|
+
assert_equal " ", Console.slice(@s, 3, 1);
|
74
|
+
assert_equal " a", Console.slice(@s, 3, 2);
|
75
|
+
assert_equal " aê", Console.slice(@s, 3, 3);
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_slice_misaligned_start_offsets_get_padded_with_specified_character
|
79
|
+
assert_equal "", Console.slice(@s, 0, 0, "X")
|
80
|
+
assert_equal "X", Console.slice(@s, 0, 1, "X")
|
81
|
+
assert_equal "XX", Console.slice(@s, 1, 2, "X")
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_slice_fails_on_nonstrings
|
85
|
+
assert_raises(TypeError) { Console.slice :potato, 1, 1 }
|
86
|
+
assert_raises(TypeError) { Console.slice 3, 1, 1 }
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_display_width_empty_string_is_zero
|
90
|
+
assert_equal 0, Console.display_width("")
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_display_width_works_on_ASCII_strings
|
94
|
+
assert_equal 1, Console.display_width("a")
|
95
|
+
assert_equal 1, Console.display_width(" ")
|
96
|
+
assert_equal 6, Console.display_width("potato")
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_display_width_works_on_accented_characters
|
100
|
+
assert_equal 1, Console.display_width("ê")
|
101
|
+
assert_equal 4, Console.display_width("êêêê")
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_display_width_works_on_chinese_characters
|
105
|
+
assert_equal 2, Console.display_width("能")
|
106
|
+
assert_equal 4, Console.display_width("能吞")
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_display_width_works_on_mixed_stuff
|
110
|
+
assert_equal 2, Console.display_width("aê")
|
111
|
+
assert_equal 5, Console.display_width("能吞a")
|
112
|
+
assert_equal 6, Console.display_width("能吞aê")
|
113
|
+
assert_equal 6, Console.display_width("aê能吞")
|
114
|
+
assert_equal 6, Console.display_width("a能ê吞")
|
115
|
+
assert_equal 6, Console.display_width("能aê吞")
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_display_width_fails_on_nonstrings
|
119
|
+
assert_raises(TypeError) { Console.display_width :potato }
|
120
|
+
assert_raises(TypeError) { Console.display_width 3 }
|
121
|
+
end
|
122
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: console
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
version: "0.1"
|
9
|
+
platform: ruby
|
10
|
+
authors:
|
11
|
+
- William Morgan
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain: []
|
15
|
+
|
16
|
+
date: 2010-05-10 21:59:22 -04:00
|
17
|
+
default_executable:
|
18
|
+
dependencies: []
|
19
|
+
|
20
|
+
description: Console is a hlper for displaying super-ASCII strings on the console. It turns out that all these funny foreign characters not only take up strange numbers of bytes, they also take up strange numbers of columns on the screen when you print them. Console procides utility methods for determining the display width of a string, and for taking a substring in a display-width-centric manner.
|
21
|
+
email: wmorgan-console@masanjin.net
|
22
|
+
executables: []
|
23
|
+
|
24
|
+
extensions:
|
25
|
+
- ext/console/extconf.rb
|
26
|
+
extra_rdoc_files: []
|
27
|
+
|
28
|
+
files:
|
29
|
+
- ext/console/extconf.rb
|
30
|
+
- ext/console/console.c
|
31
|
+
- test/console.rb
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: http://console.rubyforge.org
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
version: "0"
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
segments:
|
53
|
+
- 0
|
54
|
+
version: "0"
|
55
|
+
requirements: []
|
56
|
+
|
57
|
+
rubyforge_project:
|
58
|
+
rubygems_version: 1.3.6
|
59
|
+
signing_key:
|
60
|
+
specification_version: 3
|
61
|
+
summary: Console is a helper for displaying super-ASCII strings on the console.
|
62
|
+
test_files: []
|
63
|
+
|