joker 0.0.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,8 @@
1
+ # 1.0.0 #
2
+
3
+ * Re-release as C extension
4
+
5
+
1
6
  # 0.0.1 #
2
7
 
3
8
  * Initial release
@@ -9,14 +9,19 @@ for Ruby.
9
9
 
10
10
  * Behaves much like Regexp
11
11
  * ` * ` and ` ? ` as wildcard characters
12
- * ` \ ` for escaping
13
- * `\a` matches `\a`, but not `a`
12
+ * ` \ ` for escaping:
13
+ ` \? ` matches ` ? `,
14
+ ` \* ` matches ` * `,
15
+ ` \[ ` matches ` [ `,
16
+ ` \] ` matches ` ] `,
17
+ * But for all other characters:
18
+ ` \a ` matches ` \a `, but not ` a `
14
19
  * Wildcards must always match whole string
20
+ (thus ` uiae ` will only match the string ` uiae `)
15
21
  * Wildcards can be case insensitive
16
22
 
17
23
  ## Installation ##
18
24
 
19
- gem install karottenreibe-joker --source http://gems.github.com
20
25
  gem install joker
21
26
 
22
27
  ## Usage ##
data/Rakefile CHANGED
@@ -1,20 +1,7 @@
1
- require 'jeweler'
2
1
 
3
- task :build => :gemspec
4
-
5
- Jeweler::Tasks.new do |gem|
6
- gem.name = 'joker'
7
- gem.summary = gem.description =
8
- 'Joker is a simple wildcard implementation that works much like Regexps'
9
- gem.email = 'karottenreibe@gmail.com'
10
- gem.homepage = 'http://karottenreibe.github.com/joker'
11
- gem.authors = ['Fabian Streitel']
12
- gem.rubyforge_project = 'k-gems'
13
- end
14
-
15
- Jeweler::RubyforgeTasks.new
16
-
17
- task :test do
18
- sh 'bacon -Ilib test/test_*.rb'
19
- end
2
+ load "tasks/jeweler.rake"
3
+ load "tasks/rdoc.rake"
4
+ load "tasks/c_extensions.rake"
5
+ load "tasks/test.rake"
6
+ load "tasks/shortcuts.rake"
20
7
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 1.0.0
@@ -0,0 +1,72 @@
1
+ #include <malloc.h>
2
+ #include "Joker.h"
3
+ #include "Wildcard.h"
4
+ #include "compile.h"
5
+ #include "match.h"
6
+
7
+
8
+ void Init_joker_native(void) // {{{1
9
+ {
10
+ class_Wildcard = rb_define_class("Wildcard", rb_cObject);
11
+ rb_define_singleton_method(class_Wildcard, "new", class_method_new, -1);
12
+ rb_define_method(class_Wildcard, "=~", instance_operator_match, 1);
13
+ rb_define_method(class_Wildcard, "===", instance_operator_match, 1);
14
+ }
15
+
16
+
17
+ VALUE instance_operator_match(self, object) // {{{1
18
+ VALUE self;
19
+ VALUE object;
20
+ {
21
+ Wildcard * wildcard;
22
+ const char * cstring;
23
+ long int length;
24
+ int casefold;
25
+
26
+ // check types and get the C representation of stuff
27
+ Check_Type(object, T_STRING);
28
+ Data_Get_Struct(self, Wildcard, wildcard);
29
+ cstring = rb_str2cstr(object, &length);
30
+ casefold = RTEST(rb_iv_get(self, "@casefold"));
31
+
32
+ // match and return the result
33
+ if (Wildcard_match(wildcard, cstring, length, casefold)) {
34
+ return Qtrue;
35
+ } else {
36
+ return Qfalse;
37
+ }
38
+ }
39
+
40
+
41
+ VALUE class_method_new(argc, argv, klass) // {{{1
42
+ int argc;
43
+ VALUE * argv;
44
+ VALUE klass;
45
+ {
46
+ VALUE wildcard_string;
47
+ VALUE casefold;
48
+ VALUE new_object;
49
+ Wildcard * new_wildcard;
50
+ const char * wildcard_cstring;
51
+ long int string_length;
52
+
53
+ // check arity and fill in defaults
54
+ rb_scan_args(argc, argv, "11", &wildcard_string, &casefold);
55
+ if (NIL_P(casefold)) {
56
+ casefold = Qfalse;
57
+ }
58
+
59
+ // get C representation of stuff and create Wildcard
60
+ wildcard_cstring = rb_str2cstr(wildcard_string, &string_length);
61
+ new_wildcard = Wildcard_compile(wildcard_cstring, string_length);
62
+ // wrap wildcard
63
+ new_object = Data_Wrap_Struct(klass, NULL, Wildcard_free, new_wildcard);
64
+
65
+ // set instance variables
66
+ rb_iv_set(new_object, "@casefold", casefold);
67
+ rb_iv_set(new_object, "@source", wildcard_string);
68
+
69
+ return new_object;
70
+ }
71
+
72
+
@@ -0,0 +1,53 @@
1
+ #ifndef JOKER_H_GUARD
2
+ #define JOKER_H_GUARD
3
+
4
+ #include "ruby.h"
5
+
6
+
7
+ VALUE class_Wildcard;
8
+ void Init_joker(void);
9
+
10
+
11
+ /*
12
+ * call-seq:
13
+ * Wildcard.new(wildcard_string, casefold = false) -> Wildcard
14
+ *
15
+ * Creates a new Wildcard from the given string.
16
+ * If casefold is true, the Wildcard will ignore case.
17
+ *
18
+ * Raisess a SyntaxError if the given string could not
19
+ * be interpreted as a Wildcard.
20
+ *
21
+ * Issues warnings to the console if the given Wildcard
22
+ * was malformed.
23
+ *
24
+ */
25
+ VALUE class_method_new(int argc, VALUE * argv, VALUE klass);
26
+
27
+
28
+ /*
29
+ * call-seq:
30
+ * wildcard =~ 'string' -> true or false
31
+ * 'string' =~ wildcard -> true or false
32
+ * wildcard === 'string' -> true or false
33
+ *
34
+ * Matches the Wildcard against the given string.
35
+ *
36
+ * NOTE: Since a wildcard has to match the whole string,
37
+ * this method only returns true or false, not the position
38
+ * of the match.
39
+ *
40
+ * Wildcard['*fairy*'] =~ 'I love fairycake' #=> true
41
+ * 'I love fairycake' =~ Wildcard['*dairy*'] #=> false
42
+ *
43
+ * case 'I love fairycake'
44
+ * when Wildcard['*fairy*'] then puts 'fairy!'
45
+ * else puts 'no fairy...'
46
+ * end
47
+ *
48
+ */
49
+ VALUE instance_operator_match(VALUE self, VALUE string);
50
+
51
+
52
+ #endif
53
+
@@ -0,0 +1,22 @@
1
+ #include <malloc.h>
2
+ #include <stddef.h>
3
+ #include <Wildcard.h>
4
+
5
+
6
+
7
+ void Wildcard_free(wildcard) // {{{1
8
+ Wildcard * wildcard;
9
+ {
10
+ free(wildcard->first);
11
+ free(wildcard);
12
+ }
13
+
14
+
15
+ void Wildcard_enlarge(wildcard) // {{{1
16
+ Wildcard * wildcard;
17
+ {
18
+ wildcard->length += 2;
19
+ wildcard->first = realloc(wildcard->first, wildcard->length * sizeof(char));
20
+ wildcard->last = wildcard->first + wildcard->length - 2;
21
+ }
22
+
@@ -0,0 +1,40 @@
1
+ #ifndef WILDCARD_H_GUARD
2
+ #define WILDCARD_H_GUARD
3
+
4
+
5
+ /*
6
+ * The different kinds of Wildcard components.
7
+ *
8
+ */
9
+ typedef enum {
10
+ Kleene = 0,
11
+ Fixed = 1,
12
+ Group = 2,
13
+ Wild = 3,
14
+ EOW = 4, // only used in matching
15
+ } WildcardType;
16
+
17
+ /*
18
+ * Represents a Wildcard internally.
19
+ *
20
+ */
21
+ typedef struct {
22
+ char * first; // The first Wildcard part (points to the first of the 2 chars)
23
+ char * last; // The last Wildcard part (points to the first of the 2 chars)
24
+ long int length; // How many chars there are (not parts!)
25
+ } Wildcard;
26
+
27
+
28
+ void Wildcard_free(Wildcard * wildcard);
29
+
30
+
31
+ /*
32
+ * Adds two additional characters at the end
33
+ * and adjusts all the pointers.
34
+ *
35
+ */
36
+ void Wildcard_enlarge(Wildcard * wildcard);
37
+
38
+
39
+ #endif
40
+
@@ -0,0 +1,158 @@
1
+ #include <malloc.h>
2
+ #include <stdio.h>
3
+ #include <stddef.h>
4
+ #include <string.h>
5
+ #include <string.h>
6
+ #include <ruby.h>
7
+ #include "compile.h"
8
+
9
+
10
+ static int hash(cchar) // {{{1
11
+ const char cchar;
12
+ {
13
+ switch (cchar) {
14
+ case '\\':
15
+ return 0;
16
+ case '[':
17
+ return 1;
18
+ case ']':
19
+ return 2;
20
+ case '*':
21
+ return 3;
22
+ case '?':
23
+ return 4;
24
+ default:
25
+ return 5;
26
+ }
27
+ }
28
+
29
+
30
+ static void push(type, cchar, wildcard) // {{{1
31
+ const WildcardType type;
32
+ const char cchar;
33
+ Wildcard * wildcard;
34
+ {
35
+ Wildcard_enlarge(wildcard);
36
+ *wildcard->last = (char) type;
37
+ *(wildcard->last + 1) = cchar;
38
+ }
39
+
40
+
41
+ static void do_transition(transition, input, state, wildcard) // {{{1
42
+ const char transition;
43
+ const char input;
44
+ int * state;
45
+ Wildcard * wildcard;
46
+ {
47
+ switch (transition) {
48
+ case 0:
49
+ *state = 1;
50
+ break;
51
+ case 1:
52
+ *state = 2;
53
+ break;
54
+ case 2:
55
+ push(Fixed, input, wildcard);
56
+ rb_warning("wildcard has `]' without escape");
57
+ break;
58
+ case 3:
59
+ // refactor ** --> *
60
+ if (wildcard->last == NULL || *wildcard->last != (char) Kleene) {
61
+ push(Kleene, '*', wildcard);
62
+ }
63
+ break;
64
+ case 4:
65
+ push(Wild, '?', wildcard);
66
+ break;
67
+ case 5:
68
+ push(Fixed, input, wildcard);
69
+ break;
70
+ case 6:
71
+ *state = -1;
72
+ break;
73
+ case 7:
74
+ *state = 0;
75
+ push(Fixed, input, wildcard);
76
+ break;
77
+ case 8:
78
+ *state = 0;
79
+ push(Fixed, '\\', wildcard);
80
+ push(Fixed, input, wildcard);
81
+ break;
82
+ case 9:
83
+ *state = -1;
84
+ push(Fixed, '\\', wildcard);
85
+ break;
86
+ case 10:
87
+ *state = 3;
88
+ break;
89
+ case 11:
90
+ push(Group, input, wildcard);
91
+ rb_warning("character class has `[' without escape");
92
+ break;
93
+ case 12:
94
+ *state = 0;
95
+ break;
96
+ case 13:
97
+ push(Group, input, wildcard);
98
+ break;
99
+ case 14:
100
+ *state = -1;
101
+ rb_raise(rb_eSyntaxError, "premature end of wildcard");
102
+ break;
103
+ case 15:
104
+ *state = 2;
105
+ push(Group, input, wildcard);
106
+ break;
107
+ case 16:
108
+ *state = 2;
109
+ push(Group, '\\', wildcard);
110
+ push(Group, input, wildcard);
111
+ break;
112
+ default:
113
+ rb_fatal("Wildcard compilation state machine failure. This is a bug in Joker!");
114
+ }
115
+ }
116
+
117
+
118
+ Wildcard * Wildcard_compile(cstring, len) // {{{1
119
+ const char * cstring;
120
+ const long int len;
121
+ {
122
+ // the table that maps (state x input) -> transition
123
+ const char transition_table[4][7] = {
124
+ // \ [ ] * ? any EOS
125
+ { 0, 1, 2, 3, 4, 5, 6},
126
+ { 7, 7, 7, 7, 7, 8, 9},
127
+ {10, 11, 12, 13, 13, 13, 14},
128
+ {15, 15, 15, 16, 16, 16, 14}
129
+ };
130
+ int state = 0;
131
+
132
+ Wildcard * wildcard;
133
+ long int p;
134
+ char input;
135
+ int hashed;
136
+ char transition;
137
+
138
+ wildcard = malloc(sizeof(Wildcard));
139
+ wildcard->length = 0;
140
+ wildcard->first = NULL;
141
+ wildcard->last = NULL;
142
+
143
+ // for each char:
144
+ for (p = 0; p < len; p++) {
145
+ // get the input, it's type and what transition to make
146
+ input = cstring[p];
147
+ hashed = hash(input);
148
+ transition = transition_table[state][hashed];
149
+ // and execute the transition
150
+ do_transition(transition, input, &state, wildcard);
151
+ }
152
+
153
+ // finally: execute the finishing transition
154
+ transition = transition_table[state][6];
155
+ do_transition(transition, '\0', &state, wildcard);
156
+ return wildcard;
157
+ }
158
+
@@ -0,0 +1,16 @@
1
+ #ifndef COMPILE_H_GUARD
2
+ #define COMPILE_H_GUARD
3
+
4
+ #include "Wildcard.h"
5
+
6
+
7
+ /*
8
+ * Compiles an input string to the internal Wildcard
9
+ * represenation.
10
+ *
11
+ */
12
+ Wildcard * Wildcard_compile(const char * cstring, const long int len);
13
+
14
+
15
+ #endif
16
+
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+
3
+ extension_name = 'joker_native'
4
+ dir_config(extension_name)
5
+ create_makefile(extension_name)
6
+
@@ -0,0 +1,272 @@
1
+ #include <malloc.h>
2
+ #include <stdio.h>
3
+ #include <stddef.h>
4
+ #include <string.h>
5
+ #include <ruby.h>
6
+ #include <ctype.h>
7
+ #include "match.h"
8
+
9
+ #define SUCCESS_STATE 42
10
+ #define FAILURE_STATE 23
11
+
12
+
13
+ typedef struct {
14
+ const char * input;
15
+ const char * wildcard;
16
+ const char * pushed_input;
17
+ const char * pushed_wildcard;
18
+ int state;
19
+ void (*inc)(const char **, int);
20
+ } StateMachine;
21
+
22
+
23
+ typedef struct {
24
+ StateMachine * left;
25
+ StateMachine * right;
26
+ StateMachine * active;
27
+ } MatchData;
28
+
29
+
30
+ static void left_inc(pointer, offset) // {{{1
31
+ const char ** pointer;
32
+ int offset;
33
+ {
34
+ (*pointer) += offset;
35
+ }
36
+
37
+
38
+ static void right_inc(pointer, offset) // {{{1
39
+ const char ** pointer;
40
+ int offset;
41
+ {
42
+ (*pointer) -= offset;
43
+ }
44
+
45
+ static int matches(type, data, input, eos, casefold) // {{{1
46
+ WildcardType type;
47
+ const char data;
48
+ const char input;
49
+ bool eos;
50
+ bool casefold;
51
+ {
52
+ switch(type) {
53
+ case Fixed:
54
+ case Group:
55
+ if (casefold) {
56
+ return !eos && tolower(input) == tolower(data);
57
+ } else {
58
+ return !eos && input == data;
59
+ }
60
+ case Wild:
61
+ return !eos && input != '\0';
62
+ case Kleene:
63
+ return 1;
64
+ case EOW:
65
+ return eos;
66
+ default:
67
+ rb_raise(rb_eSyntaxError, "corrupted wildcard");
68
+ return 0;
69
+ }
70
+ }
71
+
72
+
73
+ static bool eow(match_data) // {{{1
74
+ MatchData * match_data;
75
+ {
76
+ return match_data->left->wildcard == NULL || match_data->left->wildcard > match_data->right->wildcard;
77
+ }
78
+
79
+
80
+ static bool eos(match_data) // {{{1
81
+ MatchData * match_data;
82
+ {
83
+ return match_data->left->input > match_data->right->input;
84
+ }
85
+
86
+
87
+ static void push(match_data) // {{{1
88
+ MatchData * match_data;
89
+ {
90
+ StateMachine * sm;
91
+
92
+ if (!eos(match_data)) {
93
+ sm = match_data->active;
94
+ sm->pushed_input = sm->input;
95
+ sm->pushed_wildcard = sm->wildcard;
96
+ }
97
+ }
98
+
99
+
100
+ static void pull(match_data) // {{{1
101
+ MatchData * match_data;
102
+ {
103
+ StateMachine * sm;
104
+
105
+ sm = match_data->active;
106
+ if (sm->pushed_input == NULL) {
107
+ sm->state = FAILURE_STATE;
108
+ } else {
109
+ sm->input = sm->pushed_input;
110
+ sm->wildcard = sm->pushed_wildcard;
111
+ sm->pushed_input = NULL;
112
+ sm->pushed_wildcard = NULL;
113
+ (*sm->inc)(&sm->input, 1);
114
+
115
+ if (sm == match_data->left) {
116
+ match_data->active = match_data->right;
117
+ } else {
118
+ match_data->active = match_data->left;
119
+ }
120
+ }
121
+ }
122
+
123
+
124
+ static void do_transition(transition, match_data) // {{{1
125
+ const char transition;
126
+ MatchData * match_data;
127
+ {
128
+ StateMachine * sm;
129
+
130
+ sm = match_data->active;
131
+ switch (transition) {
132
+ case 0:
133
+ push(match_data);
134
+ (*sm->inc)(&sm->wildcard, 2);
135
+ break;
136
+ case 1:
137
+ sm->state = 1;
138
+ break;
139
+ case 2:
140
+ sm->state = 2;
141
+ break;
142
+ case 3:
143
+ sm->state = 4;
144
+ break;
145
+ case 4:
146
+ // does no exist
147
+ break;
148
+ case 5:
149
+ sm->state = SUCCESS_STATE;
150
+ break;
151
+ case 6:
152
+ sm->state = 0;
153
+ (*sm->inc)(&sm->wildcard, 2);
154
+ (*sm->inc)(&sm->input, 1);
155
+ break;
156
+ case 7:
157
+ sm->state = 0;
158
+ pull(match_data);
159
+ break;
160
+ case 8:
161
+ (*sm->inc)(&sm->wildcard, 2);
162
+ break;
163
+ case 9:
164
+ sm->state = 3;
165
+ (*sm->inc)(&sm->wildcard, 2);
166
+ (*sm->inc)(&sm->input, 1);
167
+ break;
168
+ case 10:
169
+ sm->state = 0;
170
+ break;
171
+ case 11:
172
+ (*sm->inc)(&sm->wildcard, 2);
173
+ break;
174
+ case 12:
175
+ sm->state = 0;
176
+ (*sm->inc)(&sm->wildcard, 2);
177
+ (*sm->inc)(&sm->input, 1);
178
+ break;
179
+ default:
180
+ rb_fatal("Wildcard matching state machine failure. This is a bug in Joker!");
181
+ }
182
+ }
183
+
184
+
185
+ bool Wildcard_match(wildcard, cstring, len, casefold) // {{{1
186
+ Wildcard * wildcard;
187
+ const char * cstring;
188
+ const long int len;
189
+ bool casefold;
190
+ {
191
+ // the table that maps (match x state x type) -> transition
192
+ const char transition_table[2][5][5] = {
193
+ // fail
194
+ {
195
+ // kleene, fixed, group, wild, EOW
196
+ { 0, 1, 2, 3, 7}, // basic
197
+ { 7, 7, 7, 7, 7}, // fixed
198
+ { 7, 7, 8, 7, 7}, // group
199
+ { 10, 10, 11, 10, 10}, // group_finish
200
+ { 7, 7, 7, 7, 7}, // wild
201
+ },
202
+
203
+ // match
204
+ {
205
+ // kleene, fixed, group, wild, EOW
206
+ { 0, 1, 2, 3, 5}, // basic
207
+ { 6, 6, 6, 6, 6}, // fixed
208
+ { 7, 7, 9, 7, 7}, // group
209
+ { 10, 10, 11, 10, 10}, // group_finish
210
+ { 12, 12, 12, 12, 12}, // wild
211
+ },
212
+ };
213
+
214
+ MatchData * match_data;
215
+ WildcardType type;
216
+ char data;
217
+ char input;
218
+ int match;
219
+ char transition;
220
+
221
+ match_data = malloc(sizeof(MatchData));
222
+ match_data->left = malloc(sizeof(StateMachine));
223
+ match_data->right = malloc(sizeof(StateMachine));
224
+ match_data->active = match_data->left;
225
+
226
+ match_data->left->input = cstring;
227
+ match_data->left->wildcard = wildcard->first;
228
+ match_data->left->pushed_input = NULL;
229
+ match_data->left->pushed_wildcard = NULL;
230
+ match_data->left->state = 0;
231
+ match_data->left->inc = left_inc;
232
+
233
+ match_data->right->input = cstring + len - 1;
234
+ match_data->right->wildcard = wildcard->last;
235
+ match_data->right->pushed_input = NULL;
236
+ match_data->right->pushed_wildcard = NULL;
237
+ match_data->right->state = 0;
238
+ match_data->right->inc = right_inc;
239
+
240
+ while (true) {
241
+ // get the data and it's type
242
+ if (eow(match_data)) {
243
+ type = EOW;
244
+ data = '\0';
245
+ } else {
246
+ type = (WildcardType) *match_data->active->wildcard;
247
+ data = *(match_data->active->wildcard + 1);
248
+ }
249
+
250
+ // get input, whether it matches the data and the transition to make
251
+ input = *match_data->active->input;
252
+ match = matches(type, data, input, eos(match_data), casefold);
253
+ transition = transition_table[match][(int)match_data->active->state][type];
254
+ // and execute the tansition
255
+ do_transition(transition, match_data);
256
+
257
+ // if the transition resulted in failure or success:
258
+ // clean up and terminate
259
+ if (match_data->active->state == SUCCESS_STATE) {
260
+ free(match_data->right);
261
+ free(match_data->left);
262
+ free(match_data);
263
+ return true;
264
+ } else if (match_data->active->state == FAILURE_STATE) {
265
+ free(match_data->right);
266
+ free(match_data->left);
267
+ free(match_data);
268
+ return false;
269
+ }
270
+ }
271
+ }
272
+