joker 0.0.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,8 @@
1
+ # 1.0.0 #
2
+
3
+ * Re-release as C extension
4
+
5
+
1
6
  # 0.0.1 #
2
7
 
3
8
  * Initial release
@@ -9,14 +9,19 @@ for Ruby.
9
9
 
10
10
  * Behaves much like Regexp
11
11
  * ` * ` and ` ? ` as wildcard characters
12
- * ` \ ` for escaping
13
- * `\a` matches `\a`, but not `a`
12
+ * ` \ ` for escaping:
13
+ ` \? ` matches ` ? `,
14
+ ` \* ` matches ` * `,
15
+ ` \[ ` matches ` [ `,
16
+ ` \] ` matches ` ] `,
17
+ * But for all other characters:
18
+ ` \a ` matches ` \a `, but not ` a `
14
19
  * Wildcards must always match whole string
20
+ (thus ` uiae ` will only match the string ` uiae `)
15
21
  * Wildcards can be case insensitive
16
22
 
17
23
  ## Installation ##
18
24
 
19
- gem install karottenreibe-joker --source http://gems.github.com
20
25
  gem install joker
21
26
 
22
27
  ## Usage ##
data/Rakefile CHANGED
@@ -1,20 +1,7 @@
1
- require 'jeweler'
2
1
 
3
- task :build => :gemspec
4
-
5
- Jeweler::Tasks.new do |gem|
6
- gem.name = 'joker'
7
- gem.summary = gem.description =
8
- 'Joker is a simple wildcard implementation that works much like Regexps'
9
- gem.email = 'karottenreibe@gmail.com'
10
- gem.homepage = 'http://karottenreibe.github.com/joker'
11
- gem.authors = ['Fabian Streitel']
12
- gem.rubyforge_project = 'k-gems'
13
- end
14
-
15
- Jeweler::RubyforgeTasks.new
16
-
17
- task :test do
18
- sh 'bacon -Ilib test/test_*.rb'
19
- end
2
+ load "tasks/jeweler.rake"
3
+ load "tasks/rdoc.rake"
4
+ load "tasks/c_extensions.rake"
5
+ load "tasks/test.rake"
6
+ load "tasks/shortcuts.rake"
20
7
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 1.0.0
@@ -0,0 +1,72 @@
1
+ #include <malloc.h>
2
+ #include "Joker.h"
3
+ #include "Wildcard.h"
4
+ #include "compile.h"
5
+ #include "match.h"
6
+
7
+
8
+ void Init_joker_native(void) // {{{1
9
+ {
10
+ class_Wildcard = rb_define_class("Wildcard", rb_cObject);
11
+ rb_define_singleton_method(class_Wildcard, "new", class_method_new, -1);
12
+ rb_define_method(class_Wildcard, "=~", instance_operator_match, 1);
13
+ rb_define_method(class_Wildcard, "===", instance_operator_match, 1);
14
+ }
15
+
16
+
17
+ VALUE instance_operator_match(self, object) // {{{1
18
+ VALUE self;
19
+ VALUE object;
20
+ {
21
+ Wildcard * wildcard;
22
+ const char * cstring;
23
+ long int length;
24
+ int casefold;
25
+
26
+ // check types and get the C representation of stuff
27
+ Check_Type(object, T_STRING);
28
+ Data_Get_Struct(self, Wildcard, wildcard);
29
+ cstring = rb_str2cstr(object, &length);
30
+ casefold = RTEST(rb_iv_get(self, "@casefold"));
31
+
32
+ // match and return the result
33
+ if (Wildcard_match(wildcard, cstring, length, casefold)) {
34
+ return Qtrue;
35
+ } else {
36
+ return Qfalse;
37
+ }
38
+ }
39
+
40
+
41
+ VALUE class_method_new(argc, argv, klass) // {{{1
42
+ int argc;
43
+ VALUE * argv;
44
+ VALUE klass;
45
+ {
46
+ VALUE wildcard_string;
47
+ VALUE casefold;
48
+ VALUE new_object;
49
+ Wildcard * new_wildcard;
50
+ const char * wildcard_cstring;
51
+ long int string_length;
52
+
53
+ // check arity and fill in defaults
54
+ rb_scan_args(argc, argv, "11", &wildcard_string, &casefold);
55
+ if (NIL_P(casefold)) {
56
+ casefold = Qfalse;
57
+ }
58
+
59
+ // get C representation of stuff and create Wildcard
60
+ wildcard_cstring = rb_str2cstr(wildcard_string, &string_length);
61
+ new_wildcard = Wildcard_compile(wildcard_cstring, string_length);
62
+ // wrap wildcard
63
+ new_object = Data_Wrap_Struct(klass, NULL, Wildcard_free, new_wildcard);
64
+
65
+ // set instance variables
66
+ rb_iv_set(new_object, "@casefold", casefold);
67
+ rb_iv_set(new_object, "@source", wildcard_string);
68
+
69
+ return new_object;
70
+ }
71
+
72
+
@@ -0,0 +1,53 @@
1
+ #ifndef JOKER_H_GUARD
2
+ #define JOKER_H_GUARD
3
+
4
+ #include "ruby.h"
5
+
6
+
7
+ VALUE class_Wildcard;
8
+ void Init_joker(void);
9
+
10
+
11
+ /*
12
+ * call-seq:
13
+ * Wildcard.new(wildcard_string, casefold = false) -> Wildcard
14
+ *
15
+ * Creates a new Wildcard from the given string.
16
+ * If casefold is true, the Wildcard will ignore case.
17
+ *
18
+ * Raisess a SyntaxError if the given string could not
19
+ * be interpreted as a Wildcard.
20
+ *
21
+ * Issues warnings to the console if the given Wildcard
22
+ * was malformed.
23
+ *
24
+ */
25
+ VALUE class_method_new(int argc, VALUE * argv, VALUE klass);
26
+
27
+
28
+ /*
29
+ * call-seq:
30
+ * wildcard =~ 'string' -> true or false
31
+ * 'string' =~ wildcard -> true or false
32
+ * wildcard === 'string' -> true or false
33
+ *
34
+ * Matches the Wildcard against the given string.
35
+ *
36
+ * NOTE: Since a wildcard has to match the whole string,
37
+ * this method only returns true or false, not the position
38
+ * of the match.
39
+ *
40
+ * Wildcard['*fairy*'] =~ 'I love fairycake' #=> true
41
+ * 'I love fairycake' =~ Wildcard['*dairy*'] #=> false
42
+ *
43
+ * case 'I love fairycake'
44
+ * when Wildcard['*fairy*'] then puts 'fairy!'
45
+ * else puts 'no fairy...'
46
+ * end
47
+ *
48
+ */
49
+ VALUE instance_operator_match(VALUE self, VALUE string);
50
+
51
+
52
+ #endif
53
+
@@ -0,0 +1,22 @@
1
+ #include <malloc.h>
2
+ #include <stddef.h>
3
+ #include <Wildcard.h>
4
+
5
+
6
+
7
+ void Wildcard_free(wildcard) // {{{1
8
+ Wildcard * wildcard;
9
+ {
10
+ free(wildcard->first);
11
+ free(wildcard);
12
+ }
13
+
14
+
15
+ void Wildcard_enlarge(wildcard) // {{{1
16
+ Wildcard * wildcard;
17
+ {
18
+ wildcard->length += 2;
19
+ wildcard->first = realloc(wildcard->first, wildcard->length * sizeof(char));
20
+ wildcard->last = wildcard->first + wildcard->length - 2;
21
+ }
22
+
@@ -0,0 +1,40 @@
1
+ #ifndef WILDCARD_H_GUARD
2
+ #define WILDCARD_H_GUARD
3
+
4
+
5
+ /*
6
+ * The different kinds of Wildcard components.
7
+ *
8
+ */
9
+ typedef enum {
10
+ Kleene = 0,
11
+ Fixed = 1,
12
+ Group = 2,
13
+ Wild = 3,
14
+ EOW = 4, // only used in matching
15
+ } WildcardType;
16
+
17
+ /*
18
+ * Represents a Wildcard internally.
19
+ *
20
+ */
21
+ typedef struct {
22
+ char * first; // The first Wildcard part (points to the first of the 2 chars)
23
+ char * last; // The last Wildcard part (points to the first of the 2 chars)
24
+ long int length; // How many chars there are (not parts!)
25
+ } Wildcard;
26
+
27
+
28
+ void Wildcard_free(Wildcard * wildcard);
29
+
30
+
31
+ /*
32
+ * Adds two additional characters at the end
33
+ * and adjusts all the pointers.
34
+ *
35
+ */
36
+ void Wildcard_enlarge(Wildcard * wildcard);
37
+
38
+
39
+ #endif
40
+
@@ -0,0 +1,158 @@
1
+ #include <malloc.h>
2
+ #include <stdio.h>
3
+ #include <stddef.h>
4
+ #include <string.h>
5
+ #include <string.h>
6
+ #include <ruby.h>
7
+ #include "compile.h"
8
+
9
+
10
+ static int hash(cchar) // {{{1
11
+ const char cchar;
12
+ {
13
+ switch (cchar) {
14
+ case '\\':
15
+ return 0;
16
+ case '[':
17
+ return 1;
18
+ case ']':
19
+ return 2;
20
+ case '*':
21
+ return 3;
22
+ case '?':
23
+ return 4;
24
+ default:
25
+ return 5;
26
+ }
27
+ }
28
+
29
+
30
+ static void push(type, cchar, wildcard) // {{{1
31
+ const WildcardType type;
32
+ const char cchar;
33
+ Wildcard * wildcard;
34
+ {
35
+ Wildcard_enlarge(wildcard);
36
+ *wildcard->last = (char) type;
37
+ *(wildcard->last + 1) = cchar;
38
+ }
39
+
40
+
41
+ static void do_transition(transition, input, state, wildcard) // {{{1
42
+ const char transition;
43
+ const char input;
44
+ int * state;
45
+ Wildcard * wildcard;
46
+ {
47
+ switch (transition) {
48
+ case 0:
49
+ *state = 1;
50
+ break;
51
+ case 1:
52
+ *state = 2;
53
+ break;
54
+ case 2:
55
+ push(Fixed, input, wildcard);
56
+ rb_warning("wildcard has `]' without escape");
57
+ break;
58
+ case 3:
59
+ // refactor ** --> *
60
+ if (wildcard->last == NULL || *wildcard->last != (char) Kleene) {
61
+ push(Kleene, '*', wildcard);
62
+ }
63
+ break;
64
+ case 4:
65
+ push(Wild, '?', wildcard);
66
+ break;
67
+ case 5:
68
+ push(Fixed, input, wildcard);
69
+ break;
70
+ case 6:
71
+ *state = -1;
72
+ break;
73
+ case 7:
74
+ *state = 0;
75
+ push(Fixed, input, wildcard);
76
+ break;
77
+ case 8:
78
+ *state = 0;
79
+ push(Fixed, '\\', wildcard);
80
+ push(Fixed, input, wildcard);
81
+ break;
82
+ case 9:
83
+ *state = -1;
84
+ push(Fixed, '\\', wildcard);
85
+ break;
86
+ case 10:
87
+ *state = 3;
88
+ break;
89
+ case 11:
90
+ push(Group, input, wildcard);
91
+ rb_warning("character class has `[' without escape");
92
+ break;
93
+ case 12:
94
+ *state = 0;
95
+ break;
96
+ case 13:
97
+ push(Group, input, wildcard);
98
+ break;
99
+ case 14:
100
+ *state = -1;
101
+ rb_raise(rb_eSyntaxError, "premature end of wildcard");
102
+ break;
103
+ case 15:
104
+ *state = 2;
105
+ push(Group, input, wildcard);
106
+ break;
107
+ case 16:
108
+ *state = 2;
109
+ push(Group, '\\', wildcard);
110
+ push(Group, input, wildcard);
111
+ break;
112
+ default:
113
+ rb_fatal("Wildcard compilation state machine failure. This is a bug in Joker!");
114
+ }
115
+ }
116
+
117
+
118
+ Wildcard * Wildcard_compile(cstring, len) // {{{1
119
+ const char * cstring;
120
+ const long int len;
121
+ {
122
+ // the table that maps (state x input) -> transition
123
+ const char transition_table[4][7] = {
124
+ // \ [ ] * ? any EOS
125
+ { 0, 1, 2, 3, 4, 5, 6},
126
+ { 7, 7, 7, 7, 7, 8, 9},
127
+ {10, 11, 12, 13, 13, 13, 14},
128
+ {15, 15, 15, 16, 16, 16, 14}
129
+ };
130
+ int state = 0;
131
+
132
+ Wildcard * wildcard;
133
+ long int p;
134
+ char input;
135
+ int hashed;
136
+ char transition;
137
+
138
+ wildcard = malloc(sizeof(Wildcard));
139
+ wildcard->length = 0;
140
+ wildcard->first = NULL;
141
+ wildcard->last = NULL;
142
+
143
+ // for each char:
144
+ for (p = 0; p < len; p++) {
145
+ // get the input, it's type and what transition to make
146
+ input = cstring[p];
147
+ hashed = hash(input);
148
+ transition = transition_table[state][hashed];
149
+ // and execute the transition
150
+ do_transition(transition, input, &state, wildcard);
151
+ }
152
+
153
+ // finally: execute the finishing transition
154
+ transition = transition_table[state][6];
155
+ do_transition(transition, '\0', &state, wildcard);
156
+ return wildcard;
157
+ }
158
+
@@ -0,0 +1,16 @@
1
+ #ifndef COMPILE_H_GUARD
2
+ #define COMPILE_H_GUARD
3
+
4
+ #include "Wildcard.h"
5
+
6
+
7
+ /*
8
+ * Compiles an input string to the internal Wildcard
9
+ * represenation.
10
+ *
11
+ */
12
+ Wildcard * Wildcard_compile(const char * cstring, const long int len);
13
+
14
+
15
+ #endif
16
+
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+
3
+ extension_name = 'joker_native'
4
+ dir_config(extension_name)
5
+ create_makefile(extension_name)
6
+
@@ -0,0 +1,272 @@
1
+ #include <malloc.h>
2
+ #include <stdio.h>
3
+ #include <stddef.h>
4
+ #include <string.h>
5
+ #include <ruby.h>
6
+ #include <ctype.h>
7
+ #include "match.h"
8
+
9
+ #define SUCCESS_STATE 42
10
+ #define FAILURE_STATE 23
11
+
12
+
13
+ typedef struct {
14
+ const char * input;
15
+ const char * wildcard;
16
+ const char * pushed_input;
17
+ const char * pushed_wildcard;
18
+ int state;
19
+ void (*inc)(const char **, int);
20
+ } StateMachine;
21
+
22
+
23
+ typedef struct {
24
+ StateMachine * left;
25
+ StateMachine * right;
26
+ StateMachine * active;
27
+ } MatchData;
28
+
29
+
30
+ static void left_inc(pointer, offset) // {{{1
31
+ const char ** pointer;
32
+ int offset;
33
+ {
34
+ (*pointer) += offset;
35
+ }
36
+
37
+
38
+ static void right_inc(pointer, offset) // {{{1
39
+ const char ** pointer;
40
+ int offset;
41
+ {
42
+ (*pointer) -= offset;
43
+ }
44
+
45
+ static int matches(type, data, input, eos, casefold) // {{{1
46
+ WildcardType type;
47
+ const char data;
48
+ const char input;
49
+ bool eos;
50
+ bool casefold;
51
+ {
52
+ switch(type) {
53
+ case Fixed:
54
+ case Group:
55
+ if (casefold) {
56
+ return !eos && tolower(input) == tolower(data);
57
+ } else {
58
+ return !eos && input == data;
59
+ }
60
+ case Wild:
61
+ return !eos && input != '\0';
62
+ case Kleene:
63
+ return 1;
64
+ case EOW:
65
+ return eos;
66
+ default:
67
+ rb_raise(rb_eSyntaxError, "corrupted wildcard");
68
+ return 0;
69
+ }
70
+ }
71
+
72
+
73
+ static bool eow(match_data) // {{{1
74
+ MatchData * match_data;
75
+ {
76
+ return match_data->left->wildcard == NULL || match_data->left->wildcard > match_data->right->wildcard;
77
+ }
78
+
79
+
80
+ static bool eos(match_data) // {{{1
81
+ MatchData * match_data;
82
+ {
83
+ return match_data->left->input > match_data->right->input;
84
+ }
85
+
86
+
87
+ static void push(match_data) // {{{1
88
+ MatchData * match_data;
89
+ {
90
+ StateMachine * sm;
91
+
92
+ if (!eos(match_data)) {
93
+ sm = match_data->active;
94
+ sm->pushed_input = sm->input;
95
+ sm->pushed_wildcard = sm->wildcard;
96
+ }
97
+ }
98
+
99
+
100
+ static void pull(match_data) // {{{1
101
+ MatchData * match_data;
102
+ {
103
+ StateMachine * sm;
104
+
105
+ sm = match_data->active;
106
+ if (sm->pushed_input == NULL) {
107
+ sm->state = FAILURE_STATE;
108
+ } else {
109
+ sm->input = sm->pushed_input;
110
+ sm->wildcard = sm->pushed_wildcard;
111
+ sm->pushed_input = NULL;
112
+ sm->pushed_wildcard = NULL;
113
+ (*sm->inc)(&sm->input, 1);
114
+
115
+ if (sm == match_data->left) {
116
+ match_data->active = match_data->right;
117
+ } else {
118
+ match_data->active = match_data->left;
119
+ }
120
+ }
121
+ }
122
+
123
+
124
+ static void do_transition(transition, match_data) // {{{1
125
+ const char transition;
126
+ MatchData * match_data;
127
+ {
128
+ StateMachine * sm;
129
+
130
+ sm = match_data->active;
131
+ switch (transition) {
132
+ case 0:
133
+ push(match_data);
134
+ (*sm->inc)(&sm->wildcard, 2);
135
+ break;
136
+ case 1:
137
+ sm->state = 1;
138
+ break;
139
+ case 2:
140
+ sm->state = 2;
141
+ break;
142
+ case 3:
143
+ sm->state = 4;
144
+ break;
145
+ case 4:
146
+ // does no exist
147
+ break;
148
+ case 5:
149
+ sm->state = SUCCESS_STATE;
150
+ break;
151
+ case 6:
152
+ sm->state = 0;
153
+ (*sm->inc)(&sm->wildcard, 2);
154
+ (*sm->inc)(&sm->input, 1);
155
+ break;
156
+ case 7:
157
+ sm->state = 0;
158
+ pull(match_data);
159
+ break;
160
+ case 8:
161
+ (*sm->inc)(&sm->wildcard, 2);
162
+ break;
163
+ case 9:
164
+ sm->state = 3;
165
+ (*sm->inc)(&sm->wildcard, 2);
166
+ (*sm->inc)(&sm->input, 1);
167
+ break;
168
+ case 10:
169
+ sm->state = 0;
170
+ break;
171
+ case 11:
172
+ (*sm->inc)(&sm->wildcard, 2);
173
+ break;
174
+ case 12:
175
+ sm->state = 0;
176
+ (*sm->inc)(&sm->wildcard, 2);
177
+ (*sm->inc)(&sm->input, 1);
178
+ break;
179
+ default:
180
+ rb_fatal("Wildcard matching state machine failure. This is a bug in Joker!");
181
+ }
182
+ }
183
+
184
+
185
+ bool Wildcard_match(wildcard, cstring, len, casefold) // {{{1
186
+ Wildcard * wildcard;
187
+ const char * cstring;
188
+ const long int len;
189
+ bool casefold;
190
+ {
191
+ // the table that maps (match x state x type) -> transition
192
+ const char transition_table[2][5][5] = {
193
+ // fail
194
+ {
195
+ // kleene, fixed, group, wild, EOW
196
+ { 0, 1, 2, 3, 7}, // basic
197
+ { 7, 7, 7, 7, 7}, // fixed
198
+ { 7, 7, 8, 7, 7}, // group
199
+ { 10, 10, 11, 10, 10}, // group_finish
200
+ { 7, 7, 7, 7, 7}, // wild
201
+ },
202
+
203
+ // match
204
+ {
205
+ // kleene, fixed, group, wild, EOW
206
+ { 0, 1, 2, 3, 5}, // basic
207
+ { 6, 6, 6, 6, 6}, // fixed
208
+ { 7, 7, 9, 7, 7}, // group
209
+ { 10, 10, 11, 10, 10}, // group_finish
210
+ { 12, 12, 12, 12, 12}, // wild
211
+ },
212
+ };
213
+
214
+ MatchData * match_data;
215
+ WildcardType type;
216
+ char data;
217
+ char input;
218
+ int match;
219
+ char transition;
220
+
221
+ match_data = malloc(sizeof(MatchData));
222
+ match_data->left = malloc(sizeof(StateMachine));
223
+ match_data->right = malloc(sizeof(StateMachine));
224
+ match_data->active = match_data->left;
225
+
226
+ match_data->left->input = cstring;
227
+ match_data->left->wildcard = wildcard->first;
228
+ match_data->left->pushed_input = NULL;
229
+ match_data->left->pushed_wildcard = NULL;
230
+ match_data->left->state = 0;
231
+ match_data->left->inc = left_inc;
232
+
233
+ match_data->right->input = cstring + len - 1;
234
+ match_data->right->wildcard = wildcard->last;
235
+ match_data->right->pushed_input = NULL;
236
+ match_data->right->pushed_wildcard = NULL;
237
+ match_data->right->state = 0;
238
+ match_data->right->inc = right_inc;
239
+
240
+ while (true) {
241
+ // get the data and it's type
242
+ if (eow(match_data)) {
243
+ type = EOW;
244
+ data = '\0';
245
+ } else {
246
+ type = (WildcardType) *match_data->active->wildcard;
247
+ data = *(match_data->active->wildcard + 1);
248
+ }
249
+
250
+ // get input, whether it matches the data and the transition to make
251
+ input = *match_data->active->input;
252
+ match = matches(type, data, input, eos(match_data), casefold);
253
+ transition = transition_table[match][(int)match_data->active->state][type];
254
+ // and execute the tansition
255
+ do_transition(transition, match_data);
256
+
257
+ // if the transition resulted in failure or success:
258
+ // clean up and terminate
259
+ if (match_data->active->state == SUCCESS_STATE) {
260
+ free(match_data->right);
261
+ free(match_data->left);
262
+ free(match_data);
263
+ return true;
264
+ } else if (match_data->active->state == FAILURE_STATE) {
265
+ free(match_data->right);
266
+ free(match_data->left);
267
+ free(match_data);
268
+ return false;
269
+ }
270
+ }
271
+ }
272
+