gr_string_escape 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +22 -0
- data/LICENSE +20 -0
- data/README.rdoc +17 -0
- data/Rakefile +61 -0
- data/VERSION +1 -0
- data/ext/gr_string_escape/extconf.rb +4 -0
- data/ext/gr_string_escape/gr_string_escape.c +1162 -0
- data/gr_string_escape.gemspec +55 -0
- data/test/helper.rb +12 -0
- data/test/new_assertions.rb +21 -0
- data/test/test_gr_string_escape.rb +114 -0
- metadata +82 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Curtis Schofield
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= gr_string_escape
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Note on Patches/Pull Requests
|
6
|
+
|
7
|
+
* Fork the project.
|
8
|
+
* Make your feature addition or bug fix.
|
9
|
+
* Add tests for it. This is important so I don't break it in a
|
10
|
+
future version unintentionally.
|
11
|
+
* Commit, do not mess with rakefile, version, or history.
|
12
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
+
* Send me a pull request. Bonus points for topic branches.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2010 Curtis Schofield. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
jeweler_tasks = nil
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
jeweler_tasks = Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "gr_string_escape"
|
8
|
+
gem.summary = %Q{Goodreads string parser}
|
9
|
+
gem.description = %Q{Code for Goodreads String Parsing}
|
10
|
+
gem.email = "github.com@robotarmyma.de"
|
11
|
+
gem.homepage = "http://github.com/robotarmy/gr_string_escape"
|
12
|
+
gem.authors = ["Michael Economy","Curtis Schofield"]
|
13
|
+
gem.extensions = FileList['ext/**/extconf.rb']
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler missing : \n gem install jeweler"
|
19
|
+
end
|
20
|
+
begin
|
21
|
+
require 'rake/extensiontask'
|
22
|
+
rescue LoadError
|
23
|
+
puts "rake-compiler missing : \n gem install rake-compiler"
|
24
|
+
end
|
25
|
+
Rake::ExtensionTask.new('gr_string_escape', jeweler_tasks.gemspec)
|
26
|
+
CLEAN.include 'lib/**/*.so'
|
27
|
+
|
28
|
+
|
29
|
+
require 'rake/testtask'
|
30
|
+
Rake::TestTask.new(:test) do |test|
|
31
|
+
test.libs << 'lib' << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
require 'rcov/rcovtask'
|
38
|
+
Rcov::RcovTask.new do |test|
|
39
|
+
test.libs << 'test'
|
40
|
+
test.pattern = 'test/**/test_*.rb'
|
41
|
+
test.verbose = true
|
42
|
+
end
|
43
|
+
rescue LoadError
|
44
|
+
task :rcov do
|
45
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
task :test => :check_dependencies
|
50
|
+
|
51
|
+
task :default => [:compile,:test]
|
52
|
+
|
53
|
+
require 'rake/rdoctask'
|
54
|
+
Rake::RDocTask.new do |rdoc|
|
55
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
56
|
+
|
57
|
+
rdoc.rdoc_dir = 'rdoc'
|
58
|
+
rdoc.title = "gr_string_escape #{version}"
|
59
|
+
rdoc.rdoc_files.include('README*')
|
60
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
61
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.3.1
|
@@ -0,0 +1,1162 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
|
6
|
+
|
7
|
+
#define MAX_STACK_DEPTH 20
|
8
|
+
#define MAX_URL_PRINT_SIZE 40
|
9
|
+
#define MAX_ATTRIBUTES 10
|
10
|
+
#define DYNAMICS_INCREMENT 128
|
11
|
+
#define STRONG "strong"
|
12
|
+
#define EM "em"
|
13
|
+
#define FALSE 0
|
14
|
+
#define TRUE 1
|
15
|
+
#define EMPTY_STRING ""
|
16
|
+
|
17
|
+
static int id_push; // ruby thing
|
18
|
+
|
19
|
+
int input_size, output_size, max_output_size, absolute_url_size;
|
20
|
+
int position;
|
21
|
+
int ahead_position;
|
22
|
+
char **tag_stack;
|
23
|
+
int tag_stack_size;
|
24
|
+
int anchors_in_stack;
|
25
|
+
int cap_count;
|
26
|
+
int writing_utf_8; //set to the size of the utf_8 char remaining
|
27
|
+
int counting_cap;
|
28
|
+
|
29
|
+
char* input;
|
30
|
+
char* absolute_url;
|
31
|
+
char* output;
|
32
|
+
|
33
|
+
static void downcase(char *string) {
|
34
|
+
int i;
|
35
|
+
for(i=0; string[i]; i++) {
|
36
|
+
if (string[i] >= 'A' && string[i] <= 'Z'){
|
37
|
+
string[i] += 32;
|
38
|
+
}
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
static void downcasen(char *string, int string_size) {
|
43
|
+
int i;
|
44
|
+
//downcase
|
45
|
+
for(i=0; i < string_size; i++) {
|
46
|
+
if (string[i] >= 'A' && string[i] <= 'Z'){
|
47
|
+
string[i] += 32;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
static void write_chars(char *chars) {
|
53
|
+
int size = strlen(chars);
|
54
|
+
if(output_size + size >= max_output_size) {
|
55
|
+
printf("Error: max_output_size is being exceeded\n");
|
56
|
+
return;
|
57
|
+
}
|
58
|
+
strncpy(output + output_size, chars, size);
|
59
|
+
output_size += size;
|
60
|
+
if(counting_cap) {
|
61
|
+
cap_count += size;
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
inline push_to_tag_stack(char *string, int string_size) {
|
66
|
+
char *new_string;
|
67
|
+
new_string = ALLOC_N(char, string_size + 1);
|
68
|
+
strcpy(new_string, string);
|
69
|
+
tag_stack[tag_stack_size++] = new_string;
|
70
|
+
}
|
71
|
+
|
72
|
+
inline write_nchars(char *chars, int chars_size) {
|
73
|
+
if(output_size + chars_size >= max_output_size) {
|
74
|
+
printf("Error: max_output_size is being exceeded\n");
|
75
|
+
return;
|
76
|
+
}
|
77
|
+
strncpy(output + output_size, chars, chars_size);
|
78
|
+
output_size += chars_size;
|
79
|
+
if(counting_cap) {
|
80
|
+
cap_count += chars_size;
|
81
|
+
}
|
82
|
+
}
|
83
|
+
|
84
|
+
inline void write_char(char char_to_write) {
|
85
|
+
if(output_size + 1 >= max_output_size) {
|
86
|
+
printf("Error: max_output_size is being exceeded\n");
|
87
|
+
return;
|
88
|
+
}
|
89
|
+
|
90
|
+
output[output_size++] = char_to_write;
|
91
|
+
if (writing_utf_8) {
|
92
|
+
if(char_to_write & 0x80 && !(char_to_write & 0x40)) {
|
93
|
+
writing_utf_8 --;
|
94
|
+
}
|
95
|
+
else {
|
96
|
+
writing_utf_8 = 0;
|
97
|
+
}
|
98
|
+
}
|
99
|
+
else {
|
100
|
+
if(char_to_write & 0x80) {
|
101
|
+
if (char_to_write & 0x40) {
|
102
|
+
writing_utf_8 ++;
|
103
|
+
if (char_to_write & 0x20) {
|
104
|
+
writing_utf_8 ++;
|
105
|
+
if (char_to_write & 0x10) {
|
106
|
+
writing_utf_8 ++;
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
}
|
111
|
+
}
|
112
|
+
if(counting_cap && !writing_utf_8) {
|
113
|
+
cap_count++;
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
static void write_escaped_chars(char *chars) {
|
119
|
+
int i = 0;
|
120
|
+
char c;
|
121
|
+
for (i = 0; c = chars[i]; i++) {
|
122
|
+
switch(c) {
|
123
|
+
case '&':
|
124
|
+
write_chars("&");
|
125
|
+
break;
|
126
|
+
case '>':
|
127
|
+
write_chars(">");
|
128
|
+
break;
|
129
|
+
case '<':
|
130
|
+
write_chars("<");
|
131
|
+
break;
|
132
|
+
case '"':
|
133
|
+
write_chars(""");
|
134
|
+
break;
|
135
|
+
default:
|
136
|
+
write_char(c);
|
137
|
+
break;
|
138
|
+
}
|
139
|
+
}
|
140
|
+
}
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
static void write_urlitized_chars(char *chars) {
|
146
|
+
int i = 0;
|
147
|
+
int wrote_underscore = FALSE;
|
148
|
+
char c;
|
149
|
+
for (i = 0; c = chars[i]; i++) {
|
150
|
+
if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
|
151
|
+
(c >= '0' && c <= '9')) {
|
152
|
+
write_char(c);
|
153
|
+
wrote_underscore = FALSE;
|
154
|
+
}
|
155
|
+
else {
|
156
|
+
if (!wrote_underscore) {
|
157
|
+
wrote_underscore = TRUE;
|
158
|
+
write_char('_');
|
159
|
+
}
|
160
|
+
}
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
static int valid_url(char * string){
|
165
|
+
if (!string || strlen(string) <= 4) {
|
166
|
+
return FALSE;
|
167
|
+
}
|
168
|
+
// disallow javascript tags
|
169
|
+
if(string[0] != 'j' && string[0] != 'J') {
|
170
|
+
return TRUE;
|
171
|
+
}
|
172
|
+
|
173
|
+
if(string[1] != 'a' && string[1] != 'A') {
|
174
|
+
return TRUE;
|
175
|
+
}
|
176
|
+
|
177
|
+
if(string[2] != 'v' && string[2] != 'V') {
|
178
|
+
return TRUE;
|
179
|
+
}
|
180
|
+
|
181
|
+
if(string[3] != 'a' && string[3] != 'A') {
|
182
|
+
return TRUE;
|
183
|
+
}
|
184
|
+
return FALSE;
|
185
|
+
}
|
186
|
+
|
187
|
+
|
188
|
+
static int attribute_find(char *key, char **keys, int num_keys) {
|
189
|
+
int i;
|
190
|
+
|
191
|
+
// printf("finding attr: %s\n", key);
|
192
|
+
for(i=0; i< num_keys; i++) {
|
193
|
+
if(strcmp(key, keys[i]) == 0) {
|
194
|
+
// printf("found!: %d\n", i);
|
195
|
+
return i;
|
196
|
+
}
|
197
|
+
}
|
198
|
+
return -1;
|
199
|
+
}
|
200
|
+
|
201
|
+
static int html_parse(int start, int finish) {
|
202
|
+
int close_tag = FALSE;
|
203
|
+
int tag_closed = FALSE;
|
204
|
+
char *tag;
|
205
|
+
char *name;
|
206
|
+
char c;
|
207
|
+
int i;
|
208
|
+
char *attribute_keys[MAX_ATTRIBUTES];
|
209
|
+
char *attribute_values[MAX_ATTRIBUTES];
|
210
|
+
int num_attributes = 0;
|
211
|
+
int tag_size;
|
212
|
+
int name_size = 0;
|
213
|
+
|
214
|
+
tag = input + start;
|
215
|
+
tag_size = finish - start;
|
216
|
+
|
217
|
+
// printf("tag start: %c tag_size:%d\n", tag[0], tag_size);
|
218
|
+
if(tag_size <= 0) {
|
219
|
+
return FALSE;
|
220
|
+
}
|
221
|
+
|
222
|
+
// read all the whitespace and first slash "< / "
|
223
|
+
for(; tag_size > 0; tag++, tag_size--) {
|
224
|
+
int done = FALSE;
|
225
|
+
switch(c = tag[0]) {
|
226
|
+
case '/':
|
227
|
+
if (close_tag){
|
228
|
+
return FALSE;
|
229
|
+
}
|
230
|
+
else {
|
231
|
+
close_tag = TRUE;
|
232
|
+
}
|
233
|
+
break;
|
234
|
+
case ' ':
|
235
|
+
case '\t':
|
236
|
+
case '\n':
|
237
|
+
// ignore
|
238
|
+
break;
|
239
|
+
default:
|
240
|
+
done = TRUE;
|
241
|
+
break;
|
242
|
+
}
|
243
|
+
if(done){
|
244
|
+
break;
|
245
|
+
}
|
246
|
+
}
|
247
|
+
|
248
|
+
// read all the whitespace and last slash
|
249
|
+
for(; tag_size > 0; tag_size--) {
|
250
|
+
int done = FALSE;
|
251
|
+
switch(c = tag[tag_size - 1]) {
|
252
|
+
case '/':
|
253
|
+
if (tag_closed){
|
254
|
+
return FALSE;
|
255
|
+
}
|
256
|
+
else {
|
257
|
+
tag_closed = TRUE;
|
258
|
+
}
|
259
|
+
break;
|
260
|
+
case ' ':
|
261
|
+
case '\t':
|
262
|
+
case '\n':
|
263
|
+
// ignore
|
264
|
+
break;
|
265
|
+
default:
|
266
|
+
done = TRUE;
|
267
|
+
break;
|
268
|
+
}
|
269
|
+
if(done){
|
270
|
+
break;
|
271
|
+
}
|
272
|
+
}
|
273
|
+
|
274
|
+
if(tag_size == 0) {
|
275
|
+
return FALSE;
|
276
|
+
}
|
277
|
+
|
278
|
+
//read name
|
279
|
+
for(name_size = 0; name_size < tag_size; name_size++) {
|
280
|
+
int done = FALSE;
|
281
|
+
switch(tag[name_size]) {
|
282
|
+
case ' ':
|
283
|
+
case '\t':
|
284
|
+
case '\n':
|
285
|
+
done = TRUE;
|
286
|
+
break;
|
287
|
+
}
|
288
|
+
if(done) {
|
289
|
+
break;
|
290
|
+
}
|
291
|
+
}
|
292
|
+
name = ALLOCA_N(char, name_size + 1);
|
293
|
+
strncpy(name, tag, name_size);
|
294
|
+
name[name_size] = 0;
|
295
|
+
|
296
|
+
// printf("name_size %d tag name: %s\n", name_size, name);
|
297
|
+
|
298
|
+
tag_size -= name_size;
|
299
|
+
tag += name_size;
|
300
|
+
|
301
|
+
|
302
|
+
// read attributes
|
303
|
+
for(; tag_size > 0; tag_size--, tag++) {
|
304
|
+
char *key = tag;
|
305
|
+
char *value = NULL;
|
306
|
+
int key_size = 0;
|
307
|
+
int value_size = 0; // would you like fries with that?
|
308
|
+
int skip_value = FALSE;
|
309
|
+
|
310
|
+
// read key
|
311
|
+
for(; key_size < tag_size; key_size++) {
|
312
|
+
int done = FALSE;
|
313
|
+
c = tag[key_size];
|
314
|
+
switch(c) {
|
315
|
+
case ' ':
|
316
|
+
case '\t':
|
317
|
+
case '\n':
|
318
|
+
if (key_size > 0) {
|
319
|
+
done = TRUE;
|
320
|
+
}
|
321
|
+
else {
|
322
|
+
//ignore preceding whitespace
|
323
|
+
key++;
|
324
|
+
key_size--;
|
325
|
+
tag++;
|
326
|
+
tag_size--;
|
327
|
+
}
|
328
|
+
break;
|
329
|
+
case '=':
|
330
|
+
if (key_size > 0) {
|
331
|
+
done = TRUE;
|
332
|
+
// do not advance, equals still needs to be found
|
333
|
+
}
|
334
|
+
else {
|
335
|
+
return FALSE;
|
336
|
+
}
|
337
|
+
break;
|
338
|
+
case '\'':
|
339
|
+
case '"':
|
340
|
+
return FALSE;
|
341
|
+
}
|
342
|
+
if(done) {
|
343
|
+
break;
|
344
|
+
}
|
345
|
+
} // got key
|
346
|
+
key = ALLOCA_N(char, key_size + 1);
|
347
|
+
strncpy(key, tag, key_size);
|
348
|
+
key[key_size] = 0;
|
349
|
+
tag += key_size;
|
350
|
+
tag_size -= key_size;
|
351
|
+
|
352
|
+
for(;tag_size > 0; tag++, tag_size--) {
|
353
|
+
int done = FALSE;
|
354
|
+
switch(tag[0]) {
|
355
|
+
case ' ':
|
356
|
+
case '\t':
|
357
|
+
case '\n':
|
358
|
+
//ignore whitespace
|
359
|
+
break;
|
360
|
+
case '=':
|
361
|
+
done = TRUE;
|
362
|
+
tag++;
|
363
|
+
tag_size--;
|
364
|
+
break;
|
365
|
+
default:
|
366
|
+
skip_value = TRUE;
|
367
|
+
done = TRUE;
|
368
|
+
break;
|
369
|
+
}
|
370
|
+
if(done) {
|
371
|
+
break;
|
372
|
+
}
|
373
|
+
}
|
374
|
+
|
375
|
+
if(!skip_value) {
|
376
|
+
// read value
|
377
|
+
int started_reading_value = FALSE;
|
378
|
+
for(; value_size < tag_size; value_size++) {
|
379
|
+
int done = FALSE;
|
380
|
+
c = tag[value_size];
|
381
|
+
switch(c) {
|
382
|
+
case ' ':
|
383
|
+
case '\t':
|
384
|
+
case '\n':
|
385
|
+
if (started_reading_value) {
|
386
|
+
done = TRUE;
|
387
|
+
}
|
388
|
+
else {
|
389
|
+
//ignore preceding whitespace
|
390
|
+
value_size--;
|
391
|
+
tag++;
|
392
|
+
tag_size--;
|
393
|
+
}
|
394
|
+
break;
|
395
|
+
case '=':
|
396
|
+
return FALSE;
|
397
|
+
case '\'':
|
398
|
+
case '"':
|
399
|
+
if(!value) {
|
400
|
+
started_reading_value = TRUE;
|
401
|
+
tag++;
|
402
|
+
tag_size --;
|
403
|
+
while(value_size <= tag_size && tag[value_size] != c) {
|
404
|
+
value_size++;
|
405
|
+
}
|
406
|
+
if(tag[value_size] != c){
|
407
|
+
return FALSE;
|
408
|
+
}
|
409
|
+
done = TRUE;
|
410
|
+
}
|
411
|
+
else {
|
412
|
+
return FALSE;
|
413
|
+
}
|
414
|
+
break;
|
415
|
+
default:
|
416
|
+
if (!started_reading_value) {
|
417
|
+
started_reading_value = TRUE;
|
418
|
+
}
|
419
|
+
break;
|
420
|
+
}
|
421
|
+
if(done) {
|
422
|
+
break;
|
423
|
+
}
|
424
|
+
}
|
425
|
+
|
426
|
+
if(started_reading_value) {
|
427
|
+
value = ALLOCA_N(char, value_size + 1);
|
428
|
+
strncpy(value, tag, value_size);
|
429
|
+
value[value_size] = 0;
|
430
|
+
tag += value_size;
|
431
|
+
tag_size -= value_size;
|
432
|
+
}
|
433
|
+
}
|
434
|
+
|
435
|
+
if (key_size > 0) {
|
436
|
+
if (num_attributes >= MAX_ATTRIBUTES) {
|
437
|
+
break;
|
438
|
+
}
|
439
|
+
attribute_keys[num_attributes] = key;
|
440
|
+
if (value) {
|
441
|
+
attribute_values[num_attributes] = value;
|
442
|
+
}
|
443
|
+
else {
|
444
|
+
attribute_values[num_attributes] = EMPTY_STRING;
|
445
|
+
}
|
446
|
+
num_attributes ++;
|
447
|
+
}
|
448
|
+
}
|
449
|
+
|
450
|
+
for(i = 0; i< num_attributes; i++) {
|
451
|
+
downcase(attribute_keys[i]);
|
452
|
+
}
|
453
|
+
|
454
|
+
//clean tag!
|
455
|
+
downcasen(name, name_size);
|
456
|
+
if(strcmp(name, "b") == 0){
|
457
|
+
name = STRONG;
|
458
|
+
}
|
459
|
+
else if(strcmp(name, "i") == 0) {
|
460
|
+
name = EM;
|
461
|
+
}
|
462
|
+
|
463
|
+
if(close_tag) {
|
464
|
+
if(tag_stack_size == 0){
|
465
|
+
return FALSE;
|
466
|
+
}
|
467
|
+
if(strcmp(tag_stack[tag_stack_size - 1], name) == 0){
|
468
|
+
if(strcmp(name, "a")) {
|
469
|
+
anchors_in_stack--;
|
470
|
+
}
|
471
|
+
write_chars("</");
|
472
|
+
write_chars(tag_stack[--tag_stack_size]);
|
473
|
+
write_char('>');
|
474
|
+
}
|
475
|
+
}
|
476
|
+
else { //not a close tag
|
477
|
+
if(tag_stack_size >= MAX_STACK_DEPTH) {
|
478
|
+
return FALSE;
|
479
|
+
}
|
480
|
+
if(strcmp(EM, name) == 0 || strcmp(STRONG, name) == 0 ||
|
481
|
+
strcmp("s", name) == 0 || strcmp("u", name) == 0 ||
|
482
|
+
strcmp("p", name) == 0 || strcmp("blockquote", name) == 0 ||
|
483
|
+
strcmp("pre", name) == 0){
|
484
|
+
if(tag_closed) {
|
485
|
+
return FALSE;
|
486
|
+
}
|
487
|
+
if(num_attributes != 0) {
|
488
|
+
return FALSE;
|
489
|
+
}
|
490
|
+
push_to_tag_stack(name, name_size);
|
491
|
+
write_char('<');
|
492
|
+
write_chars(name);
|
493
|
+
write_char('>');
|
494
|
+
}
|
495
|
+
else if(strcmp("br", name) == 0) {
|
496
|
+
if(num_attributes != 0) {
|
497
|
+
return FALSE;
|
498
|
+
}
|
499
|
+
write_chars("<br/>");
|
500
|
+
}
|
501
|
+
else if(strcmp("a", name) == 0) {
|
502
|
+
int href_pos;
|
503
|
+
|
504
|
+
// printf("trying A tag\n");
|
505
|
+
href_pos = attribute_find("href", attribute_keys,
|
506
|
+
num_attributes);
|
507
|
+
if(href_pos < 0) {
|
508
|
+
return FALSE;
|
509
|
+
}
|
510
|
+
if(!valid_url(attribute_values[href_pos])) {
|
511
|
+
return FALSE;
|
512
|
+
}
|
513
|
+
|
514
|
+
write_chars("<a rel=\"nofollow\" target=\"_blank\" href=\"");
|
515
|
+
write_chars(attribute_values[href_pos]);
|
516
|
+
write_chars("\">");
|
517
|
+
|
518
|
+
push_to_tag_stack("a", 1);
|
519
|
+
anchors_in_stack++;
|
520
|
+
}
|
521
|
+
else if(strcmp("img", name) == 0) {
|
522
|
+
int src_pos, alt_pos, width_pos, height_pos;
|
523
|
+
|
524
|
+
src_pos = attribute_find("src", attribute_keys,
|
525
|
+
num_attributes);
|
526
|
+
alt_pos = attribute_find("alt", attribute_keys,
|
527
|
+
num_attributes);
|
528
|
+
width_pos = attribute_find("width", attribute_keys,
|
529
|
+
num_attributes);
|
530
|
+
height_pos = attribute_find("height", attribute_keys,
|
531
|
+
num_attributes);
|
532
|
+
if(src_pos < 0) {
|
533
|
+
return FALSE;
|
534
|
+
}
|
535
|
+
|
536
|
+
if(!valid_url(attribute_values[src_pos])) {
|
537
|
+
return FALSE;
|
538
|
+
}
|
539
|
+
|
540
|
+
write_chars("<img src=\"");
|
541
|
+
write_chars(attribute_values[src_pos]);
|
542
|
+
if (alt_pos >= 0){
|
543
|
+
write_chars("\" alt=\"");
|
544
|
+
write_chars(attribute_values[alt_pos]);
|
545
|
+
}
|
546
|
+
if (width_pos >= 0){
|
547
|
+
write_chars("\" width=\"");
|
548
|
+
write_chars(attribute_values[width_pos]);
|
549
|
+
}
|
550
|
+
if (height_pos >= 0){
|
551
|
+
write_chars("\" height=\"");
|
552
|
+
write_chars(attribute_values[height_pos]);
|
553
|
+
}
|
554
|
+
write_chars("\" class=\"escapedImg\"/>");
|
555
|
+
}
|
556
|
+
else {
|
557
|
+
return FALSE;
|
558
|
+
}
|
559
|
+
}
|
560
|
+
return TRUE;
|
561
|
+
}
|
562
|
+
|
563
|
+
static int gr_tag_parse() {
|
564
|
+
char *tag;
|
565
|
+
char *attributes[MAX_ATTRIBUTES];
|
566
|
+
int num_attributes = 0;
|
567
|
+
char *name;
|
568
|
+
int tag_size;
|
569
|
+
int name_size;
|
570
|
+
int i;
|
571
|
+
char *id; // attribute[1]
|
572
|
+
char *title;// attribute[2], defaulted to attr[0]
|
573
|
+
tag = input + position;
|
574
|
+
tag_size = (ahead_position - position) - 1;
|
575
|
+
|
576
|
+
if(tag_size < 3) {
|
577
|
+
return FALSE;
|
578
|
+
}
|
579
|
+
|
580
|
+
//read name
|
581
|
+
for(name_size = 0; name_size < tag_size; name_size++) {
|
582
|
+
if(tag[name_size] == ':') {
|
583
|
+
break;
|
584
|
+
}
|
585
|
+
}
|
586
|
+
name = ALLOCA_N(char, name_size + 1);
|
587
|
+
strncpy(name, tag, name_size);
|
588
|
+
name[name_size] = 0;
|
589
|
+
downcasen(name, name_size);
|
590
|
+
|
591
|
+
if(position + name_size + 1 > input_size) {
|
592
|
+
printf("Error: position + name_size + 1 > input_size\n");
|
593
|
+
return;
|
594
|
+
}
|
595
|
+
|
596
|
+
tag += name_size + 1;
|
597
|
+
tag_size -= name_size + 1;
|
598
|
+
while(tag_size > 0) {
|
599
|
+
if (num_attributes == MAX_ATTRIBUTES) {
|
600
|
+
return FALSE;
|
601
|
+
}
|
602
|
+
int attr_size;
|
603
|
+
char *attr;
|
604
|
+
for(attr_size = 0; attr_size < tag_size; attr_size++) {
|
605
|
+
if(tag[attr_size] == '|') {
|
606
|
+
break;
|
607
|
+
}
|
608
|
+
}
|
609
|
+
attr = ALLOCA_N(char, attr_size + 1);
|
610
|
+
strncpy(attr, tag, attr_size);
|
611
|
+
attr[attr_size] = 0;
|
612
|
+
tag += attr_size + 1;
|
613
|
+
tag_size -= attr_size + 1;
|
614
|
+
attributes[num_attributes++] = attr;
|
615
|
+
}
|
616
|
+
|
617
|
+
if (num_attributes < 1) {
|
618
|
+
return FALSE;
|
619
|
+
}
|
620
|
+
|
621
|
+
if (num_attributes >= 2) {
|
622
|
+
id = attributes[1];
|
623
|
+
}
|
624
|
+
else {
|
625
|
+
id = 0;
|
626
|
+
}
|
627
|
+
|
628
|
+
if (num_attributes >= 3) {
|
629
|
+
title = attributes[2];
|
630
|
+
}
|
631
|
+
else {
|
632
|
+
title = attributes[0];
|
633
|
+
}
|
634
|
+
|
635
|
+
if(strcmp("b", name) == 0 || strcmp("book", name) == 0){
|
636
|
+
if (id) {
|
637
|
+
counting_cap = FALSE;
|
638
|
+
write_chars("<a href=\"");
|
639
|
+
if (absolute_url) {
|
640
|
+
write_nchars(absolute_url, absolute_url_size);
|
641
|
+
}
|
642
|
+
write_chars("/book/show/");
|
643
|
+
write_chars(id);
|
644
|
+
write_char('.');
|
645
|
+
write_urlitized_chars(title);
|
646
|
+
write_chars("\" title=\"");
|
647
|
+
write_escaped_chars(title);
|
648
|
+
if (num_attributes >= 4) {
|
649
|
+
write_chars(" by ");
|
650
|
+
write_escaped_chars(attributes[3]);
|
651
|
+
}
|
652
|
+
write_chars("\">");
|
653
|
+
counting_cap = TRUE;
|
654
|
+
write_escaped_chars(attributes[0]);
|
655
|
+
counting_cap = FALSE;
|
656
|
+
write_chars("</a>");
|
657
|
+
counting_cap = TRUE;
|
658
|
+
}
|
659
|
+
else {
|
660
|
+
counting_cap = FALSE;
|
661
|
+
write_chars("<a href=\"");
|
662
|
+
if (absolute_url) {
|
663
|
+
write_nchars(absolute_url, absolute_url_size);
|
664
|
+
}
|
665
|
+
write_chars("/search/search?q=");
|
666
|
+
write_escaped_chars(attributes[0]);
|
667
|
+
write_chars("\" title=\"");
|
668
|
+
write_chars(title);
|
669
|
+
write_chars("\">");
|
670
|
+
counting_cap = TRUE;
|
671
|
+
write_escaped_chars(attributes[0]);
|
672
|
+
counting_cap = FALSE;
|
673
|
+
write_chars("</a>");
|
674
|
+
counting_cap = TRUE;
|
675
|
+
}
|
676
|
+
}
|
677
|
+
else if(strcmp("bc", name) == 0){
|
678
|
+
if (num_attributes >= 5) {
|
679
|
+
counting_cap = FALSE;
|
680
|
+
write_chars("<a href=\"");
|
681
|
+
if (absolute_url) {
|
682
|
+
write_nchars(absolute_url, absolute_url_size);
|
683
|
+
}
|
684
|
+
write_chars("/book/show/");
|
685
|
+
write_chars(id);
|
686
|
+
write_char('.');
|
687
|
+
write_urlitized_chars(title);
|
688
|
+
write_chars("\"><img src=\"");
|
689
|
+
write_chars(attributes[4]);
|
690
|
+
write_chars("\" title=\"");
|
691
|
+
write_escaped_chars(title);
|
692
|
+
if (num_attributes >= 4) {
|
693
|
+
write_chars(" by ");
|
694
|
+
write_escaped_chars(attributes[3]);
|
695
|
+
}
|
696
|
+
write_chars("\" alt=\"");
|
697
|
+
write_escaped_chars(title);
|
698
|
+
write_chars("\"/></a>");
|
699
|
+
counting_cap = TRUE;
|
700
|
+
}
|
701
|
+
else {
|
702
|
+
write_chars("[bookcover:");
|
703
|
+
write_escaped_chars(attributes[0]);
|
704
|
+
write_char(']');
|
705
|
+
}
|
706
|
+
}
|
707
|
+
else if(strcmp("a", name) == 0 || strcmp("author", name) == 0){
|
708
|
+
if (id) {
|
709
|
+
counting_cap = FALSE;
|
710
|
+
write_chars("<a href=\"");
|
711
|
+
if (absolute_url) {
|
712
|
+
write_nchars(absolute_url, absolute_url_size);
|
713
|
+
}
|
714
|
+
write_chars("/author/show/");
|
715
|
+
write_chars(id);
|
716
|
+
write_char('.');
|
717
|
+
write_urlitized_chars(title);
|
718
|
+
write_chars("\" title=\"");
|
719
|
+
write_escaped_chars(title);
|
720
|
+
write_chars("\">");
|
721
|
+
counting_cap = TRUE;
|
722
|
+
write_escaped_chars(attributes[0]);
|
723
|
+
counting_cap = FALSE;
|
724
|
+
write_chars("</a>");
|
725
|
+
counting_cap = TRUE;
|
726
|
+
}
|
727
|
+
else {
|
728
|
+
counting_cap = FALSE;
|
729
|
+
write_chars("<a href=\"");
|
730
|
+
if (absolute_url) {
|
731
|
+
write_nchars(absolute_url, absolute_url_size);
|
732
|
+
}
|
733
|
+
write_chars("/search/search?q=");
|
734
|
+
write_escaped_chars(attributes[0]);
|
735
|
+
write_chars("\" title=\"");
|
736
|
+
write_chars(title);
|
737
|
+
write_chars("\">");
|
738
|
+
counting_cap = TRUE;
|
739
|
+
write_escaped_chars(attributes[0]);
|
740
|
+
counting_cap = FALSE;
|
741
|
+
write_chars("</a>");
|
742
|
+
counting_cap = TRUE;
|
743
|
+
}
|
744
|
+
}
|
745
|
+
else if(strcmp("ai", name) == 0){
|
746
|
+
if (num_attributes >= 4) {
|
747
|
+
counting_cap = FALSE;
|
748
|
+
write_chars("<a href=\"");
|
749
|
+
if (absolute_url) {
|
750
|
+
write_nchars(absolute_url, absolute_url_size);
|
751
|
+
}
|
752
|
+
write_chars("/author/show/");
|
753
|
+
write_chars(id);
|
754
|
+
write_char('.');
|
755
|
+
write_urlitized_chars(title);
|
756
|
+
write_chars("\"><img src=\"");
|
757
|
+
write_chars(attributes[3]);
|
758
|
+
write_chars("\" title=\"");
|
759
|
+
write_escaped_chars(title);
|
760
|
+
write_chars("\" alt=\"");
|
761
|
+
write_escaped_chars(title);
|
762
|
+
write_chars("\"/></a>");
|
763
|
+
counting_cap = TRUE;
|
764
|
+
}
|
765
|
+
else {
|
766
|
+
write_chars("[authorimage:");
|
767
|
+
write_escaped_chars(attributes[0]);
|
768
|
+
write_char(']');
|
769
|
+
}
|
770
|
+
}
|
771
|
+
else {
|
772
|
+
return FALSE;
|
773
|
+
}
|
774
|
+
return TRUE;
|
775
|
+
}
|
776
|
+
|
777
|
+
static int html_read() {
|
778
|
+
ahead_position = position;
|
779
|
+
|
780
|
+
while(ahead_position < input_size) {
|
781
|
+
switch(input[ahead_position++]) {
|
782
|
+
case '<':
|
783
|
+
return FALSE;
|
784
|
+
case '>':
|
785
|
+
return html_parse(position, ahead_position - 1);
|
786
|
+
}
|
787
|
+
}
|
788
|
+
return FALSE;
|
789
|
+
}
|
790
|
+
|
791
|
+
static int gr_tag_read() {
|
792
|
+
ahead_position = position;
|
793
|
+
|
794
|
+
// reading name
|
795
|
+
while(ahead_position < input_size) {
|
796
|
+
switch(input[ahead_position++]) {
|
797
|
+
case '[':
|
798
|
+
return FALSE;
|
799
|
+
case ']':
|
800
|
+
return gr_tag_parse();
|
801
|
+
}
|
802
|
+
}
|
803
|
+
|
804
|
+
return FALSE;
|
805
|
+
}
|
806
|
+
|
807
|
+
static int url_read() {
|
808
|
+
char *url;
|
809
|
+
char *url_downcase;
|
810
|
+
char c;
|
811
|
+
int i;
|
812
|
+
int url_size;
|
813
|
+
int has_http = FALSE;
|
814
|
+
|
815
|
+
|
816
|
+
if(anchors_in_stack > 0) {
|
817
|
+
return FALSE;
|
818
|
+
}
|
819
|
+
|
820
|
+
ahead_position = position;
|
821
|
+
url_size = 0;
|
822
|
+
|
823
|
+
while(ahead_position < input_size) {
|
824
|
+
int done = FALSE;
|
825
|
+
switch(input[ahead_position++]) {
|
826
|
+
case '"':
|
827
|
+
case '<':
|
828
|
+
case '>':
|
829
|
+
return FALSE;
|
830
|
+
case ' ':
|
831
|
+
case '\n':
|
832
|
+
case '\t':
|
833
|
+
case '(':
|
834
|
+
case ')':
|
835
|
+
ahead_position--;
|
836
|
+
done = TRUE;
|
837
|
+
break;
|
838
|
+
}
|
839
|
+
if(done) {
|
840
|
+
break;
|
841
|
+
}
|
842
|
+
}
|
843
|
+
url_size = 1 + ahead_position - position;
|
844
|
+
|
845
|
+
if (url_size < 5) {
|
846
|
+
return FALSE;
|
847
|
+
}
|
848
|
+
|
849
|
+
url = ALLOCA_N(char, url_size + 1);
|
850
|
+
strncpy(url, input + position - 1, url_size);
|
851
|
+
url[url_size] = 0;
|
852
|
+
|
853
|
+
|
854
|
+
url_downcase = ALLOCA_N(char, url_size + 1);
|
855
|
+
strncpy(url_downcase, url, url_size);
|
856
|
+
downcasen(url_downcase, url_size);
|
857
|
+
|
858
|
+
|
859
|
+
if (strncmp(url_downcase, "http://", 7) == 0) {
|
860
|
+
has_http = TRUE;
|
861
|
+
}
|
862
|
+
else if (strncmp(url_downcase, "https://", 8) == 0) {
|
863
|
+
has_http = TRUE;
|
864
|
+
}
|
865
|
+
else {
|
866
|
+
//try and decide if the its a url without 'http' in front
|
867
|
+
int has_www = FALSE;
|
868
|
+
int last_dot = -1;
|
869
|
+
int done = TRUE;
|
870
|
+
int tld_size;
|
871
|
+
|
872
|
+
//does it start with www.?
|
873
|
+
if (strncmp(url_downcase, "www.", 4) == 0) {
|
874
|
+
has_www = TRUE;
|
875
|
+
i = 4;
|
876
|
+
last_dot = 3;
|
877
|
+
}
|
878
|
+
else {
|
879
|
+
i = 0;
|
880
|
+
}
|
881
|
+
|
882
|
+
// see if it starts with a properly formed domain name
|
883
|
+
for(; i < url_size; i++) {
|
884
|
+
c = url_downcase[i];
|
885
|
+
if (c == '.') {
|
886
|
+
//starting with a period is invalid
|
887
|
+
if(i == 0) {
|
888
|
+
return FALSE;
|
889
|
+
}
|
890
|
+
|
891
|
+
//two periods in a row is invalid!
|
892
|
+
if(last_dot + 1 == i){
|
893
|
+
return FALSE;
|
894
|
+
}
|
895
|
+
last_dot = i;
|
896
|
+
}
|
897
|
+
else if (c == '/') {
|
898
|
+
// a slash means we're no longer reading a domain name
|
899
|
+
break;
|
900
|
+
}
|
901
|
+
else if ((c >= 'a' && c <= 'z') || c == '-' ||
|
902
|
+
(c >= '0' && c <= '9')) {
|
903
|
+
// valid domain name characters
|
904
|
+
}
|
905
|
+
else {
|
906
|
+
// domains must be made up of those other characters
|
907
|
+
return FALSE;
|
908
|
+
}
|
909
|
+
}
|
910
|
+
|
911
|
+
if(last_dot == -1) { // no periods were found
|
912
|
+
return FALSE;
|
913
|
+
}
|
914
|
+
tld_size = (i - last_dot) - 1;
|
915
|
+
if (has_www) {
|
916
|
+
if (tld_size < 2){
|
917
|
+
return FALSE;
|
918
|
+
}
|
919
|
+
}
|
920
|
+
else {
|
921
|
+
char *tld;
|
922
|
+
if (tld_size != 3){
|
923
|
+
return FALSE;
|
924
|
+
}
|
925
|
+
tld = url + last_dot + 1;
|
926
|
+
if(strncmp(tld, "com", 3) != 0 &&
|
927
|
+
strncmp(tld, "net", 3) != 0 &&
|
928
|
+
strncmp(tld, "org", 3) != 0 &&
|
929
|
+
strncmp(tld, "gov", 3) != 0){
|
930
|
+
// not a tld we autogenerate for!
|
931
|
+
return FALSE;
|
932
|
+
}
|
933
|
+
}
|
934
|
+
|
935
|
+
}
|
936
|
+
|
937
|
+
//OK, now its probably ok to generate the url
|
938
|
+
counting_cap = FALSE;
|
939
|
+
write_chars("<a rel=\"nofollow\" target=\"_blank\" href=\"");
|
940
|
+
if (!has_http) {
|
941
|
+
write_chars("http://");
|
942
|
+
}
|
943
|
+
write_chars(url);
|
944
|
+
if (url_size > MAX_URL_PRINT_SIZE) {
|
945
|
+
write_chars("\" title=\"");
|
946
|
+
write_chars(url);
|
947
|
+
}
|
948
|
+
write_chars("\">");
|
949
|
+
counting_cap = TRUE;
|
950
|
+
for(i = 0; i < MAX_URL_PRINT_SIZE && i < url_size; i++) {
|
951
|
+
switch(c = url[i]) {
|
952
|
+
case '&':
|
953
|
+
write_chars("&");
|
954
|
+
break;
|
955
|
+
default:
|
956
|
+
write_char(c);
|
957
|
+
break;
|
958
|
+
}
|
959
|
+
}
|
960
|
+
if(i < url_size) {
|
961
|
+
write_chars("...");
|
962
|
+
}
|
963
|
+
counting_cap = FALSE;
|
964
|
+
write_chars("</a>");
|
965
|
+
counting_cap = TRUE;
|
966
|
+
return TRUE;
|
967
|
+
}
|
968
|
+
|
969
|
+
|
970
|
+
static int amp_read() {
|
971
|
+
int amp_escape_count = 0;
|
972
|
+
int poundsign = FALSE;
|
973
|
+
ahead_position = position;
|
974
|
+
|
975
|
+
// reading name
|
976
|
+
while(ahead_position < input_size) {
|
977
|
+
char c;
|
978
|
+
c = input[ahead_position++];
|
979
|
+
if (c == '#') {
|
980
|
+
if(amp_escape_count == 0) {
|
981
|
+
poundsign = TRUE;
|
982
|
+
}
|
983
|
+
else {
|
984
|
+
return FALSE;
|
985
|
+
}
|
986
|
+
}
|
987
|
+
else if(c >= '0' && c <= '9') {
|
988
|
+
}
|
989
|
+
else if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
|
990
|
+
if (poundsign) {
|
991
|
+
return FALSE;
|
992
|
+
}
|
993
|
+
}
|
994
|
+
else if (c == ';') {
|
995
|
+
if (amp_escape_count >= 2){
|
996
|
+
write_nchars(input + position - 1, amp_escape_count + 2);
|
997
|
+
return TRUE;
|
998
|
+
}
|
999
|
+
else {
|
1000
|
+
return FALSE;
|
1001
|
+
}
|
1002
|
+
}
|
1003
|
+
else {
|
1004
|
+
return FALSE;
|
1005
|
+
}
|
1006
|
+
|
1007
|
+
if(amp_escape_count++ > 6) {
|
1008
|
+
return FALSE;
|
1009
|
+
}
|
1010
|
+
}
|
1011
|
+
|
1012
|
+
return FALSE;
|
1013
|
+
}
|
1014
|
+
|
1015
|
+
|
1016
|
+
static VALUE t_parse(VALUE self, VALUE r_string, VALUE r_cap, VALUE r_cap_string) {
|
1017
|
+
char c;
|
1018
|
+
int try_url = TRUE;
|
1019
|
+
int cap_at;
|
1020
|
+
char * cap_string;
|
1021
|
+
VALUE result;
|
1022
|
+
tag_stack_size = 0;
|
1023
|
+
anchors_in_stack = 0;
|
1024
|
+
counting_cap = TRUE;
|
1025
|
+
cap_count = 0;
|
1026
|
+
writing_utf_8 = 0;
|
1027
|
+
position = ahead_position = 0;
|
1028
|
+
|
1029
|
+
//de-ruby all the inputs!
|
1030
|
+
r_string = StringValue(r_string);
|
1031
|
+
input_size = RSTRING(r_string)->len;
|
1032
|
+
input = RSTRING(r_string)->ptr;
|
1033
|
+
|
1034
|
+
if (NIL_P(r_cap)) {
|
1035
|
+
cap_at = 0;
|
1036
|
+
}
|
1037
|
+
else {
|
1038
|
+
cap_at = NUM2INT(r_cap);
|
1039
|
+
}
|
1040
|
+
|
1041
|
+
cap_string = StringValuePtr(r_cap_string);
|
1042
|
+
|
1043
|
+
max_output_size = input_size * 10 + 128;
|
1044
|
+
output = ALLOCA_N(char, max_output_size);
|
1045
|
+
output_size = 0;
|
1046
|
+
|
1047
|
+
tag_stack = ALLOCA_N(char *, MAX_STACK_DEPTH);
|
1048
|
+
|
1049
|
+
while(position < input_size) {
|
1050
|
+
|
1051
|
+
if(cap_at && cap_count >= cap_at) {
|
1052
|
+
write_chars(cap_string);
|
1053
|
+
break;
|
1054
|
+
}
|
1055
|
+
switch(c = input[position++]) {
|
1056
|
+
case '&':
|
1057
|
+
counting_cap = FALSE;
|
1058
|
+
if(amp_read()) {
|
1059
|
+
position = ahead_position;
|
1060
|
+
}
|
1061
|
+
else {
|
1062
|
+
write_chars("&");
|
1063
|
+
}
|
1064
|
+
counting_cap = TRUE;
|
1065
|
+
try_url = FALSE;
|
1066
|
+
cap_count++;
|
1067
|
+
break;
|
1068
|
+
case '>':
|
1069
|
+
try_url = FALSE;
|
1070
|
+
write_chars(">");
|
1071
|
+
break;
|
1072
|
+
case '<':
|
1073
|
+
counting_cap = FALSE;
|
1074
|
+
if(html_read()) {
|
1075
|
+
position = ahead_position;
|
1076
|
+
try_url = TRUE;
|
1077
|
+
counting_cap = TRUE;
|
1078
|
+
}
|
1079
|
+
else {
|
1080
|
+
counting_cap = TRUE;
|
1081
|
+
write_chars("<");
|
1082
|
+
try_url = FALSE;
|
1083
|
+
}
|
1084
|
+
break;
|
1085
|
+
case '[':
|
1086
|
+
counting_cap = FALSE;
|
1087
|
+
if(gr_tag_read()) {
|
1088
|
+
position = ahead_position;
|
1089
|
+
try_url = TRUE;
|
1090
|
+
counting_cap = TRUE;
|
1091
|
+
}
|
1092
|
+
else {
|
1093
|
+
try_url = FALSE;
|
1094
|
+
counting_cap = TRUE;
|
1095
|
+
write_char('[');
|
1096
|
+
}
|
1097
|
+
break;
|
1098
|
+
case '"':
|
1099
|
+
try_url = FALSE;
|
1100
|
+
write_chars(""");
|
1101
|
+
break;
|
1102
|
+
case '\n':
|
1103
|
+
write_chars("<br/>");
|
1104
|
+
try_url = TRUE;
|
1105
|
+
break;
|
1106
|
+
case ' ':
|
1107
|
+
case '\t':
|
1108
|
+
case '(':
|
1109
|
+
case ')':
|
1110
|
+
write_char(c);
|
1111
|
+
try_url = TRUE;
|
1112
|
+
break;
|
1113
|
+
default:
|
1114
|
+
if(try_url){
|
1115
|
+
if(url_read()) {
|
1116
|
+
position = ahead_position;
|
1117
|
+
}
|
1118
|
+
else {
|
1119
|
+
write_char(c);
|
1120
|
+
}
|
1121
|
+
try_url = FALSE;
|
1122
|
+
}
|
1123
|
+
else {
|
1124
|
+
write_char(c);
|
1125
|
+
}
|
1126
|
+
break;
|
1127
|
+
}
|
1128
|
+
}
|
1129
|
+
while(tag_stack_size > 0) {
|
1130
|
+
char *item = tag_stack[--tag_stack_size];
|
1131
|
+
write_chars("</");
|
1132
|
+
write_chars(item);
|
1133
|
+
free(item);
|
1134
|
+
write_char('>');
|
1135
|
+
}
|
1136
|
+
return rb_str_new(output, output_size);
|
1137
|
+
}
|
1138
|
+
|
1139
|
+
|
1140
|
+
static VALUE t_set_absolute_url(VALUE self, VALUE r_string) {
|
1141
|
+
int new_size;
|
1142
|
+
char *new_url;
|
1143
|
+
r_string = StringValue(r_string);
|
1144
|
+
absolute_url_size = RSTRING(r_string)->len;
|
1145
|
+
new_url = RSTRING(r_string)->ptr;
|
1146
|
+
if (absolute_url) {
|
1147
|
+
free(absolute_url);
|
1148
|
+
}
|
1149
|
+
absolute_url = malloc(absolute_url_size);
|
1150
|
+
strncpy(absolute_url, new_url, absolute_url_size);
|
1151
|
+
return r_string;
|
1152
|
+
}
|
1153
|
+
|
1154
|
+
|
1155
|
+
VALUE cTest;
|
1156
|
+
|
1157
|
+
void Init_gr_string_escape() {
|
1158
|
+
cTest = rb_define_class("GrStringEscape", rb_cObject);
|
1159
|
+
rb_define_method(cTest, "parse", t_parse, 3);
|
1160
|
+
rb_define_method(cTest, "set_absolute_url", t_set_absolute_url, 1);
|
1161
|
+
id_push = rb_intern("push");
|
1162
|
+
}
|