adamh-html_namespacing 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/ext/html_namespacing/html_namespacing.c +136 -55
- data/html_namespacing.gemspec +2 -2
- data/test/c_extension_test.rb +1 -0
- metadata +2 -2
data/Rakefile
CHANGED
@@ -7,6 +7,8 @@
|
|
7
7
|
|
8
8
|
#include "html_namespacing.h"
|
9
9
|
|
10
|
+
#define WHITE_SPACE " \t\r\n"
|
11
|
+
|
10
12
|
static const char* const IGNORE_TAGS[] = {
|
11
13
|
"html",
|
12
14
|
"head",
|
@@ -59,6 +61,19 @@ utf8_char_bytes(const char *utf8)
|
|
59
61
|
return -1;
|
60
62
|
}
|
61
63
|
|
64
|
+
/*
|
65
|
+
* Copies num_bytes bytes from *src_p to *dest_p, doing no bounds checking.
|
66
|
+
*
|
67
|
+
* Advances *dest_p and *src_p by num_bytes.
|
68
|
+
*/
|
69
|
+
static void
|
70
|
+
copy_n_bytes_and_advance(char **dest_p, const char **src_p, size_t num_bytes)
|
71
|
+
{
|
72
|
+
memcpy(*dest_p, *src_p, num_bytes);
|
73
|
+
*dest_p += num_bytes;
|
74
|
+
*src_p += num_bytes;
|
75
|
+
}
|
76
|
+
|
62
77
|
/*
|
63
78
|
* Ensures *r is long enough to hold len chars (using realloc).
|
64
79
|
*
|
@@ -136,8 +151,8 @@ append_next_utf8_char(
|
|
136
151
|
int rv;
|
137
152
|
|
138
153
|
c_num_utf8_bytes = utf8_char_bytes(*src_p);
|
139
|
-
dest_p_offset = *dest_p - *dest;
|
140
154
|
new_dest = *dest;
|
155
|
+
dest_p_offset = *dest_p - *dest;
|
141
156
|
new_dest_len = dest_p_offset + c_num_utf8_bytes;
|
142
157
|
|
143
158
|
rv = ensure_string_length(
|
@@ -150,13 +165,26 @@ append_next_utf8_char(
|
|
150
165
|
*dest_p = new_dest + dest_p_offset;
|
151
166
|
}
|
152
167
|
|
153
|
-
|
154
|
-
*dest_p += c_num_utf8_bytes;
|
155
|
-
*src_p += c_num_utf8_bytes;
|
168
|
+
copy_n_bytes_and_advance(dest_p, src_p, c_num_utf8_bytes);
|
156
169
|
|
157
170
|
return 0;
|
158
171
|
}
|
159
172
|
|
173
|
+
/*
|
174
|
+
* Tries to copy s into dest, possibly reallocating.
|
175
|
+
*
|
176
|
+
* Arguments:
|
177
|
+
* - dest: Beginning of destination string. May be reallocated during copy.
|
178
|
+
* - dest_len: Amount of memory allocated to dest. May be increased during
|
179
|
+
* copy.
|
180
|
+
* - dest_p: Pointer to end of destination string (potentially 1 past the
|
181
|
+
* allocated length of dest).
|
182
|
+
* - s: Source string.
|
183
|
+
*
|
184
|
+
* Returns:
|
185
|
+
* - 0 on success. dest may be changed; dest_p will be incremented.
|
186
|
+
* - ENOMEM if reallocation failed. dest and dest_p will remain unchanged.
|
187
|
+
*/
|
160
188
|
static int
|
161
189
|
append_string(
|
162
190
|
char **dest,
|
@@ -165,16 +193,68 @@ append_string(
|
|
165
193
|
const char *s,
|
166
194
|
HtmlNamespacingAllocationStrategy allocation_strategy)
|
167
195
|
{
|
168
|
-
int
|
196
|
+
int len;
|
197
|
+
int rv;
|
198
|
+
size_t dest_p_offset;
|
199
|
+
size_t new_dest_len;
|
200
|
+
char *new_dest;
|
201
|
+
|
202
|
+
len = strlen(s);
|
203
|
+
new_dest = *dest;
|
204
|
+
dest_p_offset = *dest_p - *dest;
|
205
|
+
new_dest_len = dest_p_offset + len;
|
169
206
|
|
170
|
-
|
171
|
-
|
172
|
-
|
207
|
+
rv = ensure_string_length(
|
208
|
+
&new_dest, dest_len, new_dest_len, allocation_strategy);
|
209
|
+
if (rv == ENOMEM) {
|
210
|
+
return ENOMEM;
|
211
|
+
}
|
212
|
+
if (new_dest != *dest) {
|
213
|
+
*dest = new_dest;
|
214
|
+
*dest_p = new_dest + dest_p_offset;
|
173
215
|
}
|
174
216
|
|
175
|
-
|
217
|
+
strncpy(*dest_p, s, len);
|
218
|
+
*dest_p += len;
|
219
|
+
|
220
|
+
return 0;
|
176
221
|
};
|
177
222
|
|
223
|
+
static int
|
224
|
+
append_next_chars_until(
|
225
|
+
char **dest,
|
226
|
+
size_t *dest_len,
|
227
|
+
char **dest_p,
|
228
|
+
const char **src_p,
|
229
|
+
const char *until_chars,
|
230
|
+
HtmlNamespacingAllocationStrategy allocation_strategy)
|
231
|
+
{
|
232
|
+
size_t num_bytes;
|
233
|
+
size_t dest_p_offset;
|
234
|
+
size_t new_dest_len;
|
235
|
+
char *new_dest;
|
236
|
+
int rv;
|
237
|
+
|
238
|
+
num_bytes = strcspn(*src_p, until_chars);
|
239
|
+
new_dest = *dest;
|
240
|
+
dest_p_offset = *dest_p - *dest;
|
241
|
+
new_dest_len = dest_p_offset + num_bytes;
|
242
|
+
|
243
|
+
rv = ensure_string_length(
|
244
|
+
&new_dest, dest_len, new_dest_len, allocation_strategy);
|
245
|
+
if (rv == ENOMEM) {
|
246
|
+
return ENOMEM;
|
247
|
+
}
|
248
|
+
if (new_dest != *dest) {
|
249
|
+
*dest = new_dest;
|
250
|
+
*dest_p = new_dest + dest_p_offset;
|
251
|
+
}
|
252
|
+
|
253
|
+
copy_n_bytes_and_advance(dest_p, src_p, num_bytes);
|
254
|
+
|
255
|
+
return 0;
|
256
|
+
}
|
257
|
+
|
178
258
|
static int
|
179
259
|
append_end_of_string(
|
180
260
|
char **dest,
|
@@ -224,7 +304,7 @@ should_ignore_tag(const char *tag_name, size_t tag_len)
|
|
224
304
|
int i = 0;
|
225
305
|
const char *test_ignore;
|
226
306
|
|
227
|
-
for (i = 0; test_ignore = IGNORE_TAGS[i]; i++) {
|
307
|
+
for (i = 0; (test_ignore = IGNORE_TAGS[i]); i++) {
|
228
308
|
if (0 == strncmp(test_ignore, tag_name, tag_len)
|
229
309
|
&& strlen(test_ignore) == tag_len)
|
230
310
|
{
|
@@ -272,20 +352,21 @@ add_namespace_to_html_with_length_and_allocation_strategy(
|
|
272
352
|
{
|
273
353
|
|
274
354
|
#define APPEND_NEXT_CHAR() \
|
275
|
-
if (append_next_utf8_char(&r, &r_len, &r_p, &html, allocation_strategy) != 0) goto error/*;*/
|
355
|
+
if (*html && append_next_utf8_char(&r, &r_len, &r_p, &html, allocation_strategy) != 0) goto error/*;*/
|
276
356
|
#define APPEND_STRING(s) \
|
277
357
|
if (append_string(&r, &r_len, &r_p, s, allocation_strategy) != 0) goto error/*;*/
|
278
358
|
#define APPEND_END_OF_STRING() \
|
279
359
|
if (append_end_of_string(&r, &r_len, &r_p, allocation_strategy) != 0) goto error/*;*/
|
360
|
+
#define APPEND_NEXT_CHARS_UNTIL(chars) \
|
361
|
+
if (append_next_chars_until(&r, &r_len, &r_p, &html, chars, allocation_strategy) != 0) goto error/*;*/
|
280
362
|
|
281
363
|
unsigned int state;
|
282
364
|
char *r; /* Start of retval */
|
283
365
|
char *r_p; /* Pointer in retval */
|
284
366
|
size_t r_len; /* Length of retval */
|
285
367
|
const char *html_start;
|
286
|
-
const char *open_tag_name;
|
287
|
-
size_t open_tag_name_len;
|
288
|
-
char c;
|
368
|
+
const char *open_tag_name = NULL;
|
369
|
+
size_t open_tag_name_len = 0;
|
289
370
|
size_t num_chars_remaining;
|
290
371
|
int num_tags_open;
|
291
372
|
int open_tag_attribute_is_class_attribute;
|
@@ -310,10 +391,9 @@ add_namespace_to_html_with_length_and_allocation_strategy(
|
|
310
391
|
num_chars_remaining = html_len - (html - html_start);
|
311
392
|
if (num_chars_remaining <= 0) break;
|
312
393
|
|
313
|
-
c = *html;
|
314
394
|
switch (state) {
|
315
395
|
case PARSE_NORMAL:
|
316
|
-
if (
|
396
|
+
if (*html == '<') {
|
317
397
|
APPEND_NEXT_CHAR();
|
318
398
|
if (num_chars_remaining >= 9
|
319
399
|
&& 0 == strncmp("![CDATA[", html, 8)) {
|
@@ -334,20 +414,16 @@ add_namespace_to_html_with_length_and_allocation_strategy(
|
|
334
414
|
state = PARSE_OPEN_TAG_NAME;
|
335
415
|
}
|
336
416
|
} else {
|
337
|
-
|
417
|
+
APPEND_NEXT_CHARS_UNTIL("<");
|
338
418
|
}
|
339
419
|
break;
|
340
420
|
case PARSE_OPEN_TAG_NAME:
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
}
|
345
|
-
if (c != '>' && c != '/') {
|
346
|
-
APPEND_NEXT_CHAR();
|
347
|
-
}
|
421
|
+
APPEND_NEXT_CHARS_UNTIL(WHITE_SPACE ">/");
|
422
|
+
open_tag_name_len = html - open_tag_name;
|
423
|
+
state = PARSE_OPEN_TAG;
|
348
424
|
break;
|
349
425
|
case PARSE_OPEN_TAG:
|
350
|
-
if (
|
426
|
+
if (*html == '/' || *html == '>') {
|
351
427
|
if (num_tags_open == 0 && !open_tag_had_class_attribute
|
352
428
|
&& !should_ignore_tag(open_tag_name, open_tag_name_len)) {
|
353
429
|
APPEND_STRING(" class=\"");
|
@@ -358,14 +434,15 @@ add_namespace_to_html_with_length_and_allocation_strategy(
|
|
358
434
|
open_tag_had_class_attribute = 0;
|
359
435
|
open_tag_attribute_value = NULL;
|
360
436
|
|
361
|
-
if (
|
437
|
+
if (*html == '/') {
|
362
438
|
APPEND_STRING(" ");
|
363
439
|
state = PARSE_EMPTY_TAG;
|
364
440
|
} else {
|
365
441
|
num_tags_open++;
|
366
442
|
state = PARSE_NORMAL;
|
367
443
|
}
|
368
|
-
|
444
|
+
APPEND_NEXT_CHAR();
|
445
|
+
} else if (!char_is_whitespace(*html)) {
|
369
446
|
if (num_chars_remaining >= 5
|
370
447
|
&& 0 == strncmp(html, "class", 5)) {
|
371
448
|
open_tag_attribute_is_class_attribute = 1;
|
@@ -374,68 +451,72 @@ add_namespace_to_html_with_length_and_allocation_strategy(
|
|
374
451
|
open_tag_attribute_is_class_attribute = 0;
|
375
452
|
}
|
376
453
|
state = PARSE_OPEN_TAG_ATTRIBUTE_NAME;
|
454
|
+
} else {
|
455
|
+
APPEND_NEXT_CHAR();
|
377
456
|
}
|
378
|
-
APPEND_NEXT_CHAR();
|
379
457
|
break;
|
380
458
|
case PARSE_OPEN_TAG_ATTRIBUTE_NAME:
|
381
|
-
|
382
|
-
|
383
|
-
}
|
459
|
+
APPEND_NEXT_CHARS_UNTIL("=");
|
460
|
+
state = PARSE_OPEN_TAG_ATTRIBUTE_EQUALS;
|
384
461
|
APPEND_NEXT_CHAR();
|
385
462
|
break;
|
386
463
|
case PARSE_OPEN_TAG_ATTRIBUTE_EQUALS:
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
}
|
464
|
+
APPEND_NEXT_CHARS_UNTIL("'\"");
|
465
|
+
open_tag_attribute_value = html;
|
466
|
+
state = PARSE_OPEN_TAG_ATTRIBUTE_VALUE;
|
391
467
|
APPEND_NEXT_CHAR();
|
392
468
|
break;
|
393
469
|
case PARSE_OPEN_TAG_ATTRIBUTE_VALUE:
|
470
|
+
APPEND_NEXT_CHARS_UNTIL("'\"");
|
394
471
|
/* open_tag_attribute_value is either ' or " */
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
APPEND_STRING(" ");
|
399
|
-
APPEND_STRING(ns);
|
400
|
-
}
|
401
|
-
open_tag_attribute_is_class_attribute = 0;
|
402
|
-
state = PARSE_OPEN_TAG;
|
472
|
+
while (*html != *open_tag_attribute_value) {
|
473
|
+
APPEND_NEXT_CHAR();
|
474
|
+
APPEND_NEXT_CHARS_UNTIL("'\"");
|
403
475
|
}
|
476
|
+
if (open_tag_attribute_is_class_attribute
|
477
|
+
&& num_tags_open == 0) {
|
478
|
+
APPEND_STRING(" ");
|
479
|
+
APPEND_STRING(ns);
|
480
|
+
}
|
481
|
+
open_tag_attribute_is_class_attribute = 0;
|
482
|
+
state = PARSE_OPEN_TAG;
|
404
483
|
APPEND_NEXT_CHAR();
|
405
484
|
break;
|
406
485
|
case PARSE_CLOSE_TAG:
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
}
|
486
|
+
APPEND_NEXT_CHARS_UNTIL(">");
|
487
|
+
num_tags_open--;
|
488
|
+
open_tag_attribute_value = NULL;
|
489
|
+
state = PARSE_NORMAL;
|
412
490
|
APPEND_NEXT_CHAR();
|
413
491
|
break;
|
414
492
|
case PARSE_EMPTY_TAG:
|
415
493
|
case PARSE_XML_DECL:
|
416
494
|
case PARSE_DOCTYPE:
|
417
|
-
|
418
|
-
|
419
|
-
}
|
495
|
+
APPEND_NEXT_CHARS_UNTIL(">");
|
496
|
+
state = PARSE_NORMAL;
|
420
497
|
APPEND_NEXT_CHAR();
|
421
498
|
break;
|
422
499
|
case PARSE_COMMENT:
|
423
|
-
APPEND_NEXT_CHAR();
|
424
|
-
|
500
|
+
APPEND_NEXT_CHAR(); /* at least one */
|
501
|
+
APPEND_NEXT_CHARS_UNTIL("-");
|
502
|
+
if (*html == '-' && num_chars_remaining >= 3
|
425
503
|
&& 0 == strncmp("->", html, 2)) {
|
426
504
|
APPEND_NEXT_CHAR();
|
427
505
|
APPEND_NEXT_CHAR();
|
428
506
|
state = PARSE_NORMAL;
|
429
507
|
}
|
508
|
+
/* else loop... */
|
430
509
|
break;
|
431
510
|
case PARSE_CDATA:
|
432
|
-
APPEND_NEXT_CHAR();
|
433
|
-
|
511
|
+
APPEND_NEXT_CHAR(); /* at least one */
|
512
|
+
APPEND_NEXT_CHARS_UNTIL("]");
|
513
|
+
if (*html == ']' && num_chars_remaining >= 3
|
434
514
|
&& 0 == strncmp("]>", html, 2)) {
|
435
515
|
APPEND_NEXT_CHAR();
|
436
516
|
APPEND_NEXT_CHAR();
|
437
517
|
state = PARSE_NORMAL;
|
438
518
|
}
|
519
|
+
/* else loop... */
|
439
520
|
break;
|
440
521
|
default:
|
441
522
|
assert(0);
|
data/html_namespacing.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{html_namespacing}
|
5
|
-
s.version = "0.0.
|
5
|
+
s.version = "0.0.4"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Adam Hooper"]
|
9
|
-
s.date = %q{2009-
|
9
|
+
s.date = %q{2009-07-07}
|
10
10
|
s.description = %q{Inserts "class=" attributes within snippets of HTML so CSS and JavaScript can use automatic scopes}
|
11
11
|
s.email = %q{adam@adamhooper.com}
|
12
12
|
s.extensions = ["ext/html_namespacing/extconf.rb"]
|
data/test/c_extension_test.rb
CHANGED
@@ -33,6 +33,7 @@ class CExtensionTest < Test::Unit::TestCase
|
|
33
33
|
self.define_test('works with utf-8', '<div class="𝞪">𝟂</div>', '𝞺', '<div class="𝞪 𝞺">𝟂</div>')
|
34
34
|
self.define_test('empty tag with existing class=', '<span class="foo"/>', 'bar', '<span class="foo bar" />')
|
35
35
|
self.define_test('works with newlines in tag', "<div\n\nclass\n\n=\n\n'foo'\n\n>bar</div>", 'baz', "<div\n\nclass\n\n=\n\n'foo baz'\n\n>bar</div>")
|
36
|
+
self.define_test('works with "\'" within \'"\' attributes', '<div title="Adam\'s House" class="foo">bar</div>', 'baz', '<div title="Adam\'s House" class="foo baz">bar</div>')
|
36
37
|
self.define_test('ignores XML prolog', '<?xml version="1.0"?><div>foo</div>', 'X', '<?xml version="1.0"?><div class="X">foo</div>')
|
37
38
|
self.define_test('ignores DOCTYPE', '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><div>foo</div>', 'X', '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><div class="X">foo</div>')
|
38
39
|
self.define_test('ignores CDATA', '<![CDATA[ignore <div class="foo">]] </div>]]><div>foo</div>', 'X', '<![CDATA[ignore <div class="foo">]] </div>]]><div class="X">foo</div>')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: adamh-html_namespacing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Hooper
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-07-07 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|