hpricot 0.8.2 → 0.8.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,7 +10,6 @@ import org.jruby.RubyModule;
10
10
  import org.jruby.runtime.CallbackFactory;
11
11
  import org.jruby.runtime.builtin.IRubyObject;
12
12
  import org.jruby.runtime.load.BasicLibraryService;
13
- import org.jruby.util.collections.IntHashMap;
14
13
 
15
14
  public class FastXsService implements BasicLibraryService {
16
15
 
@@ -477,6 +476,100 @@ class Entities {
477
476
  int value(String name);
478
477
  }
479
478
 
479
+ // Very limited IntHashMap - it only supports get() and put()
480
+ private static class IntHashMap {
481
+ private transient Entry table[];
482
+
483
+ private transient int count;
484
+
485
+
486
+ private int threshold;
487
+
488
+ private final float loadFactor;
489
+
490
+ private static class Entry {
491
+ final int hash;
492
+ final int key;
493
+ Object value;
494
+ Entry next;
495
+
496
+ protected Entry(int hash, int key, Object value, Entry next) {
497
+ this.hash = hash;
498
+ this.key = key;
499
+ this.value = value;
500
+ this.next = next;
501
+ }
502
+ }
503
+
504
+ public IntHashMap() {
505
+ this.loadFactor = 0.75f;
506
+ table = new Entry[20];
507
+ threshold = (int) (20 * 0.75f);
508
+ }
509
+
510
+ public Object get(int key) {
511
+ Entry tab[] = table;
512
+ int hash = key;
513
+ int index = (hash & 0x7FFFFFFF) % tab.length;
514
+ for (Entry e = tab[index]; e != null; e = e.next) {
515
+ if (e.hash == hash) {
516
+ return e.value;
517
+ }
518
+ }
519
+ return null;
520
+ }
521
+
522
+ protected void rehash() {
523
+ int oldCapacity = table.length;
524
+ Entry oldMap[] = table;
525
+
526
+ int newCapacity = oldCapacity * 2 + 1;
527
+ Entry newMap[] = new Entry[newCapacity];
528
+
529
+ threshold = (int) (newCapacity * loadFactor);
530
+ table = newMap;
531
+
532
+ for (int i = oldCapacity; i-- > 0;) {
533
+ for (Entry old = oldMap[i]; old != null;) {
534
+ Entry e = old;
535
+ old = old.next;
536
+
537
+ int index = (e.hash & 0x7FFFFFFF) % newCapacity;
538
+ e.next = newMap[index];
539
+ newMap[index] = e;
540
+ }
541
+ }
542
+ }
543
+
544
+ public Object put(int key, Object value) {
545
+ // Makes sure the key is not already in the hashtable.
546
+ Entry tab[] = table;
547
+ int hash = key;
548
+ int index = (hash & 0x7FFFFFFF) % tab.length;
549
+ for (Entry e = tab[index]; e != null; e = e.next) {
550
+ if (e.hash == hash) {
551
+ Object old = e.value;
552
+ e.value = value;
553
+ return old;
554
+ }
555
+ }
556
+
557
+ if (count >= threshold) {
558
+ // Rehash the table if the threshold is exceeded
559
+ rehash();
560
+
561
+ tab = table;
562
+ index = (hash & 0x7FFFFFFF) % tab.length;
563
+ }
564
+
565
+ // Creates the new entry.
566
+ Entry e = new Entry(hash, key, value, tab[index]);
567
+ tab[index] = e;
568
+ count++;
569
+ return null;
570
+ }
571
+ }
572
+
480
573
  static class PrimitiveEntityMap implements EntityMap {
481
574
  private Map mapNameToValue = new HashMap();
482
575
 
@@ -1,8 +1,12 @@
1
- #define VERSION "0.1"
2
-
3
1
  #include <ruby.h>
4
2
  #include <assert.h>
5
- /* #include <stdio.h> */
3
+
4
+ #ifdef HAVE_RUBY_ENCODING_H
5
+ #include <ruby/encoding.h>
6
+ # define ASSOCIATE_INDEX(s,enc) rb_enc_associate_index((s), rb_enc_to_index(enc))
7
+ #else
8
+ # define ASSOCIATE_INDEX(s,enc)
9
+ #endif
6
10
 
7
11
  #ifndef RARRAY_LEN
8
12
  #define RARRAY_LEN(arr) RARRAY(arr)->len
@@ -72,11 +76,6 @@ static const int cp_1252[] = {
72
76
  n = cp_1252[n - 128]; \
73
77
  } while(0)
74
78
 
75
- #define return_const_len(x) do { \
76
- memcpy(buf, x, sizeof(x) - 1); \
77
- return (sizeof(x) - 1); \
78
- } while (0)
79
-
80
79
  static inline size_t bytes_for(int n)
81
80
  {
82
81
  if (n < 1000)
@@ -91,18 +90,24 @@ static inline size_t bytes_for(int n)
91
90
  return sizeof("&#9999999;") - 1;
92
91
  }
93
92
 
94
- static long escape(char *buf, int n)
93
+ static size_t escape(char *buf, int n)
95
94
  {
95
+
96
+ #define return_const_len(x) do { \
97
+ memcpy(buf, x, sizeof(x) - 1); \
98
+ return (sizeof(x) - 1); \
99
+ } while (0)
100
+
96
101
  /* handle ASCII first */
97
102
  if (likely(n < 128)) {
98
- if (likely(n >= 0x20 || n == 0x9 || n == 0xA || n == 0xD)) {
99
- if (unlikely(n == 34))
103
+ if (likely(n >= 0x20 || n == '\t' || n == '\n' || n == '\r')) {
104
+ if (unlikely(n == '"'))
100
105
  return_const_len("&quot;");
101
- if (unlikely(n == 38))
106
+ if (unlikely(n == '&'))
102
107
  return_const_len("&amp;");
103
- if (unlikely(n == 60))
108
+ if (unlikely(n == '<'))
104
109
  return_const_len("&lt;");
105
- if (unlikely(n == 62))
110
+ if (unlikely(n == '>'))
106
111
  return_const_len("&gt;");
107
112
  buf[0] = (char)n;
108
113
  return 1;
@@ -112,16 +117,18 @@ static long escape(char *buf, int n)
112
117
  return 1;
113
118
  }
114
119
 
120
+ #undef return_const_len
121
+
115
122
  CP_1252_ESCAPE(n);
116
123
 
117
124
  if (VALID_VALUE(n)) {
118
125
  /* return snprintf(buf, sizeof("&#1114111;"), "&#%i;", n); */
119
- RUBY_EXTERN const char ruby_digitmap[];
120
- int rv = 3; /* &#; */
126
+ static const char digitmap[] = "0123456789";
127
+ size_t rv = sizeof("&#;") - 1;
121
128
  buf += bytes_for(n);
122
129
  *--buf = ';';
123
130
  do {
124
- *--buf = ruby_digitmap[(int)(n % 10)];
131
+ *--buf = digitmap[(int)(n % 10)];
125
132
  ++rv;
126
133
  } while (n /= 10);
127
134
  *--buf = '#';
@@ -132,27 +139,6 @@ static long escape(char *buf, int n)
132
139
  return 1;
133
140
  }
134
141
 
135
- #undef return_const_len
136
-
137
- static long escaped_len(int n)
138
- {
139
- if (likely(n < 128)) {
140
- if (unlikely(n == 34))
141
- return (sizeof("&quot;") - 1);
142
- if (unlikely(n == 38))
143
- return (sizeof("&amp;") - 1);
144
- if (unlikely(n == 60 || n == 62))
145
- return (sizeof("&gt;") - 1);
146
- return 1;
147
- }
148
-
149
- CP_1252_ESCAPE(n);
150
-
151
- if (VALID_VALUE(n))
152
- return bytes_for(n);
153
- return 1;
154
- }
155
-
156
142
  static VALUE unpack_utf8(VALUE self)
157
143
  {
158
144
  return rb_funcall(self, unpack_id, 1, U_fmt);
@@ -163,28 +149,49 @@ static VALUE unpack_uchar(VALUE self)
163
149
  return rb_funcall(self, unpack_id, 1, C_fmt);
164
150
  }
165
151
 
166
- VALUE fast_xs(VALUE self)
152
+ /*
153
+ * escapes strings for XML
154
+ * The double-quote (") character is translated to "&quot;"
155
+ */
156
+ static VALUE fast_xs(VALUE self)
167
157
  {
168
158
  long i;
169
- struct RArray *array;
170
- char *s, *c;
171
- long s_len = 0;
159
+ VALUE array;
160
+ char *c;
161
+ size_t s_len;
172
162
  VALUE *tmp;
163
+ VALUE rv;
164
+
165
+ array = rb_rescue(unpack_utf8, self, unpack_uchar, self);
166
+
167
+ for (tmp = RARRAY_PTR(array), s_len = i = RARRAY_LEN(array);
168
+ --i >= 0;
169
+ tmp++) {
170
+ int n = NUM2INT(*tmp);
171
+ if (likely(n < 128)) {
172
+ if (unlikely(n == '"'))
173
+ s_len += (sizeof("&quot;") - 2);
174
+ if (unlikely(n == '&'))
175
+ s_len += (sizeof("&amp;") - 2);
176
+ if (unlikely(n == '>' || n == '<'))
177
+ s_len += (sizeof("&gt;") - 2);
178
+ continue;
179
+ }
173
180
 
174
- array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
181
+ CP_1252_ESCAPE(n);
175
182
 
176
- tmp = RARRAY_PTR(array);
177
- for (i = RARRAY_LEN(array); --i >= 0; tmp++)
178
- s_len += escaped_len(NUM2INT(*tmp));
183
+ if (VALID_VALUE(n))
184
+ s_len += bytes_for(n) - 1;
185
+ }
179
186
 
180
- c = s = alloca(s_len + 1);
187
+ rv = rb_str_new(NULL, s_len);
188
+ ASSOCIATE_INDEX(rv, rb_default_external_encoding());
189
+ c = RSTRING_PTR(rv);
181
190
 
182
- tmp = RARRAY_PTR(array);
183
- for (i = RARRAY_LEN(array); --i >= 0; tmp++)
191
+ for (tmp = RARRAY_PTR(array), i = RARRAY_LEN(array); --i >= 0; tmp++)
184
192
  c += escape(c, NUM2INT(*tmp));
185
193
 
186
- *c = '\0';
187
- return rb_str_new(s, s_len);
194
+ return rv;
188
195
  }
189
196
 
190
197
  void Init_fast_xs(void)
@@ -193,7 +200,9 @@ void Init_fast_xs(void)
193
200
 
194
201
  unpack_id = rb_intern("unpack");
195
202
  U_fmt = rb_str_new("U*", 2);
203
+ ASSOCIATE_INDEX(U_fmt, rb_ascii8bit_encoding());
196
204
  C_fmt = rb_str_new("C*", 2);
205
+ ASSOCIATE_INDEX(C_fmt, rb_ascii8bit_encoding());
197
206
  rb_global_variable(&U_fmt);
198
207
  rb_global_variable(&C_fmt);
199
208
 
@@ -1,3 +1,4 @@
1
+
1
2
  // line 1 "hpricot_css.java.rl"
2
3
  import java.io.IOException;
3
4
 
@@ -81,7 +82,7 @@ public class HpricotCss {
81
82
  }
82
83
 
83
84
 
84
- // line 85 "HpricotCss.java"
85
+ // line 86 "HpricotCss.java"
85
86
  private static byte[] init__hpricot_css_actions_0()
86
87
  {
87
88
  return new byte [] {
@@ -279,7 +280,7 @@ private static short[] init__hpricot_css_index_offsets_0()
279
280
  private static final short _hpricot_css_index_offsets[] = init__hpricot_css_index_offsets_0();
280
281
 
281
282
 
282
- private static byte[] init__hpricot_css_trans_targs_wi_0()
283
+ private static byte[] init__hpricot_css_trans_targs_0()
283
284
  {
284
285
  return new byte [] {
285
286
  1, 89, 1, 87, 90, 3, 90, 4, 90, 90, 90, 5,
@@ -353,14 +354,22 @@ private static byte[] init__hpricot_css_trans_targs_wi_0()
353
354
  93, 124, 9, 93, 93, 93, 10, 11, 12, 87, 93, 8,
354
355
  93, 9, 93, 93, 93, 10, 11, 12, 87, 93, 8, 93,
355
356
  126, 9, 93, 93, 93, 10, 11, 12, 87, 93, 8, 93,
356
- 9, 93, 93, 93, 10, 11, 12, 87, 0
357
+ 9, 93, 93, 93, 10, 11, 12, 87, 87, 87, 87, 87,
358
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
359
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
360
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
361
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
362
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
363
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
364
+ 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
365
+ 87, 87, 0
357
366
  };
358
367
  }
359
368
 
360
- private static final byte _hpricot_css_trans_targs_wi[] = init__hpricot_css_trans_targs_wi_0();
369
+ private static final byte _hpricot_css_trans_targs[] = init__hpricot_css_trans_targs_0();
361
370
 
362
371
 
363
- private static byte[] init__hpricot_css_trans_actions_wi_0()
372
+ private static byte[] init__hpricot_css_trans_actions_0()
364
373
  {
365
374
  return new byte [] {
366
375
  0, 0, 0, 33, 99, 1, 99, 1, 99, 99, 99, 1,
@@ -434,11 +443,19 @@ private static byte[] init__hpricot_css_trans_actions_wi_0()
434
443
  79, 79, 0, 79, 79, 79, 0, 0, 0, 58, 79, 0,
435
444
  79, 0, 79, 79, 79, 0, 0, 0, 58, 79, 0, 79,
436
445
  79, 0, 79, 79, 79, 0, 0, 0, 58, 79, 0, 79,
437
- 0, 79, 79, 79, 0, 0, 0, 58, 0
446
+ 0, 79, 79, 79, 0, 0, 0, 58, 33, 35, 35, 35,
447
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
448
+ 31, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
449
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 29, 29,
450
+ 27, 27, 27, 27, 27, 27, 27, 27, 25, 25, 25, 23,
451
+ 21, 52, 19, 58, 58, 55, 67, 67, 64, 67, 67, 67,
452
+ 67, 67, 67, 64, 67, 67, 67, 67, 67, 61, 67, 67,
453
+ 67, 67, 67, 67, 67, 67, 67, 17, 17, 58, 58, 58,
454
+ 58, 58, 0
438
455
  };
439
456
  }
440
457
 
441
- private static final byte _hpricot_css_trans_actions_wi[] = init__hpricot_css_trans_actions_wi_0();
458
+ private static final byte _hpricot_css_trans_actions[] = init__hpricot_css_trans_actions_0();
442
459
 
443
460
 
444
461
  private static byte[] init__hpricot_css_to_state_actions_0()
@@ -484,17 +501,17 @@ private static final byte _hpricot_css_from_state_actions[] = init__hpricot_css_
484
501
  private static short[] init__hpricot_css_eof_trans_0()
485
502
  {
486
503
  return new short [] {
487
- 0, 1, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11,
488
- 11, 0, 11, 11, 11, 11, 11, 0, 39, 11, 11, 11,
489
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
490
- 11, 11, 11, 11, 11, 11, 63, 63, 67, 67, 67, 67,
491
- 67, 67, 67, 67, 0, 0, 0, 0, 0, 0, 0, 105,
492
- 105, 0, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504
+ 0, 861, 0, 898, 898, 898, 898, 898, 898, 898, 898, 898,
505
+ 898, 0, 898, 898, 898, 898, 898, 0, 877, 898, 898, 898,
506
+ 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898, 898,
507
+ 898, 898, 898, 898, 898, 898, 900, 900, 908, 908, 908, 908,
508
+ 908, 908, 908, 908, 0, 0, 0, 0, 0, 0, 0, 911,
509
+ 911, 0, 911, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493
510
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494
- 0, 0, 0, 0, 150, 151, 152, 156, 157, 157, 163, 167,
495
- 167, 174, 167, 167, 167, 167, 167, 167, 174, 167, 167, 167,
496
- 167, 167, 193, 167, 167, 167, 167, 167, 167, 167, 167, 167,
497
- 201, 201, 157, 157, 157, 157, 157
511
+ 0, 0, 0, 0, 912, 913, 914, 915, 950, 950, 918, 943,
512
+ 943, 928, 943, 943, 943, 943, 943, 943, 928, 943, 943, 943,
513
+ 943, 943, 934, 943, 943, 943, 943, 943, 943, 943, 943, 943,
514
+ 945, 945, 950, 950, 950, 950, 950
498
515
  };
499
516
  }
500
517
 
@@ -506,21 +523,23 @@ static final int hpricot_css_error = 0;
506
523
 
507
524
  static final int hpricot_css_en_main = 87;
508
525
 
526
+
509
527
  // line 147 "hpricot_css.java.rl"
510
528
 
511
529
 
512
530
  public IRubyObject scan() {
513
531
 
514
- // line 515 "HpricotCss.java"
532
+ // line 533 "HpricotCss.java"
515
533
  {
516
534
  cs = hpricot_css_start;
517
535
  ts = -1;
518
536
  te = -1;
519
537
  act = 0;
520
538
  }
539
+
521
540
  // line 151 "hpricot_css.java.rl"
522
541
 
523
- // line 524 "HpricotCss.java"
542
+ // line 543 "HpricotCss.java"
524
543
  {
525
544
  int _klen;
526
545
  int _trans = 0;
@@ -546,10 +565,10 @@ case 1:
546
565
  while ( _nacts-- > 0 ) {
547
566
  switch ( _hpricot_css_actions[_acts++] ) {
548
567
  case 6:
549
- // line 1 "hpricot_css.java.rl"
568
+ // line 1 "NONE"
550
569
  {ts = p;}
551
570
  break;
552
- // line 553 "HpricotCss.java"
571
+ // line 572 "HpricotCss.java"
553
572
  }
554
573
  }
555
574
 
@@ -603,10 +622,10 @@ case 1:
603
622
  } while (false);
604
623
 
605
624
  case 3:
606
- cs = _hpricot_css_trans_targs_wi[_trans];
625
+ cs = _hpricot_css_trans_targs[_trans];
607
626
 
608
- if ( _hpricot_css_trans_actions_wi[_trans] != 0 ) {
609
- _acts = _hpricot_css_trans_actions_wi[_trans];
627
+ if ( _hpricot_css_trans_actions[_trans] != 0 ) {
628
+ _acts = _hpricot_css_trans_actions[_trans];
610
629
  _nacts = (int) _hpricot_css_actions[_acts++];
611
630
  while ( _nacts-- > 0 )
612
631
  {
@@ -641,7 +660,7 @@ case 3:
641
660
  }
642
661
  break;
643
662
  case 7:
644
- // line 1 "hpricot_css.java.rl"
663
+ // line 1 "NONE"
645
664
  {te = p+1;}
646
665
  break;
647
666
  case 8:
@@ -753,7 +772,7 @@ case 3:
753
772
  {{p = ((te))-1;}}
754
773
  break;
755
774
  case 35:
756
- // line 1 "hpricot_css.java.rl"
775
+ // line 1 "NONE"
757
776
  { switch( act ) {
758
777
  case 0:
759
778
  {{cs = 0; _goto_targ = 2; if (true) continue _goto;}}
@@ -776,11 +795,10 @@ case 3:
776
795
  case 9:
777
796
  {{p = ((te))-1;} FILTER("PSUEDO"); }
778
797
  break;
779
- default: break;
780
798
  }
781
799
  }
782
800
  break;
783
- // line 784 "HpricotCss.java"
801
+ // line 802 "HpricotCss.java"
784
802
  }
785
803
  }
786
804
  }
@@ -791,14 +809,14 @@ case 2:
791
809
  while ( _nacts-- > 0 ) {
792
810
  switch ( _hpricot_css_actions[_acts++] ) {
793
811
  case 4:
794
- // line 1 "hpricot_css.java.rl"
812
+ // line 1 "NONE"
795
813
  {ts = -1;}
796
814
  break;
797
815
  case 5:
798
- // line 1 "hpricot_css.java.rl"
816
+ // line 1 "NONE"
799
817
  {act = 0;}
800
818
  break;
801
- // line 802 "HpricotCss.java"
819
+ // line 820 "HpricotCss.java"
802
820
  }
803
821
  }
804
822
 
@@ -824,6 +842,7 @@ case 5:
824
842
  }
825
843
  break; }
826
844
  }
845
+
827
846
  // line 152 "hpricot_css.java.rl"
828
847
 
829
848
  return focus;