ox-bundlecachetest 2.14.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +751 -0
  3. data/LICENSE +21 -0
  4. data/README.md +351 -0
  5. data/ext/ox/attr.h +78 -0
  6. data/ext/ox/base64.c +105 -0
  7. data/ext/ox/base64.h +18 -0
  8. data/ext/ox/buf.h +162 -0
  9. data/ext/ox/builder.c +948 -0
  10. data/ext/ox/cache.c +351 -0
  11. data/ext/ox/cache.h +21 -0
  12. data/ext/ox/cache8.c +106 -0
  13. data/ext/ox/cache8.h +23 -0
  14. data/ext/ox/dump.c +1260 -0
  15. data/ext/ox/err.c +46 -0
  16. data/ext/ox/err.h +36 -0
  17. data/ext/ox/extconf.rb +47 -0
  18. data/ext/ox/gen_load.c +342 -0
  19. data/ext/ox/hash_load.c +309 -0
  20. data/ext/ox/helper.h +84 -0
  21. data/ext/ox/intern.c +157 -0
  22. data/ext/ox/intern.h +25 -0
  23. data/ext/ox/obj_load.c +809 -0
  24. data/ext/ox/ox.c +1649 -0
  25. data/ext/ox/ox.h +245 -0
  26. data/ext/ox/parse.c +1197 -0
  27. data/ext/ox/sax.c +1570 -0
  28. data/ext/ox/sax.h +69 -0
  29. data/ext/ox/sax_as.c +270 -0
  30. data/ext/ox/sax_buf.c +209 -0
  31. data/ext/ox/sax_buf.h +204 -0
  32. data/ext/ox/sax_hint.c +207 -0
  33. data/ext/ox/sax_hint.h +40 -0
  34. data/ext/ox/sax_stack.h +113 -0
  35. data/ext/ox/slotcache.c +158 -0
  36. data/ext/ox/slotcache.h +19 -0
  37. data/ext/ox/special.c +390 -0
  38. data/ext/ox/special.h +14 -0
  39. data/ext/ox/type.h +39 -0
  40. data/lib/ox/bag.rb +103 -0
  41. data/lib/ox/cdata.rb +10 -0
  42. data/lib/ox/comment.rb +11 -0
  43. data/lib/ox/doctype.rb +11 -0
  44. data/lib/ox/document.rb +28 -0
  45. data/lib/ox/element.rb +464 -0
  46. data/lib/ox/error.rb +25 -0
  47. data/lib/ox/hasattrs.rb +54 -0
  48. data/lib/ox/instruct.rb +34 -0
  49. data/lib/ox/node.rb +23 -0
  50. data/lib/ox/raw.rb +12 -0
  51. data/lib/ox/sax.rb +97 -0
  52. data/lib/ox/version.rb +4 -0
  53. data/lib/ox/xmlrpc_adapter.rb +33 -0
  54. data/lib/ox.rb +79 -0
  55. metadata +128 -0
@@ -0,0 +1,158 @@
1
+ /* slotcache.c
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ */
5
+
6
+ #include "slotcache.h"
7
+
8
+ #include <errno.h>
9
+ #include <stdarg.h>
10
+ #include <stdint.h>
11
+ #include <stdio.h>
12
+ #include <stdlib.h>
13
+ #include <string.h>
14
+ #include <strings.h>
15
+
16
+ struct _slotCache {
17
+ /* The key is a length byte followed by the key as a string. If the key is longer than 254 characters then the
18
+ length is 255. The key can be for a premature value and in that case the length byte is greater than the length
19
+ of the key. */
20
+ char *key;
21
+ VALUE value;
22
+ struct _slotCache *slots[16];
23
+ };
24
+
25
+ static void slot_print(SlotCache cache, unsigned int depth);
26
+
27
+ static char *form_key(const char *s) {
28
+ size_t len = strlen(s);
29
+ char *d = ALLOC_N(char, len + 2);
30
+
31
+ *(uint8_t *)d = (255 <= len) ? 255 : len;
32
+ memcpy(d + 1, s, len + 1);
33
+
34
+ return d;
35
+ }
36
+
37
+ void slot_cache_new(SlotCache *cache) {
38
+ *cache = ALLOC(struct _slotCache);
39
+ (*cache)->key = 0;
40
+ (*cache)->value = Qundef;
41
+ memset((*cache)->slots, 0, sizeof((*cache)->slots));
42
+ }
43
+
44
+ VALUE
45
+ slot_cache_get(SlotCache cache, const char *key, VALUE **slot, const char **keyp) {
46
+ unsigned char *k = (unsigned char *)key;
47
+ SlotCache *cp;
48
+
49
+ for (; '\0' != *k; k++) {
50
+ cp = cache->slots + (unsigned int)(*k >> 4); /* upper 4 bits */
51
+ if (0 == *cp) {
52
+ slot_cache_new(cp);
53
+ }
54
+ cache = *cp;
55
+ cp = cache->slots + (unsigned int)(*k & 0x0F); /* lower 4 bits */
56
+ if (0 == *cp) { /* nothing on this tree so set key and value as a premature key/value pair */
57
+ slot_cache_new(cp);
58
+ cache = *cp;
59
+ cache->key = form_key(key);
60
+ break;
61
+ } else {
62
+ int depth = (int)(k - (unsigned char *)key + 1);
63
+
64
+ cache = *cp;
65
+
66
+ if ('\0' == *(k + 1)) { /* exact match */
67
+ if (0 == cache->key) { /* nothing in this spot so take it */
68
+ cache->key = form_key(key);
69
+ break;
70
+ } else if ((depth == *cache->key || 255 < depth) && 0 == strcmp(key, cache->key + 1)) { /* match */
71
+ break;
72
+ } else { /* have to move the current premature key/value deeper */
73
+ unsigned char *ck = (unsigned char *)(cache->key + depth + 1);
74
+ SlotCache orig = *cp;
75
+
76
+ cp = (*cp)->slots + (*ck >> 4);
77
+ slot_cache_new(cp);
78
+ cp = (*cp)->slots + (*ck & 0x0F);
79
+ slot_cache_new(cp);
80
+ (*cp)->key = cache->key;
81
+ (*cp)->value = cache->value;
82
+ orig->key = form_key(key);
83
+ orig->value = Qundef;
84
+ }
85
+ } else { /* not exact match but on the path */
86
+ if (0 != cache->key) { /* there is a key/value here already */
87
+ if (depth == *cache->key || (255 <= depth && 0 == strncmp(cache->key, key, depth) &&
88
+ '\0' == cache->key[depth])) { /* key belongs here */
89
+ continue;
90
+ } else {
91
+ unsigned char *ck = (unsigned char *)(cache->key + depth + 1);
92
+ SlotCache orig = *cp;
93
+
94
+ cp = (*cp)->slots + (*ck >> 4);
95
+ slot_cache_new(cp);
96
+ cp = (*cp)->slots + (*ck & 0x0F);
97
+ slot_cache_new(cp);
98
+ (*cp)->key = cache->key;
99
+ (*cp)->value = cache->value;
100
+ orig->key = 0;
101
+ orig->value = Qundef;
102
+ }
103
+ }
104
+ }
105
+ }
106
+ }
107
+ *slot = &cache->value;
108
+ if (0 != keyp) {
109
+ if (0 == cache->key) {
110
+ printf("*** Error: failed to set the key for '%s'\n", key);
111
+ *keyp = 0;
112
+ } else {
113
+ *keyp = cache->key + 1;
114
+ }
115
+ }
116
+ return cache->value;
117
+ }
118
+
119
+ void slot_cache_print(SlotCache cache) {
120
+ /*printf("-------------------------------------------\n");*/
121
+ slot_print(cache, 0);
122
+ }
123
+
124
+ static void slot_print(SlotCache c, unsigned int depth) {
125
+ char indent[256];
126
+ SlotCache *cp;
127
+ unsigned int i;
128
+
129
+ if (sizeof(indent) - 1 < depth) {
130
+ depth = ((int)sizeof(indent) - 1);
131
+ }
132
+ memset(indent, ' ', depth);
133
+ indent[depth] = '\0';
134
+ for (i = 0, cp = c->slots; i < 16; i++, cp++) {
135
+ if (0 == *cp) {
136
+ /*printf("%s%02u:\n", indent, i);*/
137
+ } else {
138
+ if (0 == (*cp)->key && Qundef == (*cp)->value) {
139
+ printf("%s%02u:\n", indent, i);
140
+ } else {
141
+ const char *vs;
142
+ const char *clas;
143
+
144
+ if (Qundef == (*cp)->value) {
145
+ vs = "undefined";
146
+ clas = "";
147
+ } else {
148
+ VALUE rs = rb_String((*cp)->value);
149
+
150
+ vs = StringValuePtr(rs);
151
+ clas = rb_class2name(rb_obj_class((*cp)->value));
152
+ }
153
+ printf("%s%02u: %s = %s (%s)\n", indent, i, (*cp)->key, vs, clas);
154
+ }
155
+ slot_print(*cp, depth + 2);
156
+ }
157
+ }
158
+ }
@@ -0,0 +1,19 @@
1
+ /* slotcache.h
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ */
5
+
6
+ #ifndef SLOT_CACHE_H
7
+ #define SLOT_CACHE_H
8
+
9
+ #include "ruby.h"
10
+
11
+ typedef struct _slotCache *SlotCache;
12
+
13
+ extern void slot_cache_new(SlotCache *cache);
14
+
15
+ extern VALUE slot_cache_get(SlotCache cache, const char *key, VALUE **slot, const char **keyp);
16
+
17
+ extern void slot_cache_print(SlotCache cache);
18
+
19
+ #endif /* SLOT_CACHE_H */
data/ext/ox/special.c ADDED
@@ -0,0 +1,390 @@
1
+ /* special.c
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ */
5
+
6
+ #include "special.h"
7
+
8
+ #include <stdbool.h>
9
+ #include <string.h>
10
+
11
+ /*
12
+ u0000..u007F 00000000000000xxxxxxx 0xxxxxxx
13
+ u0080..u07FF 0000000000yyyyyxxxxxx 110yyyyy 10xxxxxx
14
+ u0800..uD7FF, uE000..uFFFF 00000zzzzyyyyyyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
15
+ u10000..u10FFFF uuuzzzzzzyyyyyyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
16
+ */
17
+ char *ox_ucs_to_utf8_chars(char *text, uint64_t u) {
18
+ int reading = 0;
19
+ int i;
20
+ unsigned char c;
21
+
22
+ if (u <= 0x000000000000007FULL) {
23
+ /* 0xxxxxxx */
24
+ *text++ = (char)u;
25
+ } else if (u <= 0x00000000000007FFULL) {
26
+ /* 110yyyyy 10xxxxxx */
27
+ *text++ = (char)(0x00000000000000C0ULL | (0x000000000000001FULL & (u >> 6)));
28
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
29
+ } else if (u <= 0x000000000000D7FFULL || (0x000000000000E000ULL <= u && u <= 0x000000000000FFFFULL)) {
30
+ /* 1110zzzz 10yyyyyy 10xxxxxx */
31
+ *text++ = (char)(0x00000000000000E0ULL | (0x000000000000000FULL & (u >> 12)));
32
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
33
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
34
+ } else if (0x0000000000010000ULL <= u && u <= 0x000000000010FFFFULL) {
35
+ /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
36
+ *text++ = (char)(0x00000000000000F0ULL | (0x0000000000000007ULL & (u >> 18)));
37
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 12)));
38
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
39
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
40
+ } else {
41
+ /* assume it is UTF-8 encoded directly and not UCS */
42
+ for (i = 56; 0 <= i; i -= 8) {
43
+ c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
44
+ if (reading) {
45
+ *text++ = (char)c;
46
+ } else if ('\0' != c) {
47
+ *text++ = (char)c;
48
+ reading = 1;
49
+ }
50
+ }
51
+ }
52
+ return text;
53
+ }
54
+
55
+ #define BUCKET_SIZE 256
56
+ #define BUCKET_MASK 255
57
+
58
+ typedef struct _slot {
59
+ const char *key;
60
+ uint64_t code;
61
+ struct _slot *next;
62
+ uint64_t hash;
63
+ } *Slot;
64
+
65
+ typedef struct _cache {
66
+ Slot buckets[BUCKET_SIZE];
67
+ } *Cache;
68
+
69
+ static struct _cache entity_cache;
70
+ static bool inited = false;
71
+
72
+ // HTML entities such as &amp;. This is a complete list from the HTML 5 spec.
73
+ static struct _slot entities[] = {
74
+ {"AElig", 198}, // latin capital letter AE
75
+ {"Aacute", 193}, // latin capital letter A with acute
76
+ {"Acirc", 194}, // latin capital letter A with circumflex
77
+ {"Agrave", 192}, // latin capital letter A with grave
78
+ {"Alpha", 913}, // greek capital letter alpha, U+0391
79
+ {"Aring", 197}, // latin capital letter A with ring above
80
+ {"Atilde", 195}, // latin capital letter A with tilde
81
+ {"Auml", 196}, // latin capital letter A with diaeresis
82
+ {"Beta", 914}, // greek capital letter beta, U+0392
83
+ {"Ccedil", 199}, // latin capital letter C with cedilla
84
+ {"Chi", 935}, // greek capital letter chi, U+03A7
85
+ {"Dagger", 8225}, // double dagger, U+2021 ISOpub
86
+ {"Delta", 916}, // greek capital letter delta
87
+ {"ETH", 208}, // latin capital letter ETH, U+00D0 ISOlat1
88
+ {"Eacute", 201}, // latin capital letter E with acute
89
+ {"Ecirc", 202}, // latin capital letter E with circumflex
90
+ {"Egrave", 200}, // latin capital letter E with grave
91
+ {"Epsilon", 917}, // greek capital letter epsilon, U+0395
92
+ {"Eta", 919}, // greek capital letter eta, U+0397
93
+ {"Euml", 203}, // latin capital letter E with diaeresis
94
+ {"Gamma", 915}, // greek capital letter gamma
95
+ {"Iacute", 205}, // latin capital letter I with acute
96
+ {"Icirc", 206}, // latin capital letter I with circumflex
97
+ {"Igrave", 204}, // latin capital letter I with grave
98
+ {"Iota", 921}, // greek capital letter iota, U+0399
99
+ {"Iuml", 207}, // latin capital letter I with diaeresis
100
+ {"Kappa", 922}, // greek capital letter kappa, U+039A
101
+ {"Lambda", 923}, // greek capital letter lambda
102
+ {"Mu", 924}, // greek capital letter mu, U+039C
103
+ {"Ntilde", 209}, // latin capital letter N with tilde
104
+ {"Nu", 925}, // greek capital letter nu, U+039D
105
+ {"OElig", 338}, // - latin capital ligature OE
106
+ {"Oacute", 211}, // latin capital letter O with acute
107
+ {"Ocirc", 212}, // latin capital letter O with circumflex
108
+ {"Ograve", 210}, // latin capital letter O with grave
109
+ {"Omega", 937}, // greek capital letter omega
110
+ {"Omicron", 927}, // greek capital letter omicron, U+039F
111
+ {"Oslash", 216}, // latin capital letter O with stroke
112
+ {"Otilde", 213}, // latin capital letter O with tilde
113
+ {"Ouml", 214}, // latin capital letter O with diaeresis
114
+ {"Phi", 934}, // greek capital letter phi
115
+ {"Pi", 928}, // greek capital letter pi, U+03A0 ISOgrk3
116
+ {"Prime", 8243}, // double prime = seconds = inches
117
+ {"Psi", 936}, // greek capital letter psi
118
+ {"Rho", 929}, // greek capital letter rho, U+03A1
119
+ {"Scaron", 352}, // - latin capital letter S with caron
120
+ {"Sigma", 931}, // greek capital letter sigma
121
+ {"THORN", 222}, // latin capital letter THORN
122
+ {"Tau", 932}, // greek capital letter tau, U+03A4
123
+ {"Theta", 920}, // greek capital letter theta
124
+ {"Uacute", 218}, // latin capital letter U with acute
125
+ {"Ucirc", 219}, // latin capital letter U with circumflex
126
+ {"Ugrave", 217}, // latin capital letter U with grave
127
+ {"Upsilon", 933}, // greek capital letter upsilon
128
+ {"Uuml", 220}, // latin capital letter U with diaeresis
129
+ {"Xi", 926}, // greek capital letter xi, U+039E ISOgrk3
130
+ {"Yacute", 221}, // latin capital letter Y with acute
131
+ {"Yuml", 376}, // - latin capital letter Y with diaeresis
132
+ {"Zeta", 918}, // greek capital letter zeta, U+0396
133
+ {"aacute", 225}, // latin small letter a with acute
134
+ {"acirc", 226}, // latin small letter a with circumflex
135
+ {"acute", 180}, // acute accent = spacing acute
136
+ {"aelig", 230}, // latin small letter ae
137
+ {"agrave", 224}, // latin small letter a with grave
138
+ {"alefsym", 8501}, // alef symbol = first transfinite cardinal
139
+ {"alpha", 945}, // greek small letter alpha
140
+ {"amp", 38}, // -- ampersand, U+0026 ISOnum
141
+ {"and", 8743}, // logical and = wedge, U+2227 ISOtech
142
+ {"ang", 8736}, // angle, U+2220 ISOamso
143
+ {"apos", 39}, // -- single quote
144
+ {"aring", 229}, // latin small letter a with ring above
145
+ {"asymp", 8776}, // almost equal to = asymptotic to
146
+ {"atilde", 227}, // latin small letter a with tilde
147
+ {"auml", 228}, // latin small letter a with diaeresis
148
+ {"bdquo", 8222}, // double low-9 quotation mark, U+201E NEW
149
+ {"beta", 946}, // greek small letter beta, U+03B2 ISOgrk3
150
+ {"brvbar", 166}, // broken bar = broken vertical bar
151
+ {"bull", 8226}, // bullet = black small circle
152
+ {"cap", 8745}, // intersection = cap, U+2229 ISOtech
153
+ {"ccedil", 231}, // latin small letter c with cedilla
154
+ {"cedil", 184}, // cedilla = spacing cedilla, U+00B8 ISOdia
155
+ {"cent", 162}, // cent sign, U+00A2 ISOnum
156
+ {"chi", 967}, // greek small letter chi, U+03C7 ISOgrk3
157
+ {"circ", 710}, // - modifier letter circumflex accent
158
+ {"clubs", 9827}, // black club suit = shamrock
159
+ {"cong", 8773}, // approximately equal to, U+2245 ISOtech
160
+ {"copy", 169}, // copyright sign, U+00A9 ISOnum
161
+ {"crarr", 8629}, // downwards arrow with corner leftwards
162
+ {"cup", 8746}, // union = cup, U+222A ISOtech
163
+ {"curren", 164}, // currency sign, U+00A4 ISOnum
164
+ {"dArr", 8659}, // downwards double arrow, U+21D3 ISOamsa
165
+ {"dagger", 8224}, // dagger, U+2020 ISOpub
166
+ {"darr", 8595}, // downwards arrow, U+2193 ISOnum
167
+ {"deg", 176}, // degree sign, U+00B0 ISOnum
168
+ {"delta", 948}, // greek small letter delta
169
+ {"diams", 9830}, // black diamond suit, U+2666 ISOpub
170
+ {"divide", 247}, // division sign, U+00F7 ISOnum
171
+ {"eacute", 233}, // latin small letter e with acute
172
+ {"ecirc", 234}, // latin small letter e with circumflex
173
+ {"egrave", 232}, // latin small letter e with grave
174
+ {"empty", 8709}, // empty set = null set = diameter
175
+ {"emsp", 8195}, // em space, U+2003 ISOpub
176
+ {"ensp", 8194}, // en space, U+2002 ISOpub
177
+ {"epsilon", 949}, // greek small letter epsilon
178
+ {"equiv", 8801}, // identical to, U+2261 ISOtech
179
+ {"eta", 951}, // greek small letter eta, U+03B7 ISOgrk3
180
+ {"eth", 240}, // latin small letter eth, U+00F0 ISOlat1
181
+ {"euml", 235}, // latin small letter e with diaeresis
182
+ {"euro", 8364}, // - euro sign, U+20AC NEW
183
+ {"exist", 8707}, // there exists, U+2203 ISOtech
184
+ {"fnof", 402}, // latin small f with hook = function
185
+ {"forall", 8704}, // for all, U+2200 ISOtech
186
+ {"frac12", 189}, // vulgar fraction one half
187
+ {"frac14", 188}, // vulgar fraction one quarter
188
+ {"frac34", 190}, // vulgar fraction three quarters
189
+ {"frasl", 8260}, // fraction slash, U+2044 NEW
190
+ {"gamma", 947}, // greek small letter gamma
191
+ {"ge", 8805}, // greater-than or equal to
192
+ {"gt", 62}, // -- greater-than sign, U+003E ISOnum
193
+ {"hArr", 8660}, // left right double arrow
194
+ {"harr", 8596}, // left right arrow, U+2194 ISOamsa
195
+ {"hearts", 9829}, // black heart suit = valentine
196
+ {"hellip", 8230}, // horizontal ellipsis = three dot leader
197
+ {"iacute", 237}, // latin small letter i with acute
198
+ {"icirc", 238}, // latin small letter i with circumflex
199
+ {"iexcl", 161}, // inverted exclamation mark, U+00A1 ISOnum
200
+ {"igrave", 236}, // latin small letter i with grave
201
+ {"image", 8465}, // blackletter capital I = imaginary part
202
+ {"infin", 8734}, // infinity, U+221E ISOtech
203
+ {"int", 8747}, // integral, U+222B ISOtech
204
+ {"iota", 953}, // greek small letter iota, U+03B9 ISOgrk3
205
+ {"iquest", 191}, // inverted question mark
206
+ {"isin", 8712}, // element of, U+2208 ISOtech
207
+ {"iuml", 239}, // latin small letter i with diaeresis
208
+ {"kappa", 954}, // greek small letter kappa
209
+ {"lArr", 8656}, // leftwards double arrow, U+21D0 ISOtech
210
+ {"lambda", 955}, // greek small letter lambda
211
+ {"lang", 9001}, // left-pointing angle bracket = bra
212
+ {"laquo", 171}, // left-pointing double angle quotation mark
213
+ {"larr", 8592}, // leftwards arrow, U+2190 ISOnum
214
+ {"lceil", 8968}, // left ceiling = apl upstile
215
+ {"ldquo", 8220}, // left double quotation mark
216
+ {"le", 8804}, // less-than or equal to, U+2264 ISOtech
217
+ {"lfloor", 8970}, // left floor = apl downstile
218
+ {"lowast", 8727}, // asterisk operator, U+2217 ISOtech
219
+ {"loz", 9674}, // lozenge, U+25CA ISOpub
220
+ {"lrm", 8206}, // left-to-right mark, U+200E NEW RFC 2070
221
+ {"lsaquo", 8249}, // single left-pointing angle quotation mark
222
+ {"lsquo", 8216}, // left single quotation mark
223
+ {"lt", 60}, // -- less-than sign, U+003C ISOnum
224
+ {"macr", 175}, // macron = spacing macron = overline
225
+ {"mdash", 8212}, // em dash, U+2014 ISOpub
226
+ {"micro", 181}, // micro sign, U+00B5 ISOnum
227
+ {"middot", 183}, // middle dot = Georgian comma
228
+ {"minus", 8722}, // minus sign, U+2212 ISOtech
229
+ {"mu", 956}, // greek small letter mu, U+03BC ISOgrk3
230
+ {"nabla", 8711}, // nabla = backward difference
231
+ {"nbsp", 160}, // no-break space = non-breaking space
232
+ {"ndash", 8211}, // en dash, U+2013 ISOpub
233
+ {"ne", 8800}, // not equal to, U+2260 ISOtech
234
+ {"ni", 8715}, // contains as member, U+220B ISOtech
235
+ {"not", 172}, // not sign, U+00AC ISOnum
236
+ {"notin", 8713}, // not an element of, U+2209 ISOtech
237
+ {"nsub", 8836}, // not a subset of, U+2284 ISOamsn
238
+ {"ntilde", 241}, // latin small letter n with tilde
239
+ {"nu", 957}, // greek small letter nu, U+03BD ISOgrk3
240
+ {"oacute", 243}, // latin small letter o with acute
241
+ {"ocirc", 244}, // latin small letter o with circumflex
242
+ {"oelig", 339}, // - latin small ligature oe, U+0153 ISOlat2
243
+ {"ograve", 242}, // latin small letter o with grave
244
+ {"oline", 8254}, // overline = spacing overscore
245
+ {"omega", 969}, // greek small letter omega
246
+ {"omicron", 959}, // greek small letter omicron, U+03BF NEW
247
+ {"oplus", 8853}, // circled plus = direct sum
248
+ {"or", 8744}, // logical or = vee, U+2228 ISOtech
249
+ {"ordf", 170}, // feminine ordinal indicator, U+00AA ISOnum
250
+ {"ordm", 186}, // masculine ordinal indicator
251
+ {"oslash", 248}, // latin small letter o with stroke
252
+ {"otilde", 245}, // latin small letter o with tilde
253
+ {"otimes", 8855}, // circled times = vector product
254
+ {"ouml", 246}, // latin small letter o with diaeresis
255
+ {"para", 182}, // pilcrow sign = paragraph sign
256
+ {"part", 8706}, // partial differential, U+2202 ISOtech
257
+ {"permil", 8240}, // per mille sign, U+2030 ISOtech
258
+ {"perp", 8869}, // up tack = orthogonal to = perpendicular
259
+ {"phi", 966}, // greek small letter phi, U+03C6 ISOgrk3
260
+ {"pi", 960}, // greek small letter pi, U+03C0 ISOgrk3
261
+ {"piv", 982}, // greek pi symbol, U+03D6 ISOgrk3
262
+ {"plusmn", 177}, // plus-minus sign = plus-or-minus sign
263
+ {"pound", 163}, // pound sign, U+00A3 ISOnum
264
+ {"prime", 8242}, // prime = minutes = feet, U+2032 ISOtech
265
+ {"prod", 8719}, // n-ary product = product sign
266
+ {"prop", 8733}, // proportional to, U+221D ISOtech
267
+ {"psi", 968}, // greek small letter psi, U+03C8 ISOgrk3
268
+ {"quot", 34}, // -- quotation mark = APL quote
269
+ {"rArr", 8658}, // rightwards double arrow
270
+ {"radic", 8730}, // square root = radical sign
271
+ {"rang", 9002}, // right-pointing angle bracket = ket
272
+ {"raquo", 187}, // right-pointing double angle quotation mark
273
+ {"rarr", 8594}, // rightwards arrow, U+2192 ISOnum
274
+ {"rceil", 8969}, // right ceiling, U+2309 ISOamsc
275
+ {"rdquo", 8221}, // right double quotation mark
276
+ {"real", 8476}, // blackletter capital R = real part symbol
277
+ {"reg", 174}, // registered sign = registered trade mark sign
278
+ {"rfloor", 8971}, // right floor, U+230B ISOamsc
279
+ {"rho", 961}, // greek small letter rho, U+03C1 ISOgrk3
280
+ {"rlm", 8207}, // right-to-left mark, U+200F NEW RFC 2070
281
+ {"rsaquo", 8250}, // single right-pointing angle quotation mark
282
+ {"rsquo", 8217}, // right single quotation mark
283
+ {"sbquo", 8218}, // single low-9 quotation mark, U+201A NEW
284
+ {"scaron", 353}, // - latin small letter s with caron
285
+ {"sdot", 8901}, // dot operator, U+22C5 ISOamsb
286
+ {"sect", 167}, // section sign, U+00A7 ISOnum
287
+ {"shy", 173}, // soft hyphen = discretionary hyphen
288
+ {"sigma", 963}, // greek small letter sigma
289
+ {"sigmaf", 962}, // greek small letter final sigma
290
+ {"sim", 8764}, // tilde operator = varies with = similar to
291
+ {"spades", 9824}, // black spade suit, U+2660 ISOpub
292
+ {"sub", 8834}, // subset of, U+2282 ISOtech
293
+ {"sube", 8838}, // subset of or equal to, U+2286 ISOtech
294
+ {"sum", 8721}, // n-ary sumation, U+2211 ISOamsb
295
+ {"sup", 8835}, // superset of, U+2283 ISOtech
296
+ {"sup1", 185}, // superscript one = superscript digit one
297
+ {"sup2", 178}, // superscript two = superscript digit two
298
+ {"sup3", 179}, // superscript three = superscript digit three
299
+ {"supe", 8839}, // superset of or equal to
300
+ {"szlig", 223}, // latin small letter sharp s = ess-zed
301
+ {"tau", 964}, // greek small letter tau, U+03C4 ISOgrk3
302
+ {"there4", 8756}, // therefore, U+2234 ISOtech
303
+ {"theta", 952}, // greek small letter theta
304
+ {"thetasym", 977}, // greek small letter theta symbol
305
+ {"thinsp", 8201}, // thin space, U+2009 ISOpub
306
+ {"thorn", 254}, // latin small letter thorn
307
+ {"tilde", 732}, // - small tilde, U+02DC ISOdia
308
+ {"times", 215}, // multiplication sign, U+00D7 ISOnum
309
+ {"trade", 8482}, // trade mark sign, U+2122 ISOnum
310
+ {"uArr", 8657}, // upwards double arrow, U+21D1 ISOamsa
311
+ {"uacute", 250}, // latin small letter u with acute
312
+ {"uarr", 8593}, // upwards arrow, U+2191 ISOnum-->
313
+ {"ucirc", 251}, // latin small letter u with circumflex
314
+ {"ugrave", 249}, // latin small letter u with grave
315
+ {"uml", 168}, // diaeresis = spacing diaeresis
316
+ {"upsih", 978}, // greek upsilon with hook symbol
317
+ {"upsilon", 965}, // greek small letter upsilon
318
+ {"uuml", 252}, // latin small letter u with diaeresis
319
+ {"weierp", 8472}, // script capital P = power set
320
+ {"xi", 958}, // greek small letter xi, U+03BE ISOgrk3
321
+ {"yacute", 253}, // latin small letter y with acute
322
+ {"yen", 165}, // yen sign = yuan sign, U+00A5 ISOnum
323
+ {"yuml", 255}, // latin small letter y with diaeresis
324
+ {"zeta", 950}, // greek small letter zeta, U+03B6 ISOgrk3
325
+ {"zwj", 8205}, // zero width joiner, U+200D NEW RFC 2070
326
+ {"zwnj", 8204}, // zero width non-joiner
327
+ {NULL, 0},
328
+ };
329
+
330
+ static uint64_t calc_hash(const char *key) {
331
+ uint64_t h = 0;
332
+
333
+ if (NULL != key) {
334
+ const uint8_t *k = (const uint8_t *)key;
335
+
336
+ for (; 0 != *k; k++) {
337
+ // narrow to most used range of 0x4D (77) in size
338
+ h = 77 * h + ((*k | 0x20) - 0x2D);
339
+ }
340
+ }
341
+ return h;
342
+ }
343
+
344
+ static Slot *get_bucketp(uint64_t h) {
345
+ return entity_cache.buckets + (BUCKET_MASK & (h ^ (h << 5) ^ (h >> 7)));
346
+ }
347
+
348
+ static void cache_set(Slot s) {
349
+ int64_t h = calc_hash(s->key);
350
+ Slot *bucket = get_bucketp(h);
351
+
352
+ s->hash = h;
353
+ s->next = *bucket;
354
+ *bucket = s;
355
+ }
356
+
357
+ static Slot cache_get(const char *key) {
358
+ int64_t h = calc_hash(key);
359
+ Slot *bucket = get_bucketp(h);
360
+ Slot s;
361
+
362
+ for (s = *bucket; NULL != s; s = s->next) {
363
+ if (h == (int64_t)s->hash && 0 == strcasecmp(s->key, key)) {
364
+ return s;
365
+ }
366
+ }
367
+ return NULL;
368
+ }
369
+
370
+ static void cache_init(void) {
371
+ Slot e = entities;
372
+
373
+ memset(&entity_cache, 0, sizeof(struct _cache));
374
+ for (; NULL != e->key; e++) {
375
+ cache_set(e);
376
+ }
377
+ inited = true;
378
+ }
379
+
380
+ char *ox_entity_lookup(char *text, const char *key) {
381
+ Slot s = entities;
382
+
383
+ if (!inited) {
384
+ cache_init();
385
+ }
386
+ if (NULL == (s = cache_get(key))) {
387
+ return NULL;
388
+ }
389
+ return ox_ucs_to_utf8_chars(text, s->code);
390
+ }
data/ext/ox/special.h ADDED
@@ -0,0 +1,14 @@
1
+ /* special.h
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ */
5
+
6
+ #ifndef OX_SPECIAL_H
7
+ #define OX_SPECIAL_H
8
+
9
+ #include <stdint.h>
10
+
11
+ extern char *ox_ucs_to_utf8_chars(char *text, uint64_t u);
12
+ extern char *ox_entity_lookup(char *text, const char *key);
13
+
14
+ #endif /* OX_SPECIAL_H */
data/ext/ox/type.h ADDED
@@ -0,0 +1,39 @@
1
+ /* type.h
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ */
5
+
6
+ #ifndef OX_TYPE_H
7
+ #define OX_TYPE_H
8
+
9
+ typedef enum {
10
+ NoCode = 0,
11
+ ArrayCode = 'a',
12
+ String64Code = 'b', /* base64 encoded String */
13
+ ClassCode = 'c',
14
+ Symbol64Code = 'd', /* base64 encoded Symbol */
15
+ DateCode = 'D',
16
+ BigDecimalCode = 'B',
17
+ ExceptionCode = 'e',
18
+ FloatCode = 'f',
19
+ RegexpCode = 'g',
20
+ HashCode = 'h',
21
+ FixnumCode = 'i',
22
+ BignumCode = 'j',
23
+ KeyCode = 'k', /* indicates the value is a hash key, kind of a hack */
24
+ RationalCode = 'l',
25
+ SymbolCode = 'm',
26
+ FalseClassCode = 'n',
27
+ ObjectCode = 'o',
28
+ RefCode = 'p',
29
+ RangeCode = 'r',
30
+ StringCode = 's',
31
+ TimeCode = 't',
32
+ StructCode = 'u',
33
+ ComplexCode = 'v',
34
+ RawCode = 'x',
35
+ TrueClassCode = 'y',
36
+ NilClassCode = 'z',
37
+ } Type;
38
+
39
+ #endif /* OX_TYPE_H */