ox 2.11.0 → 2.13.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +46 -0
- data/README.md +55 -7
- data/ext/ox/builder.c +13 -7
- data/ext/ox/dump.c +31 -25
- data/ext/ox/extconf.rb +16 -34
- data/ext/ox/gen_load.c +18 -96
- data/ext/ox/hash_load.c +62 -26
- data/ext/ox/obj_load.c +8 -45
- data/ext/ox/ox.c +59 -48
- data/ext/ox/ox.h +33 -38
- data/ext/ox/parse.c +59 -67
- data/ext/ox/sax.c +84 -134
- data/ext/ox/sax.h +2 -4
- data/ext/ox/sax_as.c +2 -6
- data/ext/ox/sax_buf.c +1 -1
- data/ext/ox/special.c +346 -0
- data/ext/ox/special.h +1 -0
- data/lib/ox/element.rb +1 -0
- data/lib/ox/version.rb +1 -1
- metadata +7 -8
- data/ext/ox/encode.h +0 -26
data/ext/ox/sax.h
CHANGED
@@ -33,10 +33,8 @@ typedef struct _saxDrive {
|
|
33
33
|
int blocked;
|
34
34
|
bool abort;
|
35
35
|
struct _has has;
|
36
|
-
#if
|
36
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
37
37
|
rb_encoding *encoding;
|
38
|
-
#elif HAS_PRIVATE_ENCODING
|
39
|
-
VALUE encoding;
|
40
38
|
#else
|
41
39
|
const char *encoding;
|
42
40
|
#endif
|
@@ -46,7 +44,7 @@ extern void ox_collapse_return(char *str);
|
|
46
44
|
extern void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options);
|
47
45
|
extern void ox_sax_drive_cleanup(SaxDrive dr);
|
48
46
|
extern void ox_sax_drive_error(SaxDrive dr, const char *msg);
|
49
|
-
extern int ox_sax_collapse_special(SaxDrive dr, char *str,
|
47
|
+
extern int ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col);
|
50
48
|
|
51
49
|
extern VALUE ox_sax_value_class;
|
52
50
|
|
data/ext/ox/sax_as.c
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
#include <stdio.h>
|
9
9
|
#include <strings.h>
|
10
10
|
#include <sys/types.h>
|
11
|
-
#if
|
11
|
+
#if HAVE_SYS_UIO_H
|
12
12
|
#include <sys/uio.h>
|
13
13
|
#endif
|
14
14
|
#include <unistd.h>
|
@@ -136,14 +136,10 @@ sax_value_as_s(VALUE self) {
|
|
136
136
|
break;
|
137
137
|
}
|
138
138
|
rs = rb_str_new2(dr->buf.str);
|
139
|
-
#if
|
139
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
140
140
|
if (0 != dr->encoding) {
|
141
141
|
rb_enc_associate(rs, dr->encoding);
|
142
142
|
}
|
143
|
-
#elif HAS_PRIVATE_ENCODING
|
144
|
-
if (Qnil != dr->encoding) {
|
145
|
-
rb_funcall(rs, ox_force_encoding_id, 1, dr->encoding);
|
146
|
-
}
|
147
143
|
#endif
|
148
144
|
return rs;
|
149
145
|
}
|
data/ext/ox/sax_buf.c
CHANGED
data/ext/ox/special.c
CHANGED
@@ -3,6 +3,9 @@
|
|
3
3
|
* All rights reserved.
|
4
4
|
*/
|
5
5
|
|
6
|
+
#include <string.h>
|
7
|
+
#include <stdbool.h>
|
8
|
+
|
6
9
|
#include "special.h"
|
7
10
|
|
8
11
|
/*
|
@@ -49,3 +52,346 @@ ox_ucs_to_utf8_chars(char *text, uint64_t u) {
|
|
49
52
|
}
|
50
53
|
return text;
|
51
54
|
}
|
55
|
+
|
56
|
+
#define BUCKET_SIZE 256
|
57
|
+
#define BUCKET_MASK 255
|
58
|
+
|
59
|
+
typedef struct _slot {
|
60
|
+
const char *key;
|
61
|
+
uint64_t code;
|
62
|
+
struct _slot *next;
|
63
|
+
uint64_t hash;
|
64
|
+
} *Slot;
|
65
|
+
|
66
|
+
typedef struct _cache {
|
67
|
+
Slot buckets[BUCKET_SIZE];
|
68
|
+
} *Cache;
|
69
|
+
|
70
|
+
static struct _cache entity_cache;
|
71
|
+
static bool inited = false;
|
72
|
+
|
73
|
+
// HTML entities such as &. This is a complete list from the HTML 5 spec.
|
74
|
+
static struct _slot entities[] = {
|
75
|
+
{ "AElig", 198 }, // latin capital letter AE
|
76
|
+
{ "Aacute", 193 }, // latin capital letter A with acute
|
77
|
+
{ "Acirc", 194 }, // latin capital letter A with circumflex
|
78
|
+
{ "Agrave", 192 }, // latin capital letter A with grave
|
79
|
+
{ "Alpha", 913 }, // greek capital letter alpha, U+0391
|
80
|
+
{ "Aring", 197 }, // latin capital letter A with ring above
|
81
|
+
{ "Atilde", 195 }, // latin capital letter A with tilde
|
82
|
+
{ "Auml", 196 }, // latin capital letter A with diaeresis
|
83
|
+
{ "Beta", 914 }, // greek capital letter beta, U+0392
|
84
|
+
{ "Ccedil", 199 }, // latin capital letter C with cedilla
|
85
|
+
{ "Chi", 935 }, // greek capital letter chi, U+03A7
|
86
|
+
{ "Dagger", 8225 }, // double dagger, U+2021 ISOpub
|
87
|
+
{ "Delta", 916 }, // greek capital letter delta
|
88
|
+
{ "ETH", 208 }, // latin capital letter ETH, U+00D0 ISOlat1
|
89
|
+
{ "Eacute", 201 }, // latin capital letter E with acute
|
90
|
+
{ "Ecirc", 202 }, // latin capital letter E with circumflex
|
91
|
+
{ "Egrave", 200 }, // latin capital letter E with grave
|
92
|
+
{ "Epsilon", 917 }, // greek capital letter epsilon, U+0395
|
93
|
+
{ "Eta", 919 }, // greek capital letter eta, U+0397
|
94
|
+
{ "Euml", 203 }, // latin capital letter E with diaeresis
|
95
|
+
{ "Gamma", 915 }, // greek capital letter gamma
|
96
|
+
{ "Iacute", 205 }, // latin capital letter I with acute
|
97
|
+
{ "Icirc", 206 }, // latin capital letter I with circumflex
|
98
|
+
{ "Igrave", 204 }, // latin capital letter I with grave
|
99
|
+
{ "Iota", 921 }, // greek capital letter iota, U+0399
|
100
|
+
{ "Iuml", 207 }, // latin capital letter I with diaeresis
|
101
|
+
{ "Kappa", 922 }, // greek capital letter kappa, U+039A
|
102
|
+
{ "Lambda", 923 }, // greek capital letter lambda
|
103
|
+
{ "Mu", 924 }, // greek capital letter mu, U+039C
|
104
|
+
{ "Ntilde", 209 }, // latin capital letter N with tilde
|
105
|
+
{ "Nu", 925 }, // greek capital letter nu, U+039D
|
106
|
+
{ "OElig", 338 }, // - latin capital ligature OE
|
107
|
+
{ "Oacute", 211 }, // latin capital letter O with acute
|
108
|
+
{ "Ocirc", 212 }, // latin capital letter O with circumflex
|
109
|
+
{ "Ograve", 210 }, // latin capital letter O with grave
|
110
|
+
{ "Omega", 937 }, // greek capital letter omega
|
111
|
+
{ "Omicron", 927 }, // greek capital letter omicron, U+039F
|
112
|
+
{ "Oslash", 216 }, // latin capital letter O with stroke
|
113
|
+
{ "Otilde", 213 }, // latin capital letter O with tilde
|
114
|
+
{ "Ouml", 214 }, // latin capital letter O with diaeresis
|
115
|
+
{ "Phi", 934 }, // greek capital letter phi
|
116
|
+
{ "Pi", 928 }, // greek capital letter pi, U+03A0 ISOgrk3
|
117
|
+
{ "Prime", 8243 }, // double prime = seconds = inches
|
118
|
+
{ "Psi", 936 }, // greek capital letter psi
|
119
|
+
{ "Rho", 929 }, // greek capital letter rho, U+03A1
|
120
|
+
{ "Scaron", 352 }, // - latin capital letter S with caron
|
121
|
+
{ "Sigma", 931 }, // greek capital letter sigma
|
122
|
+
{ "THORN", 222 }, // latin capital letter THORN
|
123
|
+
{ "Tau", 932 }, // greek capital letter tau, U+03A4
|
124
|
+
{ "Theta", 920 }, // greek capital letter theta
|
125
|
+
{ "Uacute", 218 }, // latin capital letter U with acute
|
126
|
+
{ "Ucirc", 219 }, // latin capital letter U with circumflex
|
127
|
+
{ "Ugrave", 217 }, // latin capital letter U with grave
|
128
|
+
{ "Upsilon", 933 }, // greek capital letter upsilon
|
129
|
+
{ "Uuml", 220 }, // latin capital letter U with diaeresis
|
130
|
+
{ "Xi", 926 }, // greek capital letter xi, U+039E ISOgrk3
|
131
|
+
{ "Yacute", 221 }, // latin capital letter Y with acute
|
132
|
+
{ "Yuml", 376 }, // - latin capital letter Y with diaeresis
|
133
|
+
{ "Zeta", 918 }, // greek capital letter zeta, U+0396
|
134
|
+
{ "aacute", 225 }, // latin small letter a with acute
|
135
|
+
{ "acirc", 226 }, // latin small letter a with circumflex
|
136
|
+
{ "acute", 180 }, // acute accent = spacing acute
|
137
|
+
{ "aelig", 230 }, // latin small letter ae
|
138
|
+
{ "agrave", 224 }, // latin small letter a with grave
|
139
|
+
{ "alefsym", 8501 },// alef symbol = first transfinite cardinal
|
140
|
+
{ "alpha", 945 }, // greek small letter alpha
|
141
|
+
{ "amp", 38 }, // -- ampersand, U+0026 ISOnum
|
142
|
+
{ "and", 8743 }, // logical and = wedge, U+2227 ISOtech
|
143
|
+
{ "ang", 8736 }, // angle, U+2220 ISOamso
|
144
|
+
{ "apos", 39 }, // -- single quote
|
145
|
+
{ "aring", 229 }, // latin small letter a with ring above
|
146
|
+
{ "asymp", 8776 }, // almost equal to = asymptotic to
|
147
|
+
{ "atilde", 227 }, // latin small letter a with tilde
|
148
|
+
{ "auml", 228 }, // latin small letter a with diaeresis
|
149
|
+
{ "bdquo", 8222 }, // double low-9 quotation mark, U+201E NEW
|
150
|
+
{ "beta", 946 }, // greek small letter beta, U+03B2 ISOgrk3
|
151
|
+
{ "brvbar", 166 }, // broken bar = broken vertical bar
|
152
|
+
{ "bull", 8226 }, // bullet = black small circle
|
153
|
+
{ "cap", 8745 }, // intersection = cap, U+2229 ISOtech
|
154
|
+
{ "ccedil", 231 }, // latin small letter c with cedilla
|
155
|
+
{ "cedil", 184 }, // cedilla = spacing cedilla, U+00B8 ISOdia
|
156
|
+
{ "cent", 162 }, // cent sign, U+00A2 ISOnum
|
157
|
+
{ "chi", 967 }, // greek small letter chi, U+03C7 ISOgrk3
|
158
|
+
{ "circ", 710 }, // - modifier letter circumflex accent
|
159
|
+
{ "clubs", 9827 }, // black club suit = shamrock
|
160
|
+
{ "cong", 8773 }, // approximately equal to, U+2245 ISOtech
|
161
|
+
{ "copy", 169 }, // copyright sign, U+00A9 ISOnum
|
162
|
+
{ "crarr", 8629 }, // downwards arrow with corner leftwards
|
163
|
+
{ "cup", 8746 }, // union = cup, U+222A ISOtech
|
164
|
+
{ "curren", 164 }, // currency sign, U+00A4 ISOnum
|
165
|
+
{ "dArr", 8659 }, // downwards double arrow, U+21D3 ISOamsa
|
166
|
+
{ "dagger", 8224 }, // dagger, U+2020 ISOpub
|
167
|
+
{ "darr", 8595 }, // downwards arrow, U+2193 ISOnum
|
168
|
+
{ "deg", 176 }, // degree sign, U+00B0 ISOnum
|
169
|
+
{ "delta", 948 }, // greek small letter delta
|
170
|
+
{ "diams", 9830 }, // black diamond suit, U+2666 ISOpub
|
171
|
+
{ "divide", 247 }, // division sign, U+00F7 ISOnum
|
172
|
+
{ "eacute", 233 }, // latin small letter e with acute
|
173
|
+
{ "ecirc", 234 }, // latin small letter e with circumflex
|
174
|
+
{ "egrave", 232 }, // latin small letter e with grave
|
175
|
+
{ "empty", 8709 }, // empty set = null set = diameter
|
176
|
+
{ "emsp", 8195 }, // em space, U+2003 ISOpub
|
177
|
+
{ "ensp", 8194 }, // en space, U+2002 ISOpub
|
178
|
+
{ "epsilon", 949 }, // greek small letter epsilon
|
179
|
+
{ "equiv", 8801 }, // identical to, U+2261 ISOtech
|
180
|
+
{ "eta", 951 }, // greek small letter eta, U+03B7 ISOgrk3
|
181
|
+
{ "eth", 240 }, // latin small letter eth, U+00F0 ISOlat1
|
182
|
+
{ "euml", 235 }, // latin small letter e with diaeresis
|
183
|
+
{ "euro", 8364 }, // - euro sign, U+20AC NEW
|
184
|
+
{ "exist", 8707 }, // there exists, U+2203 ISOtech
|
185
|
+
{ "fnof", 402 }, // latin small f with hook = function
|
186
|
+
{ "forall", 8704 }, // for all, U+2200 ISOtech
|
187
|
+
{ "frac12", 189 }, // vulgar fraction one half
|
188
|
+
{ "frac14", 188 }, // vulgar fraction one quarter
|
189
|
+
{ "frac34", 190 }, // vulgar fraction three quarters
|
190
|
+
{ "frasl", 8260 }, // fraction slash, U+2044 NEW
|
191
|
+
{ "gamma", 947 }, // greek small letter gamma
|
192
|
+
{ "ge", 8805 }, // greater-than or equal to
|
193
|
+
{ "gt", 62 }, // -- greater-than sign, U+003E ISOnum
|
194
|
+
{ "hArr", 8660 }, // left right double arrow
|
195
|
+
{ "harr", 8596 }, // left right arrow, U+2194 ISOamsa
|
196
|
+
{ "hearts", 9829 }, // black heart suit = valentine
|
197
|
+
{ "hellip", 8230 }, // horizontal ellipsis = three dot leader
|
198
|
+
{ "iacute", 237 }, // latin small letter i with acute
|
199
|
+
{ "icirc", 238 }, // latin small letter i with circumflex
|
200
|
+
{ "iexcl", 161 }, // inverted exclamation mark, U+00A1 ISOnum
|
201
|
+
{ "igrave", 236 }, // latin small letter i with grave
|
202
|
+
{ "image", 8465 }, // blackletter capital I = imaginary part
|
203
|
+
{ "infin", 8734 }, // infinity, U+221E ISOtech
|
204
|
+
{ "int", 8747 }, // integral, U+222B ISOtech
|
205
|
+
{ "iota", 953 }, // greek small letter iota, U+03B9 ISOgrk3
|
206
|
+
{ "iquest", 191 }, // inverted question mark
|
207
|
+
{ "isin", 8712 }, // element of, U+2208 ISOtech
|
208
|
+
{ "iuml", 239 }, // latin small letter i with diaeresis
|
209
|
+
{ "kappa", 954 }, // greek small letter kappa
|
210
|
+
{ "lArr", 8656 }, // leftwards double arrow, U+21D0 ISOtech
|
211
|
+
{ "lambda", 955 }, // greek small letter lambda
|
212
|
+
{ "lang", 9001 }, // left-pointing angle bracket = bra
|
213
|
+
{ "laquo", 171 }, // left-pointing double angle quotation mark
|
214
|
+
{ "larr", 8592 }, // leftwards arrow, U+2190 ISOnum
|
215
|
+
{ "lceil", 8968 }, // left ceiling = apl upstile
|
216
|
+
{ "ldquo", 8220 }, // left double quotation mark
|
217
|
+
{ "le", 8804 }, // less-than or equal to, U+2264 ISOtech
|
218
|
+
{ "lfloor", 8970 }, // left floor = apl downstile
|
219
|
+
{ "lowast", 8727 }, // asterisk operator, U+2217 ISOtech
|
220
|
+
{ "loz", 9674 }, // lozenge, U+25CA ISOpub
|
221
|
+
{ "lrm", 8206 }, // left-to-right mark, U+200E NEW RFC 2070
|
222
|
+
{ "lsaquo", 8249 }, // single left-pointing angle quotation mark
|
223
|
+
{ "lsquo", 8216 }, // left single quotation mark
|
224
|
+
{ "lt", 60 }, // -- less-than sign, U+003C ISOnum
|
225
|
+
{ "macr", 175 }, // macron = spacing macron = overline
|
226
|
+
{ "mdash", 8212 }, // em dash, U+2014 ISOpub
|
227
|
+
{ "micro", 181 }, // micro sign, U+00B5 ISOnum
|
228
|
+
{ "middot", 183 }, // middle dot = Georgian comma
|
229
|
+
{ "minus", 8722 }, // minus sign, U+2212 ISOtech
|
230
|
+
{ "mu", 956 }, // greek small letter mu, U+03BC ISOgrk3
|
231
|
+
{ "nabla", 8711 }, // nabla = backward difference
|
232
|
+
{ "nbsp", 160 }, // no-break space = non-breaking space
|
233
|
+
{ "ndash", 8211 }, // en dash, U+2013 ISOpub
|
234
|
+
{ "ne", 8800 }, // not equal to, U+2260 ISOtech
|
235
|
+
{ "ni", 8715 }, // contains as member, U+220B ISOtech
|
236
|
+
{ "not", 172 }, // not sign, U+00AC ISOnum
|
237
|
+
{ "notin", 8713 }, // not an element of, U+2209 ISOtech
|
238
|
+
{ "nsub", 8836 }, // not a subset of, U+2284 ISOamsn
|
239
|
+
{ "ntilde", 241 }, // latin small letter n with tilde
|
240
|
+
{ "nu", 957 }, // greek small letter nu, U+03BD ISOgrk3
|
241
|
+
{ "oacute", 243 }, // latin small letter o with acute
|
242
|
+
{ "ocirc", 244 }, // latin small letter o with circumflex
|
243
|
+
{ "oelig", 339 }, // - latin small ligature oe, U+0153 ISOlat2
|
244
|
+
{ "ograve", 242 }, // latin small letter o with grave
|
245
|
+
{ "oline", 8254 }, // overline = spacing overscore
|
246
|
+
{ "omega", 969 }, // greek small letter omega
|
247
|
+
{ "omicron", 959 }, // greek small letter omicron, U+03BF NEW
|
248
|
+
{ "oplus", 8853 }, // circled plus = direct sum
|
249
|
+
{ "or", 8744 }, // logical or = vee, U+2228 ISOtech
|
250
|
+
{ "ordf", 170 }, // feminine ordinal indicator, U+00AA ISOnum
|
251
|
+
{ "ordm", 186 }, // masculine ordinal indicator
|
252
|
+
{ "oslash", 248 }, // latin small letter o with stroke
|
253
|
+
{ "otilde", 245 }, // latin small letter o with tilde
|
254
|
+
{ "otimes", 8855 }, // circled times = vector product
|
255
|
+
{ "ouml", 246 }, // latin small letter o with diaeresis
|
256
|
+
{ "para", 182 }, // pilcrow sign = paragraph sign
|
257
|
+
{ "part", 8706 }, // partial differential, U+2202 ISOtech
|
258
|
+
{ "permil", 8240 }, // per mille sign, U+2030 ISOtech
|
259
|
+
{ "perp", 8869 }, // up tack = orthogonal to = perpendicular
|
260
|
+
{ "phi", 966 }, // greek small letter phi, U+03C6 ISOgrk3
|
261
|
+
{ "pi", 960 }, // greek small letter pi, U+03C0 ISOgrk3
|
262
|
+
{ "piv", 982 }, // greek pi symbol, U+03D6 ISOgrk3
|
263
|
+
{ "plusmn", 177 }, // plus-minus sign = plus-or-minus sign
|
264
|
+
{ "pound", 163 }, // pound sign, U+00A3 ISOnum
|
265
|
+
{ "prime", 8242 }, // prime = minutes = feet, U+2032 ISOtech
|
266
|
+
{ "prod", 8719 }, // n-ary product = product sign
|
267
|
+
{ "prop", 8733 }, // proportional to, U+221D ISOtech
|
268
|
+
{ "psi", 968 }, // greek small letter psi, U+03C8 ISOgrk3
|
269
|
+
{ "quot", 34 }, // -- quotation mark = APL quote
|
270
|
+
{ "rArr", 8658 }, // rightwards double arrow
|
271
|
+
{ "radic", 8730 }, // square root = radical sign
|
272
|
+
{ "rang", 9002 }, // right-pointing angle bracket = ket
|
273
|
+
{ "raquo", 187 }, // right-pointing double angle quotation mark
|
274
|
+
{ "rarr", 8594 }, // rightwards arrow, U+2192 ISOnum
|
275
|
+
{ "rceil", 8969 }, // right ceiling, U+2309 ISOamsc
|
276
|
+
{ "rdquo", 8221 }, // right double quotation mark
|
277
|
+
{ "real", 8476 }, // blackletter capital R = real part symbol
|
278
|
+
{ "reg", 174 }, // registered sign = registered trade mark sign
|
279
|
+
{ "rfloor", 8971 }, // right floor, U+230B ISOamsc
|
280
|
+
{ "rho", 961 }, // greek small letter rho, U+03C1 ISOgrk3
|
281
|
+
{ "rlm", 8207 }, // right-to-left mark, U+200F NEW RFC 2070
|
282
|
+
{ "rsaquo", 8250 }, // single right-pointing angle quotation mark
|
283
|
+
{ "rsquo", 8217 }, // right single quotation mark
|
284
|
+
{ "sbquo", 8218 }, // single low-9 quotation mark, U+201A NEW
|
285
|
+
{ "scaron", 353 }, // - latin small letter s with caron
|
286
|
+
{ "sdot", 8901 }, // dot operator, U+22C5 ISOamsb
|
287
|
+
{ "sect", 167 }, // section sign, U+00A7 ISOnum
|
288
|
+
{ "shy", 173 }, // soft hyphen = discretionary hyphen
|
289
|
+
{ "sigma", 963 }, // greek small letter sigma
|
290
|
+
{ "sigmaf", 962 }, // greek small letter final sigma
|
291
|
+
{ "sim", 8764 }, // tilde operator = varies with = similar to
|
292
|
+
{ "spades", 9824 }, // black spade suit, U+2660 ISOpub
|
293
|
+
{ "sub", 8834 }, // subset of, U+2282 ISOtech
|
294
|
+
{ "sube", 8838 }, // subset of or equal to, U+2286 ISOtech
|
295
|
+
{ "sum", 8721 }, // n-ary sumation, U+2211 ISOamsb
|
296
|
+
{ "sup", 8835 }, // superset of, U+2283 ISOtech
|
297
|
+
{ "sup1", 185 }, // superscript one = superscript digit one
|
298
|
+
{ "sup2", 178 }, // superscript two = superscript digit two
|
299
|
+
{ "sup3", 179 }, // superscript three = superscript digit three
|
300
|
+
{ "supe", 8839 }, // superset of or equal to
|
301
|
+
{ "szlig", 223 }, // latin small letter sharp s = ess-zed
|
302
|
+
{ "tau", 964 }, // greek small letter tau, U+03C4 ISOgrk3
|
303
|
+
{ "there4", 8756 }, // therefore, U+2234 ISOtech
|
304
|
+
{ "theta", 952 }, // greek small letter theta
|
305
|
+
{ "thetasym", 977 },// greek small letter theta symbol
|
306
|
+
{ "thinsp", 8201 }, // thin space, U+2009 ISOpub
|
307
|
+
{ "thorn", 254 }, // latin small letter thorn
|
308
|
+
{ "tilde", 732 }, // - small tilde, U+02DC ISOdia
|
309
|
+
{ "times", 215 }, // multiplication sign, U+00D7 ISOnum
|
310
|
+
{ "trade", 8482 }, // trade mark sign, U+2122 ISOnum
|
311
|
+
{ "uArr", 8657 }, // upwards double arrow, U+21D1 ISOamsa
|
312
|
+
{ "uacute", 250 }, // latin small letter u with acute
|
313
|
+
{ "uarr", 8593 }, // upwards arrow, U+2191 ISOnum-->
|
314
|
+
{ "ucirc", 251 }, // latin small letter u with circumflex
|
315
|
+
{ "ugrave", 249 }, // latin small letter u with grave
|
316
|
+
{ "uml", 168 }, // diaeresis = spacing diaeresis
|
317
|
+
{ "upsih", 978 }, // greek upsilon with hook symbol
|
318
|
+
{ "upsilon", 965 }, // greek small letter upsilon
|
319
|
+
{ "uuml", 252 }, // latin small letter u with diaeresis
|
320
|
+
{ "weierp", 8472 }, // script capital P = power set
|
321
|
+
{ "xi", 958 }, // greek small letter xi, U+03BE ISOgrk3
|
322
|
+
{ "yacute", 253 }, // latin small letter y with acute
|
323
|
+
{ "yen", 165 }, // yen sign = yuan sign, U+00A5 ISOnum
|
324
|
+
{ "yuml", 255 }, // latin small letter y with diaeresis
|
325
|
+
{ "zeta", 950 }, // greek small letter zeta, U+03B6 ISOgrk3
|
326
|
+
{ "zwj", 8205 }, // zero width joiner, U+200D NEW RFC 2070
|
327
|
+
{ "zwnj", 8204 }, // zero width non-joiner
|
328
|
+
{ NULL, 0 },
|
329
|
+
};
|
330
|
+
|
331
|
+
static uint64_t
|
332
|
+
calc_hash(const char *key) {
|
333
|
+
uint64_t h = 0;
|
334
|
+
|
335
|
+
if (NULL != key) {
|
336
|
+
const uint8_t *k = (const uint8_t*)key;
|
337
|
+
|
338
|
+
for (; 0 != *k; k++) {
|
339
|
+
// narrow to most used range of 0x4D (77) in size
|
340
|
+
h = 77 * h + ((*k | 0x20) - 0x2D);
|
341
|
+
}
|
342
|
+
}
|
343
|
+
return h;
|
344
|
+
}
|
345
|
+
|
346
|
+
static Slot*
|
347
|
+
get_bucketp(uint64_t h) {
|
348
|
+
return entity_cache.buckets + (BUCKET_MASK & (h ^ (h << 5) ^ (h >> 7)));
|
349
|
+
}
|
350
|
+
|
351
|
+
static void
|
352
|
+
cache_set(Slot s) {
|
353
|
+
int64_t h = calc_hash(s->key);
|
354
|
+
Slot *bucket = get_bucketp(h);
|
355
|
+
|
356
|
+
s->hash = h;
|
357
|
+
s->next = *bucket;
|
358
|
+
*bucket = s;
|
359
|
+
}
|
360
|
+
|
361
|
+
static Slot
|
362
|
+
cache_get(const char *key) {
|
363
|
+
int64_t h = calc_hash(key);
|
364
|
+
Slot *bucket = get_bucketp(h);
|
365
|
+
Slot s;
|
366
|
+
|
367
|
+
for (s = *bucket; NULL != s; s = s->next) {
|
368
|
+
if (h == (int64_t)s->hash && 0 == strcasecmp(s->key, key)) {
|
369
|
+
return s;
|
370
|
+
}
|
371
|
+
}
|
372
|
+
return NULL;
|
373
|
+
}
|
374
|
+
|
375
|
+
static void
|
376
|
+
cache_init() {
|
377
|
+
Slot e = entities;
|
378
|
+
|
379
|
+
memset(&entity_cache, 0, sizeof(struct _cache));
|
380
|
+
for (; NULL != e->key; e++) {
|
381
|
+
cache_set(e);
|
382
|
+
}
|
383
|
+
inited = true;
|
384
|
+
}
|
385
|
+
|
386
|
+
char*
|
387
|
+
ox_entity_lookup(char *text, const char *key) {
|
388
|
+
Slot s = entities;
|
389
|
+
|
390
|
+
if (!inited) {
|
391
|
+
cache_init();
|
392
|
+
}
|
393
|
+
if (NULL == (s = cache_get(key))) {
|
394
|
+
return NULL;
|
395
|
+
}
|
396
|
+
return ox_ucs_to_utf8_chars(text, s->code);
|
397
|
+
}
|
data/ext/ox/special.h
CHANGED
data/lib/ox/element.rb
CHANGED
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,19 +1,19 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.13.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Ohler
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: "A fast XML parser and object serializer that uses only standard C lib.\n
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
description: "A fast XML parser and object serializer that uses only standard C lib.\n\nOptimized
|
14
|
+
XML (Ox), as the name implies was written to provide speed optimized\nXML handling.
|
15
|
+
It was designed to be an alternative to Nokogiri and other Ruby\nXML parsers for
|
16
|
+
generic XML parsing and as an alternative to Marshal for Object\nserialization. "
|
17
17
|
email: peter@ohler.com
|
18
18
|
executables: []
|
19
19
|
extensions:
|
@@ -35,7 +35,6 @@ files:
|
|
35
35
|
- ext/ox/cache8.c
|
36
36
|
- ext/ox/cache8.h
|
37
37
|
- ext/ox/dump.c
|
38
|
-
- ext/ox/encode.h
|
39
38
|
- ext/ox/err.c
|
40
39
|
- ext/ox/err.h
|
41
40
|
- ext/ox/extconf.rb
|
@@ -99,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
98
|
- !ruby/object:Gem::Version
|
100
99
|
version: '0'
|
101
100
|
requirements: []
|
102
|
-
rubygems_version: 3.
|
101
|
+
rubygems_version: 3.1.2
|
103
102
|
signing_key:
|
104
103
|
specification_version: 4
|
105
104
|
summary: A fast XML parser and object serializer.
|