ctags.rb 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/Gemfile +2 -0
  2. data/Rakefile +23 -0
  3. data/ctags.rb.gemspec +23 -0
  4. data/ext/.gitignore +3 -0
  5. data/ext/extconf.rb +15 -0
  6. data/ext/vendor/exuberant-ctags/.gitignore +6 -0
  7. data/ext/vendor/exuberant-ctags/.indent.pro +31 -0
  8. data/ext/vendor/exuberant-ctags/COPYING +340 -0
  9. data/ext/vendor/exuberant-ctags/EXTENDING.html +386 -0
  10. data/ext/vendor/exuberant-ctags/FAQ +371 -0
  11. data/ext/vendor/exuberant-ctags/INSTALL +215 -0
  12. data/ext/vendor/exuberant-ctags/INSTALL.oth +73 -0
  13. data/ext/vendor/exuberant-ctags/MAINTAINERS +88 -0
  14. data/ext/vendor/exuberant-ctags/Makefile.in +222 -0
  15. data/ext/vendor/exuberant-ctags/NEWS +871 -0
  16. data/ext/vendor/exuberant-ctags/README +73 -0
  17. data/ext/vendor/exuberant-ctags/ant.c +42 -0
  18. data/ext/vendor/exuberant-ctags/argproc.c +505 -0
  19. data/ext/vendor/exuberant-ctags/args.c +274 -0
  20. data/ext/vendor/exuberant-ctags/args.h +63 -0
  21. data/ext/vendor/exuberant-ctags/asm.c +387 -0
  22. data/ext/vendor/exuberant-ctags/asp.c +328 -0
  23. data/ext/vendor/exuberant-ctags/awk.c +81 -0
  24. data/ext/vendor/exuberant-ctags/basic.c +203 -0
  25. data/ext/vendor/exuberant-ctags/beta.c +321 -0
  26. data/ext/vendor/exuberant-ctags/c.c +2932 -0
  27. data/ext/vendor/exuberant-ctags/cobol.c +50 -0
  28. data/ext/vendor/exuberant-ctags/config.h.in +277 -0
  29. data/ext/vendor/exuberant-ctags/configure +7704 -0
  30. data/ext/vendor/exuberant-ctags/configure.ac +532 -0
  31. data/ext/vendor/exuberant-ctags/ctags.1 +1186 -0
  32. data/ext/vendor/exuberant-ctags/ctags.h +28 -0
  33. data/ext/vendor/exuberant-ctags/ctags.html +2087 -0
  34. data/ext/vendor/exuberant-ctags/ctags.spec +40 -0
  35. data/ext/vendor/exuberant-ctags/debug.c +113 -0
  36. data/ext/vendor/exuberant-ctags/debug.h +70 -0
  37. data/ext/vendor/exuberant-ctags/descrip.mms +68 -0
  38. data/ext/vendor/exuberant-ctags/dosbatch.c +42 -0
  39. data/ext/vendor/exuberant-ctags/e_amiga.h +24 -0
  40. data/ext/vendor/exuberant-ctags/e_djgpp.h +47 -0
  41. data/ext/vendor/exuberant-ctags/e_mac.h +143 -0
  42. data/ext/vendor/exuberant-ctags/e_msoft.h +76 -0
  43. data/ext/vendor/exuberant-ctags/e_os2.h +37 -0
  44. data/ext/vendor/exuberant-ctags/e_qdos.h +34 -0
  45. data/ext/vendor/exuberant-ctags/e_riscos.h +58 -0
  46. data/ext/vendor/exuberant-ctags/e_vms.h +31 -0
  47. data/ext/vendor/exuberant-ctags/eiffel.c +1352 -0
  48. data/ext/vendor/exuberant-ctags/entry.c +847 -0
  49. data/ext/vendor/exuberant-ctags/entry.h +103 -0
  50. data/ext/vendor/exuberant-ctags/erlang.c +189 -0
  51. data/ext/vendor/exuberant-ctags/flex.c +2243 -0
  52. data/ext/vendor/exuberant-ctags/fortran.c +2197 -0
  53. data/ext/vendor/exuberant-ctags/general.h +127 -0
  54. data/ext/vendor/exuberant-ctags/get.c +669 -0
  55. data/ext/vendor/exuberant-ctags/get.h +50 -0
  56. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/all-wcprops +47 -0
  57. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/entries +112 -0
  58. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/README.txt.svn-base +5 -0
  59. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regcomp.c.svn-base +3818 -0
  60. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regex.c.svn-base +74 -0
  61. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regex.h.svn-base +575 -0
  62. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regex_internal.c.svn-base +1713 -0
  63. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regex_internal.h.svn-base +773 -0
  64. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regexec.c.svn-base +4338 -0
  65. data/ext/vendor/exuberant-ctags/gnu_regex/README.txt +5 -0
  66. data/ext/vendor/exuberant-ctags/gnu_regex/regcomp.c +3818 -0
  67. data/ext/vendor/exuberant-ctags/gnu_regex/regex.c +74 -0
  68. data/ext/vendor/exuberant-ctags/gnu_regex/regex.h +575 -0
  69. data/ext/vendor/exuberant-ctags/gnu_regex/regex_internal.c +1713 -0
  70. data/ext/vendor/exuberant-ctags/gnu_regex/regex_internal.h +773 -0
  71. data/ext/vendor/exuberant-ctags/gnu_regex/regexec.c +4338 -0
  72. data/ext/vendor/exuberant-ctags/html.c +49 -0
  73. data/ext/vendor/exuberant-ctags/jscript.c +1572 -0
  74. data/ext/vendor/exuberant-ctags/keyword.c +258 -0
  75. data/ext/vendor/exuberant-ctags/keyword.h +34 -0
  76. data/ext/vendor/exuberant-ctags/lisp.c +139 -0
  77. data/ext/vendor/exuberant-ctags/lregex.c +704 -0
  78. data/ext/vendor/exuberant-ctags/lua.c +133 -0
  79. data/ext/vendor/exuberant-ctags/mac.c +273 -0
  80. data/ext/vendor/exuberant-ctags/magic.diff +21 -0
  81. data/ext/vendor/exuberant-ctags/main.c +584 -0
  82. data/ext/vendor/exuberant-ctags/main.h +32 -0
  83. data/ext/vendor/exuberant-ctags/maintainer.mak +476 -0
  84. data/ext/vendor/exuberant-ctags/make.c +217 -0
  85. data/ext/vendor/exuberant-ctags/matlab.c +44 -0
  86. data/ext/vendor/exuberant-ctags/mk_bc3.mak +46 -0
  87. data/ext/vendor/exuberant-ctags/mk_bc5.mak +49 -0
  88. data/ext/vendor/exuberant-ctags/mk_djg.mak +18 -0
  89. data/ext/vendor/exuberant-ctags/mk_manx.mak +65 -0
  90. data/ext/vendor/exuberant-ctags/mk_mingw.mak +31 -0
  91. data/ext/vendor/exuberant-ctags/mk_mpw.mak +130 -0
  92. data/ext/vendor/exuberant-ctags/mk_mvc.mak +40 -0
  93. data/ext/vendor/exuberant-ctags/mk_os2.mak +104 -0
  94. data/ext/vendor/exuberant-ctags/mk_qdos.mak +100 -0
  95. data/ext/vendor/exuberant-ctags/mk_sas.mak +63 -0
  96. data/ext/vendor/exuberant-ctags/mkinstalldirs +40 -0
  97. data/ext/vendor/exuberant-ctags/ocaml.c +1842 -0
  98. data/ext/vendor/exuberant-ctags/options.c +1842 -0
  99. data/ext/vendor/exuberant-ctags/options.h +155 -0
  100. data/ext/vendor/exuberant-ctags/parse.c +677 -0
  101. data/ext/vendor/exuberant-ctags/parse.h +129 -0
  102. data/ext/vendor/exuberant-ctags/parsers.h +63 -0
  103. data/ext/vendor/exuberant-ctags/pascal.c +267 -0
  104. data/ext/vendor/exuberant-ctags/perl.c +382 -0
  105. data/ext/vendor/exuberant-ctags/php.c +237 -0
  106. data/ext/vendor/exuberant-ctags/python.c +771 -0
  107. data/ext/vendor/exuberant-ctags/qdos.c +106 -0
  108. data/ext/vendor/exuberant-ctags/read.c +569 -0
  109. data/ext/vendor/exuberant-ctags/read.h +116 -0
  110. data/ext/vendor/exuberant-ctags/readtags.c +959 -0
  111. data/ext/vendor/exuberant-ctags/readtags.h +252 -0
  112. data/ext/vendor/exuberant-ctags/rexx.c +39 -0
  113. data/ext/vendor/exuberant-ctags/routines.c +891 -0
  114. data/ext/vendor/exuberant-ctags/routines.h +134 -0
  115. data/ext/vendor/exuberant-ctags/ruby.c +408 -0
  116. data/ext/vendor/exuberant-ctags/scheme.c +111 -0
  117. data/ext/vendor/exuberant-ctags/sh.c +115 -0
  118. data/ext/vendor/exuberant-ctags/slang.c +41 -0
  119. data/ext/vendor/exuberant-ctags/sml.c +212 -0
  120. data/ext/vendor/exuberant-ctags/sort.c +230 -0
  121. data/ext/vendor/exuberant-ctags/sort.h +32 -0
  122. data/ext/vendor/exuberant-ctags/source.mak +122 -0
  123. data/ext/vendor/exuberant-ctags/sql.c +2112 -0
  124. data/ext/vendor/exuberant-ctags/strlist.c +281 -0
  125. data/ext/vendor/exuberant-ctags/strlist.h +54 -0
  126. data/ext/vendor/exuberant-ctags/tcl.c +116 -0
  127. data/ext/vendor/exuberant-ctags/tex.c +524 -0
  128. data/ext/vendor/exuberant-ctags/verilog.c +340 -0
  129. data/ext/vendor/exuberant-ctags/vhdl.c +835 -0
  130. data/ext/vendor/exuberant-ctags/vim.c +636 -0
  131. data/ext/vendor/exuberant-ctags/vstring.c +232 -0
  132. data/ext/vendor/exuberant-ctags/vstring.h +85 -0
  133. data/ext/vendor/exuberant-ctags/yacc.c +40 -0
  134. data/lib/ctags/exuberant.rb +45 -0
  135. data/lib/ctags/version.rb +3 -0
  136. data/lib/ctags.rb +6 -0
  137. data/test/test_ctags.rb +24 -0
  138. metadata +233 -0
@@ -0,0 +1,1713 @@
1
+ /* Extended regular expression matching and search library.
2
+ Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3
+ This file is part of the GNU C Library.
4
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
+
6
+ The GNU C Library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ The GNU C Library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with the GNU C Library; if not, write to the Free
18
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19
+ 02111-1307 USA. */
20
+
21
+ static void re_string_construct_common (const char *str, int len,
22
+ re_string_t *pstr,
23
+ RE_TRANSLATE_TYPE trans, int icase,
24
+ const re_dfa_t *dfa) internal_function;
25
+ static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
26
+ const re_node_set *nodes,
27
+ unsigned int hash) internal_function;
28
+ static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
29
+ const re_node_set *nodes,
30
+ unsigned int context,
31
+ unsigned int hash) internal_function;
32
+
33
+ /* Functions for string operation. */
34
+
35
+ /* This function allocate the buffers. It is necessary to call
36
+ re_string_reconstruct before using the object. */
37
+
38
+ static reg_errcode_t
39
+ internal_function
40
+ re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
41
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
42
+ {
43
+ reg_errcode_t ret;
44
+ int init_buf_len;
45
+
46
+ /* Ensure at least one character fits into the buffers. */
47
+ if (init_len < dfa->mb_cur_max)
48
+ init_len = dfa->mb_cur_max;
49
+ init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
50
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
51
+
52
+ ret = re_string_realloc_buffers (pstr, init_buf_len);
53
+ if (BE (ret != REG_NOERROR, 0))
54
+ return ret;
55
+
56
+ pstr->word_char = dfa->word_char;
57
+ pstr->word_ops_used = dfa->word_ops_used;
58
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
59
+ pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
60
+ pstr->valid_raw_len = pstr->valid_len;
61
+ return REG_NOERROR;
62
+ }
63
+
64
+ /* This function allocate the buffers, and initialize them. */
65
+
66
+ static reg_errcode_t
67
+ internal_function
68
+ re_string_construct (re_string_t *pstr, const char *str, int len,
69
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
70
+ {
71
+ reg_errcode_t ret;
72
+ memset (pstr, '\0', sizeof (re_string_t));
73
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
74
+
75
+ if (len > 0)
76
+ {
77
+ ret = re_string_realloc_buffers (pstr, len + 1);
78
+ if (BE (ret != REG_NOERROR, 0))
79
+ return ret;
80
+ }
81
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
82
+
83
+ if (icase)
84
+ {
85
+ #ifdef RE_ENABLE_I18N
86
+ if (dfa->mb_cur_max > 1)
87
+ {
88
+ while (1)
89
+ {
90
+ ret = build_wcs_upper_buffer (pstr);
91
+ if (BE (ret != REG_NOERROR, 0))
92
+ return ret;
93
+ if (pstr->valid_raw_len >= len)
94
+ break;
95
+ if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
96
+ break;
97
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
98
+ if (BE (ret != REG_NOERROR, 0))
99
+ return ret;
100
+ }
101
+ }
102
+ else
103
+ #endif /* RE_ENABLE_I18N */
104
+ build_upper_buffer (pstr);
105
+ }
106
+ else
107
+ {
108
+ #ifdef RE_ENABLE_I18N
109
+ if (dfa->mb_cur_max > 1)
110
+ build_wcs_buffer (pstr);
111
+ else
112
+ #endif /* RE_ENABLE_I18N */
113
+ {
114
+ if (trans != NULL)
115
+ re_string_translate_buffer (pstr);
116
+ else
117
+ {
118
+ pstr->valid_len = pstr->bufs_len;
119
+ pstr->valid_raw_len = pstr->bufs_len;
120
+ }
121
+ }
122
+ }
123
+
124
+ return REG_NOERROR;
125
+ }
126
+
127
+ /* Helper functions for re_string_allocate, and re_string_construct. */
128
+
129
+ static reg_errcode_t
130
+ internal_function
131
+ re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
132
+ {
133
+ #ifdef RE_ENABLE_I18N
134
+ if (pstr->mb_cur_max > 1)
135
+ {
136
+ wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
137
+ if (BE (new_wcs == NULL, 0))
138
+ return REG_ESPACE;
139
+ pstr->wcs = new_wcs;
140
+ if (pstr->offsets != NULL)
141
+ {
142
+ int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
143
+ if (BE (new_offsets == NULL, 0))
144
+ return REG_ESPACE;
145
+ pstr->offsets = new_offsets;
146
+ }
147
+ }
148
+ #endif /* RE_ENABLE_I18N */
149
+ if (pstr->mbs_allocated)
150
+ {
151
+ unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
152
+ new_buf_len);
153
+ if (BE (new_mbs == NULL, 0))
154
+ return REG_ESPACE;
155
+ pstr->mbs = new_mbs;
156
+ }
157
+ pstr->bufs_len = new_buf_len;
158
+ return REG_NOERROR;
159
+ }
160
+
161
+
162
+ static void
163
+ internal_function
164
+ re_string_construct_common (const char *str, int len, re_string_t *pstr,
165
+ RE_TRANSLATE_TYPE trans, int icase,
166
+ const re_dfa_t *dfa)
167
+ {
168
+ pstr->raw_mbs = (const unsigned char *) str;
169
+ pstr->len = len;
170
+ pstr->raw_len = len;
171
+ pstr->trans = trans;
172
+ pstr->icase = icase ? 1 : 0;
173
+ pstr->mbs_allocated = (trans != NULL || icase);
174
+ pstr->mb_cur_max = dfa->mb_cur_max;
175
+ pstr->is_utf8 = dfa->is_utf8;
176
+ pstr->map_notascii = dfa->map_notascii;
177
+ pstr->stop = pstr->len;
178
+ pstr->raw_stop = pstr->stop;
179
+ }
180
+
181
+ #ifdef RE_ENABLE_I18N
182
+
183
+ /* Build wide character buffer PSTR->WCS.
184
+ If the byte sequence of the string are:
185
+ <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
186
+ Then wide character buffer will be:
187
+ <wc1> , WEOF , <wc2> , WEOF , <wc3>
188
+ We use WEOF for padding, they indicate that the position isn't
189
+ a first byte of a multibyte character.
190
+
191
+ Note that this function assumes PSTR->VALID_LEN elements are already
192
+ built and starts from PSTR->VALID_LEN. */
193
+
194
+ static void
195
+ internal_function
196
+ build_wcs_buffer (re_string_t *pstr)
197
+ {
198
+ #ifdef _LIBC
199
+ unsigned char buf[MB_LEN_MAX];
200
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
201
+ #else
202
+ unsigned char buf[64];
203
+ #endif
204
+ mbstate_t prev_st;
205
+ int byte_idx, end_idx, remain_len;
206
+ size_t mbclen;
207
+
208
+ /* Build the buffers from pstr->valid_len to either pstr->len or
209
+ pstr->bufs_len. */
210
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
211
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
212
+ {
213
+ wchar_t wc;
214
+ const char *p;
215
+
216
+ remain_len = end_idx - byte_idx;
217
+ prev_st = pstr->cur_state;
218
+ /* Apply the translation if we need. */
219
+ if (BE (pstr->trans != NULL, 0))
220
+ {
221
+ int i, ch;
222
+
223
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
224
+ {
225
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
226
+ buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
227
+ }
228
+ p = (const char *) buf;
229
+ }
230
+ else
231
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
232
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
233
+ if (BE (mbclen == (size_t) -2, 0))
234
+ {
235
+ /* The buffer doesn't have enough space, finish to build. */
236
+ pstr->cur_state = prev_st;
237
+ break;
238
+ }
239
+ else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
240
+ {
241
+ /* We treat these cases as a singlebyte character. */
242
+ mbclen = 1;
243
+ wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
244
+ if (BE (pstr->trans != NULL, 0))
245
+ wc = pstr->trans[wc];
246
+ pstr->cur_state = prev_st;
247
+ }
248
+
249
+ /* Write wide character and padding. */
250
+ pstr->wcs[byte_idx++] = wc;
251
+ /* Write paddings. */
252
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
253
+ pstr->wcs[byte_idx++] = WEOF;
254
+ }
255
+ pstr->valid_len = byte_idx;
256
+ pstr->valid_raw_len = byte_idx;
257
+ }
258
+
259
+ /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
260
+ but for REG_ICASE. */
261
+
262
+ static reg_errcode_t
263
+ internal_function
264
+ build_wcs_upper_buffer (re_string_t *pstr)
265
+ {
266
+ mbstate_t prev_st;
267
+ int src_idx, byte_idx, end_idx, remain_len;
268
+ size_t mbclen;
269
+ #ifdef _LIBC
270
+ char buf[MB_LEN_MAX];
271
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
272
+ #else
273
+ char buf[64];
274
+ #endif
275
+
276
+ byte_idx = pstr->valid_len;
277
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
278
+
279
+ /* The following optimization assumes that ASCII characters can be
280
+ mapped to wide characters with a simple cast. */
281
+ if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
282
+ {
283
+ while (byte_idx < end_idx)
284
+ {
285
+ wchar_t wc;
286
+
287
+ if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
288
+ && mbsinit (&pstr->cur_state))
289
+ {
290
+ /* In case of a singlebyte character. */
291
+ pstr->mbs[byte_idx]
292
+ = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
293
+ /* The next step uses the assumption that wchar_t is encoded
294
+ ASCII-safe: all ASCII values can be converted like this. */
295
+ pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
296
+ ++byte_idx;
297
+ continue;
298
+ }
299
+
300
+ remain_len = end_idx - byte_idx;
301
+ prev_st = pstr->cur_state;
302
+ mbclen = __mbrtowc (&wc,
303
+ ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
304
+ + byte_idx), remain_len, &pstr->cur_state);
305
+ if (BE (mbclen + 2 > 2, 1))
306
+ {
307
+ wchar_t wcu = wc;
308
+ if (iswlower (wc))
309
+ {
310
+ size_t mbcdlen;
311
+
312
+ wcu = towupper (wc);
313
+ mbcdlen = wcrtomb (buf, wcu, &prev_st);
314
+ if (BE (mbclen == mbcdlen, 1))
315
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
316
+ else
317
+ {
318
+ src_idx = byte_idx;
319
+ goto offsets_needed;
320
+ }
321
+ }
322
+ else
323
+ memcpy (pstr->mbs + byte_idx,
324
+ pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
325
+ pstr->wcs[byte_idx++] = wcu;
326
+ /* Write paddings. */
327
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
328
+ pstr->wcs[byte_idx++] = WEOF;
329
+ }
330
+ else if (mbclen == (size_t) -1 || mbclen == 0)
331
+ {
332
+ /* It is an invalid character or '\0'. Just use the byte. */
333
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
334
+ pstr->mbs[byte_idx] = ch;
335
+ /* And also cast it to wide char. */
336
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
337
+ if (BE (mbclen == (size_t) -1, 0))
338
+ pstr->cur_state = prev_st;
339
+ }
340
+ else
341
+ {
342
+ /* The buffer doesn't have enough space, finish to build. */
343
+ pstr->cur_state = prev_st;
344
+ break;
345
+ }
346
+ }
347
+ pstr->valid_len = byte_idx;
348
+ pstr->valid_raw_len = byte_idx;
349
+ return REG_NOERROR;
350
+ }
351
+ else
352
+ for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
353
+ {
354
+ wchar_t wc;
355
+ const char *p;
356
+ offsets_needed:
357
+ remain_len = end_idx - byte_idx;
358
+ prev_st = pstr->cur_state;
359
+ if (BE (pstr->trans != NULL, 0))
360
+ {
361
+ int i, ch;
362
+
363
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
364
+ {
365
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
366
+ buf[i] = pstr->trans[ch];
367
+ }
368
+ p = (const char *) buf;
369
+ }
370
+ else
371
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
372
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
373
+ if (BE (mbclen + 2 > 2, 1))
374
+ {
375
+ wchar_t wcu = wc;
376
+ if (iswlower (wc))
377
+ {
378
+ size_t mbcdlen;
379
+
380
+ wcu = towupper (wc);
381
+ mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
382
+ if (BE (mbclen == mbcdlen, 1))
383
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
384
+ else if (mbcdlen != (size_t) -1)
385
+ {
386
+ size_t i;
387
+
388
+ if (byte_idx + mbcdlen > pstr->bufs_len)
389
+ {
390
+ pstr->cur_state = prev_st;
391
+ break;
392
+ }
393
+
394
+ if (pstr->offsets == NULL)
395
+ {
396
+ pstr->offsets = re_malloc (int, pstr->bufs_len);
397
+
398
+ if (pstr->offsets == NULL)
399
+ return REG_ESPACE;
400
+ }
401
+ if (!pstr->offsets_needed)
402
+ {
403
+ for (i = 0; i < (size_t) byte_idx; ++i)
404
+ pstr->offsets[i] = i;
405
+ pstr->offsets_needed = 1;
406
+ }
407
+
408
+ memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
409
+ pstr->wcs[byte_idx] = wcu;
410
+ pstr->offsets[byte_idx] = src_idx;
411
+ for (i = 1; i < mbcdlen; ++i)
412
+ {
413
+ pstr->offsets[byte_idx + i]
414
+ = src_idx + (i < mbclen ? i : mbclen - 1);
415
+ pstr->wcs[byte_idx + i] = WEOF;
416
+ }
417
+ pstr->len += mbcdlen - mbclen;
418
+ if (pstr->raw_stop > src_idx)
419
+ pstr->stop += mbcdlen - mbclen;
420
+ end_idx = (pstr->bufs_len > pstr->len)
421
+ ? pstr->len : pstr->bufs_len;
422
+ byte_idx += mbcdlen;
423
+ src_idx += mbclen;
424
+ continue;
425
+ }
426
+ else
427
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
428
+ }
429
+ else
430
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
431
+
432
+ if (BE (pstr->offsets_needed != 0, 0))
433
+ {
434
+ size_t i;
435
+ for (i = 0; i < mbclen; ++i)
436
+ pstr->offsets[byte_idx + i] = src_idx + i;
437
+ }
438
+ src_idx += mbclen;
439
+
440
+ pstr->wcs[byte_idx++] = wcu;
441
+ /* Write paddings. */
442
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
443
+ pstr->wcs[byte_idx++] = WEOF;
444
+ }
445
+ else if (mbclen == (size_t) -1 || mbclen == 0)
446
+ {
447
+ /* It is an invalid character or '\0'. Just use the byte. */
448
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
449
+
450
+ if (BE (pstr->trans != NULL, 0))
451
+ ch = pstr->trans [ch];
452
+ pstr->mbs[byte_idx] = ch;
453
+
454
+ if (BE (pstr->offsets_needed != 0, 0))
455
+ pstr->offsets[byte_idx] = src_idx;
456
+ ++src_idx;
457
+
458
+ /* And also cast it to wide char. */
459
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
460
+ if (BE (mbclen == (size_t) -1, 0))
461
+ pstr->cur_state = prev_st;
462
+ }
463
+ else
464
+ {
465
+ /* The buffer doesn't have enough space, finish to build. */
466
+ pstr->cur_state = prev_st;
467
+ break;
468
+ }
469
+ }
470
+ pstr->valid_len = byte_idx;
471
+ pstr->valid_raw_len = src_idx;
472
+ return REG_NOERROR;
473
+ }
474
+
475
+ /* Skip characters until the index becomes greater than NEW_RAW_IDX.
476
+ Return the index. */
477
+
478
+ static int
479
+ internal_function
480
+ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
481
+ {
482
+ mbstate_t prev_st;
483
+ int rawbuf_idx;
484
+ size_t mbclen;
485
+ wchar_t wc = WEOF;
486
+
487
+ /* Skip the characters which are not necessary to check. */
488
+ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
489
+ rawbuf_idx < new_raw_idx;)
490
+ {
491
+ int remain_len;
492
+ remain_len = pstr->len - rawbuf_idx;
493
+ prev_st = pstr->cur_state;
494
+ mbclen = __mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
495
+ remain_len, &pstr->cur_state);
496
+ if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
497
+ {
498
+ /* We treat these cases as a single byte character. */
499
+ if (mbclen == 0 || remain_len == 0)
500
+ wc = L'\0';
501
+ else
502
+ wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
503
+ mbclen = 1;
504
+ pstr->cur_state = prev_st;
505
+ }
506
+ /* Then proceed the next character. */
507
+ rawbuf_idx += mbclen;
508
+ }
509
+ *last_wc = (wint_t) wc;
510
+ return rawbuf_idx;
511
+ }
512
+ #endif /* RE_ENABLE_I18N */
513
+
514
+ /* Build the buffer PSTR->MBS, and apply the translation if we need.
515
+ This function is used in case of REG_ICASE. */
516
+
517
+ static void
518
+ internal_function
519
+ build_upper_buffer (re_string_t *pstr)
520
+ {
521
+ int char_idx, end_idx;
522
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
523
+
524
+ for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
525
+ {
526
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
527
+ if (BE (pstr->trans != NULL, 0))
528
+ ch = pstr->trans[ch];
529
+ if (islower (ch))
530
+ pstr->mbs[char_idx] = toupper (ch);
531
+ else
532
+ pstr->mbs[char_idx] = ch;
533
+ }
534
+ pstr->valid_len = char_idx;
535
+ pstr->valid_raw_len = char_idx;
536
+ }
537
+
538
+ /* Apply TRANS to the buffer in PSTR. */
539
+
540
+ static void
541
+ internal_function
542
+ re_string_translate_buffer (re_string_t *pstr)
543
+ {
544
+ int buf_idx, end_idx;
545
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
546
+
547
+ for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
548
+ {
549
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
550
+ pstr->mbs[buf_idx] = pstr->trans[ch];
551
+ }
552
+
553
+ pstr->valid_len = buf_idx;
554
+ pstr->valid_raw_len = buf_idx;
555
+ }
556
+
557
+ /* This function re-construct the buffers.
558
+ Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
559
+ convert to upper case in case of REG_ICASE, apply translation. */
560
+
561
+ static reg_errcode_t
562
+ internal_function
563
+ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
564
+ {
565
+ int offset = idx - pstr->raw_mbs_idx;
566
+ if (BE (offset < 0, 0))
567
+ {
568
+ /* Reset buffer. */
569
+ #ifdef RE_ENABLE_I18N
570
+ if (pstr->mb_cur_max > 1)
571
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
572
+ #endif /* RE_ENABLE_I18N */
573
+ pstr->len = pstr->raw_len;
574
+ pstr->stop = pstr->raw_stop;
575
+ pstr->valid_len = 0;
576
+ pstr->raw_mbs_idx = 0;
577
+ pstr->valid_raw_len = 0;
578
+ pstr->offsets_needed = 0;
579
+ pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
580
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
581
+ if (!pstr->mbs_allocated)
582
+ pstr->mbs = (unsigned char *) pstr->raw_mbs;
583
+ offset = idx;
584
+ }
585
+
586
+ if (BE (offset != 0, 1))
587
+ {
588
+ /* Should the already checked characters be kept? */
589
+ if (BE (offset < pstr->valid_raw_len, 1))
590
+ {
591
+ /* Yes, move them to the front of the buffer. */
592
+ #ifdef RE_ENABLE_I18N
593
+ if (BE (pstr->offsets_needed, 0))
594
+ {
595
+ int low = 0, high = pstr->valid_len, mid;
596
+ do
597
+ {
598
+ mid = (high + low) / 2;
599
+ if (pstr->offsets[mid] > offset)
600
+ high = mid;
601
+ else if (pstr->offsets[mid] < offset)
602
+ low = mid + 1;
603
+ else
604
+ break;
605
+ }
606
+ while (low < high);
607
+ if (pstr->offsets[mid] < offset)
608
+ ++mid;
609
+ pstr->tip_context = re_string_context_at (pstr, mid - 1,
610
+ eflags);
611
+ /* This can be quite complicated, so handle specially
612
+ only the common and easy case where the character with
613
+ different length representation of lower and upper
614
+ case is present at or after offset. */
615
+ if (pstr->valid_len > offset
616
+ && mid == offset && pstr->offsets[mid] == offset)
617
+ {
618
+ memmove (pstr->wcs, pstr->wcs + offset,
619
+ (pstr->valid_len - offset) * sizeof (wint_t));
620
+ memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
621
+ pstr->valid_len -= offset;
622
+ pstr->valid_raw_len -= offset;
623
+ for (low = 0; low < pstr->valid_len; low++)
624
+ pstr->offsets[low] = pstr->offsets[low + offset] - offset;
625
+ }
626
+ else
627
+ {
628
+ /* Otherwise, just find out how long the partial multibyte
629
+ character at offset is and fill it with WEOF/255. */
630
+ pstr->len = pstr->raw_len - idx + offset;
631
+ pstr->stop = pstr->raw_stop - idx + offset;
632
+ pstr->offsets_needed = 0;
633
+ while (mid > 0 && pstr->offsets[mid - 1] == offset)
634
+ --mid;
635
+ while (mid < pstr->valid_len)
636
+ if (pstr->wcs[mid] != WEOF)
637
+ break;
638
+ else
639
+ ++mid;
640
+ if (mid == pstr->valid_len)
641
+ pstr->valid_len = 0;
642
+ else
643
+ {
644
+ pstr->valid_len = pstr->offsets[mid] - offset;
645
+ if (pstr->valid_len)
646
+ {
647
+ for (low = 0; low < pstr->valid_len; ++low)
648
+ pstr->wcs[low] = WEOF;
649
+ memset (pstr->mbs, 255, pstr->valid_len);
650
+ }
651
+ }
652
+ pstr->valid_raw_len = pstr->valid_len;
653
+ }
654
+ }
655
+ else
656
+ #endif
657
+ {
658
+ pstr->tip_context = re_string_context_at (pstr, offset - 1,
659
+ eflags);
660
+ #ifdef RE_ENABLE_I18N
661
+ if (pstr->mb_cur_max > 1)
662
+ memmove (pstr->wcs, pstr->wcs + offset,
663
+ (pstr->valid_len - offset) * sizeof (wint_t));
664
+ #endif /* RE_ENABLE_I18N */
665
+ if (BE (pstr->mbs_allocated, 0))
666
+ memmove (pstr->mbs, pstr->mbs + offset,
667
+ pstr->valid_len - offset);
668
+ pstr->valid_len -= offset;
669
+ pstr->valid_raw_len -= offset;
670
+ #if DEBUG
671
+ assert (pstr->valid_len > 0);
672
+ #endif
673
+ }
674
+ }
675
+ else
676
+ {
677
+ /* No, skip all characters until IDX. */
678
+ int prev_valid_len = pstr->valid_len;
679
+
680
+ #ifdef RE_ENABLE_I18N
681
+ if (BE (pstr->offsets_needed, 0))
682
+ {
683
+ pstr->len = pstr->raw_len - idx + offset;
684
+ pstr->stop = pstr->raw_stop - idx + offset;
685
+ pstr->offsets_needed = 0;
686
+ }
687
+ #endif
688
+ pstr->valid_len = 0;
689
+ #ifdef RE_ENABLE_I18N
690
+ if (pstr->mb_cur_max > 1)
691
+ {
692
+ int wcs_idx;
693
+ wint_t wc = WEOF;
694
+
695
+ if (pstr->is_utf8)
696
+ {
697
+ const unsigned char *raw, *p, *q, *end;
698
+
699
+ /* Special case UTF-8. Multi-byte chars start with any
700
+ byte other than 0x80 - 0xbf. */
701
+ raw = pstr->raw_mbs + pstr->raw_mbs_idx;
702
+ end = raw + (offset - pstr->mb_cur_max);
703
+ if (end < pstr->raw_mbs)
704
+ end = pstr->raw_mbs;
705
+ p = raw + offset - 1;
706
+ #ifdef _LIBC
707
+ /* We know the wchar_t encoding is UCS4, so for the simple
708
+ case, ASCII characters, skip the conversion step. */
709
+ if (isascii (*p) && BE (pstr->trans == NULL, 1))
710
+ {
711
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
712
+ /* pstr->valid_len = 0; */
713
+ wc = (wchar_t) *p;
714
+ }
715
+ else
716
+ #endif
717
+ for (; p >= end; --p)
718
+ if ((*p & 0xc0) != 0x80)
719
+ {
720
+ mbstate_t cur_state;
721
+ wchar_t wc2;
722
+ int mlen = raw + pstr->len - p;
723
+ unsigned char buf[6];
724
+ size_t mbclen;
725
+
726
+ q = p;
727
+ if (BE (pstr->trans != NULL, 0))
728
+ {
729
+ int i = mlen < 6 ? mlen : 6;
730
+ while (--i >= 0)
731
+ buf[i] = pstr->trans[p[i]];
732
+ q = buf;
733
+ }
734
+ /* XXX Don't use mbrtowc, we know which conversion
735
+ to use (UTF-8 -> UCS4). */
736
+ memset (&cur_state, 0, sizeof (cur_state));
737
+ mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
738
+ &cur_state);
739
+ if (raw + offset - p <= mbclen
740
+ && mbclen < (size_t) -2)
741
+ {
742
+ memset (&pstr->cur_state, '\0',
743
+ sizeof (mbstate_t));
744
+ pstr->valid_len = mbclen - (raw + offset - p);
745
+ wc = wc2;
746
+ }
747
+ break;
748
+ }
749
+ }
750
+
751
+ if (wc == WEOF)
752
+ pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
753
+ if (wc == WEOF)
754
+ pstr->tip_context
755
+ = re_string_context_at (pstr, prev_valid_len - 1, eflags);
756
+ else
757
+ pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
758
+ && IS_WIDE_WORD_CHAR (wc))
759
+ ? CONTEXT_WORD
760
+ : ((IS_WIDE_NEWLINE (wc)
761
+ && pstr->newline_anchor)
762
+ ? CONTEXT_NEWLINE : 0));
763
+ if (BE (pstr->valid_len, 0))
764
+ {
765
+ for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
766
+ pstr->wcs[wcs_idx] = WEOF;
767
+ if (pstr->mbs_allocated)
768
+ memset (pstr->mbs, 255, pstr->valid_len);
769
+ }
770
+ pstr->valid_raw_len = pstr->valid_len;
771
+ }
772
+ else
773
+ #endif /* RE_ENABLE_I18N */
774
+ {
775
+ int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
776
+ pstr->valid_raw_len = 0;
777
+ if (pstr->trans)
778
+ c = pstr->trans[c];
779
+ pstr->tip_context = (bitset_contain (pstr->word_char, c)
780
+ ? CONTEXT_WORD
781
+ : ((IS_NEWLINE (c) && pstr->newline_anchor)
782
+ ? CONTEXT_NEWLINE : 0));
783
+ }
784
+ }
785
+ if (!BE (pstr->mbs_allocated, 0))
786
+ pstr->mbs += offset;
787
+ }
788
+ pstr->raw_mbs_idx = idx;
789
+ pstr->len -= offset;
790
+ pstr->stop -= offset;
791
+
792
+ /* Then build the buffers. */
793
+ #ifdef RE_ENABLE_I18N
794
+ if (pstr->mb_cur_max > 1)
795
+ {
796
+ if (pstr->icase)
797
+ {
798
+ reg_errcode_t ret = build_wcs_upper_buffer (pstr);
799
+ if (BE (ret != REG_NOERROR, 0))
800
+ return ret;
801
+ }
802
+ else
803
+ build_wcs_buffer (pstr);
804
+ }
805
+ else
806
+ #endif /* RE_ENABLE_I18N */
807
+ if (BE (pstr->mbs_allocated, 0))
808
+ {
809
+ if (pstr->icase)
810
+ build_upper_buffer (pstr);
811
+ else if (pstr->trans != NULL)
812
+ re_string_translate_buffer (pstr);
813
+ }
814
+ else
815
+ pstr->valid_len = pstr->len;
816
+
817
+ pstr->cur_idx = 0;
818
+ return REG_NOERROR;
819
+ }
820
+
821
+ static unsigned char
822
+ internal_function __attribute ((pure))
823
+ re_string_peek_byte_case (const re_string_t *pstr, int idx)
824
+ {
825
+ int ch, off;
826
+
827
+ /* Handle the common (easiest) cases first. */
828
+ if (BE (!pstr->mbs_allocated, 1))
829
+ return re_string_peek_byte (pstr, idx);
830
+
831
+ #ifdef RE_ENABLE_I18N
832
+ if (pstr->mb_cur_max > 1
833
+ && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
834
+ return re_string_peek_byte (pstr, idx);
835
+ #endif
836
+
837
+ off = pstr->cur_idx + idx;
838
+ #ifdef RE_ENABLE_I18N
839
+ if (pstr->offsets_needed)
840
+ off = pstr->offsets[off];
841
+ #endif
842
+
843
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
844
+
845
+ #ifdef RE_ENABLE_I18N
846
+ /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
847
+ this function returns CAPITAL LETTER I instead of first byte of
848
+ DOTLESS SMALL LETTER I. The latter would confuse the parser,
849
+ since peek_byte_case doesn't advance cur_idx in any way. */
850
+ if (pstr->offsets_needed && !isascii (ch))
851
+ return re_string_peek_byte (pstr, idx);
852
+ #endif
853
+
854
+ return ch;
855
+ }
856
+
857
+ static unsigned char
858
+ internal_function __attribute ((pure))
859
+ re_string_fetch_byte_case (re_string_t *pstr)
860
+ {
861
+ if (BE (!pstr->mbs_allocated, 1))
862
+ return re_string_fetch_byte (pstr);
863
+
864
+ #ifdef RE_ENABLE_I18N
865
+ if (pstr->offsets_needed)
866
+ {
867
+ int off, ch;
868
+
869
+ /* For tr_TR.UTF-8 [[:islower:]] there is
870
+ [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
871
+ in that case the whole multi-byte character and return
872
+ the original letter. On the other side, with
873
+ [[: DOTLESS SMALL LETTER I return [[:I, as doing
874
+ anything else would complicate things too much. */
875
+
876
+ if (!re_string_first_byte (pstr, pstr->cur_idx))
877
+ return re_string_fetch_byte (pstr);
878
+
879
+ off = pstr->offsets[pstr->cur_idx];
880
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
881
+
882
+ if (! isascii (ch))
883
+ return re_string_fetch_byte (pstr);
884
+
885
+ re_string_skip_bytes (pstr,
886
+ re_string_char_size_at (pstr, pstr->cur_idx));
887
+ return ch;
888
+ }
889
+ #endif
890
+
891
+ return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
892
+ }
893
+
894
+ static void
895
+ internal_function
896
+ re_string_destruct (re_string_t *pstr)
897
+ {
898
+ #ifdef RE_ENABLE_I18N
899
+ re_free (pstr->wcs);
900
+ re_free (pstr->offsets);
901
+ #endif /* RE_ENABLE_I18N */
902
+ if (pstr->mbs_allocated)
903
+ re_free (pstr->mbs);
904
+ }
905
+
906
+ /* Return the context at IDX in INPUT. */
907
+
908
+ static unsigned int
909
+ internal_function
910
+ re_string_context_at (const re_string_t *input, int idx, int eflags)
911
+ {
912
+ int c;
913
+ if (BE (idx < 0, 0))
914
+ /* In this case, we use the value stored in input->tip_context,
915
+ since we can't know the character in input->mbs[-1] here. */
916
+ return input->tip_context;
917
+ if (BE (idx == input->len, 0))
918
+ return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
919
+ : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
920
+ #ifdef RE_ENABLE_I18N
921
+ if (input->mb_cur_max > 1)
922
+ {
923
+ wint_t wc;
924
+ int wc_idx = idx;
925
+ while(input->wcs[wc_idx] == WEOF)
926
+ {
927
+ #ifdef DEBUG
928
+ /* It must not happen. */
929
+ assert (wc_idx >= 0);
930
+ #endif
931
+ --wc_idx;
932
+ if (wc_idx < 0)
933
+ return input->tip_context;
934
+ }
935
+ wc = input->wcs[wc_idx];
936
+ if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
937
+ return CONTEXT_WORD;
938
+ return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
939
+ ? CONTEXT_NEWLINE : 0);
940
+ }
941
+ else
942
+ #endif
943
+ {
944
+ c = re_string_byte_at (input, idx);
945
+ if (bitset_contain (input->word_char, c))
946
+ return CONTEXT_WORD;
947
+ return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
948
+ }
949
+ }
950
+
951
+ /* Functions for set operation. */
952
+
953
+ static reg_errcode_t
954
+ internal_function
955
+ re_node_set_alloc (re_node_set *set, int size)
956
+ {
957
+ set->alloc = size;
958
+ set->nelem = 0;
959
+ set->elems = re_malloc (int, size);
960
+ if (BE (set->elems == NULL, 0))
961
+ return REG_ESPACE;
962
+ return REG_NOERROR;
963
+ }
964
+
965
+ static reg_errcode_t
966
+ internal_function
967
+ re_node_set_init_1 (re_node_set *set, int elem)
968
+ {
969
+ set->alloc = 1;
970
+ set->nelem = 1;
971
+ set->elems = re_malloc (int, 1);
972
+ if (BE (set->elems == NULL, 0))
973
+ {
974
+ set->alloc = set->nelem = 0;
975
+ return REG_ESPACE;
976
+ }
977
+ set->elems[0] = elem;
978
+ return REG_NOERROR;
979
+ }
980
+
981
+ static reg_errcode_t
982
+ internal_function
983
+ re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
984
+ {
985
+ set->alloc = 2;
986
+ set->elems = re_malloc (int, 2);
987
+ if (BE (set->elems == NULL, 0))
988
+ return REG_ESPACE;
989
+ if (elem1 == elem2)
990
+ {
991
+ set->nelem = 1;
992
+ set->elems[0] = elem1;
993
+ }
994
+ else
995
+ {
996
+ set->nelem = 2;
997
+ if (elem1 < elem2)
998
+ {
999
+ set->elems[0] = elem1;
1000
+ set->elems[1] = elem2;
1001
+ }
1002
+ else
1003
+ {
1004
+ set->elems[0] = elem2;
1005
+ set->elems[1] = elem1;
1006
+ }
1007
+ }
1008
+ return REG_NOERROR;
1009
+ }
1010
+
1011
+ static reg_errcode_t
1012
+ internal_function
1013
+ re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1014
+ {
1015
+ dest->nelem = src->nelem;
1016
+ if (src->nelem > 0)
1017
+ {
1018
+ dest->alloc = dest->nelem;
1019
+ dest->elems = re_malloc (int, dest->alloc);
1020
+ if (BE (dest->elems == NULL, 0))
1021
+ {
1022
+ dest->alloc = dest->nelem = 0;
1023
+ return REG_ESPACE;
1024
+ }
1025
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1026
+ }
1027
+ else
1028
+ re_node_set_init_empty (dest);
1029
+ return REG_NOERROR;
1030
+ }
1031
+
1032
+ /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1033
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1034
+ Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1035
+
1036
+ static reg_errcode_t
1037
+ internal_function
1038
+ re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1039
+ const re_node_set *src2)
1040
+ {
1041
+ int i1, i2, is, id, delta, sbase;
1042
+ if (src1->nelem == 0 || src2->nelem == 0)
1043
+ return REG_NOERROR;
1044
+
1045
+ /* We need dest->nelem + 2 * elems_in_intersection; this is a
1046
+ conservative estimate. */
1047
+ if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1048
+ {
1049
+ int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1050
+ int *new_elems = re_realloc (dest->elems, int, new_alloc);
1051
+ if (BE (new_elems == NULL, 0))
1052
+ return REG_ESPACE;
1053
+ dest->elems = new_elems;
1054
+ dest->alloc = new_alloc;
1055
+ }
1056
+
1057
+ /* Find the items in the intersection of SRC1 and SRC2, and copy
1058
+ into the top of DEST those that are not already in DEST itself. */
1059
+ sbase = dest->nelem + src1->nelem + src2->nelem;
1060
+ i1 = src1->nelem - 1;
1061
+ i2 = src2->nelem - 1;
1062
+ id = dest->nelem - 1;
1063
+ for (;;)
1064
+ {
1065
+ if (src1->elems[i1] == src2->elems[i2])
1066
+ {
1067
+ /* Try to find the item in DEST. Maybe we could binary search? */
1068
+ while (id >= 0 && dest->elems[id] > src1->elems[i1])
1069
+ --id;
1070
+
1071
+ if (id < 0 || dest->elems[id] != src1->elems[i1])
1072
+ dest->elems[--sbase] = src1->elems[i1];
1073
+
1074
+ if (--i1 < 0 || --i2 < 0)
1075
+ break;
1076
+ }
1077
+
1078
+ /* Lower the highest of the two items. */
1079
+ else if (src1->elems[i1] < src2->elems[i2])
1080
+ {
1081
+ if (--i2 < 0)
1082
+ break;
1083
+ }
1084
+ else
1085
+ {
1086
+ if (--i1 < 0)
1087
+ break;
1088
+ }
1089
+ }
1090
+
1091
+ id = dest->nelem - 1;
1092
+ is = dest->nelem + src1->nelem + src2->nelem - 1;
1093
+ delta = is - sbase + 1;
1094
+
1095
+ /* Now copy. When DELTA becomes zero, the remaining
1096
+ DEST elements are already in place; this is more or
1097
+ less the same loop that is in re_node_set_merge. */
1098
+ dest->nelem += delta;
1099
+ if (delta > 0 && id >= 0)
1100
+ for (;;)
1101
+ {
1102
+ if (dest->elems[is] > dest->elems[id])
1103
+ {
1104
+ /* Copy from the top. */
1105
+ dest->elems[id + delta--] = dest->elems[is--];
1106
+ if (delta == 0)
1107
+ break;
1108
+ }
1109
+ else
1110
+ {
1111
+ /* Slide from the bottom. */
1112
+ dest->elems[id + delta] = dest->elems[id];
1113
+ if (--id < 0)
1114
+ break;
1115
+ }
1116
+ }
1117
+
1118
+ /* Copy remaining SRC elements. */
1119
+ memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1120
+
1121
+ return REG_NOERROR;
1122
+ }
1123
+
1124
+ /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1125
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1126
+
1127
+ static reg_errcode_t
1128
+ internal_function
1129
+ re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1130
+ const re_node_set *src2)
1131
+ {
1132
+ int i1, i2, id;
1133
+ if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1134
+ {
1135
+ dest->alloc = src1->nelem + src2->nelem;
1136
+ dest->elems = re_malloc (int, dest->alloc);
1137
+ if (BE (dest->elems == NULL, 0))
1138
+ return REG_ESPACE;
1139
+ }
1140
+ else
1141
+ {
1142
+ if (src1 != NULL && src1->nelem > 0)
1143
+ return re_node_set_init_copy (dest, src1);
1144
+ else if (src2 != NULL && src2->nelem > 0)
1145
+ return re_node_set_init_copy (dest, src2);
1146
+ else
1147
+ re_node_set_init_empty (dest);
1148
+ return REG_NOERROR;
1149
+ }
1150
+ for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1151
+ {
1152
+ if (src1->elems[i1] > src2->elems[i2])
1153
+ {
1154
+ dest->elems[id++] = src2->elems[i2++];
1155
+ continue;
1156
+ }
1157
+ if (src1->elems[i1] == src2->elems[i2])
1158
+ ++i2;
1159
+ dest->elems[id++] = src1->elems[i1++];
1160
+ }
1161
+ if (i1 < src1->nelem)
1162
+ {
1163
+ memcpy (dest->elems + id, src1->elems + i1,
1164
+ (src1->nelem - i1) * sizeof (int));
1165
+ id += src1->nelem - i1;
1166
+ }
1167
+ else if (i2 < src2->nelem)
1168
+ {
1169
+ memcpy (dest->elems + id, src2->elems + i2,
1170
+ (src2->nelem - i2) * sizeof (int));
1171
+ id += src2->nelem - i2;
1172
+ }
1173
+ dest->nelem = id;
1174
+ return REG_NOERROR;
1175
+ }
1176
+
1177
+ /* Calculate the union set of the sets DEST and SRC. And store it to
1178
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1179
+
1180
+ static reg_errcode_t
1181
+ internal_function
1182
+ re_node_set_merge (re_node_set *dest, const re_node_set *src)
1183
+ {
1184
+ int is, id, sbase, delta;
1185
+ if (src == NULL || src->nelem == 0)
1186
+ return REG_NOERROR;
1187
+ if (dest->alloc < 2 * src->nelem + dest->nelem)
1188
+ {
1189
+ int new_alloc = 2 * (src->nelem + dest->alloc);
1190
+ int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1191
+ if (BE (new_buffer == NULL, 0))
1192
+ return REG_ESPACE;
1193
+ dest->elems = new_buffer;
1194
+ dest->alloc = new_alloc;
1195
+ }
1196
+
1197
+ if (BE (dest->nelem == 0, 0))
1198
+ {
1199
+ dest->nelem = src->nelem;
1200
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1201
+ return REG_NOERROR;
1202
+ }
1203
+
1204
+ /* Copy into the top of DEST the items of SRC that are not
1205
+ found in DEST. Maybe we could binary search in DEST? */
1206
+ for (sbase = dest->nelem + 2 * src->nelem,
1207
+ is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1208
+ {
1209
+ if (dest->elems[id] == src->elems[is])
1210
+ is--, id--;
1211
+ else if (dest->elems[id] < src->elems[is])
1212
+ dest->elems[--sbase] = src->elems[is--];
1213
+ else /* if (dest->elems[id] > src->elems[is]) */
1214
+ --id;
1215
+ }
1216
+
1217
+ if (is >= 0)
1218
+ {
1219
+ /* If DEST is exhausted, the remaining items of SRC must be unique. */
1220
+ sbase -= is + 1;
1221
+ memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1222
+ }
1223
+
1224
+ id = dest->nelem - 1;
1225
+ is = dest->nelem + 2 * src->nelem - 1;
1226
+ delta = is - sbase + 1;
1227
+ if (delta == 0)
1228
+ return REG_NOERROR;
1229
+
1230
+ /* Now copy. When DELTA becomes zero, the remaining
1231
+ DEST elements are already in place. */
1232
+ dest->nelem += delta;
1233
+ for (;;)
1234
+ {
1235
+ if (dest->elems[is] > dest->elems[id])
1236
+ {
1237
+ /* Copy from the top. */
1238
+ dest->elems[id + delta--] = dest->elems[is--];
1239
+ if (delta == 0)
1240
+ break;
1241
+ }
1242
+ else
1243
+ {
1244
+ /* Slide from the bottom. */
1245
+ dest->elems[id + delta] = dest->elems[id];
1246
+ if (--id < 0)
1247
+ {
1248
+ /* Copy remaining SRC elements. */
1249
+ memcpy (dest->elems, dest->elems + sbase,
1250
+ delta * sizeof (int));
1251
+ break;
1252
+ }
1253
+ }
1254
+ }
1255
+
1256
+ return REG_NOERROR;
1257
+ }
1258
+
1259
+ /* Insert the new element ELEM to the re_node_set* SET.
1260
+ SET should not already have ELEM.
1261
+ return -1 if an error is occured, return 1 otherwise. */
1262
+
1263
+ static int
1264
+ internal_function
1265
+ re_node_set_insert (re_node_set *set, int elem)
1266
+ {
1267
+ int idx;
1268
+ /* In case the set is empty. */
1269
+ if (set->alloc == 0)
1270
+ {
1271
+ if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1272
+ return 1;
1273
+ else
1274
+ return -1;
1275
+ }
1276
+
1277
+ if (BE (set->nelem, 0) == 0)
1278
+ {
1279
+ /* We already guaranteed above that set->alloc != 0. */
1280
+ set->elems[0] = elem;
1281
+ ++set->nelem;
1282
+ return 1;
1283
+ }
1284
+
1285
+ /* Realloc if we need. */
1286
+ if (set->alloc == set->nelem)
1287
+ {
1288
+ int *new_elems;
1289
+ set->alloc = set->alloc * 2;
1290
+ new_elems = re_realloc (set->elems, int, set->alloc);
1291
+ if (BE (new_elems == NULL, 0))
1292
+ return -1;
1293
+ set->elems = new_elems;
1294
+ }
1295
+
1296
+ /* Move the elements which follows the new element. Test the
1297
+ first element separately to skip a check in the inner loop. */
1298
+ if (elem < set->elems[0])
1299
+ {
1300
+ idx = 0;
1301
+ for (idx = set->nelem; idx > 0; idx--)
1302
+ set->elems[idx] = set->elems[idx - 1];
1303
+ }
1304
+ else
1305
+ {
1306
+ for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1307
+ set->elems[idx] = set->elems[idx - 1];
1308
+ }
1309
+
1310
+ /* Insert the new element. */
1311
+ set->elems[idx] = elem;
1312
+ ++set->nelem;
1313
+ return 1;
1314
+ }
1315
+
1316
+ /* Insert the new element ELEM to the re_node_set* SET.
1317
+ SET should not already have any element greater than or equal to ELEM.
1318
+ Return -1 if an error is occured, return 1 otherwise. */
1319
+
1320
+ static int
1321
+ internal_function
1322
+ re_node_set_insert_last (re_node_set *set, int elem)
1323
+ {
1324
+ /* Realloc if we need. */
1325
+ if (set->alloc == set->nelem)
1326
+ {
1327
+ int *new_elems;
1328
+ set->alloc = (set->alloc + 1) * 2;
1329
+ new_elems = re_realloc (set->elems, int, set->alloc);
1330
+ if (BE (new_elems == NULL, 0))
1331
+ return -1;
1332
+ set->elems = new_elems;
1333
+ }
1334
+
1335
+ /* Insert the new element. */
1336
+ set->elems[set->nelem++] = elem;
1337
+ return 1;
1338
+ }
1339
+
1340
+ /* Compare two node sets SET1 and SET2.
1341
+ return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1342
+
1343
+ static int
1344
+ internal_function __attribute ((pure))
1345
+ re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1346
+ {
1347
+ int i;
1348
+ if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1349
+ return 0;
1350
+ for (i = set1->nelem ; --i >= 0 ; )
1351
+ if (set1->elems[i] != set2->elems[i])
1352
+ return 0;
1353
+ return 1;
1354
+ }
1355
+
1356
+ /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1357
+
1358
+ static int
1359
+ internal_function __attribute ((pure))
1360
+ re_node_set_contains (const re_node_set *set, int elem)
1361
+ {
1362
+ unsigned int idx, right, mid;
1363
+ if (set->nelem <= 0)
1364
+ return 0;
1365
+
1366
+ /* Binary search the element. */
1367
+ idx = 0;
1368
+ right = set->nelem - 1;
1369
+ while (idx < right)
1370
+ {
1371
+ mid = (idx + right) / 2;
1372
+ if (set->elems[mid] < elem)
1373
+ idx = mid + 1;
1374
+ else
1375
+ right = mid;
1376
+ }
1377
+ return set->elems[idx] == elem ? idx + 1 : 0;
1378
+ }
1379
+
1380
+ static void
1381
+ internal_function
1382
+ re_node_set_remove_at (re_node_set *set, int idx)
1383
+ {
1384
+ if (idx < 0 || idx >= set->nelem)
1385
+ return;
1386
+ --set->nelem;
1387
+ for (; idx < set->nelem; idx++)
1388
+ set->elems[idx] = set->elems[idx + 1];
1389
+ }
1390
+
1391
+
1392
+ /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1393
+ Or return -1, if an error will be occured. */
1394
+
1395
+ static int
1396
+ internal_function
1397
+ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1398
+ {
1399
+ int type = token.type;
1400
+ if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1401
+ {
1402
+ size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1403
+ int *new_nexts, *new_indices;
1404
+ re_node_set *new_edests, *new_eclosures;
1405
+ re_token_t *new_nodes;
1406
+
1407
+ /* Avoid overflows. */
1408
+ if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
1409
+ return -1;
1410
+
1411
+ new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1412
+ if (BE (new_nodes == NULL, 0))
1413
+ return -1;
1414
+ dfa->nodes = new_nodes;
1415
+ new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1416
+ new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1417
+ new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1418
+ new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1419
+ if (BE (new_nexts == NULL || new_indices == NULL
1420
+ || new_edests == NULL || new_eclosures == NULL, 0))
1421
+ return -1;
1422
+ dfa->nexts = new_nexts;
1423
+ dfa->org_indices = new_indices;
1424
+ dfa->edests = new_edests;
1425
+ dfa->eclosures = new_eclosures;
1426
+ dfa->nodes_alloc = new_nodes_alloc;
1427
+ }
1428
+ dfa->nodes[dfa->nodes_len] = token;
1429
+ dfa->nodes[dfa->nodes_len].constraint = 0;
1430
+ #ifdef RE_ENABLE_I18N
1431
+ dfa->nodes[dfa->nodes_len].accept_mb =
1432
+ (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1433
+ #endif
1434
+ dfa->nexts[dfa->nodes_len] = -1;
1435
+ re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1436
+ re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1437
+ return dfa->nodes_len++;
1438
+ }
1439
+
1440
+ static inline unsigned int
1441
+ internal_function
1442
+ calc_state_hash (const re_node_set *nodes, unsigned int context)
1443
+ {
1444
+ unsigned int hash = nodes->nelem + context;
1445
+ int i;
1446
+ for (i = 0 ; i < nodes->nelem ; i++)
1447
+ hash += nodes->elems[i];
1448
+ return hash;
1449
+ }
1450
+
1451
+ /* Search for the state whose node_set is equivalent to NODES.
1452
+ Return the pointer to the state, if we found it in the DFA.
1453
+ Otherwise create the new one and return it. In case of an error
1454
+ return NULL and set the error code in ERR.
1455
+ Note: - We assume NULL as the invalid state, then it is possible that
1456
+ return value is NULL and ERR is REG_NOERROR.
1457
+ - We never return non-NULL value in case of any errors, it is for
1458
+ optimization. */
1459
+
1460
+ static re_dfastate_t *
1461
+ internal_function
1462
+ re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1463
+ const re_node_set *nodes)
1464
+ {
1465
+ unsigned int hash;
1466
+ re_dfastate_t *new_state;
1467
+ struct re_state_table_entry *spot;
1468
+ int i;
1469
+ if (BE (nodes->nelem == 0, 0))
1470
+ {
1471
+ *err = REG_NOERROR;
1472
+ return NULL;
1473
+ }
1474
+ hash = calc_state_hash (nodes, 0);
1475
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
1476
+
1477
+ for (i = 0 ; i < spot->num ; i++)
1478
+ {
1479
+ re_dfastate_t *state = spot->array[i];
1480
+ if (hash != state->hash)
1481
+ continue;
1482
+ if (re_node_set_compare (&state->nodes, nodes))
1483
+ return state;
1484
+ }
1485
+
1486
+ /* There are no appropriate state in the dfa, create the new one. */
1487
+ new_state = create_ci_newstate (dfa, nodes, hash);
1488
+ if (BE (new_state == NULL, 0))
1489
+ *err = REG_ESPACE;
1490
+
1491
+ return new_state;
1492
+ }
1493
+
1494
+ /* Search for the state whose node_set is equivalent to NODES and
1495
+ whose context is equivalent to CONTEXT.
1496
+ Return the pointer to the state, if we found it in the DFA.
1497
+ Otherwise create the new one and return it. In case of an error
1498
+ return NULL and set the error code in ERR.
1499
+ Note: - We assume NULL as the invalid state, then it is possible that
1500
+ return value is NULL and ERR is REG_NOERROR.
1501
+ - We never return non-NULL value in case of any errors, it is for
1502
+ optimization. */
1503
+
1504
+ static re_dfastate_t *
1505
+ internal_function
1506
+ re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1507
+ const re_node_set *nodes, unsigned int context)
1508
+ {
1509
+ unsigned int hash;
1510
+ re_dfastate_t *new_state;
1511
+ struct re_state_table_entry *spot;
1512
+ int i;
1513
+ if (nodes->nelem == 0)
1514
+ {
1515
+ *err = REG_NOERROR;
1516
+ return NULL;
1517
+ }
1518
+ hash = calc_state_hash (nodes, context);
1519
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
1520
+
1521
+ for (i = 0 ; i < spot->num ; i++)
1522
+ {
1523
+ re_dfastate_t *state = spot->array[i];
1524
+ if (state->hash == hash
1525
+ && state->context == context
1526
+ && re_node_set_compare (state->entrance_nodes, nodes))
1527
+ return state;
1528
+ }
1529
+ /* There are no appropriate state in `dfa', create the new one. */
1530
+ new_state = create_cd_newstate (dfa, nodes, context, hash);
1531
+ if (BE (new_state == NULL, 0))
1532
+ *err = REG_ESPACE;
1533
+
1534
+ return new_state;
1535
+ }
1536
+
1537
+ /* Finish initialization of the new state NEWSTATE, and using its hash value
1538
+ HASH put in the appropriate bucket of DFA's state table. Return value
1539
+ indicates the error code if failed. */
1540
+
1541
+ static reg_errcode_t
1542
+ register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1543
+ unsigned int hash)
1544
+ {
1545
+ struct re_state_table_entry *spot;
1546
+ reg_errcode_t err;
1547
+ int i;
1548
+
1549
+ newstate->hash = hash;
1550
+ err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1551
+ if (BE (err != REG_NOERROR, 0))
1552
+ return REG_ESPACE;
1553
+ for (i = 0; i < newstate->nodes.nelem; i++)
1554
+ {
1555
+ int elem = newstate->nodes.elems[i];
1556
+ if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1557
+ re_node_set_insert_last (&newstate->non_eps_nodes, elem);
1558
+ }
1559
+
1560
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
1561
+ if (BE (spot->alloc <= spot->num, 0))
1562
+ {
1563
+ int new_alloc = 2 * spot->num + 2;
1564
+ re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1565
+ new_alloc);
1566
+ if (BE (new_array == NULL, 0))
1567
+ return REG_ESPACE;
1568
+ spot->array = new_array;
1569
+ spot->alloc = new_alloc;
1570
+ }
1571
+ spot->array[spot->num++] = newstate;
1572
+ return REG_NOERROR;
1573
+ }
1574
+
1575
+ static void
1576
+ free_state (re_dfastate_t *state)
1577
+ {
1578
+ re_node_set_free (&state->non_eps_nodes);
1579
+ re_node_set_free (&state->inveclosure);
1580
+ if (state->entrance_nodes != &state->nodes)
1581
+ {
1582
+ re_node_set_free (state->entrance_nodes);
1583
+ re_free (state->entrance_nodes);
1584
+ }
1585
+ re_node_set_free (&state->nodes);
1586
+ re_free (state->word_trtable);
1587
+ re_free (state->trtable);
1588
+ re_free (state);
1589
+ }
1590
+
1591
+ /* Create the new state which is independ of contexts.
1592
+ Return the new state if succeeded, otherwise return NULL. */
1593
+
1594
+ static re_dfastate_t *
1595
+ internal_function
1596
+ create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1597
+ unsigned int hash)
1598
+ {
1599
+ int i;
1600
+ reg_errcode_t err;
1601
+ re_dfastate_t *newstate;
1602
+
1603
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1604
+ if (BE (newstate == NULL, 0))
1605
+ return NULL;
1606
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
1607
+ if (BE (err != REG_NOERROR, 0))
1608
+ {
1609
+ re_free (newstate);
1610
+ return NULL;
1611
+ }
1612
+
1613
+ newstate->entrance_nodes = &newstate->nodes;
1614
+ for (i = 0 ; i < nodes->nelem ; i++)
1615
+ {
1616
+ re_token_t *node = dfa->nodes + nodes->elems[i];
1617
+ re_token_type_t type = node->type;
1618
+ if (type == CHARACTER && !node->constraint)
1619
+ continue;
1620
+ #ifdef RE_ENABLE_I18N
1621
+ newstate->accept_mb |= node->accept_mb;
1622
+ #endif /* RE_ENABLE_I18N */
1623
+
1624
+ /* If the state has the halt node, the state is a halt state. */
1625
+ if (type == END_OF_RE)
1626
+ newstate->halt = 1;
1627
+ else if (type == OP_BACK_REF)
1628
+ newstate->has_backref = 1;
1629
+ else if (type == ANCHOR || node->constraint)
1630
+ newstate->has_constraint = 1;
1631
+ }
1632
+ err = register_state (dfa, newstate, hash);
1633
+ if (BE (err != REG_NOERROR, 0))
1634
+ {
1635
+ free_state (newstate);
1636
+ newstate = NULL;
1637
+ }
1638
+ return newstate;
1639
+ }
1640
+
1641
+ /* Create the new state which is depend on the context CONTEXT.
1642
+ Return the new state if succeeded, otherwise return NULL. */
1643
+
1644
+ static re_dfastate_t *
1645
+ internal_function
1646
+ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1647
+ unsigned int context, unsigned int hash)
1648
+ {
1649
+ int i, nctx_nodes = 0;
1650
+ reg_errcode_t err;
1651
+ re_dfastate_t *newstate;
1652
+
1653
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1654
+ if (BE (newstate == NULL, 0))
1655
+ return NULL;
1656
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
1657
+ if (BE (err != REG_NOERROR, 0))
1658
+ {
1659
+ re_free (newstate);
1660
+ return NULL;
1661
+ }
1662
+
1663
+ newstate->context = context;
1664
+ newstate->entrance_nodes = &newstate->nodes;
1665
+
1666
+ for (i = 0 ; i < nodes->nelem ; i++)
1667
+ {
1668
+ re_token_t *node = dfa->nodes + nodes->elems[i];
1669
+ re_token_type_t type = node->type;
1670
+ unsigned int constraint = node->constraint;
1671
+
1672
+ if (type == CHARACTER && !constraint)
1673
+ continue;
1674
+ #ifdef RE_ENABLE_I18N
1675
+ newstate->accept_mb |= node->accept_mb;
1676
+ #endif /* RE_ENABLE_I18N */
1677
+
1678
+ /* If the state has the halt node, the state is a halt state. */
1679
+ if (type == END_OF_RE)
1680
+ newstate->halt = 1;
1681
+ else if (type == OP_BACK_REF)
1682
+ newstate->has_backref = 1;
1683
+
1684
+ if (constraint)
1685
+ {
1686
+ if (newstate->entrance_nodes == &newstate->nodes)
1687
+ {
1688
+ newstate->entrance_nodes = re_malloc (re_node_set, 1);
1689
+ if (BE (newstate->entrance_nodes == NULL, 0))
1690
+ {
1691
+ free_state (newstate);
1692
+ return NULL;
1693
+ }
1694
+ re_node_set_init_copy (newstate->entrance_nodes, nodes);
1695
+ nctx_nodes = 0;
1696
+ newstate->has_constraint = 1;
1697
+ }
1698
+
1699
+ if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1700
+ {
1701
+ re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1702
+ ++nctx_nodes;
1703
+ }
1704
+ }
1705
+ }
1706
+ err = register_state (dfa, newstate, hash);
1707
+ if (BE (err != REG_NOERROR, 0))
1708
+ {
1709
+ free_state (newstate);
1710
+ newstate = NULL;
1711
+ }
1712
+ return newstate;
1713
+ }