ctags.rb 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/Gemfile +2 -0
  2. data/Rakefile +23 -0
  3. data/ctags.rb.gemspec +23 -0
  4. data/ext/.gitignore +3 -0
  5. data/ext/extconf.rb +15 -0
  6. data/ext/vendor/exuberant-ctags/.gitignore +6 -0
  7. data/ext/vendor/exuberant-ctags/.indent.pro +31 -0
  8. data/ext/vendor/exuberant-ctags/COPYING +340 -0
  9. data/ext/vendor/exuberant-ctags/EXTENDING.html +386 -0
  10. data/ext/vendor/exuberant-ctags/FAQ +371 -0
  11. data/ext/vendor/exuberant-ctags/INSTALL +215 -0
  12. data/ext/vendor/exuberant-ctags/INSTALL.oth +73 -0
  13. data/ext/vendor/exuberant-ctags/MAINTAINERS +88 -0
  14. data/ext/vendor/exuberant-ctags/Makefile.in +222 -0
  15. data/ext/vendor/exuberant-ctags/NEWS +871 -0
  16. data/ext/vendor/exuberant-ctags/README +73 -0
  17. data/ext/vendor/exuberant-ctags/ant.c +42 -0
  18. data/ext/vendor/exuberant-ctags/argproc.c +505 -0
  19. data/ext/vendor/exuberant-ctags/args.c +274 -0
  20. data/ext/vendor/exuberant-ctags/args.h +63 -0
  21. data/ext/vendor/exuberant-ctags/asm.c +387 -0
  22. data/ext/vendor/exuberant-ctags/asp.c +328 -0
  23. data/ext/vendor/exuberant-ctags/awk.c +81 -0
  24. data/ext/vendor/exuberant-ctags/basic.c +203 -0
  25. data/ext/vendor/exuberant-ctags/beta.c +321 -0
  26. data/ext/vendor/exuberant-ctags/c.c +2932 -0
  27. data/ext/vendor/exuberant-ctags/cobol.c +50 -0
  28. data/ext/vendor/exuberant-ctags/config.h.in +277 -0
  29. data/ext/vendor/exuberant-ctags/configure +7704 -0
  30. data/ext/vendor/exuberant-ctags/configure.ac +532 -0
  31. data/ext/vendor/exuberant-ctags/ctags.1 +1186 -0
  32. data/ext/vendor/exuberant-ctags/ctags.h +28 -0
  33. data/ext/vendor/exuberant-ctags/ctags.html +2087 -0
  34. data/ext/vendor/exuberant-ctags/ctags.spec +40 -0
  35. data/ext/vendor/exuberant-ctags/debug.c +113 -0
  36. data/ext/vendor/exuberant-ctags/debug.h +70 -0
  37. data/ext/vendor/exuberant-ctags/descrip.mms +68 -0
  38. data/ext/vendor/exuberant-ctags/dosbatch.c +42 -0
  39. data/ext/vendor/exuberant-ctags/e_amiga.h +24 -0
  40. data/ext/vendor/exuberant-ctags/e_djgpp.h +47 -0
  41. data/ext/vendor/exuberant-ctags/e_mac.h +143 -0
  42. data/ext/vendor/exuberant-ctags/e_msoft.h +76 -0
  43. data/ext/vendor/exuberant-ctags/e_os2.h +37 -0
  44. data/ext/vendor/exuberant-ctags/e_qdos.h +34 -0
  45. data/ext/vendor/exuberant-ctags/e_riscos.h +58 -0
  46. data/ext/vendor/exuberant-ctags/e_vms.h +31 -0
  47. data/ext/vendor/exuberant-ctags/eiffel.c +1352 -0
  48. data/ext/vendor/exuberant-ctags/entry.c +847 -0
  49. data/ext/vendor/exuberant-ctags/entry.h +103 -0
  50. data/ext/vendor/exuberant-ctags/erlang.c +189 -0
  51. data/ext/vendor/exuberant-ctags/flex.c +2243 -0
  52. data/ext/vendor/exuberant-ctags/fortran.c +2197 -0
  53. data/ext/vendor/exuberant-ctags/general.h +127 -0
  54. data/ext/vendor/exuberant-ctags/get.c +669 -0
  55. data/ext/vendor/exuberant-ctags/get.h +50 -0
  56. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/all-wcprops +47 -0
  57. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/entries +112 -0
  58. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/README.txt.svn-base +5 -0
  59. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regcomp.c.svn-base +3818 -0
  60. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regex.c.svn-base +74 -0
  61. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regex.h.svn-base +575 -0
  62. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regex_internal.c.svn-base +1713 -0
  63. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regex_internal.h.svn-base +773 -0
  64. data/ext/vendor/exuberant-ctags/gnu_regex/.svn/text-base/regexec.c.svn-base +4338 -0
  65. data/ext/vendor/exuberant-ctags/gnu_regex/README.txt +5 -0
  66. data/ext/vendor/exuberant-ctags/gnu_regex/regcomp.c +3818 -0
  67. data/ext/vendor/exuberant-ctags/gnu_regex/regex.c +74 -0
  68. data/ext/vendor/exuberant-ctags/gnu_regex/regex.h +575 -0
  69. data/ext/vendor/exuberant-ctags/gnu_regex/regex_internal.c +1713 -0
  70. data/ext/vendor/exuberant-ctags/gnu_regex/regex_internal.h +773 -0
  71. data/ext/vendor/exuberant-ctags/gnu_regex/regexec.c +4338 -0
  72. data/ext/vendor/exuberant-ctags/html.c +49 -0
  73. data/ext/vendor/exuberant-ctags/jscript.c +1572 -0
  74. data/ext/vendor/exuberant-ctags/keyword.c +258 -0
  75. data/ext/vendor/exuberant-ctags/keyword.h +34 -0
  76. data/ext/vendor/exuberant-ctags/lisp.c +139 -0
  77. data/ext/vendor/exuberant-ctags/lregex.c +704 -0
  78. data/ext/vendor/exuberant-ctags/lua.c +133 -0
  79. data/ext/vendor/exuberant-ctags/mac.c +273 -0
  80. data/ext/vendor/exuberant-ctags/magic.diff +21 -0
  81. data/ext/vendor/exuberant-ctags/main.c +584 -0
  82. data/ext/vendor/exuberant-ctags/main.h +32 -0
  83. data/ext/vendor/exuberant-ctags/maintainer.mak +476 -0
  84. data/ext/vendor/exuberant-ctags/make.c +217 -0
  85. data/ext/vendor/exuberant-ctags/matlab.c +44 -0
  86. data/ext/vendor/exuberant-ctags/mk_bc3.mak +46 -0
  87. data/ext/vendor/exuberant-ctags/mk_bc5.mak +49 -0
  88. data/ext/vendor/exuberant-ctags/mk_djg.mak +18 -0
  89. data/ext/vendor/exuberant-ctags/mk_manx.mak +65 -0
  90. data/ext/vendor/exuberant-ctags/mk_mingw.mak +31 -0
  91. data/ext/vendor/exuberant-ctags/mk_mpw.mak +130 -0
  92. data/ext/vendor/exuberant-ctags/mk_mvc.mak +40 -0
  93. data/ext/vendor/exuberant-ctags/mk_os2.mak +104 -0
  94. data/ext/vendor/exuberant-ctags/mk_qdos.mak +100 -0
  95. data/ext/vendor/exuberant-ctags/mk_sas.mak +63 -0
  96. data/ext/vendor/exuberant-ctags/mkinstalldirs +40 -0
  97. data/ext/vendor/exuberant-ctags/ocaml.c +1842 -0
  98. data/ext/vendor/exuberant-ctags/options.c +1842 -0
  99. data/ext/vendor/exuberant-ctags/options.h +155 -0
  100. data/ext/vendor/exuberant-ctags/parse.c +677 -0
  101. data/ext/vendor/exuberant-ctags/parse.h +129 -0
  102. data/ext/vendor/exuberant-ctags/parsers.h +63 -0
  103. data/ext/vendor/exuberant-ctags/pascal.c +267 -0
  104. data/ext/vendor/exuberant-ctags/perl.c +382 -0
  105. data/ext/vendor/exuberant-ctags/php.c +237 -0
  106. data/ext/vendor/exuberant-ctags/python.c +771 -0
  107. data/ext/vendor/exuberant-ctags/qdos.c +106 -0
  108. data/ext/vendor/exuberant-ctags/read.c +569 -0
  109. data/ext/vendor/exuberant-ctags/read.h +116 -0
  110. data/ext/vendor/exuberant-ctags/readtags.c +959 -0
  111. data/ext/vendor/exuberant-ctags/readtags.h +252 -0
  112. data/ext/vendor/exuberant-ctags/rexx.c +39 -0
  113. data/ext/vendor/exuberant-ctags/routines.c +891 -0
  114. data/ext/vendor/exuberant-ctags/routines.h +134 -0
  115. data/ext/vendor/exuberant-ctags/ruby.c +408 -0
  116. data/ext/vendor/exuberant-ctags/scheme.c +111 -0
  117. data/ext/vendor/exuberant-ctags/sh.c +115 -0
  118. data/ext/vendor/exuberant-ctags/slang.c +41 -0
  119. data/ext/vendor/exuberant-ctags/sml.c +212 -0
  120. data/ext/vendor/exuberant-ctags/sort.c +230 -0
  121. data/ext/vendor/exuberant-ctags/sort.h +32 -0
  122. data/ext/vendor/exuberant-ctags/source.mak +122 -0
  123. data/ext/vendor/exuberant-ctags/sql.c +2112 -0
  124. data/ext/vendor/exuberant-ctags/strlist.c +281 -0
  125. data/ext/vendor/exuberant-ctags/strlist.h +54 -0
  126. data/ext/vendor/exuberant-ctags/tcl.c +116 -0
  127. data/ext/vendor/exuberant-ctags/tex.c +524 -0
  128. data/ext/vendor/exuberant-ctags/verilog.c +340 -0
  129. data/ext/vendor/exuberant-ctags/vhdl.c +835 -0
  130. data/ext/vendor/exuberant-ctags/vim.c +636 -0
  131. data/ext/vendor/exuberant-ctags/vstring.c +232 -0
  132. data/ext/vendor/exuberant-ctags/vstring.h +85 -0
  133. data/ext/vendor/exuberant-ctags/yacc.c +40 -0
  134. data/lib/ctags/exuberant.rb +45 -0
  135. data/lib/ctags/version.rb +3 -0
  136. data/lib/ctags.rb +6 -0
  137. data/test/test_ctags.rb +24 -0
  138. metadata +233 -0
@@ -0,0 +1,1713 @@
1
+ /* Extended regular expression matching and search library.
2
+ Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3
+ This file is part of the GNU C Library.
4
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
+
6
+ The GNU C Library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ The GNU C Library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with the GNU C Library; if not, write to the Free
18
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19
+ 02111-1307 USA. */
20
+
21
+ static void re_string_construct_common (const char *str, int len,
22
+ re_string_t *pstr,
23
+ RE_TRANSLATE_TYPE trans, int icase,
24
+ const re_dfa_t *dfa) internal_function;
25
+ static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
26
+ const re_node_set *nodes,
27
+ unsigned int hash) internal_function;
28
+ static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
29
+ const re_node_set *nodes,
30
+ unsigned int context,
31
+ unsigned int hash) internal_function;
32
+
33
+ /* Functions for string operation. */
34
+
35
+ /* This function allocate the buffers. It is necessary to call
36
+ re_string_reconstruct before using the object. */
37
+
38
+ static reg_errcode_t
39
+ internal_function
40
+ re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
41
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
42
+ {
43
+ reg_errcode_t ret;
44
+ int init_buf_len;
45
+
46
+ /* Ensure at least one character fits into the buffers. */
47
+ if (init_len < dfa->mb_cur_max)
48
+ init_len = dfa->mb_cur_max;
49
+ init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
50
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
51
+
52
+ ret = re_string_realloc_buffers (pstr, init_buf_len);
53
+ if (BE (ret != REG_NOERROR, 0))
54
+ return ret;
55
+
56
+ pstr->word_char = dfa->word_char;
57
+ pstr->word_ops_used = dfa->word_ops_used;
58
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
59
+ pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
60
+ pstr->valid_raw_len = pstr->valid_len;
61
+ return REG_NOERROR;
62
+ }
63
+
64
+ /* This function allocate the buffers, and initialize them. */
65
+
66
+ static reg_errcode_t
67
+ internal_function
68
+ re_string_construct (re_string_t *pstr, const char *str, int len,
69
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
70
+ {
71
+ reg_errcode_t ret;
72
+ memset (pstr, '\0', sizeof (re_string_t));
73
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
74
+
75
+ if (len > 0)
76
+ {
77
+ ret = re_string_realloc_buffers (pstr, len + 1);
78
+ if (BE (ret != REG_NOERROR, 0))
79
+ return ret;
80
+ }
81
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
82
+
83
+ if (icase)
84
+ {
85
+ #ifdef RE_ENABLE_I18N
86
+ if (dfa->mb_cur_max > 1)
87
+ {
88
+ while (1)
89
+ {
90
+ ret = build_wcs_upper_buffer (pstr);
91
+ if (BE (ret != REG_NOERROR, 0))
92
+ return ret;
93
+ if (pstr->valid_raw_len >= len)
94
+ break;
95
+ if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
96
+ break;
97
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
98
+ if (BE (ret != REG_NOERROR, 0))
99
+ return ret;
100
+ }
101
+ }
102
+ else
103
+ #endif /* RE_ENABLE_I18N */
104
+ build_upper_buffer (pstr);
105
+ }
106
+ else
107
+ {
108
+ #ifdef RE_ENABLE_I18N
109
+ if (dfa->mb_cur_max > 1)
110
+ build_wcs_buffer (pstr);
111
+ else
112
+ #endif /* RE_ENABLE_I18N */
113
+ {
114
+ if (trans != NULL)
115
+ re_string_translate_buffer (pstr);
116
+ else
117
+ {
118
+ pstr->valid_len = pstr->bufs_len;
119
+ pstr->valid_raw_len = pstr->bufs_len;
120
+ }
121
+ }
122
+ }
123
+
124
+ return REG_NOERROR;
125
+ }
126
+
127
+ /* Helper functions for re_string_allocate, and re_string_construct. */
128
+
129
+ static reg_errcode_t
130
+ internal_function
131
+ re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
132
+ {
133
+ #ifdef RE_ENABLE_I18N
134
+ if (pstr->mb_cur_max > 1)
135
+ {
136
+ wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
137
+ if (BE (new_wcs == NULL, 0))
138
+ return REG_ESPACE;
139
+ pstr->wcs = new_wcs;
140
+ if (pstr->offsets != NULL)
141
+ {
142
+ int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
143
+ if (BE (new_offsets == NULL, 0))
144
+ return REG_ESPACE;
145
+ pstr->offsets = new_offsets;
146
+ }
147
+ }
148
+ #endif /* RE_ENABLE_I18N */
149
+ if (pstr->mbs_allocated)
150
+ {
151
+ unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
152
+ new_buf_len);
153
+ if (BE (new_mbs == NULL, 0))
154
+ return REG_ESPACE;
155
+ pstr->mbs = new_mbs;
156
+ }
157
+ pstr->bufs_len = new_buf_len;
158
+ return REG_NOERROR;
159
+ }
160
+
161
+
162
+ static void
163
+ internal_function
164
+ re_string_construct_common (const char *str, int len, re_string_t *pstr,
165
+ RE_TRANSLATE_TYPE trans, int icase,
166
+ const re_dfa_t *dfa)
167
+ {
168
+ pstr->raw_mbs = (const unsigned char *) str;
169
+ pstr->len = len;
170
+ pstr->raw_len = len;
171
+ pstr->trans = trans;
172
+ pstr->icase = icase ? 1 : 0;
173
+ pstr->mbs_allocated = (trans != NULL || icase);
174
+ pstr->mb_cur_max = dfa->mb_cur_max;
175
+ pstr->is_utf8 = dfa->is_utf8;
176
+ pstr->map_notascii = dfa->map_notascii;
177
+ pstr->stop = pstr->len;
178
+ pstr->raw_stop = pstr->stop;
179
+ }
180
+
181
+ #ifdef RE_ENABLE_I18N
182
+
183
+ /* Build wide character buffer PSTR->WCS.
184
+ If the byte sequence of the string are:
185
+ <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
186
+ Then wide character buffer will be:
187
+ <wc1> , WEOF , <wc2> , WEOF , <wc3>
188
+ We use WEOF for padding, they indicate that the position isn't
189
+ a first byte of a multibyte character.
190
+
191
+ Note that this function assumes PSTR->VALID_LEN elements are already
192
+ built and starts from PSTR->VALID_LEN. */
193
+
194
+ static void
195
+ internal_function
196
+ build_wcs_buffer (re_string_t *pstr)
197
+ {
198
+ #ifdef _LIBC
199
+ unsigned char buf[MB_LEN_MAX];
200
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
201
+ #else
202
+ unsigned char buf[64];
203
+ #endif
204
+ mbstate_t prev_st;
205
+ int byte_idx, end_idx, remain_len;
206
+ size_t mbclen;
207
+
208
+ /* Build the buffers from pstr->valid_len to either pstr->len or
209
+ pstr->bufs_len. */
210
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
211
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
212
+ {
213
+ wchar_t wc;
214
+ const char *p;
215
+
216
+ remain_len = end_idx - byte_idx;
217
+ prev_st = pstr->cur_state;
218
+ /* Apply the translation if we need. */
219
+ if (BE (pstr->trans != NULL, 0))
220
+ {
221
+ int i, ch;
222
+
223
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
224
+ {
225
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
226
+ buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
227
+ }
228
+ p = (const char *) buf;
229
+ }
230
+ else
231
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
232
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
233
+ if (BE (mbclen == (size_t) -2, 0))
234
+ {
235
+ /* The buffer doesn't have enough space, finish to build. */
236
+ pstr->cur_state = prev_st;
237
+ break;
238
+ }
239
+ else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
240
+ {
241
+ /* We treat these cases as a singlebyte character. */
242
+ mbclen = 1;
243
+ wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
244
+ if (BE (pstr->trans != NULL, 0))
245
+ wc = pstr->trans[wc];
246
+ pstr->cur_state = prev_st;
247
+ }
248
+
249
+ /* Write wide character and padding. */
250
+ pstr->wcs[byte_idx++] = wc;
251
+ /* Write paddings. */
252
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
253
+ pstr->wcs[byte_idx++] = WEOF;
254
+ }
255
+ pstr->valid_len = byte_idx;
256
+ pstr->valid_raw_len = byte_idx;
257
+ }
258
+
259
+ /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
260
+ but for REG_ICASE. */
261
+
262
+ static reg_errcode_t
263
+ internal_function
264
+ build_wcs_upper_buffer (re_string_t *pstr)
265
+ {
266
+ mbstate_t prev_st;
267
+ int src_idx, byte_idx, end_idx, remain_len;
268
+ size_t mbclen;
269
+ #ifdef _LIBC
270
+ char buf[MB_LEN_MAX];
271
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
272
+ #else
273
+ char buf[64];
274
+ #endif
275
+
276
+ byte_idx = pstr->valid_len;
277
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
278
+
279
+ /* The following optimization assumes that ASCII characters can be
280
+ mapped to wide characters with a simple cast. */
281
+ if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
282
+ {
283
+ while (byte_idx < end_idx)
284
+ {
285
+ wchar_t wc;
286
+
287
+ if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
288
+ && mbsinit (&pstr->cur_state))
289
+ {
290
+ /* In case of a singlebyte character. */
291
+ pstr->mbs[byte_idx]
292
+ = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
293
+ /* The next step uses the assumption that wchar_t is encoded
294
+ ASCII-safe: all ASCII values can be converted like this. */
295
+ pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
296
+ ++byte_idx;
297
+ continue;
298
+ }
299
+
300
+ remain_len = end_idx - byte_idx;
301
+ prev_st = pstr->cur_state;
302
+ mbclen = __mbrtowc (&wc,
303
+ ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
304
+ + byte_idx), remain_len, &pstr->cur_state);
305
+ if (BE (mbclen + 2 > 2, 1))
306
+ {
307
+ wchar_t wcu = wc;
308
+ if (iswlower (wc))
309
+ {
310
+ size_t mbcdlen;
311
+
312
+ wcu = towupper (wc);
313
+ mbcdlen = wcrtomb (buf, wcu, &prev_st);
314
+ if (BE (mbclen == mbcdlen, 1))
315
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
316
+ else
317
+ {
318
+ src_idx = byte_idx;
319
+ goto offsets_needed;
320
+ }
321
+ }
322
+ else
323
+ memcpy (pstr->mbs + byte_idx,
324
+ pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
325
+ pstr->wcs[byte_idx++] = wcu;
326
+ /* Write paddings. */
327
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
328
+ pstr->wcs[byte_idx++] = WEOF;
329
+ }
330
+ else if (mbclen == (size_t) -1 || mbclen == 0)
331
+ {
332
+ /* It is an invalid character or '\0'. Just use the byte. */
333
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
334
+ pstr->mbs[byte_idx] = ch;
335
+ /* And also cast it to wide char. */
336
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
337
+ if (BE (mbclen == (size_t) -1, 0))
338
+ pstr->cur_state = prev_st;
339
+ }
340
+ else
341
+ {
342
+ /* The buffer doesn't have enough space, finish to build. */
343
+ pstr->cur_state = prev_st;
344
+ break;
345
+ }
346
+ }
347
+ pstr->valid_len = byte_idx;
348
+ pstr->valid_raw_len = byte_idx;
349
+ return REG_NOERROR;
350
+ }
351
+ else
352
+ for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
353
+ {
354
+ wchar_t wc;
355
+ const char *p;
356
+ offsets_needed:
357
+ remain_len = end_idx - byte_idx;
358
+ prev_st = pstr->cur_state;
359
+ if (BE (pstr->trans != NULL, 0))
360
+ {
361
+ int i, ch;
362
+
363
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
364
+ {
365
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
366
+ buf[i] = pstr->trans[ch];
367
+ }
368
+ p = (const char *) buf;
369
+ }
370
+ else
371
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
372
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
373
+ if (BE (mbclen + 2 > 2, 1))
374
+ {
375
+ wchar_t wcu = wc;
376
+ if (iswlower (wc))
377
+ {
378
+ size_t mbcdlen;
379
+
380
+ wcu = towupper (wc);
381
+ mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
382
+ if (BE (mbclen == mbcdlen, 1))
383
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
384
+ else if (mbcdlen != (size_t) -1)
385
+ {
386
+ size_t i;
387
+
388
+ if (byte_idx + mbcdlen > pstr->bufs_len)
389
+ {
390
+ pstr->cur_state = prev_st;
391
+ break;
392
+ }
393
+
394
+ if (pstr->offsets == NULL)
395
+ {
396
+ pstr->offsets = re_malloc (int, pstr->bufs_len);
397
+
398
+ if (pstr->offsets == NULL)
399
+ return REG_ESPACE;
400
+ }
401
+ if (!pstr->offsets_needed)
402
+ {
403
+ for (i = 0; i < (size_t) byte_idx; ++i)
404
+ pstr->offsets[i] = i;
405
+ pstr->offsets_needed = 1;
406
+ }
407
+
408
+ memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
409
+ pstr->wcs[byte_idx] = wcu;
410
+ pstr->offsets[byte_idx] = src_idx;
411
+ for (i = 1; i < mbcdlen; ++i)
412
+ {
413
+ pstr->offsets[byte_idx + i]
414
+ = src_idx + (i < mbclen ? i : mbclen - 1);
415
+ pstr->wcs[byte_idx + i] = WEOF;
416
+ }
417
+ pstr->len += mbcdlen - mbclen;
418
+ if (pstr->raw_stop > src_idx)
419
+ pstr->stop += mbcdlen - mbclen;
420
+ end_idx = (pstr->bufs_len > pstr->len)
421
+ ? pstr->len : pstr->bufs_len;
422
+ byte_idx += mbcdlen;
423
+ src_idx += mbclen;
424
+ continue;
425
+ }
426
+ else
427
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
428
+ }
429
+ else
430
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
431
+
432
+ if (BE (pstr->offsets_needed != 0, 0))
433
+ {
434
+ size_t i;
435
+ for (i = 0; i < mbclen; ++i)
436
+ pstr->offsets[byte_idx + i] = src_idx + i;
437
+ }
438
+ src_idx += mbclen;
439
+
440
+ pstr->wcs[byte_idx++] = wcu;
441
+ /* Write paddings. */
442
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
443
+ pstr->wcs[byte_idx++] = WEOF;
444
+ }
445
+ else if (mbclen == (size_t) -1 || mbclen == 0)
446
+ {
447
+ /* It is an invalid character or '\0'. Just use the byte. */
448
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
449
+
450
+ if (BE (pstr->trans != NULL, 0))
451
+ ch = pstr->trans [ch];
452
+ pstr->mbs[byte_idx] = ch;
453
+
454
+ if (BE (pstr->offsets_needed != 0, 0))
455
+ pstr->offsets[byte_idx] = src_idx;
456
+ ++src_idx;
457
+
458
+ /* And also cast it to wide char. */
459
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
460
+ if (BE (mbclen == (size_t) -1, 0))
461
+ pstr->cur_state = prev_st;
462
+ }
463
+ else
464
+ {
465
+ /* The buffer doesn't have enough space, finish to build. */
466
+ pstr->cur_state = prev_st;
467
+ break;
468
+ }
469
+ }
470
+ pstr->valid_len = byte_idx;
471
+ pstr->valid_raw_len = src_idx;
472
+ return REG_NOERROR;
473
+ }
474
+
475
+ /* Skip characters until the index becomes greater than NEW_RAW_IDX.
476
+ Return the index. */
477
+
478
+ static int
479
+ internal_function
480
+ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
481
+ {
482
+ mbstate_t prev_st;
483
+ int rawbuf_idx;
484
+ size_t mbclen;
485
+ wchar_t wc = WEOF;
486
+
487
+ /* Skip the characters which are not necessary to check. */
488
+ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
489
+ rawbuf_idx < new_raw_idx;)
490
+ {
491
+ int remain_len;
492
+ remain_len = pstr->len - rawbuf_idx;
493
+ prev_st = pstr->cur_state;
494
+ mbclen = __mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
495
+ remain_len, &pstr->cur_state);
496
+ if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
497
+ {
498
+ /* We treat these cases as a single byte character. */
499
+ if (mbclen == 0 || remain_len == 0)
500
+ wc = L'\0';
501
+ else
502
+ wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
503
+ mbclen = 1;
504
+ pstr->cur_state = prev_st;
505
+ }
506
+ /* Then proceed the next character. */
507
+ rawbuf_idx += mbclen;
508
+ }
509
+ *last_wc = (wint_t) wc;
510
+ return rawbuf_idx;
511
+ }
512
+ #endif /* RE_ENABLE_I18N */
513
+
514
+ /* Build the buffer PSTR->MBS, and apply the translation if we need.
515
+ This function is used in case of REG_ICASE. */
516
+
517
+ static void
518
+ internal_function
519
+ build_upper_buffer (re_string_t *pstr)
520
+ {
521
+ int char_idx, end_idx;
522
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
523
+
524
+ for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
525
+ {
526
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
527
+ if (BE (pstr->trans != NULL, 0))
528
+ ch = pstr->trans[ch];
529
+ if (islower (ch))
530
+ pstr->mbs[char_idx] = toupper (ch);
531
+ else
532
+ pstr->mbs[char_idx] = ch;
533
+ }
534
+ pstr->valid_len = char_idx;
535
+ pstr->valid_raw_len = char_idx;
536
+ }
537
+
538
+ /* Apply TRANS to the buffer in PSTR. */
539
+
540
+ static void
541
+ internal_function
542
+ re_string_translate_buffer (re_string_t *pstr)
543
+ {
544
+ int buf_idx, end_idx;
545
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
546
+
547
+ for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
548
+ {
549
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
550
+ pstr->mbs[buf_idx] = pstr->trans[ch];
551
+ }
552
+
553
+ pstr->valid_len = buf_idx;
554
+ pstr->valid_raw_len = buf_idx;
555
+ }
556
+
557
+ /* This function re-construct the buffers.
558
+ Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
559
+ convert to upper case in case of REG_ICASE, apply translation. */
560
+
561
+ static reg_errcode_t
562
+ internal_function
563
+ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
564
+ {
565
+ int offset = idx - pstr->raw_mbs_idx;
566
+ if (BE (offset < 0, 0))
567
+ {
568
+ /* Reset buffer. */
569
+ #ifdef RE_ENABLE_I18N
570
+ if (pstr->mb_cur_max > 1)
571
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
572
+ #endif /* RE_ENABLE_I18N */
573
+ pstr->len = pstr->raw_len;
574
+ pstr->stop = pstr->raw_stop;
575
+ pstr->valid_len = 0;
576
+ pstr->raw_mbs_idx = 0;
577
+ pstr->valid_raw_len = 0;
578
+ pstr->offsets_needed = 0;
579
+ pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
580
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
581
+ if (!pstr->mbs_allocated)
582
+ pstr->mbs = (unsigned char *) pstr->raw_mbs;
583
+ offset = idx;
584
+ }
585
+
586
+ if (BE (offset != 0, 1))
587
+ {
588
+ /* Should the already checked characters be kept? */
589
+ if (BE (offset < pstr->valid_raw_len, 1))
590
+ {
591
+ /* Yes, move them to the front of the buffer. */
592
+ #ifdef RE_ENABLE_I18N
593
+ if (BE (pstr->offsets_needed, 0))
594
+ {
595
+ int low = 0, high = pstr->valid_len, mid;
596
+ do
597
+ {
598
+ mid = (high + low) / 2;
599
+ if (pstr->offsets[mid] > offset)
600
+ high = mid;
601
+ else if (pstr->offsets[mid] < offset)
602
+ low = mid + 1;
603
+ else
604
+ break;
605
+ }
606
+ while (low < high);
607
+ if (pstr->offsets[mid] < offset)
608
+ ++mid;
609
+ pstr->tip_context = re_string_context_at (pstr, mid - 1,
610
+ eflags);
611
+ /* This can be quite complicated, so handle specially
612
+ only the common and easy case where the character with
613
+ different length representation of lower and upper
614
+ case is present at or after offset. */
615
+ if (pstr->valid_len > offset
616
+ && mid == offset && pstr->offsets[mid] == offset)
617
+ {
618
+ memmove (pstr->wcs, pstr->wcs + offset,
619
+ (pstr->valid_len - offset) * sizeof (wint_t));
620
+ memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
621
+ pstr->valid_len -= offset;
622
+ pstr->valid_raw_len -= offset;
623
+ for (low = 0; low < pstr->valid_len; low++)
624
+ pstr->offsets[low] = pstr->offsets[low + offset] - offset;
625
+ }
626
+ else
627
+ {
628
+ /* Otherwise, just find out how long the partial multibyte
629
+ character at offset is and fill it with WEOF/255. */
630
+ pstr->len = pstr->raw_len - idx + offset;
631
+ pstr->stop = pstr->raw_stop - idx + offset;
632
+ pstr->offsets_needed = 0;
633
+ while (mid > 0 && pstr->offsets[mid - 1] == offset)
634
+ --mid;
635
+ while (mid < pstr->valid_len)
636
+ if (pstr->wcs[mid] != WEOF)
637
+ break;
638
+ else
639
+ ++mid;
640
+ if (mid == pstr->valid_len)
641
+ pstr->valid_len = 0;
642
+ else
643
+ {
644
+ pstr->valid_len = pstr->offsets[mid] - offset;
645
+ if (pstr->valid_len)
646
+ {
647
+ for (low = 0; low < pstr->valid_len; ++low)
648
+ pstr->wcs[low] = WEOF;
649
+ memset (pstr->mbs, 255, pstr->valid_len);
650
+ }
651
+ }
652
+ pstr->valid_raw_len = pstr->valid_len;
653
+ }
654
+ }
655
+ else
656
+ #endif
657
+ {
658
+ pstr->tip_context = re_string_context_at (pstr, offset - 1,
659
+ eflags);
660
+ #ifdef RE_ENABLE_I18N
661
+ if (pstr->mb_cur_max > 1)
662
+ memmove (pstr->wcs, pstr->wcs + offset,
663
+ (pstr->valid_len - offset) * sizeof (wint_t));
664
+ #endif /* RE_ENABLE_I18N */
665
+ if (BE (pstr->mbs_allocated, 0))
666
+ memmove (pstr->mbs, pstr->mbs + offset,
667
+ pstr->valid_len - offset);
668
+ pstr->valid_len -= offset;
669
+ pstr->valid_raw_len -= offset;
670
+ #if DEBUG
671
+ assert (pstr->valid_len > 0);
672
+ #endif
673
+ }
674
+ }
675
+ else
676
+ {
677
+ /* No, skip all characters until IDX. */
678
+ int prev_valid_len = pstr->valid_len;
679
+
680
+ #ifdef RE_ENABLE_I18N
681
+ if (BE (pstr->offsets_needed, 0))
682
+ {
683
+ pstr->len = pstr->raw_len - idx + offset;
684
+ pstr->stop = pstr->raw_stop - idx + offset;
685
+ pstr->offsets_needed = 0;
686
+ }
687
+ #endif
688
+ pstr->valid_len = 0;
689
+ #ifdef RE_ENABLE_I18N
690
+ if (pstr->mb_cur_max > 1)
691
+ {
692
+ int wcs_idx;
693
+ wint_t wc = WEOF;
694
+
695
+ if (pstr->is_utf8)
696
+ {
697
+ const unsigned char *raw, *p, *q, *end;
698
+
699
+ /* Special case UTF-8. Multi-byte chars start with any
700
+ byte other than 0x80 - 0xbf. */
701
+ raw = pstr->raw_mbs + pstr->raw_mbs_idx;
702
+ end = raw + (offset - pstr->mb_cur_max);
703
+ if (end < pstr->raw_mbs)
704
+ end = pstr->raw_mbs;
705
+ p = raw + offset - 1;
706
+ #ifdef _LIBC
707
+ /* We know the wchar_t encoding is UCS4, so for the simple
708
+ case, ASCII characters, skip the conversion step. */
709
+ if (isascii (*p) && BE (pstr->trans == NULL, 1))
710
+ {
711
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
712
+ /* pstr->valid_len = 0; */
713
+ wc = (wchar_t) *p;
714
+ }
715
+ else
716
+ #endif
717
+ for (; p >= end; --p)
718
+ if ((*p & 0xc0) != 0x80)
719
+ {
720
+ mbstate_t cur_state;
721
+ wchar_t wc2;
722
+ int mlen = raw + pstr->len - p;
723
+ unsigned char buf[6];
724
+ size_t mbclen;
725
+
726
+ q = p;
727
+ if (BE (pstr->trans != NULL, 0))
728
+ {
729
+ int i = mlen < 6 ? mlen : 6;
730
+ while (--i >= 0)
731
+ buf[i] = pstr->trans[p[i]];
732
+ q = buf;
733
+ }
734
+ /* XXX Don't use mbrtowc, we know which conversion
735
+ to use (UTF-8 -> UCS4). */
736
+ memset (&cur_state, 0, sizeof (cur_state));
737
+ mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
738
+ &cur_state);
739
+ if (raw + offset - p <= mbclen
740
+ && mbclen < (size_t) -2)
741
+ {
742
+ memset (&pstr->cur_state, '\0',
743
+ sizeof (mbstate_t));
744
+ pstr->valid_len = mbclen - (raw + offset - p);
745
+ wc = wc2;
746
+ }
747
+ break;
748
+ }
749
+ }
750
+
751
+ if (wc == WEOF)
752
+ pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
753
+ if (wc == WEOF)
754
+ pstr->tip_context
755
+ = re_string_context_at (pstr, prev_valid_len - 1, eflags);
756
+ else
757
+ pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
758
+ && IS_WIDE_WORD_CHAR (wc))
759
+ ? CONTEXT_WORD
760
+ : ((IS_WIDE_NEWLINE (wc)
761
+ && pstr->newline_anchor)
762
+ ? CONTEXT_NEWLINE : 0));
763
+ if (BE (pstr->valid_len, 0))
764
+ {
765
+ for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
766
+ pstr->wcs[wcs_idx] = WEOF;
767
+ if (pstr->mbs_allocated)
768
+ memset (pstr->mbs, 255, pstr->valid_len);
769
+ }
770
+ pstr->valid_raw_len = pstr->valid_len;
771
+ }
772
+ else
773
+ #endif /* RE_ENABLE_I18N */
774
+ {
775
+ int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
776
+ pstr->valid_raw_len = 0;
777
+ if (pstr->trans)
778
+ c = pstr->trans[c];
779
+ pstr->tip_context = (bitset_contain (pstr->word_char, c)
780
+ ? CONTEXT_WORD
781
+ : ((IS_NEWLINE (c) && pstr->newline_anchor)
782
+ ? CONTEXT_NEWLINE : 0));
783
+ }
784
+ }
785
+ if (!BE (pstr->mbs_allocated, 0))
786
+ pstr->mbs += offset;
787
+ }
788
+ pstr->raw_mbs_idx = idx;
789
+ pstr->len -= offset;
790
+ pstr->stop -= offset;
791
+
792
+ /* Then build the buffers. */
793
+ #ifdef RE_ENABLE_I18N
794
+ if (pstr->mb_cur_max > 1)
795
+ {
796
+ if (pstr->icase)
797
+ {
798
+ reg_errcode_t ret = build_wcs_upper_buffer (pstr);
799
+ if (BE (ret != REG_NOERROR, 0))
800
+ return ret;
801
+ }
802
+ else
803
+ build_wcs_buffer (pstr);
804
+ }
805
+ else
806
+ #endif /* RE_ENABLE_I18N */
807
+ if (BE (pstr->mbs_allocated, 0))
808
+ {
809
+ if (pstr->icase)
810
+ build_upper_buffer (pstr);
811
+ else if (pstr->trans != NULL)
812
+ re_string_translate_buffer (pstr);
813
+ }
814
+ else
815
+ pstr->valid_len = pstr->len;
816
+
817
+ pstr->cur_idx = 0;
818
+ return REG_NOERROR;
819
+ }
820
+
821
+ static unsigned char
822
+ internal_function __attribute ((pure))
823
+ re_string_peek_byte_case (const re_string_t *pstr, int idx)
824
+ {
825
+ int ch, off;
826
+
827
+ /* Handle the common (easiest) cases first. */
828
+ if (BE (!pstr->mbs_allocated, 1))
829
+ return re_string_peek_byte (pstr, idx);
830
+
831
+ #ifdef RE_ENABLE_I18N
832
+ if (pstr->mb_cur_max > 1
833
+ && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
834
+ return re_string_peek_byte (pstr, idx);
835
+ #endif
836
+
837
+ off = pstr->cur_idx + idx;
838
+ #ifdef RE_ENABLE_I18N
839
+ if (pstr->offsets_needed)
840
+ off = pstr->offsets[off];
841
+ #endif
842
+
843
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
844
+
845
+ #ifdef RE_ENABLE_I18N
846
+ /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
847
+ this function returns CAPITAL LETTER I instead of first byte of
848
+ DOTLESS SMALL LETTER I. The latter would confuse the parser,
849
+ since peek_byte_case doesn't advance cur_idx in any way. */
850
+ if (pstr->offsets_needed && !isascii (ch))
851
+ return re_string_peek_byte (pstr, idx);
852
+ #endif
853
+
854
+ return ch;
855
+ }
856
+
857
+ static unsigned char
858
+ internal_function __attribute ((pure))
859
+ re_string_fetch_byte_case (re_string_t *pstr)
860
+ {
861
+ if (BE (!pstr->mbs_allocated, 1))
862
+ return re_string_fetch_byte (pstr);
863
+
864
+ #ifdef RE_ENABLE_I18N
865
+ if (pstr->offsets_needed)
866
+ {
867
+ int off, ch;
868
+
869
+ /* For tr_TR.UTF-8 [[:islower:]] there is
870
+ [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
871
+ in that case the whole multi-byte character and return
872
+ the original letter. On the other side, with
873
+ [[: DOTLESS SMALL LETTER I return [[:I, as doing
874
+ anything else would complicate things too much. */
875
+
876
+ if (!re_string_first_byte (pstr, pstr->cur_idx))
877
+ return re_string_fetch_byte (pstr);
878
+
879
+ off = pstr->offsets[pstr->cur_idx];
880
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
881
+
882
+ if (! isascii (ch))
883
+ return re_string_fetch_byte (pstr);
884
+
885
+ re_string_skip_bytes (pstr,
886
+ re_string_char_size_at (pstr, pstr->cur_idx));
887
+ return ch;
888
+ }
889
+ #endif
890
+
891
+ return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
892
+ }
893
+
894
+ static void
895
+ internal_function
896
+ re_string_destruct (re_string_t *pstr)
897
+ {
898
+ #ifdef RE_ENABLE_I18N
899
+ re_free (pstr->wcs);
900
+ re_free (pstr->offsets);
901
+ #endif /* RE_ENABLE_I18N */
902
+ if (pstr->mbs_allocated)
903
+ re_free (pstr->mbs);
904
+ }
905
+
906
+ /* Return the context at IDX in INPUT. */
907
+
908
+ static unsigned int
909
+ internal_function
910
+ re_string_context_at (const re_string_t *input, int idx, int eflags)
911
+ {
912
+ int c;
913
+ if (BE (idx < 0, 0))
914
+ /* In this case, we use the value stored in input->tip_context,
915
+ since we can't know the character in input->mbs[-1] here. */
916
+ return input->tip_context;
917
+ if (BE (idx == input->len, 0))
918
+ return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
919
+ : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
920
+ #ifdef RE_ENABLE_I18N
921
+ if (input->mb_cur_max > 1)
922
+ {
923
+ wint_t wc;
924
+ int wc_idx = idx;
925
+ while(input->wcs[wc_idx] == WEOF)
926
+ {
927
+ #ifdef DEBUG
928
+ /* It must not happen. */
929
+ assert (wc_idx >= 0);
930
+ #endif
931
+ --wc_idx;
932
+ if (wc_idx < 0)
933
+ return input->tip_context;
934
+ }
935
+ wc = input->wcs[wc_idx];
936
+ if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
937
+ return CONTEXT_WORD;
938
+ return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
939
+ ? CONTEXT_NEWLINE : 0);
940
+ }
941
+ else
942
+ #endif
943
+ {
944
+ c = re_string_byte_at (input, idx);
945
+ if (bitset_contain (input->word_char, c))
946
+ return CONTEXT_WORD;
947
+ return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
948
+ }
949
+ }
950
+
951
+ /* Functions for set operation. */
952
+
953
+ static reg_errcode_t
954
+ internal_function
955
+ re_node_set_alloc (re_node_set *set, int size)
956
+ {
957
+ set->alloc = size;
958
+ set->nelem = 0;
959
+ set->elems = re_malloc (int, size);
960
+ if (BE (set->elems == NULL, 0))
961
+ return REG_ESPACE;
962
+ return REG_NOERROR;
963
+ }
964
+
965
+ static reg_errcode_t
966
+ internal_function
967
+ re_node_set_init_1 (re_node_set *set, int elem)
968
+ {
969
+ set->alloc = 1;
970
+ set->nelem = 1;
971
+ set->elems = re_malloc (int, 1);
972
+ if (BE (set->elems == NULL, 0))
973
+ {
974
+ set->alloc = set->nelem = 0;
975
+ return REG_ESPACE;
976
+ }
977
+ set->elems[0] = elem;
978
+ return REG_NOERROR;
979
+ }
980
+
981
+ static reg_errcode_t
982
+ internal_function
983
+ re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
984
+ {
985
+ set->alloc = 2;
986
+ set->elems = re_malloc (int, 2);
987
+ if (BE (set->elems == NULL, 0))
988
+ return REG_ESPACE;
989
+ if (elem1 == elem2)
990
+ {
991
+ set->nelem = 1;
992
+ set->elems[0] = elem1;
993
+ }
994
+ else
995
+ {
996
+ set->nelem = 2;
997
+ if (elem1 < elem2)
998
+ {
999
+ set->elems[0] = elem1;
1000
+ set->elems[1] = elem2;
1001
+ }
1002
+ else
1003
+ {
1004
+ set->elems[0] = elem2;
1005
+ set->elems[1] = elem1;
1006
+ }
1007
+ }
1008
+ return REG_NOERROR;
1009
+ }
1010
+
1011
+ static reg_errcode_t
1012
+ internal_function
1013
+ re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1014
+ {
1015
+ dest->nelem = src->nelem;
1016
+ if (src->nelem > 0)
1017
+ {
1018
+ dest->alloc = dest->nelem;
1019
+ dest->elems = re_malloc (int, dest->alloc);
1020
+ if (BE (dest->elems == NULL, 0))
1021
+ {
1022
+ dest->alloc = dest->nelem = 0;
1023
+ return REG_ESPACE;
1024
+ }
1025
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1026
+ }
1027
+ else
1028
+ re_node_set_init_empty (dest);
1029
+ return REG_NOERROR;
1030
+ }
1031
+
1032
+ /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1033
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1034
+ Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1035
+
1036
+ static reg_errcode_t
1037
+ internal_function
1038
+ re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1039
+ const re_node_set *src2)
1040
+ {
1041
+ int i1, i2, is, id, delta, sbase;
1042
+ if (src1->nelem == 0 || src2->nelem == 0)
1043
+ return REG_NOERROR;
1044
+
1045
+ /* We need dest->nelem + 2 * elems_in_intersection; this is a
1046
+ conservative estimate. */
1047
+ if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1048
+ {
1049
+ int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1050
+ int *new_elems = re_realloc (dest->elems, int, new_alloc);
1051
+ if (BE (new_elems == NULL, 0))
1052
+ return REG_ESPACE;
1053
+ dest->elems = new_elems;
1054
+ dest->alloc = new_alloc;
1055
+ }
1056
+
1057
+ /* Find the items in the intersection of SRC1 and SRC2, and copy
1058
+ into the top of DEST those that are not already in DEST itself. */
1059
+ sbase = dest->nelem + src1->nelem + src2->nelem;
1060
+ i1 = src1->nelem - 1;
1061
+ i2 = src2->nelem - 1;
1062
+ id = dest->nelem - 1;
1063
+ for (;;)
1064
+ {
1065
+ if (src1->elems[i1] == src2->elems[i2])
1066
+ {
1067
+ /* Try to find the item in DEST. Maybe we could binary search? */
1068
+ while (id >= 0 && dest->elems[id] > src1->elems[i1])
1069
+ --id;
1070
+
1071
+ if (id < 0 || dest->elems[id] != src1->elems[i1])
1072
+ dest->elems[--sbase] = src1->elems[i1];
1073
+
1074
+ if (--i1 < 0 || --i2 < 0)
1075
+ break;
1076
+ }
1077
+
1078
+ /* Lower the highest of the two items. */
1079
+ else if (src1->elems[i1] < src2->elems[i2])
1080
+ {
1081
+ if (--i2 < 0)
1082
+ break;
1083
+ }
1084
+ else
1085
+ {
1086
+ if (--i1 < 0)
1087
+ break;
1088
+ }
1089
+ }
1090
+
1091
+ id = dest->nelem - 1;
1092
+ is = dest->nelem + src1->nelem + src2->nelem - 1;
1093
+ delta = is - sbase + 1;
1094
+
1095
+ /* Now copy. When DELTA becomes zero, the remaining
1096
+ DEST elements are already in place; this is more or
1097
+ less the same loop that is in re_node_set_merge. */
1098
+ dest->nelem += delta;
1099
+ if (delta > 0 && id >= 0)
1100
+ for (;;)
1101
+ {
1102
+ if (dest->elems[is] > dest->elems[id])
1103
+ {
1104
+ /* Copy from the top. */
1105
+ dest->elems[id + delta--] = dest->elems[is--];
1106
+ if (delta == 0)
1107
+ break;
1108
+ }
1109
+ else
1110
+ {
1111
+ /* Slide from the bottom. */
1112
+ dest->elems[id + delta] = dest->elems[id];
1113
+ if (--id < 0)
1114
+ break;
1115
+ }
1116
+ }
1117
+
1118
+ /* Copy remaining SRC elements. */
1119
+ memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1120
+
1121
+ return REG_NOERROR;
1122
+ }
1123
+
1124
+ /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1125
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1126
+
1127
+ static reg_errcode_t
1128
+ internal_function
1129
+ re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1130
+ const re_node_set *src2)
1131
+ {
1132
+ int i1, i2, id;
1133
+ if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1134
+ {
1135
+ dest->alloc = src1->nelem + src2->nelem;
1136
+ dest->elems = re_malloc (int, dest->alloc);
1137
+ if (BE (dest->elems == NULL, 0))
1138
+ return REG_ESPACE;
1139
+ }
1140
+ else
1141
+ {
1142
+ if (src1 != NULL && src1->nelem > 0)
1143
+ return re_node_set_init_copy (dest, src1);
1144
+ else if (src2 != NULL && src2->nelem > 0)
1145
+ return re_node_set_init_copy (dest, src2);
1146
+ else
1147
+ re_node_set_init_empty (dest);
1148
+ return REG_NOERROR;
1149
+ }
1150
+ for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1151
+ {
1152
+ if (src1->elems[i1] > src2->elems[i2])
1153
+ {
1154
+ dest->elems[id++] = src2->elems[i2++];
1155
+ continue;
1156
+ }
1157
+ if (src1->elems[i1] == src2->elems[i2])
1158
+ ++i2;
1159
+ dest->elems[id++] = src1->elems[i1++];
1160
+ }
1161
+ if (i1 < src1->nelem)
1162
+ {
1163
+ memcpy (dest->elems + id, src1->elems + i1,
1164
+ (src1->nelem - i1) * sizeof (int));
1165
+ id += src1->nelem - i1;
1166
+ }
1167
+ else if (i2 < src2->nelem)
1168
+ {
1169
+ memcpy (dest->elems + id, src2->elems + i2,
1170
+ (src2->nelem - i2) * sizeof (int));
1171
+ id += src2->nelem - i2;
1172
+ }
1173
+ dest->nelem = id;
1174
+ return REG_NOERROR;
1175
+ }
1176
+
1177
+ /* Calculate the union set of the sets DEST and SRC. And store it to
1178
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1179
+
1180
+ static reg_errcode_t
1181
+ internal_function
1182
+ re_node_set_merge (re_node_set *dest, const re_node_set *src)
1183
+ {
1184
+ int is, id, sbase, delta;
1185
+ if (src == NULL || src->nelem == 0)
1186
+ return REG_NOERROR;
1187
+ if (dest->alloc < 2 * src->nelem + dest->nelem)
1188
+ {
1189
+ int new_alloc = 2 * (src->nelem + dest->alloc);
1190
+ int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1191
+ if (BE (new_buffer == NULL, 0))
1192
+ return REG_ESPACE;
1193
+ dest->elems = new_buffer;
1194
+ dest->alloc = new_alloc;
1195
+ }
1196
+
1197
+ if (BE (dest->nelem == 0, 0))
1198
+ {
1199
+ dest->nelem = src->nelem;
1200
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1201
+ return REG_NOERROR;
1202
+ }
1203
+
1204
+ /* Copy into the top of DEST the items of SRC that are not
1205
+ found in DEST. Maybe we could binary search in DEST? */
1206
+ for (sbase = dest->nelem + 2 * src->nelem,
1207
+ is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1208
+ {
1209
+ if (dest->elems[id] == src->elems[is])
1210
+ is--, id--;
1211
+ else if (dest->elems[id] < src->elems[is])
1212
+ dest->elems[--sbase] = src->elems[is--];
1213
+ else /* if (dest->elems[id] > src->elems[is]) */
1214
+ --id;
1215
+ }
1216
+
1217
+ if (is >= 0)
1218
+ {
1219
+ /* If DEST is exhausted, the remaining items of SRC must be unique. */
1220
+ sbase -= is + 1;
1221
+ memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1222
+ }
1223
+
1224
+ id = dest->nelem - 1;
1225
+ is = dest->nelem + 2 * src->nelem - 1;
1226
+ delta = is - sbase + 1;
1227
+ if (delta == 0)
1228
+ return REG_NOERROR;
1229
+
1230
+ /* Now copy. When DELTA becomes zero, the remaining
1231
+ DEST elements are already in place. */
1232
+ dest->nelem += delta;
1233
+ for (;;)
1234
+ {
1235
+ if (dest->elems[is] > dest->elems[id])
1236
+ {
1237
+ /* Copy from the top. */
1238
+ dest->elems[id + delta--] = dest->elems[is--];
1239
+ if (delta == 0)
1240
+ break;
1241
+ }
1242
+ else
1243
+ {
1244
+ /* Slide from the bottom. */
1245
+ dest->elems[id + delta] = dest->elems[id];
1246
+ if (--id < 0)
1247
+ {
1248
+ /* Copy remaining SRC elements. */
1249
+ memcpy (dest->elems, dest->elems + sbase,
1250
+ delta * sizeof (int));
1251
+ break;
1252
+ }
1253
+ }
1254
+ }
1255
+
1256
+ return REG_NOERROR;
1257
+ }
1258
+
1259
+ /* Insert the new element ELEM to the re_node_set* SET.
1260
+ SET should not already have ELEM.
1261
+ return -1 if an error is occured, return 1 otherwise. */
1262
+
1263
+ static int
1264
+ internal_function
1265
+ re_node_set_insert (re_node_set *set, int elem)
1266
+ {
1267
+ int idx;
1268
+ /* In case the set is empty. */
1269
+ if (set->alloc == 0)
1270
+ {
1271
+ if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1272
+ return 1;
1273
+ else
1274
+ return -1;
1275
+ }
1276
+
1277
+ if (BE (set->nelem, 0) == 0)
1278
+ {
1279
+ /* We already guaranteed above that set->alloc != 0. */
1280
+ set->elems[0] = elem;
1281
+ ++set->nelem;
1282
+ return 1;
1283
+ }
1284
+
1285
+ /* Realloc if we need. */
1286
+ if (set->alloc == set->nelem)
1287
+ {
1288
+ int *new_elems;
1289
+ set->alloc = set->alloc * 2;
1290
+ new_elems = re_realloc (set->elems, int, set->alloc);
1291
+ if (BE (new_elems == NULL, 0))
1292
+ return -1;
1293
+ set->elems = new_elems;
1294
+ }
1295
+
1296
+ /* Move the elements which follows the new element. Test the
1297
+ first element separately to skip a check in the inner loop. */
1298
+ if (elem < set->elems[0])
1299
+ {
1300
+ idx = 0;
1301
+ for (idx = set->nelem; idx > 0; idx--)
1302
+ set->elems[idx] = set->elems[idx - 1];
1303
+ }
1304
+ else
1305
+ {
1306
+ for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1307
+ set->elems[idx] = set->elems[idx - 1];
1308
+ }
1309
+
1310
+ /* Insert the new element. */
1311
+ set->elems[idx] = elem;
1312
+ ++set->nelem;
1313
+ return 1;
1314
+ }
1315
+
1316
+ /* Insert the new element ELEM to the re_node_set* SET.
1317
+ SET should not already have any element greater than or equal to ELEM.
1318
+ Return -1 if an error is occured, return 1 otherwise. */
1319
+
1320
+ static int
1321
+ internal_function
1322
+ re_node_set_insert_last (re_node_set *set, int elem)
1323
+ {
1324
+ /* Realloc if we need. */
1325
+ if (set->alloc == set->nelem)
1326
+ {
1327
+ int *new_elems;
1328
+ set->alloc = (set->alloc + 1) * 2;
1329
+ new_elems = re_realloc (set->elems, int, set->alloc);
1330
+ if (BE (new_elems == NULL, 0))
1331
+ return -1;
1332
+ set->elems = new_elems;
1333
+ }
1334
+
1335
+ /* Insert the new element. */
1336
+ set->elems[set->nelem++] = elem;
1337
+ return 1;
1338
+ }
1339
+
1340
+ /* Compare two node sets SET1 and SET2.
1341
+ return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1342
+
1343
+ static int
1344
+ internal_function __attribute ((pure))
1345
+ re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1346
+ {
1347
+ int i;
1348
+ if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1349
+ return 0;
1350
+ for (i = set1->nelem ; --i >= 0 ; )
1351
+ if (set1->elems[i] != set2->elems[i])
1352
+ return 0;
1353
+ return 1;
1354
+ }
1355
+
1356
+ /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1357
+
1358
+ static int
1359
+ internal_function __attribute ((pure))
1360
+ re_node_set_contains (const re_node_set *set, int elem)
1361
+ {
1362
+ unsigned int idx, right, mid;
1363
+ if (set->nelem <= 0)
1364
+ return 0;
1365
+
1366
+ /* Binary search the element. */
1367
+ idx = 0;
1368
+ right = set->nelem - 1;
1369
+ while (idx < right)
1370
+ {
1371
+ mid = (idx + right) / 2;
1372
+ if (set->elems[mid] < elem)
1373
+ idx = mid + 1;
1374
+ else
1375
+ right = mid;
1376
+ }
1377
+ return set->elems[idx] == elem ? idx + 1 : 0;
1378
+ }
1379
+
1380
+ static void
1381
+ internal_function
1382
+ re_node_set_remove_at (re_node_set *set, int idx)
1383
+ {
1384
+ if (idx < 0 || idx >= set->nelem)
1385
+ return;
1386
+ --set->nelem;
1387
+ for (; idx < set->nelem; idx++)
1388
+ set->elems[idx] = set->elems[idx + 1];
1389
+ }
1390
+
1391
+
1392
+ /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1393
+ Or return -1, if an error will be occured. */
1394
+
1395
+ static int
1396
+ internal_function
1397
+ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1398
+ {
1399
+ int type = token.type;
1400
+ if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1401
+ {
1402
+ size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1403
+ int *new_nexts, *new_indices;
1404
+ re_node_set *new_edests, *new_eclosures;
1405
+ re_token_t *new_nodes;
1406
+
1407
+ /* Avoid overflows. */
1408
+ if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
1409
+ return -1;
1410
+
1411
+ new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1412
+ if (BE (new_nodes == NULL, 0))
1413
+ return -1;
1414
+ dfa->nodes = new_nodes;
1415
+ new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1416
+ new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1417
+ new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1418
+ new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1419
+ if (BE (new_nexts == NULL || new_indices == NULL
1420
+ || new_edests == NULL || new_eclosures == NULL, 0))
1421
+ return -1;
1422
+ dfa->nexts = new_nexts;
1423
+ dfa->org_indices = new_indices;
1424
+ dfa->edests = new_edests;
1425
+ dfa->eclosures = new_eclosures;
1426
+ dfa->nodes_alloc = new_nodes_alloc;
1427
+ }
1428
+ dfa->nodes[dfa->nodes_len] = token;
1429
+ dfa->nodes[dfa->nodes_len].constraint = 0;
1430
+ #ifdef RE_ENABLE_I18N
1431
+ dfa->nodes[dfa->nodes_len].accept_mb =
1432
+ (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1433
+ #endif
1434
+ dfa->nexts[dfa->nodes_len] = -1;
1435
+ re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1436
+ re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1437
+ return dfa->nodes_len++;
1438
+ }
1439
+
1440
+ static inline unsigned int
1441
+ internal_function
1442
+ calc_state_hash (const re_node_set *nodes, unsigned int context)
1443
+ {
1444
+ unsigned int hash = nodes->nelem + context;
1445
+ int i;
1446
+ for (i = 0 ; i < nodes->nelem ; i++)
1447
+ hash += nodes->elems[i];
1448
+ return hash;
1449
+ }
1450
+
1451
+ /* Search for the state whose node_set is equivalent to NODES.
1452
+ Return the pointer to the state, if we found it in the DFA.
1453
+ Otherwise create the new one and return it. In case of an error
1454
+ return NULL and set the error code in ERR.
1455
+ Note: - We assume NULL as the invalid state, then it is possible that
1456
+ return value is NULL and ERR is REG_NOERROR.
1457
+ - We never return non-NULL value in case of any errors, it is for
1458
+ optimization. */
1459
+
1460
+ static re_dfastate_t *
1461
+ internal_function
1462
+ re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1463
+ const re_node_set *nodes)
1464
+ {
1465
+ unsigned int hash;
1466
+ re_dfastate_t *new_state;
1467
+ struct re_state_table_entry *spot;
1468
+ int i;
1469
+ if (BE (nodes->nelem == 0, 0))
1470
+ {
1471
+ *err = REG_NOERROR;
1472
+ return NULL;
1473
+ }
1474
+ hash = calc_state_hash (nodes, 0);
1475
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
1476
+
1477
+ for (i = 0 ; i < spot->num ; i++)
1478
+ {
1479
+ re_dfastate_t *state = spot->array[i];
1480
+ if (hash != state->hash)
1481
+ continue;
1482
+ if (re_node_set_compare (&state->nodes, nodes))
1483
+ return state;
1484
+ }
1485
+
1486
+ /* There are no appropriate state in the dfa, create the new one. */
1487
+ new_state = create_ci_newstate (dfa, nodes, hash);
1488
+ if (BE (new_state == NULL, 0))
1489
+ *err = REG_ESPACE;
1490
+
1491
+ return new_state;
1492
+ }
1493
+
1494
+ /* Search for the state whose node_set is equivalent to NODES and
1495
+ whose context is equivalent to CONTEXT.
1496
+ Return the pointer to the state, if we found it in the DFA.
1497
+ Otherwise create the new one and return it. In case of an error
1498
+ return NULL and set the error code in ERR.
1499
+ Note: - We assume NULL as the invalid state, then it is possible that
1500
+ return value is NULL and ERR is REG_NOERROR.
1501
+ - We never return non-NULL value in case of any errors, it is for
1502
+ optimization. */
1503
+
1504
+ static re_dfastate_t *
1505
+ internal_function
1506
+ re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1507
+ const re_node_set *nodes, unsigned int context)
1508
+ {
1509
+ unsigned int hash;
1510
+ re_dfastate_t *new_state;
1511
+ struct re_state_table_entry *spot;
1512
+ int i;
1513
+ if (nodes->nelem == 0)
1514
+ {
1515
+ *err = REG_NOERROR;
1516
+ return NULL;
1517
+ }
1518
+ hash = calc_state_hash (nodes, context);
1519
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
1520
+
1521
+ for (i = 0 ; i < spot->num ; i++)
1522
+ {
1523
+ re_dfastate_t *state = spot->array[i];
1524
+ if (state->hash == hash
1525
+ && state->context == context
1526
+ && re_node_set_compare (state->entrance_nodes, nodes))
1527
+ return state;
1528
+ }
1529
+ /* There are no appropriate state in `dfa', create the new one. */
1530
+ new_state = create_cd_newstate (dfa, nodes, context, hash);
1531
+ if (BE (new_state == NULL, 0))
1532
+ *err = REG_ESPACE;
1533
+
1534
+ return new_state;
1535
+ }
1536
+
1537
+ /* Finish initialization of the new state NEWSTATE, and using its hash value
1538
+ HASH put in the appropriate bucket of DFA's state table. Return value
1539
+ indicates the error code if failed. */
1540
+
1541
+ static reg_errcode_t
1542
+ register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1543
+ unsigned int hash)
1544
+ {
1545
+ struct re_state_table_entry *spot;
1546
+ reg_errcode_t err;
1547
+ int i;
1548
+
1549
+ newstate->hash = hash;
1550
+ err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1551
+ if (BE (err != REG_NOERROR, 0))
1552
+ return REG_ESPACE;
1553
+ for (i = 0; i < newstate->nodes.nelem; i++)
1554
+ {
1555
+ int elem = newstate->nodes.elems[i];
1556
+ if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1557
+ re_node_set_insert_last (&newstate->non_eps_nodes, elem);
1558
+ }
1559
+
1560
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
1561
+ if (BE (spot->alloc <= spot->num, 0))
1562
+ {
1563
+ int new_alloc = 2 * spot->num + 2;
1564
+ re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1565
+ new_alloc);
1566
+ if (BE (new_array == NULL, 0))
1567
+ return REG_ESPACE;
1568
+ spot->array = new_array;
1569
+ spot->alloc = new_alloc;
1570
+ }
1571
+ spot->array[spot->num++] = newstate;
1572
+ return REG_NOERROR;
1573
+ }
1574
+
1575
+ static void
1576
+ free_state (re_dfastate_t *state)
1577
+ {
1578
+ re_node_set_free (&state->non_eps_nodes);
1579
+ re_node_set_free (&state->inveclosure);
1580
+ if (state->entrance_nodes != &state->nodes)
1581
+ {
1582
+ re_node_set_free (state->entrance_nodes);
1583
+ re_free (state->entrance_nodes);
1584
+ }
1585
+ re_node_set_free (&state->nodes);
1586
+ re_free (state->word_trtable);
1587
+ re_free (state->trtable);
1588
+ re_free (state);
1589
+ }
1590
+
1591
+ /* Create the new state which is independ of contexts.
1592
+ Return the new state if succeeded, otherwise return NULL. */
1593
+
1594
+ static re_dfastate_t *
1595
+ internal_function
1596
+ create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1597
+ unsigned int hash)
1598
+ {
1599
+ int i;
1600
+ reg_errcode_t err;
1601
+ re_dfastate_t *newstate;
1602
+
1603
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1604
+ if (BE (newstate == NULL, 0))
1605
+ return NULL;
1606
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
1607
+ if (BE (err != REG_NOERROR, 0))
1608
+ {
1609
+ re_free (newstate);
1610
+ return NULL;
1611
+ }
1612
+
1613
+ newstate->entrance_nodes = &newstate->nodes;
1614
+ for (i = 0 ; i < nodes->nelem ; i++)
1615
+ {
1616
+ re_token_t *node = dfa->nodes + nodes->elems[i];
1617
+ re_token_type_t type = node->type;
1618
+ if (type == CHARACTER && !node->constraint)
1619
+ continue;
1620
+ #ifdef RE_ENABLE_I18N
1621
+ newstate->accept_mb |= node->accept_mb;
1622
+ #endif /* RE_ENABLE_I18N */
1623
+
1624
+ /* If the state has the halt node, the state is a halt state. */
1625
+ if (type == END_OF_RE)
1626
+ newstate->halt = 1;
1627
+ else if (type == OP_BACK_REF)
1628
+ newstate->has_backref = 1;
1629
+ else if (type == ANCHOR || node->constraint)
1630
+ newstate->has_constraint = 1;
1631
+ }
1632
+ err = register_state (dfa, newstate, hash);
1633
+ if (BE (err != REG_NOERROR, 0))
1634
+ {
1635
+ free_state (newstate);
1636
+ newstate = NULL;
1637
+ }
1638
+ return newstate;
1639
+ }
1640
+
1641
+ /* Create the new state which is depend on the context CONTEXT.
1642
+ Return the new state if succeeded, otherwise return NULL. */
1643
+
1644
+ static re_dfastate_t *
1645
+ internal_function
1646
+ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1647
+ unsigned int context, unsigned int hash)
1648
+ {
1649
+ int i, nctx_nodes = 0;
1650
+ reg_errcode_t err;
1651
+ re_dfastate_t *newstate;
1652
+
1653
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1654
+ if (BE (newstate == NULL, 0))
1655
+ return NULL;
1656
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
1657
+ if (BE (err != REG_NOERROR, 0))
1658
+ {
1659
+ re_free (newstate);
1660
+ return NULL;
1661
+ }
1662
+
1663
+ newstate->context = context;
1664
+ newstate->entrance_nodes = &newstate->nodes;
1665
+
1666
+ for (i = 0 ; i < nodes->nelem ; i++)
1667
+ {
1668
+ re_token_t *node = dfa->nodes + nodes->elems[i];
1669
+ re_token_type_t type = node->type;
1670
+ unsigned int constraint = node->constraint;
1671
+
1672
+ if (type == CHARACTER && !constraint)
1673
+ continue;
1674
+ #ifdef RE_ENABLE_I18N
1675
+ newstate->accept_mb |= node->accept_mb;
1676
+ #endif /* RE_ENABLE_I18N */
1677
+
1678
+ /* If the state has the halt node, the state is a halt state. */
1679
+ if (type == END_OF_RE)
1680
+ newstate->halt = 1;
1681
+ else if (type == OP_BACK_REF)
1682
+ newstate->has_backref = 1;
1683
+
1684
+ if (constraint)
1685
+ {
1686
+ if (newstate->entrance_nodes == &newstate->nodes)
1687
+ {
1688
+ newstate->entrance_nodes = re_malloc (re_node_set, 1);
1689
+ if (BE (newstate->entrance_nodes == NULL, 0))
1690
+ {
1691
+ free_state (newstate);
1692
+ return NULL;
1693
+ }
1694
+ re_node_set_init_copy (newstate->entrance_nodes, nodes);
1695
+ nctx_nodes = 0;
1696
+ newstate->has_constraint = 1;
1697
+ }
1698
+
1699
+ if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1700
+ {
1701
+ re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1702
+ ++nctx_nodes;
1703
+ }
1704
+ }
1705
+ }
1706
+ err = register_state (dfa, newstate, hash);
1707
+ if (BE (err != REG_NOERROR, 0))
1708
+ {
1709
+ free_state (newstate);
1710
+ newstate = NULL;
1711
+ }
1712
+ return newstate;
1713
+ }