passenger 4.0.27 → 4.0.28

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of passenger might be problematic. Click here for more details.

Files changed (156) hide show
  1. data.tar.gz.asc +7 -7
  2. data/.gitignore +1 -0
  3. data/NEWS +22 -0
  4. data/build/preprocessor.rb +10 -0
  5. data/build/rpm.rb +74 -65
  6. data/debian.template/rules.template +8 -0
  7. data/dev/copy_boost_headers.rb +11 -2
  8. data/doc/Users guide Apache.idmap.txt +161 -145
  9. data/doc/Users guide Apache.txt +12 -1
  10. data/doc/Users guide Nginx.idmap.txt +142 -126
  11. data/doc/Users guide Nginx.txt +14 -1
  12. data/doc/Users guide Standalone.txt +1 -0
  13. data/doc/users_guide_snippets/environment_variables.txt +1 -1
  14. data/doc/users_guide_snippets/installation.txt +2 -0
  15. data/doc/users_guide_snippets/tips.txt +118 -0
  16. data/ext/apache2/Configuration.cpp +0 -6
  17. data/ext/apache2/Configuration.hpp +0 -5
  18. data/ext/apache2/ConfigurationCommands.cpp +7 -0
  19. data/ext/apache2/ConfigurationFields.hpp +2 -0
  20. data/ext/apache2/ConfigurationSetters.cpp +24 -0
  21. data/ext/apache2/CreateDirConfig.cpp +1 -0
  22. data/ext/apache2/Hooks.cpp +0 -1
  23. data/ext/apache2/MergeDirConfig.cpp +7 -0
  24. data/ext/apache2/SetHeaders.cpp +5 -1
  25. data/ext/boost/cregex.hpp +39 -0
  26. data/ext/boost/libs/regex/src/c_regex_traits.cpp +193 -0
  27. data/ext/boost/libs/regex/src/cpp_regex_traits.cpp +117 -0
  28. data/ext/boost/libs/regex/src/cregex.cpp +660 -0
  29. data/ext/boost/libs/regex/src/instances.cpp +32 -0
  30. data/ext/boost/libs/regex/src/internals.hpp +35 -0
  31. data/ext/boost/libs/regex/src/posix_api.cpp +296 -0
  32. data/ext/boost/libs/regex/src/regex.cpp +227 -0
  33. data/ext/boost/libs/regex/src/regex_debug.cpp +59 -0
  34. data/ext/boost/libs/regex/src/regex_raw_buffer.cpp +72 -0
  35. data/ext/boost/libs/regex/src/regex_traits_defaults.cpp +692 -0
  36. data/ext/boost/libs/regex/src/static_mutex.cpp +179 -0
  37. data/ext/boost/libs/regex/src/wc_regex_traits.cpp +301 -0
  38. data/ext/boost/libs/regex/src/wide_posix_api.cpp +315 -0
  39. data/ext/boost/libs/regex/src/winstances.cpp +35 -0
  40. data/ext/boost/regex.h +100 -0
  41. data/ext/boost/regex.hpp +37 -0
  42. data/ext/boost/regex/concepts.hpp +1128 -0
  43. data/ext/boost/regex/config.hpp +435 -0
  44. data/ext/boost/regex/config/borland.hpp +72 -0
  45. data/ext/boost/regex/config/cwchar.hpp +207 -0
  46. data/ext/boost/regex/mfc.hpp +190 -0
  47. data/ext/boost/regex/pattern_except.hpp +100 -0
  48. data/ext/boost/regex/pending/object_cache.hpp +165 -0
  49. data/ext/boost/regex/pending/static_mutex.hpp +179 -0
  50. data/ext/boost/regex/pending/unicode_iterator.hpp +776 -0
  51. data/ext/boost/regex/regex_traits.hpp +35 -0
  52. data/ext/boost/regex/user.hpp +93 -0
  53. data/ext/boost/regex/v4/basic_regex.hpp +782 -0
  54. data/ext/boost/regex/v4/basic_regex_creator.hpp +1571 -0
  55. data/ext/boost/regex/v4/basic_regex_parser.hpp +2874 -0
  56. data/ext/boost/regex/v4/c_regex_traits.hpp +211 -0
  57. data/ext/boost/regex/v4/char_regex_traits.hpp +81 -0
  58. data/ext/boost/regex/v4/cpp_regex_traits.hpp +1099 -0
  59. data/ext/boost/regex/v4/cregex.hpp +330 -0
  60. data/ext/boost/regex/v4/error_type.hpp +59 -0
  61. data/ext/boost/regex/v4/fileiter.hpp +455 -0
  62. data/ext/boost/regex/v4/instances.hpp +222 -0
  63. data/ext/boost/regex/v4/iterator_category.hpp +91 -0
  64. data/ext/boost/regex/v4/iterator_traits.hpp +135 -0
  65. data/ext/boost/regex/v4/match_flags.hpp +138 -0
  66. data/ext/boost/regex/v4/match_results.hpp +702 -0
  67. data/ext/boost/regex/v4/mem_block_cache.hpp +99 -0
  68. data/ext/boost/regex/v4/perl_matcher.hpp +587 -0
  69. data/ext/boost/regex/v4/perl_matcher_common.hpp +996 -0
  70. data/ext/boost/regex/v4/perl_matcher_non_recursive.hpp +1642 -0
  71. data/ext/boost/regex/v4/perl_matcher_recursive.hpp +991 -0
  72. data/ext/boost/regex/v4/primary_transform.hpp +146 -0
  73. data/ext/boost/regex/v4/protected_call.hpp +81 -0
  74. data/ext/boost/regex/v4/regbase.hpp +180 -0
  75. data/ext/boost/regex/v4/regex.hpp +202 -0
  76. data/ext/boost/regex/v4/regex_format.hpp +1156 -0
  77. data/ext/boost/regex/v4/regex_fwd.hpp +73 -0
  78. data/ext/boost/regex/v4/regex_grep.hpp +155 -0
  79. data/ext/boost/regex/v4/regex_iterator.hpp +201 -0
  80. data/ext/boost/regex/v4/regex_match.hpp +382 -0
  81. data/ext/boost/regex/v4/regex_merge.hpp +93 -0
  82. data/ext/boost/regex/v4/regex_raw_buffer.hpp +210 -0
  83. data/ext/boost/regex/v4/regex_replace.hpp +99 -0
  84. data/ext/boost/regex/v4/regex_search.hpp +217 -0
  85. data/ext/boost/regex/v4/regex_split.hpp +172 -0
  86. data/ext/boost/regex/v4/regex_token_iterator.hpp +342 -0
  87. data/ext/boost/regex/v4/regex_traits.hpp +189 -0
  88. data/ext/boost/regex/v4/regex_traits_defaults.hpp +371 -0
  89. data/ext/boost/regex/v4/regex_workaround.hpp +232 -0
  90. data/ext/boost/regex/v4/states.hpp +301 -0
  91. data/ext/boost/regex/v4/sub_match.hpp +512 -0
  92. data/ext/boost/regex/v4/syntax_type.hpp +105 -0
  93. data/ext/boost/regex/v4/u32regex_iterator.hpp +193 -0
  94. data/ext/boost/regex/v4/u32regex_token_iterator.hpp +377 -0
  95. data/ext/boost/regex/v4/w32_regex_traits.hpp +741 -0
  96. data/ext/boost/regex_fwd.hpp +33 -0
  97. data/ext/common/AgentsStarter.h +0 -11
  98. data/ext/common/ApplicationPool2/Common.h +1 -7
  99. data/ext/common/ApplicationPool2/DirectSpawner.h +3 -3
  100. data/ext/common/ApplicationPool2/Group.h +166 -69
  101. data/ext/common/ApplicationPool2/Implementation.cpp +55 -10
  102. data/ext/common/ApplicationPool2/Options.h +45 -10
  103. data/ext/common/ApplicationPool2/PipeWatcher.h +1 -2
  104. data/ext/common/ApplicationPool2/Pool.h +29 -7
  105. data/ext/common/ApplicationPool2/Process.h +22 -3
  106. data/ext/common/ApplicationPool2/Session.h +1 -0
  107. data/ext/common/ApplicationPool2/SmartSpawner.h +5 -10
  108. data/ext/common/ApplicationPool2/Spawner.h +10 -15
  109. data/ext/common/ApplicationPool2/SuperGroup.h +10 -9
  110. data/ext/common/Constants.h +1 -3
  111. data/ext/common/Hooks.h +193 -0
  112. data/ext/common/Logging.cpp +67 -2
  113. data/ext/common/Logging.h +23 -1
  114. data/ext/common/Utils.cpp +0 -21
  115. data/ext/common/Utils.h +0 -42
  116. data/ext/common/Utils/CachedFileStat.hpp +1 -1
  117. data/ext/common/Utils/StrIntUtils.h +61 -14
  118. data/ext/common/Utils/StringMap.h +4 -0
  119. data/ext/common/agents/HelperAgent/AgentOptions.h +4 -4
  120. data/ext/common/agents/HelperAgent/Main.cpp +2 -3
  121. data/ext/common/agents/HelperAgent/RequestHandler.h +65 -2
  122. data/ext/common/agents/LoggingAgent/FilterSupport.h +3 -1
  123. data/ext/common/agents/Watchdog/Main.cpp +8 -72
  124. data/ext/nginx/CacheLocationConfig.c +29 -1
  125. data/ext/nginx/Configuration.c +0 -12
  126. data/ext/nginx/Configuration.h +0 -1
  127. data/ext/nginx/ConfigurationCommands.c +10 -0
  128. data/ext/nginx/ConfigurationFields.h +2 -0
  129. data/ext/nginx/CreateLocationConfig.c +4 -0
  130. data/ext/nginx/MergeLocationConfig.c +6 -0
  131. data/ext/oxt/system_calls.cpp +7 -1
  132. data/ext/oxt/system_calls.hpp +7 -7
  133. data/helper-scripts/node-loader.js +6 -2
  134. data/helper-scripts/rack-loader.rb +5 -2
  135. data/helper-scripts/rack-preloader.rb +5 -2
  136. data/lib/phusion_passenger.rb +1 -1
  137. data/lib/phusion_passenger/apache2/config_options.rb +8 -0
  138. data/lib/phusion_passenger/constants.rb +0 -1
  139. data/lib/phusion_passenger/nginx/config_options.rb +9 -2
  140. data/lib/phusion_passenger/platform_info/apache.rb +2 -1
  141. data/lib/phusion_passenger/platform_info/compiler.rb +15 -1
  142. data/lib/phusion_passenger/platform_info/cxx_portability.rb +2 -0
  143. data/node_lib/phusion_passenger/httplib_emulation.js +85 -17
  144. data/node_lib/phusion_passenger/request_handler.js +10 -2
  145. data/rpm/Vagrantfile +32 -0
  146. data/rpm/get_distro_id.py +4 -0
  147. data/test/cxx/ApplicationPool2/DirectSpawnerTest.cpp +2 -2
  148. data/test/cxx/ApplicationPool2/PoolTest.cpp +60 -9
  149. data/test/cxx/ApplicationPool2/SmartSpawnerTest.cpp +2 -6
  150. data/test/cxx/CachedFileStatTest.cpp +5 -5
  151. data/test/cxx/RequestHandlerTest.cpp +3 -6
  152. data/test/cxx/UtilsTest.cpp +30 -0
  153. data/test/node/httplib_emulation_spec.js +491 -0
  154. data/test/node/spec_helper.js +25 -0
  155. metadata +78 -2
  156. metadata.gz.asc +7 -7
@@ -0,0 +1,2874 @@
1
+ /*
2
+ *
3
+ * Copyright (c) 2004
4
+ * John Maddock
5
+ *
6
+ * Use, modification and distribution are subject to the
7
+ * Boost Software License, Version 1.0. (See accompanying file
8
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9
+ *
10
+ */
11
+
12
+ /*
13
+ * LOCATION: see http://www.boost.org for most recent version.
14
+ * FILE basic_regex_parser.cpp
15
+ * VERSION see <boost/version.hpp>
16
+ * DESCRIPTION: Declares template class basic_regex_parser.
17
+ */
18
+
19
+ #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20
+ #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
21
+
22
+ #ifdef BOOST_MSVC
23
+ #pragma warning(push)
24
+ #pragma warning(disable: 4103)
25
+ #endif
26
+ #ifdef BOOST_HAS_ABI_HEADERS
27
+ # include BOOST_ABI_PREFIX
28
+ #endif
29
+ #ifdef BOOST_MSVC
30
+ #pragma warning(pop)
31
+ #endif
32
+
33
+ namespace boost{
34
+ namespace re_detail{
35
+
36
+ #ifdef BOOST_MSVC
37
+ #pragma warning(push)
38
+ #pragma warning(disable:4244 4800)
39
+ #endif
40
+
41
+ template <class charT, class traits>
42
+ class basic_regex_parser : public basic_regex_creator<charT, traits>
43
+ {
44
+ public:
45
+ basic_regex_parser(regex_data<charT, traits>* data);
46
+ void parse(const charT* p1, const charT* p2, unsigned flags);
47
+ void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
48
+ void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
49
+ void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
50
+ {
51
+ fail(error_code, position, message, position);
52
+ }
53
+
54
+ bool parse_all();
55
+ bool parse_basic();
56
+ bool parse_extended();
57
+ bool parse_literal();
58
+ bool parse_open_paren();
59
+ bool parse_basic_escape();
60
+ bool parse_extended_escape();
61
+ bool parse_match_any();
62
+ bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
63
+ bool parse_repeat_range(bool isbasic);
64
+ bool parse_alt();
65
+ bool parse_set();
66
+ bool parse_backref();
67
+ void parse_set_literal(basic_char_set<charT, traits>& char_set);
68
+ bool parse_inner_set(basic_char_set<charT, traits>& char_set);
69
+ bool parse_QE();
70
+ bool parse_perl_extension();
71
+ bool add_emacs_code(bool negate);
72
+ bool unwind_alts(std::ptrdiff_t last_paren_start);
73
+ digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
74
+ charT unescape_character();
75
+ regex_constants::syntax_option_type parse_options();
76
+
77
+ private:
78
+ typedef bool (basic_regex_parser::*parser_proc_type)();
79
+ typedef typename traits::string_type string_type;
80
+ typedef typename traits::char_class_type char_class_type;
81
+ parser_proc_type m_parser_proc; // the main parser to use
82
+ const charT* m_base; // the start of the string being parsed
83
+ const charT* m_end; // the end of the string being parsed
84
+ const charT* m_position; // our current parser position
85
+ unsigned m_mark_count; // how many sub-expressions we have
86
+ int m_mark_reset; // used to indicate that we're inside a (?|...) block.
87
+ unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
88
+ std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
89
+ std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
90
+ bool m_has_case_change; // true if somewhere in the current block the case has changed
91
+ #if defined(BOOST_MSVC) && defined(_M_IX86)
92
+ // This is an ugly warning suppression workaround (for warnings *inside* std::vector
93
+ // that can not otherwise be suppressed)...
94
+ BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
95
+ std::vector<long> m_alt_jumps; // list of alternative in the current scope.
96
+ #else
97
+ std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
98
+ #endif
99
+
100
+ basic_regex_parser& operator=(const basic_regex_parser&);
101
+ basic_regex_parser(const basic_regex_parser&);
102
+ };
103
+
104
+ template <class charT, class traits>
105
+ basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
106
+ : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
107
+ {
108
+ }
109
+
110
+ template <class charT, class traits>
111
+ void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
112
+ {
113
+ // pass l_flags on to base class:
114
+ this->init(l_flags);
115
+ // set up pointers:
116
+ m_position = m_base = p1;
117
+ m_end = p2;
118
+ // empty strings are errors:
119
+ if((p1 == p2) &&
120
+ (
121
+ ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
122
+ || (l_flags & regbase::no_empty_expressions)
123
+ )
124
+ )
125
+ {
126
+ fail(regex_constants::error_empty, 0);
127
+ return;
128
+ }
129
+ // select which parser to use:
130
+ switch(l_flags & regbase::main_option_type)
131
+ {
132
+ case regbase::perl_syntax_group:
133
+ {
134
+ m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
135
+ //
136
+ // Add a leading paren with index zero to give recursions a target:
137
+ //
138
+ re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
139
+ br->index = 0;
140
+ br->icase = this->flags() & regbase::icase;
141
+ break;
142
+ }
143
+ case regbase::basic_syntax_group:
144
+ m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
145
+ break;
146
+ case regbase::literal:
147
+ m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
148
+ break;
149
+ default:
150
+ // Ooops, someone has managed to set more than one of the main option flags,
151
+ // so this must be an error:
152
+ fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
153
+ return;
154
+ }
155
+
156
+ // parse all our characters:
157
+ bool result = parse_all();
158
+ //
159
+ // Unwind our alternatives:
160
+ //
161
+ unwind_alts(-1);
162
+ // reset l_flags as a global scope (?imsx) may have altered them:
163
+ this->flags(l_flags);
164
+ // if we haven't gobbled up all the characters then we must
165
+ // have had an unexpected ')' :
166
+ if(!result)
167
+ {
168
+ fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
169
+ return;
170
+ }
171
+ // if an error has been set then give up now:
172
+ if(this->m_pdata->m_status)
173
+ return;
174
+ // fill in our sub-expression count:
175
+ this->m_pdata->m_mark_count = 1 + m_mark_count;
176
+ this->finalize(p1, p2);
177
+ }
178
+
179
+ template <class charT, class traits>
180
+ void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
181
+ {
182
+ // get the error message:
183
+ std::string message = this->m_pdata->m_ptraits->error_string(error_code);
184
+ fail(error_code, position, message);
185
+ }
186
+
187
+ template <class charT, class traits>
188
+ void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
189
+ {
190
+ if(0 == this->m_pdata->m_status) // update the error code if not already set
191
+ this->m_pdata->m_status = error_code;
192
+ m_position = m_end; // don't bother parsing anything else
193
+
194
+ #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
195
+ //
196
+ // Augment error message with the regular expression text:
197
+ //
198
+ if(start_pos == position)
199
+ start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
200
+ std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
201
+ if(error_code != regex_constants::error_empty)
202
+ {
203
+ if((start_pos != 0) || (end_pos != (m_end - m_base)))
204
+ message += " The error occurred while parsing the regular expression fragment: '";
205
+ else
206
+ message += " The error occurred while parsing the regular expression: '";
207
+ if(start_pos != end_pos)
208
+ {
209
+ message += std::string(m_base + start_pos, m_base + position);
210
+ message += ">>>HERE>>>";
211
+ message += std::string(m_base + position, m_base + end_pos);
212
+ }
213
+ message += "'.";
214
+ }
215
+ #endif
216
+
217
+ #ifndef BOOST_NO_EXCEPTIONS
218
+ if(0 == (this->flags() & regex_constants::no_except))
219
+ {
220
+ boost::regex_error e(message, error_code, position);
221
+ e.raise();
222
+ }
223
+ #else
224
+ (void)position; // suppress warnings.
225
+ #endif
226
+ }
227
+
228
+ template <class charT, class traits>
229
+ bool basic_regex_parser<charT, traits>::parse_all()
230
+ {
231
+ bool result = true;
232
+ while(result && (m_position != m_end))
233
+ {
234
+ result = (this->*m_parser_proc)();
235
+ }
236
+ return result;
237
+ }
238
+
239
+ #ifdef BOOST_MSVC
240
+ #pragma warning(push)
241
+ #pragma warning(disable:4702)
242
+ #endif
243
+ template <class charT, class traits>
244
+ bool basic_regex_parser<charT, traits>::parse_basic()
245
+ {
246
+ switch(this->m_traits.syntax_type(*m_position))
247
+ {
248
+ case regex_constants::syntax_escape:
249
+ return parse_basic_escape();
250
+ case regex_constants::syntax_dot:
251
+ return parse_match_any();
252
+ case regex_constants::syntax_caret:
253
+ ++m_position;
254
+ this->append_state(syntax_element_start_line);
255
+ break;
256
+ case regex_constants::syntax_dollar:
257
+ ++m_position;
258
+ this->append_state(syntax_element_end_line);
259
+ break;
260
+ case regex_constants::syntax_star:
261
+ if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
262
+ return parse_literal();
263
+ else
264
+ {
265
+ ++m_position;
266
+ return parse_repeat();
267
+ }
268
+ case regex_constants::syntax_plus:
269
+ if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
270
+ return parse_literal();
271
+ else
272
+ {
273
+ ++m_position;
274
+ return parse_repeat(1);
275
+ }
276
+ case regex_constants::syntax_question:
277
+ if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
278
+ return parse_literal();
279
+ else
280
+ {
281
+ ++m_position;
282
+ return parse_repeat(0, 1);
283
+ }
284
+ case regex_constants::syntax_open_set:
285
+ return parse_set();
286
+ case regex_constants::syntax_newline:
287
+ if(this->flags() & regbase::newline_alt)
288
+ return parse_alt();
289
+ else
290
+ return parse_literal();
291
+ default:
292
+ return parse_literal();
293
+ }
294
+ return true;
295
+ }
296
+
297
+ template <class charT, class traits>
298
+ bool basic_regex_parser<charT, traits>::parse_extended()
299
+ {
300
+ bool result = true;
301
+ switch(this->m_traits.syntax_type(*m_position))
302
+ {
303
+ case regex_constants::syntax_open_mark:
304
+ return parse_open_paren();
305
+ case regex_constants::syntax_close_mark:
306
+ return false;
307
+ case regex_constants::syntax_escape:
308
+ return parse_extended_escape();
309
+ case regex_constants::syntax_dot:
310
+ return parse_match_any();
311
+ case regex_constants::syntax_caret:
312
+ ++m_position;
313
+ this->append_state(
314
+ (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
315
+ break;
316
+ case regex_constants::syntax_dollar:
317
+ ++m_position;
318
+ this->append_state(
319
+ (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
320
+ break;
321
+ case regex_constants::syntax_star:
322
+ if(m_position == this->m_base)
323
+ {
324
+ fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
325
+ return false;
326
+ }
327
+ ++m_position;
328
+ return parse_repeat();
329
+ case regex_constants::syntax_question:
330
+ if(m_position == this->m_base)
331
+ {
332
+ fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
333
+ return false;
334
+ }
335
+ ++m_position;
336
+ return parse_repeat(0,1);
337
+ case regex_constants::syntax_plus:
338
+ if(m_position == this->m_base)
339
+ {
340
+ fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
341
+ return false;
342
+ }
343
+ ++m_position;
344
+ return parse_repeat(1);
345
+ case regex_constants::syntax_open_brace:
346
+ ++m_position;
347
+ return parse_repeat_range(false);
348
+ case regex_constants::syntax_close_brace:
349
+ fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
350
+ return false;
351
+ case regex_constants::syntax_or:
352
+ return parse_alt();
353
+ case regex_constants::syntax_open_set:
354
+ return parse_set();
355
+ case regex_constants::syntax_newline:
356
+ if(this->flags() & regbase::newline_alt)
357
+ return parse_alt();
358
+ else
359
+ return parse_literal();
360
+ case regex_constants::syntax_hash:
361
+ //
362
+ // If we have a mod_x flag set, then skip until
363
+ // we get to a newline character:
364
+ //
365
+ if((this->flags()
366
+ & (regbase::no_perl_ex|regbase::mod_x))
367
+ == regbase::mod_x)
368
+ {
369
+ while((m_position != m_end) && !is_separator(*m_position++)){}
370
+ return true;
371
+ }
372
+ BOOST_FALLTHROUGH;
373
+ default:
374
+ result = parse_literal();
375
+ break;
376
+ }
377
+ return result;
378
+ }
379
+ #ifdef BOOST_MSVC
380
+ #pragma warning(pop)
381
+ #endif
382
+
383
+ template <class charT, class traits>
384
+ bool basic_regex_parser<charT, traits>::parse_literal()
385
+ {
386
+ // append this as a literal provided it's not a space character
387
+ // or the perl option regbase::mod_x is not set:
388
+ if(
389
+ ((this->flags()
390
+ & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
391
+ != regbase::mod_x)
392
+ || !this->m_traits.isctype(*m_position, this->m_mask_space))
393
+ this->append_literal(*m_position);
394
+ ++m_position;
395
+ return true;
396
+ }
397
+
398
+ template <class charT, class traits>
399
+ bool basic_regex_parser<charT, traits>::parse_open_paren()
400
+ {
401
+ //
402
+ // skip the '(' and error check:
403
+ //
404
+ if(++m_position == m_end)
405
+ {
406
+ fail(regex_constants::error_paren, m_position - m_base);
407
+ return false;
408
+ }
409
+ //
410
+ // begin by checking for a perl-style (?...) extension:
411
+ //
412
+ if(
413
+ ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
414
+ || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
415
+ )
416
+ {
417
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
418
+ return parse_perl_extension();
419
+ }
420
+ //
421
+ // update our mark count, and append the required state:
422
+ //
423
+ unsigned markid = 0;
424
+ if(0 == (this->flags() & regbase::nosubs))
425
+ {
426
+ markid = ++m_mark_count;
427
+ #ifndef BOOST_NO_STD_DISTANCE
428
+ if(this->flags() & regbase::save_subexpression_location)
429
+ this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
430
+ #else
431
+ if(this->flags() & regbase::save_subexpression_location)
432
+ this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
433
+ #endif
434
+ }
435
+ re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
436
+ pb->index = markid;
437
+ pb->icase = this->flags() & regbase::icase;
438
+ std::ptrdiff_t last_paren_start = this->getoffset(pb);
439
+ // back up insertion point for alternations, and set new point:
440
+ std::ptrdiff_t last_alt_point = m_alt_insert_point;
441
+ this->m_pdata->m_data.align();
442
+ m_alt_insert_point = this->m_pdata->m_data.size();
443
+ //
444
+ // back up the current flags in case we have a nested (?imsx) group:
445
+ //
446
+ regex_constants::syntax_option_type opts = this->flags();
447
+ bool old_case_change = m_has_case_change;
448
+ m_has_case_change = false; // no changes to this scope as yet...
449
+ //
450
+ // Back up branch reset data in case we have a nested (?|...)
451
+ //
452
+ int mark_reset = m_mark_reset;
453
+ m_mark_reset = -1;
454
+ //
455
+ // now recursively add more states, this will terminate when we get to a
456
+ // matching ')' :
457
+ //
458
+ parse_all();
459
+ //
460
+ // Unwind pushed alternatives:
461
+ //
462
+ if(0 == unwind_alts(last_paren_start))
463
+ return false;
464
+ //
465
+ // restore flags:
466
+ //
467
+ if(m_has_case_change)
468
+ {
469
+ // the case has changed in one or more of the alternatives
470
+ // within the scoped (...) block: we have to add a state
471
+ // to reset the case sensitivity:
472
+ static_cast<re_case*>(
473
+ this->append_state(syntax_element_toggle_case, sizeof(re_case))
474
+ )->icase = opts & regbase::icase;
475
+ }
476
+ this->flags(opts);
477
+ m_has_case_change = old_case_change;
478
+ //
479
+ // restore branch reset:
480
+ //
481
+ m_mark_reset = mark_reset;
482
+ //
483
+ // we either have a ')' or we have run out of characters prematurely:
484
+ //
485
+ if(m_position == m_end)
486
+ {
487
+ this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
488
+ return false;
489
+ }
490
+ BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
491
+ #ifndef BOOST_NO_STD_DISTANCE
492
+ if(markid && (this->flags() & regbase::save_subexpression_location))
493
+ this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
494
+ #else
495
+ if(markid && (this->flags() & regbase::save_subexpression_location))
496
+ this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
497
+ #endif
498
+ ++m_position;
499
+ //
500
+ // append closing parenthesis state:
501
+ //
502
+ pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
503
+ pb->index = markid;
504
+ pb->icase = this->flags() & regbase::icase;
505
+ this->m_paren_start = last_paren_start;
506
+ //
507
+ // restore the alternate insertion point:
508
+ //
509
+ this->m_alt_insert_point = last_alt_point;
510
+ //
511
+ // allow backrefs to this mark:
512
+ //
513
+ if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
514
+ this->m_backrefs |= 1u << (markid - 1);
515
+
516
+ return true;
517
+ }
518
+
519
+ template <class charT, class traits>
520
+ bool basic_regex_parser<charT, traits>::parse_basic_escape()
521
+ {
522
+ ++m_position;
523
+ bool result = true;
524
+ switch(this->m_traits.escape_syntax_type(*m_position))
525
+ {
526
+ case regex_constants::syntax_open_mark:
527
+ return parse_open_paren();
528
+ case regex_constants::syntax_close_mark:
529
+ return false;
530
+ case regex_constants::syntax_plus:
531
+ if(this->flags() & regex_constants::bk_plus_qm)
532
+ {
533
+ ++m_position;
534
+ return parse_repeat(1);
535
+ }
536
+ else
537
+ return parse_literal();
538
+ case regex_constants::syntax_question:
539
+ if(this->flags() & regex_constants::bk_plus_qm)
540
+ {
541
+ ++m_position;
542
+ return parse_repeat(0, 1);
543
+ }
544
+ else
545
+ return parse_literal();
546
+ case regex_constants::syntax_open_brace:
547
+ if(this->flags() & regbase::no_intervals)
548
+ return parse_literal();
549
+ ++m_position;
550
+ return parse_repeat_range(true);
551
+ case regex_constants::syntax_close_brace:
552
+ if(this->flags() & regbase::no_intervals)
553
+ return parse_literal();
554
+ fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
555
+ return false;
556
+ case regex_constants::syntax_or:
557
+ if(this->flags() & regbase::bk_vbar)
558
+ return parse_alt();
559
+ else
560
+ result = parse_literal();
561
+ break;
562
+ case regex_constants::syntax_digit:
563
+ return parse_backref();
564
+ case regex_constants::escape_type_start_buffer:
565
+ if(this->flags() & regbase::emacs_ex)
566
+ {
567
+ ++m_position;
568
+ this->append_state(syntax_element_buffer_start);
569
+ }
570
+ else
571
+ result = parse_literal();
572
+ break;
573
+ case regex_constants::escape_type_end_buffer:
574
+ if(this->flags() & regbase::emacs_ex)
575
+ {
576
+ ++m_position;
577
+ this->append_state(syntax_element_buffer_end);
578
+ }
579
+ else
580
+ result = parse_literal();
581
+ break;
582
+ case regex_constants::escape_type_word_assert:
583
+ if(this->flags() & regbase::emacs_ex)
584
+ {
585
+ ++m_position;
586
+ this->append_state(syntax_element_word_boundary);
587
+ }
588
+ else
589
+ result = parse_literal();
590
+ break;
591
+ case regex_constants::escape_type_not_word_assert:
592
+ if(this->flags() & regbase::emacs_ex)
593
+ {
594
+ ++m_position;
595
+ this->append_state(syntax_element_within_word);
596
+ }
597
+ else
598
+ result = parse_literal();
599
+ break;
600
+ case regex_constants::escape_type_left_word:
601
+ if(this->flags() & regbase::emacs_ex)
602
+ {
603
+ ++m_position;
604
+ this->append_state(syntax_element_word_start);
605
+ }
606
+ else
607
+ result = parse_literal();
608
+ break;
609
+ case regex_constants::escape_type_right_word:
610
+ if(this->flags() & regbase::emacs_ex)
611
+ {
612
+ ++m_position;
613
+ this->append_state(syntax_element_word_end);
614
+ }
615
+ else
616
+ result = parse_literal();
617
+ break;
618
+ default:
619
+ if(this->flags() & regbase::emacs_ex)
620
+ {
621
+ bool negate = true;
622
+ switch(*m_position)
623
+ {
624
+ case 'w':
625
+ negate = false;
626
+ BOOST_FALLTHROUGH;
627
+ case 'W':
628
+ {
629
+ basic_char_set<charT, traits> char_set;
630
+ if(negate)
631
+ char_set.negate();
632
+ char_set.add_class(this->m_word_mask);
633
+ if(0 == this->append_set(char_set))
634
+ {
635
+ fail(regex_constants::error_ctype, m_position - m_base);
636
+ return false;
637
+ }
638
+ ++m_position;
639
+ return true;
640
+ }
641
+ case 's':
642
+ negate = false;
643
+ BOOST_FALLTHROUGH;
644
+ case 'S':
645
+ return add_emacs_code(negate);
646
+ case 'c':
647
+ case 'C':
648
+ // not supported yet:
649
+ fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
650
+ return false;
651
+ default:
652
+ break;
653
+ }
654
+ }
655
+ result = parse_literal();
656
+ break;
657
+ }
658
+ return result;
659
+ }
660
+
661
+ template <class charT, class traits>
662
+ bool basic_regex_parser<charT, traits>::parse_extended_escape()
663
+ {
664
+ ++m_position;
665
+ if(m_position == m_end)
666
+ {
667
+ fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
668
+ return false;
669
+ }
670
+ bool negate = false; // in case this is a character class escape: \w \d etc
671
+ switch(this->m_traits.escape_syntax_type(*m_position))
672
+ {
673
+ case regex_constants::escape_type_not_class:
674
+ negate = true;
675
+ BOOST_FALLTHROUGH;
676
+ case regex_constants::escape_type_class:
677
+ {
678
+ escape_type_class_jump:
679
+ typedef typename traits::char_class_type m_type;
680
+ m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
681
+ if(m != 0)
682
+ {
683
+ basic_char_set<charT, traits> char_set;
684
+ if(negate)
685
+ char_set.negate();
686
+ char_set.add_class(m);
687
+ if(0 == this->append_set(char_set))
688
+ {
689
+ fail(regex_constants::error_ctype, m_position - m_base);
690
+ return false;
691
+ }
692
+ ++m_position;
693
+ return true;
694
+ }
695
+ //
696
+ // not a class, just a regular unknown escape:
697
+ //
698
+ this->append_literal(unescape_character());
699
+ break;
700
+ }
701
+ case regex_constants::syntax_digit:
702
+ return parse_backref();
703
+ case regex_constants::escape_type_left_word:
704
+ ++m_position;
705
+ this->append_state(syntax_element_word_start);
706
+ break;
707
+ case regex_constants::escape_type_right_word:
708
+ ++m_position;
709
+ this->append_state(syntax_element_word_end);
710
+ break;
711
+ case regex_constants::escape_type_start_buffer:
712
+ ++m_position;
713
+ this->append_state(syntax_element_buffer_start);
714
+ break;
715
+ case regex_constants::escape_type_end_buffer:
716
+ ++m_position;
717
+ this->append_state(syntax_element_buffer_end);
718
+ break;
719
+ case regex_constants::escape_type_word_assert:
720
+ ++m_position;
721
+ this->append_state(syntax_element_word_boundary);
722
+ break;
723
+ case regex_constants::escape_type_not_word_assert:
724
+ ++m_position;
725
+ this->append_state(syntax_element_within_word);
726
+ break;
727
+ case regex_constants::escape_type_Z:
728
+ ++m_position;
729
+ this->append_state(syntax_element_soft_buffer_end);
730
+ break;
731
+ case regex_constants::escape_type_Q:
732
+ return parse_QE();
733
+ case regex_constants::escape_type_C:
734
+ return parse_match_any();
735
+ case regex_constants::escape_type_X:
736
+ ++m_position;
737
+ this->append_state(syntax_element_combining);
738
+ break;
739
+ case regex_constants::escape_type_G:
740
+ ++m_position;
741
+ this->append_state(syntax_element_restart_continue);
742
+ break;
743
+ case regex_constants::escape_type_not_property:
744
+ negate = true;
745
+ BOOST_FALLTHROUGH;
746
+ case regex_constants::escape_type_property:
747
+ {
748
+ ++m_position;
749
+ char_class_type m;
750
+ if(m_position == m_end)
751
+ {
752
+ fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
753
+ return false;
754
+ }
755
+ // maybe have \p{ddd}
756
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
757
+ {
758
+ const charT* base = m_position;
759
+ // skip forward until we find enclosing brace:
760
+ while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
761
+ ++m_position;
762
+ if(m_position == m_end)
763
+ {
764
+ fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
765
+ return false;
766
+ }
767
+ m = this->m_traits.lookup_classname(++base, m_position++);
768
+ }
769
+ else
770
+ {
771
+ m = this->m_traits.lookup_classname(m_position, m_position+1);
772
+ ++m_position;
773
+ }
774
+ if(m != 0)
775
+ {
776
+ basic_char_set<charT, traits> char_set;
777
+ if(negate)
778
+ char_set.negate();
779
+ char_set.add_class(m);
780
+ if(0 == this->append_set(char_set))
781
+ {
782
+ fail(regex_constants::error_ctype, m_position - m_base);
783
+ return false;
784
+ }
785
+ return true;
786
+ }
787
+ fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
788
+ return false;
789
+ }
790
+ case regex_constants::escape_type_reset_start_mark:
791
+ if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
792
+ {
793
+ re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
794
+ pb->index = -5;
795
+ pb->icase = this->flags() & regbase::icase;
796
+ this->m_pdata->m_data.align();
797
+ ++m_position;
798
+ return true;
799
+ }
800
+ goto escape_type_class_jump;
801
+ case regex_constants::escape_type_line_ending:
802
+ if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
803
+ {
804
+ const charT* e = get_escape_R_string<charT>();
805
+ const charT* old_position = m_position;
806
+ const charT* old_end = m_end;
807
+ const charT* old_base = m_base;
808
+ m_position = e;
809
+ m_base = e;
810
+ m_end = e + traits::length(e);
811
+ bool r = parse_all();
812
+ m_position = ++old_position;
813
+ m_end = old_end;
814
+ m_base = old_base;
815
+ return r;
816
+ }
817
+ goto escape_type_class_jump;
818
+ case regex_constants::escape_type_extended_backref:
819
+ if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
820
+ {
821
+ bool have_brace = false;
822
+ bool negative = false;
823
+ static const char* incomplete_message = "Incomplete \\g escape found.";
824
+ if(++m_position == m_end)
825
+ {
826
+ fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
827
+ return false;
828
+ }
829
+ // maybe have \g{ddd}
830
+ regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
831
+ regex_constants::syntax_type syn_end = 0;
832
+ if((syn == regex_constants::syntax_open_brace)
833
+ || (syn == regex_constants::escape_type_left_word)
834
+ || (syn == regex_constants::escape_type_end_buffer))
835
+ {
836
+ if(++m_position == m_end)
837
+ {
838
+ fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
839
+ return false;
840
+ }
841
+ have_brace = true;
842
+ switch(syn)
843
+ {
844
+ case regex_constants::syntax_open_brace:
845
+ syn_end = regex_constants::syntax_close_brace;
846
+ break;
847
+ case regex_constants::escape_type_left_word:
848
+ syn_end = regex_constants::escape_type_right_word;
849
+ break;
850
+ default:
851
+ syn_end = regex_constants::escape_type_end_buffer;
852
+ break;
853
+ }
854
+ }
855
+ negative = (*m_position == static_cast<charT>('-'));
856
+ if((negative) && (++m_position == m_end))
857
+ {
858
+ fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
859
+ return false;
860
+ }
861
+ const charT* pc = m_position;
862
+ int i = this->m_traits.toi(pc, m_end, 10);
863
+ if((i < 0) && syn_end)
864
+ {
865
+ // Check for a named capture, get the leftmost one if there is more than one:
866
+ const charT* base = m_position;
867
+ while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
868
+ {
869
+ ++m_position;
870
+ }
871
+ i = hash_value_from_capture_name(base, m_position);
872
+ pc = m_position;
873
+ }
874
+ if(negative)
875
+ i = 1 + m_mark_count - i;
876
+ if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
877
+ {
878
+ m_position = pc;
879
+ re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
880
+ pb->index = i;
881
+ pb->icase = this->flags() & regbase::icase;
882
+ }
883
+ else
884
+ {
885
+ fail(regex_constants::error_backref, m_position - m_base);
886
+ return false;
887
+ }
888
+ m_position = pc;
889
+ if(have_brace)
890
+ {
891
+ if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
892
+ {
893
+ fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
894
+ return false;
895
+ }
896
+ ++m_position;
897
+ }
898
+ return true;
899
+ }
900
+ goto escape_type_class_jump;
901
+ case regex_constants::escape_type_control_v:
902
+ if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
903
+ goto escape_type_class_jump;
904
+ BOOST_FALLTHROUGH;
905
+ default:
906
+ this->append_literal(unescape_character());
907
+ break;
908
+ }
909
+ return true;
910
+ }
911
+
912
+ template <class charT, class traits>
913
+ bool basic_regex_parser<charT, traits>::parse_match_any()
914
+ {
915
+ //
916
+ // we have a '.' that can match any character:
917
+ //
918
+ ++m_position;
919
+ static_cast<re_dot*>(
920
+ this->append_state(syntax_element_wild, sizeof(re_dot))
921
+ )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
922
+ ? re_detail::force_not_newline
923
+ : this->flags() & regbase::mod_s ?
924
+ re_detail::force_newline : re_detail::dont_care);
925
+ return true;
926
+ }
927
+
928
+ template <class charT, class traits>
929
+ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
930
+ {
931
+ bool greedy = true;
932
+ bool pocessive = false;
933
+ std::size_t insert_point;
934
+ //
935
+ // when we get to here we may have a non-greedy ? mark still to come:
936
+ //
937
+ if((m_position != m_end)
938
+ && (
939
+ (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
940
+ || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
941
+ )
942
+ )
943
+ {
944
+ // OK we have a perl or emacs regex, check for a '?':
945
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
946
+ {
947
+ greedy = false;
948
+ ++m_position;
949
+ }
950
+ // for perl regexes only check for pocessive ++ repeats.
951
+ if((m_position != m_end)
952
+ && (0 == (this->flags() & regbase::main_option_type))
953
+ && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
954
+ {
955
+ pocessive = true;
956
+ ++m_position;
957
+ }
958
+ }
959
+ if(0 == this->m_last_state)
960
+ {
961
+ fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position), "Nothing to repeat.");
962
+ return false;
963
+ }
964
+ if(this->m_last_state->type == syntax_element_endmark)
965
+ {
966
+ // insert a repeat before the '(' matching the last ')':
967
+ insert_point = this->m_paren_start;
968
+ }
969
+ else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
970
+ {
971
+ // the last state was a literal with more than one character, split it in two:
972
+ re_literal* lit = static_cast<re_literal*>(this->m_last_state);
973
+ charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
974
+ --(lit->length);
975
+ // now append new state:
976
+ lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
977
+ lit->length = 1;
978
+ (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
979
+ insert_point = this->getoffset(this->m_last_state);
980
+ }
981
+ else
982
+ {
983
+ // repeat the last state whatever it was, need to add some error checking here:
984
+ switch(this->m_last_state->type)
985
+ {
986
+ case syntax_element_start_line:
987
+ case syntax_element_end_line:
988
+ case syntax_element_word_boundary:
989
+ case syntax_element_within_word:
990
+ case syntax_element_word_start:
991
+ case syntax_element_word_end:
992
+ case syntax_element_buffer_start:
993
+ case syntax_element_buffer_end:
994
+ case syntax_element_alt:
995
+ case syntax_element_soft_buffer_end:
996
+ case syntax_element_restart_continue:
997
+ case syntax_element_jump:
998
+ case syntax_element_startmark:
999
+ case syntax_element_backstep:
1000
+ // can't legally repeat any of the above:
1001
+ fail(regex_constants::error_badrepeat, m_position - m_base);
1002
+ return false;
1003
+ default:
1004
+ // do nothing...
1005
+ break;
1006
+ }
1007
+ insert_point = this->getoffset(this->m_last_state);
1008
+ }
1009
+ //
1010
+ // OK we now know what to repeat, so insert the repeat around it:
1011
+ //
1012
+ re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1013
+ rep->min = low;
1014
+ rep->max = high;
1015
+ rep->greedy = greedy;
1016
+ rep->leading = false;
1017
+ // store our repeater position for later:
1018
+ std::ptrdiff_t rep_off = this->getoffset(rep);
1019
+ // and append a back jump to the repeat:
1020
+ re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1021
+ jmp->alt.i = rep_off - this->getoffset(jmp);
1022
+ this->m_pdata->m_data.align();
1023
+ // now fill in the alt jump for the repeat:
1024
+ rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1025
+ rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1026
+ //
1027
+ // If the repeat is pocessive then bracket the repeat with a (?>...)
1028
+ // independent sub-expression construct:
1029
+ //
1030
+ if(pocessive)
1031
+ {
1032
+ if(m_position != m_end)
1033
+ {
1034
+ //
1035
+ // Check for illegal following quantifier, we have to do this here, because
1036
+ // the extra states we insert below circumvents our usual error checking :-(
1037
+ //
1038
+ switch(this->m_traits.syntax_type(*m_position))
1039
+ {
1040
+ case regex_constants::syntax_star:
1041
+ case regex_constants::syntax_plus:
1042
+ case regex_constants::syntax_question:
1043
+ case regex_constants::syntax_open_brace:
1044
+ fail(regex_constants::error_badrepeat, m_position - m_base);
1045
+ return false;
1046
+ }
1047
+ }
1048
+ re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1049
+ pb->index = -3;
1050
+ pb->icase = this->flags() & regbase::icase;
1051
+ jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1052
+ this->m_pdata->m_data.align();
1053
+ jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1054
+ pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1055
+ pb->index = -3;
1056
+ pb->icase = this->flags() & regbase::icase;
1057
+ }
1058
+ return true;
1059
+ }
1060
+
1061
+ template <class charT, class traits>
1062
+ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1063
+ {
1064
+ static const char* incomplete_message = "Missing } in quantified repetition.";
1065
+ //
1066
+ // parse a repeat-range:
1067
+ //
1068
+ std::size_t min, max;
1069
+ int v;
1070
+ // skip whitespace:
1071
+ while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1072
+ ++m_position;
1073
+ if(this->m_position == this->m_end)
1074
+ {
1075
+ if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1076
+ {
1077
+ fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1078
+ return false;
1079
+ }
1080
+ // Treat the opening '{' as a literal character, rewind to start of error:
1081
+ --m_position;
1082
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1083
+ return parse_literal();
1084
+ }
1085
+ // get min:
1086
+ v = this->m_traits.toi(m_position, m_end, 10);
1087
+ // skip whitespace:
1088
+ if(v < 0)
1089
+ {
1090
+ if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1091
+ {
1092
+ fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1093
+ return false;
1094
+ }
1095
+ // Treat the opening '{' as a literal character, rewind to start of error:
1096
+ --m_position;
1097
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1098
+ return parse_literal();
1099
+ }
1100
+ while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1101
+ ++m_position;
1102
+ if(this->m_position == this->m_end)
1103
+ {
1104
+ if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1105
+ {
1106
+ fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1107
+ return false;
1108
+ }
1109
+ // Treat the opening '{' as a literal character, rewind to start of error:
1110
+ --m_position;
1111
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1112
+ return parse_literal();
1113
+ }
1114
+ min = v;
1115
+ // see if we have a comma:
1116
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1117
+ {
1118
+ // move on and error check:
1119
+ ++m_position;
1120
+ // skip whitespace:
1121
+ while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1122
+ ++m_position;
1123
+ if(this->m_position == this->m_end)
1124
+ {
1125
+ if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1126
+ {
1127
+ fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1128
+ return false;
1129
+ }
1130
+ // Treat the opening '{' as a literal character, rewind to start of error:
1131
+ --m_position;
1132
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1133
+ return parse_literal();
1134
+ }
1135
+ // get the value if any:
1136
+ v = this->m_traits.toi(m_position, m_end, 10);
1137
+ max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1138
+ }
1139
+ else
1140
+ {
1141
+ // no comma, max = min:
1142
+ max = min;
1143
+ }
1144
+ // skip whitespace:
1145
+ while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1146
+ ++m_position;
1147
+ // OK now check trailing }:
1148
+ if(this->m_position == this->m_end)
1149
+ {
1150
+ if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1151
+ {
1152
+ fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1153
+ return false;
1154
+ }
1155
+ // Treat the opening '{' as a literal character, rewind to start of error:
1156
+ --m_position;
1157
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1158
+ return parse_literal();
1159
+ }
1160
+ if(isbasic)
1161
+ {
1162
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1163
+ {
1164
+ ++m_position;
1165
+ if(this->m_position == this->m_end)
1166
+ {
1167
+ fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1168
+ return false;
1169
+ }
1170
+ }
1171
+ else
1172
+ {
1173
+ fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1174
+ return false;
1175
+ }
1176
+ }
1177
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1178
+ ++m_position;
1179
+ else
1180
+ {
1181
+ // Treat the opening '{' as a literal character, rewind to start of error:
1182
+ --m_position;
1183
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1184
+ return parse_literal();
1185
+ }
1186
+ //
1187
+ // finally go and add the repeat, unless error:
1188
+ //
1189
+ if(min > max)
1190
+ {
1191
+ // Backtrack to error location:
1192
+ m_position -= 2;
1193
+ while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1194
+ ++m_position;
1195
+ fail(regex_constants::error_badbrace, m_position - m_base);
1196
+ return false;
1197
+ }
1198
+ return parse_repeat(min, max);
1199
+ }
1200
+
1201
+ template <class charT, class traits>
1202
+ bool basic_regex_parser<charT, traits>::parse_alt()
1203
+ {
1204
+ //
1205
+ // error check: if there have been no previous states,
1206
+ // or if the last state was a '(' then error:
1207
+ //
1208
+ if(
1209
+ ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1210
+ &&
1211
+ !(
1212
+ ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1213
+ &&
1214
+ ((this->flags() & regbase::no_empty_expressions) == 0)
1215
+ )
1216
+ )
1217
+ {
1218
+ fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression can start with the alternation operator |.");
1219
+ return false;
1220
+ }
1221
+ //
1222
+ // Reset mark count if required:
1223
+ //
1224
+ if(m_max_mark < m_mark_count)
1225
+ m_max_mark = m_mark_count;
1226
+ if(m_mark_reset >= 0)
1227
+ m_mark_count = m_mark_reset;
1228
+
1229
+ ++m_position;
1230
+ //
1231
+ // we need to append a trailing jump:
1232
+ //
1233
+ re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
1234
+ std::ptrdiff_t jump_offset = this->getoffset(pj);
1235
+ //
1236
+ // now insert the alternative:
1237
+ //
1238
+ re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1239
+ jump_offset += re_alt_size;
1240
+ this->m_pdata->m_data.align();
1241
+ palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1242
+ //
1243
+ // update m_alt_insert_point so that the next alternate gets
1244
+ // inserted at the start of the second of the two we've just created:
1245
+ //
1246
+ this->m_alt_insert_point = this->m_pdata->m_data.size();
1247
+ //
1248
+ // the start of this alternative must have a case changes state
1249
+ // if the current block has messed around with case changes:
1250
+ //
1251
+ if(m_has_case_change)
1252
+ {
1253
+ static_cast<re_case*>(
1254
+ this->append_state(syntax_element_toggle_case, sizeof(re_case))
1255
+ )->icase = this->m_icase;
1256
+ }
1257
+ //
1258
+ // push the alternative onto our stack, a recursive
1259
+ // implementation here is easier to understand (and faster
1260
+ // as it happens), but causes all kinds of stack overflow problems
1261
+ // on programs with small stacks (COM+).
1262
+ //
1263
+ m_alt_jumps.push_back(jump_offset);
1264
+ return true;
1265
+ }
1266
+
1267
+ template <class charT, class traits>
1268
+ bool basic_regex_parser<charT, traits>::parse_set()
1269
+ {
1270
+ static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1271
+ ++m_position;
1272
+ if(m_position == m_end)
1273
+ {
1274
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1275
+ return false;
1276
+ }
1277
+ basic_char_set<charT, traits> char_set;
1278
+
1279
+ const charT* base = m_position; // where the '[' was
1280
+ const charT* item_base = m_position; // where the '[' or '^' was
1281
+
1282
+ while(m_position != m_end)
1283
+ {
1284
+ switch(this->m_traits.syntax_type(*m_position))
1285
+ {
1286
+ case regex_constants::syntax_caret:
1287
+ if(m_position == base)
1288
+ {
1289
+ char_set.negate();
1290
+ ++m_position;
1291
+ item_base = m_position;
1292
+ }
1293
+ else
1294
+ parse_set_literal(char_set);
1295
+ break;
1296
+ case regex_constants::syntax_close_set:
1297
+ if(m_position == item_base)
1298
+ {
1299
+ parse_set_literal(char_set);
1300
+ break;
1301
+ }
1302
+ else
1303
+ {
1304
+ ++m_position;
1305
+ if(0 == this->append_set(char_set))
1306
+ {
1307
+ fail(regex_constants::error_ctype, m_position - m_base);
1308
+ return false;
1309
+ }
1310
+ }
1311
+ return true;
1312
+ case regex_constants::syntax_open_set:
1313
+ if(parse_inner_set(char_set))
1314
+ break;
1315
+ return true;
1316
+ case regex_constants::syntax_escape:
1317
+ {
1318
+ //
1319
+ // look ahead and see if this is a character class shortcut
1320
+ // \d \w \s etc...
1321
+ //
1322
+ ++m_position;
1323
+ if(this->m_traits.escape_syntax_type(*m_position)
1324
+ == regex_constants::escape_type_class)
1325
+ {
1326
+ char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1327
+ if(m != 0)
1328
+ {
1329
+ char_set.add_class(m);
1330
+ ++m_position;
1331
+ break;
1332
+ }
1333
+ }
1334
+ else if(this->m_traits.escape_syntax_type(*m_position)
1335
+ == regex_constants::escape_type_not_class)
1336
+ {
1337
+ // negated character class:
1338
+ char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1339
+ if(m != 0)
1340
+ {
1341
+ char_set.add_negated_class(m);
1342
+ ++m_position;
1343
+ break;
1344
+ }
1345
+ }
1346
+ // not a character class, just a regular escape:
1347
+ --m_position;
1348
+ parse_set_literal(char_set);
1349
+ break;
1350
+ }
1351
+ default:
1352
+ parse_set_literal(char_set);
1353
+ break;
1354
+ }
1355
+ }
1356
+ return m_position != m_end;
1357
+ }
1358
+
1359
+ template <class charT, class traits>
1360
+ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1361
+ {
1362
+ static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1363
+ //
1364
+ // we have either a character class [:name:]
1365
+ // a collating element [.name.]
1366
+ // or an equivalence class [=name=]
1367
+ //
1368
+ if(m_end == ++m_position)
1369
+ {
1370
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1371
+ return false;
1372
+ }
1373
+ switch(this->m_traits.syntax_type(*m_position))
1374
+ {
1375
+ case regex_constants::syntax_dot:
1376
+ //
1377
+ // a collating element is treated as a literal:
1378
+ //
1379
+ --m_position;
1380
+ parse_set_literal(char_set);
1381
+ return true;
1382
+ case regex_constants::syntax_colon:
1383
+ {
1384
+ // check that character classes are actually enabled:
1385
+ if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1386
+ == (regbase::basic_syntax_group | regbase::no_char_classes))
1387
+ {
1388
+ --m_position;
1389
+ parse_set_literal(char_set);
1390
+ return true;
1391
+ }
1392
+ // skip the ':'
1393
+ if(m_end == ++m_position)
1394
+ {
1395
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1396
+ return false;
1397
+ }
1398
+ const charT* name_first = m_position;
1399
+ // skip at least one character, then find the matching ':]'
1400
+ if(m_end == ++m_position)
1401
+ {
1402
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1403
+ return false;
1404
+ }
1405
+ while((m_position != m_end)
1406
+ && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1407
+ ++m_position;
1408
+ const charT* name_last = m_position;
1409
+ if(m_end == m_position)
1410
+ {
1411
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1412
+ return false;
1413
+ }
1414
+ if((m_end == ++m_position)
1415
+ || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1416
+ {
1417
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1418
+ return false;
1419
+ }
1420
+ //
1421
+ // check for negated class:
1422
+ //
1423
+ bool negated = false;
1424
+ if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1425
+ {
1426
+ ++name_first;
1427
+ negated = true;
1428
+ }
1429
+ typedef typename traits::char_class_type m_type;
1430
+ m_type m = this->m_traits.lookup_classname(name_first, name_last);
1431
+ if(m == 0)
1432
+ {
1433
+ if(char_set.empty() && (name_last - name_first == 1))
1434
+ {
1435
+ // maybe a special case:
1436
+ ++m_position;
1437
+ if( (m_position != m_end)
1438
+ && (this->m_traits.syntax_type(*m_position)
1439
+ == regex_constants::syntax_close_set))
1440
+ {
1441
+ if(this->m_traits.escape_syntax_type(*name_first)
1442
+ == regex_constants::escape_type_left_word)
1443
+ {
1444
+ ++m_position;
1445
+ this->append_state(syntax_element_word_start);
1446
+ return false;
1447
+ }
1448
+ if(this->m_traits.escape_syntax_type(*name_first)
1449
+ == regex_constants::escape_type_right_word)
1450
+ {
1451
+ ++m_position;
1452
+ this->append_state(syntax_element_word_end);
1453
+ return false;
1454
+ }
1455
+ }
1456
+ }
1457
+ fail(regex_constants::error_ctype, name_first - m_base);
1458
+ return false;
1459
+ }
1460
+ if(negated == false)
1461
+ char_set.add_class(m);
1462
+ else
1463
+ char_set.add_negated_class(m);
1464
+ ++m_position;
1465
+ break;
1466
+ }
1467
+ case regex_constants::syntax_equal:
1468
+ {
1469
+ // skip the '='
1470
+ if(m_end == ++m_position)
1471
+ {
1472
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1473
+ return false;
1474
+ }
1475
+ const charT* name_first = m_position;
1476
+ // skip at least one character, then find the matching '=]'
1477
+ if(m_end == ++m_position)
1478
+ {
1479
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1480
+ return false;
1481
+ }
1482
+ while((m_position != m_end)
1483
+ && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1484
+ ++m_position;
1485
+ const charT* name_last = m_position;
1486
+ if(m_end == m_position)
1487
+ {
1488
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1489
+ return false;
1490
+ }
1491
+ if((m_end == ++m_position)
1492
+ || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1493
+ {
1494
+ fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1495
+ return false;
1496
+ }
1497
+ string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1498
+ if((0 == m.size()) || (m.size() > 2))
1499
+ {
1500
+ fail(regex_constants::error_collate, name_first - m_base);
1501
+ return false;
1502
+ }
1503
+ digraph<charT> d;
1504
+ d.first = m[0];
1505
+ if(m.size() > 1)
1506
+ d.second = m[1];
1507
+ else
1508
+ d.second = 0;
1509
+ char_set.add_equivalent(d);
1510
+ ++m_position;
1511
+ break;
1512
+ }
1513
+ default:
1514
+ --m_position;
1515
+ parse_set_literal(char_set);
1516
+ break;
1517
+ }
1518
+ return true;
1519
+ }
1520
+
1521
+ template <class charT, class traits>
1522
+ void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1523
+ {
1524
+ digraph<charT> start_range(get_next_set_literal(char_set));
1525
+ if(m_end == m_position)
1526
+ {
1527
+ fail(regex_constants::error_brack, m_position - m_base);
1528
+ return;
1529
+ }
1530
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1531
+ {
1532
+ // we have a range:
1533
+ if(m_end == ++m_position)
1534
+ {
1535
+ fail(regex_constants::error_brack, m_position - m_base);
1536
+ return;
1537
+ }
1538
+ if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1539
+ {
1540
+ digraph<charT> end_range = get_next_set_literal(char_set);
1541
+ char_set.add_range(start_range, end_range);
1542
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1543
+ {
1544
+ if(m_end == ++m_position)
1545
+ {
1546
+ fail(regex_constants::error_brack, m_position - m_base);
1547
+ return;
1548
+ }
1549
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1550
+ {
1551
+ // trailing - :
1552
+ --m_position;
1553
+ return;
1554
+ }
1555
+ fail(regex_constants::error_range, m_position - m_base);
1556
+ return;
1557
+ }
1558
+ return;
1559
+ }
1560
+ --m_position;
1561
+ }
1562
+ char_set.add_single(start_range);
1563
+ }
1564
+
1565
+ template <class charT, class traits>
1566
+ digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1567
+ {
1568
+ digraph<charT> result;
1569
+ switch(this->m_traits.syntax_type(*m_position))
1570
+ {
1571
+ case regex_constants::syntax_dash:
1572
+ if(!char_set.empty())
1573
+ {
1574
+ // see if we are at the end of the set:
1575
+ if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1576
+ {
1577
+ fail(regex_constants::error_range, m_position - m_base);
1578
+ return result;
1579
+ }
1580
+ --m_position;
1581
+ }
1582
+ result.first = *m_position++;
1583
+ return result;
1584
+ case regex_constants::syntax_escape:
1585
+ // check to see if escapes are supported first:
1586
+ if(this->flags() & regex_constants::no_escape_in_lists)
1587
+ {
1588
+ result = *m_position++;
1589
+ break;
1590
+ }
1591
+ ++m_position;
1592
+ result = unescape_character();
1593
+ break;
1594
+ case regex_constants::syntax_open_set:
1595
+ {
1596
+ if(m_end == ++m_position)
1597
+ {
1598
+ fail(regex_constants::error_collate, m_position - m_base);
1599
+ return result;
1600
+ }
1601
+ if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1602
+ {
1603
+ --m_position;
1604
+ result.first = *m_position;
1605
+ ++m_position;
1606
+ return result;
1607
+ }
1608
+ if(m_end == ++m_position)
1609
+ {
1610
+ fail(regex_constants::error_collate, m_position - m_base);
1611
+ return result;
1612
+ }
1613
+ const charT* name_first = m_position;
1614
+ // skip at least one character, then find the matching ':]'
1615
+ if(m_end == ++m_position)
1616
+ {
1617
+ fail(regex_constants::error_collate, name_first - m_base);
1618
+ return result;
1619
+ }
1620
+ while((m_position != m_end)
1621
+ && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1622
+ ++m_position;
1623
+ const charT* name_last = m_position;
1624
+ if(m_end == m_position)
1625
+ {
1626
+ fail(regex_constants::error_collate, name_first - m_base);
1627
+ return result;
1628
+ }
1629
+ if((m_end == ++m_position)
1630
+ || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1631
+ {
1632
+ fail(regex_constants::error_collate, name_first - m_base);
1633
+ return result;
1634
+ }
1635
+ ++m_position;
1636
+ string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1637
+ if(s.empty() || (s.size() > 2))
1638
+ {
1639
+ fail(regex_constants::error_collate, name_first - m_base);
1640
+ return result;
1641
+ }
1642
+ result.first = s[0];
1643
+ if(s.size() > 1)
1644
+ result.second = s[1];
1645
+ else
1646
+ result.second = 0;
1647
+ return result;
1648
+ }
1649
+ default:
1650
+ result = *m_position++;
1651
+ }
1652
+ return result;
1653
+ }
1654
+
1655
+ //
1656
+ // does a value fit in the specified charT type?
1657
+ //
1658
+ template <class charT>
1659
+ bool valid_value(charT, int v, const mpl::true_&)
1660
+ {
1661
+ return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1662
+ }
1663
+ template <class charT>
1664
+ bool valid_value(charT, int, const mpl::false_&)
1665
+ {
1666
+ return true; // v will alsways fit in a charT
1667
+ }
1668
+ template <class charT>
1669
+ bool valid_value(charT c, int v)
1670
+ {
1671
+ return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
1672
+ }
1673
+
1674
+ template <class charT, class traits>
1675
+ charT basic_regex_parser<charT, traits>::unescape_character()
1676
+ {
1677
+ #ifdef BOOST_MSVC
1678
+ #pragma warning(push)
1679
+ #pragma warning(disable:4127)
1680
+ #endif
1681
+ charT result(0);
1682
+ if(m_position == m_end)
1683
+ {
1684
+ fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1685
+ return false;
1686
+ }
1687
+ switch(this->m_traits.escape_syntax_type(*m_position))
1688
+ {
1689
+ case regex_constants::escape_type_control_a:
1690
+ result = charT('\a');
1691
+ break;
1692
+ case regex_constants::escape_type_e:
1693
+ result = charT(27);
1694
+ break;
1695
+ case regex_constants::escape_type_control_f:
1696
+ result = charT('\f');
1697
+ break;
1698
+ case regex_constants::escape_type_control_n:
1699
+ result = charT('\n');
1700
+ break;
1701
+ case regex_constants::escape_type_control_r:
1702
+ result = charT('\r');
1703
+ break;
1704
+ case regex_constants::escape_type_control_t:
1705
+ result = charT('\t');
1706
+ break;
1707
+ case regex_constants::escape_type_control_v:
1708
+ result = charT('\v');
1709
+ break;
1710
+ case regex_constants::escape_type_word_assert:
1711
+ result = charT('\b');
1712
+ break;
1713
+ case regex_constants::escape_type_ascii_control:
1714
+ ++m_position;
1715
+ if(m_position == m_end)
1716
+ {
1717
+ // Rewind to start of escape:
1718
+ --m_position;
1719
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1720
+ fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1721
+ return result;
1722
+ }
1723
+ result = static_cast<charT>(*m_position % 32);
1724
+ break;
1725
+ case regex_constants::escape_type_hex:
1726
+ ++m_position;
1727
+ if(m_position == m_end)
1728
+ {
1729
+ // Rewind to start of escape:
1730
+ --m_position;
1731
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1732
+ fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1733
+ return result;
1734
+ }
1735
+ // maybe have \x{ddd}
1736
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1737
+ {
1738
+ ++m_position;
1739
+ if(m_position == m_end)
1740
+ {
1741
+ // Rewind to start of escape:
1742
+ --m_position;
1743
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1744
+ fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1745
+ return result;
1746
+ }
1747
+ int i = this->m_traits.toi(m_position, m_end, 16);
1748
+ if((m_position == m_end)
1749
+ || (i < 0)
1750
+ || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
1751
+ || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1752
+ {
1753
+ // Rewind to start of escape:
1754
+ --m_position;
1755
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1756
+ fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1757
+ return result;
1758
+ }
1759
+ ++m_position;
1760
+ result = charT(i);
1761
+ }
1762
+ else
1763
+ {
1764
+ std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1765
+ int i = this->m_traits.toi(m_position, m_position + len, 16);
1766
+ if((i < 0)
1767
+ || !valid_value(charT(0), i))
1768
+ {
1769
+ // Rewind to start of escape:
1770
+ --m_position;
1771
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1772
+ fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1773
+ return result;
1774
+ }
1775
+ result = charT(i);
1776
+ }
1777
+ return result;
1778
+ case regex_constants::syntax_digit:
1779
+ {
1780
+ // an octal escape sequence, the first character must be a zero
1781
+ // followed by up to 3 octal digits:
1782
+ std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1783
+ const charT* bp = m_position;
1784
+ int val = this->m_traits.toi(bp, bp + 1, 8);
1785
+ if(val != 0)
1786
+ {
1787
+ // Rewind to start of escape:
1788
+ --m_position;
1789
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1790
+ // Oops not an octal escape after all:
1791
+ fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1792
+ return result;
1793
+ }
1794
+ val = this->m_traits.toi(m_position, m_position + len, 8);
1795
+ if(val < 0)
1796
+ {
1797
+ // Rewind to start of escape:
1798
+ --m_position;
1799
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1800
+ fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1801
+ return result;
1802
+ }
1803
+ return static_cast<charT>(val);
1804
+ }
1805
+ case regex_constants::escape_type_named_char:
1806
+ {
1807
+ ++m_position;
1808
+ if(m_position == m_end)
1809
+ {
1810
+ // Rewind to start of escape:
1811
+ --m_position;
1812
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1813
+ fail(regex_constants::error_escape, m_position - m_base);
1814
+ return false;
1815
+ }
1816
+ // maybe have \N{name}
1817
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1818
+ {
1819
+ const charT* base = m_position;
1820
+ // skip forward until we find enclosing brace:
1821
+ while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1822
+ ++m_position;
1823
+ if(m_position == m_end)
1824
+ {
1825
+ // Rewind to start of escape:
1826
+ --m_position;
1827
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1828
+ fail(regex_constants::error_escape, m_position - m_base);
1829
+ return false;
1830
+ }
1831
+ string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1832
+ if(s.empty())
1833
+ {
1834
+ // Rewind to start of escape:
1835
+ --m_position;
1836
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1837
+ fail(regex_constants::error_collate, m_position - m_base);
1838
+ return false;
1839
+ }
1840
+ if(s.size() == 1)
1841
+ {
1842
+ return s[0];
1843
+ }
1844
+ }
1845
+ // fall through is a failure:
1846
+ // Rewind to start of escape:
1847
+ --m_position;
1848
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1849
+ fail(regex_constants::error_escape, m_position - m_base);
1850
+ return false;
1851
+ }
1852
+ default:
1853
+ result = *m_position;
1854
+ break;
1855
+ }
1856
+ ++m_position;
1857
+ return result;
1858
+ #ifdef BOOST_MSVC
1859
+ #pragma warning(pop)
1860
+ #endif
1861
+ }
1862
+
1863
+ template <class charT, class traits>
1864
+ bool basic_regex_parser<charT, traits>::parse_backref()
1865
+ {
1866
+ BOOST_ASSERT(m_position != m_end);
1867
+ const charT* pc = m_position;
1868
+ int i = this->m_traits.toi(pc, pc + 1, 10);
1869
+ if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1870
+ {
1871
+ // not a backref at all but an octal escape sequence:
1872
+ charT c = unescape_character();
1873
+ this->append_literal(c);
1874
+ }
1875
+ else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
1876
+ {
1877
+ m_position = pc;
1878
+ re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1879
+ pb->index = i;
1880
+ pb->icase = this->flags() & regbase::icase;
1881
+ }
1882
+ else
1883
+ {
1884
+ // Rewind to start of escape:
1885
+ --m_position;
1886
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1887
+ fail(regex_constants::error_backref, m_position - m_base);
1888
+ return false;
1889
+ }
1890
+ return true;
1891
+ }
1892
+
1893
+ template <class charT, class traits>
1894
+ bool basic_regex_parser<charT, traits>::parse_QE()
1895
+ {
1896
+ #ifdef BOOST_MSVC
1897
+ #pragma warning(push)
1898
+ #pragma warning(disable:4127)
1899
+ #endif
1900
+ //
1901
+ // parse a \Q...\E sequence:
1902
+ //
1903
+ ++m_position; // skip the Q
1904
+ const charT* start = m_position;
1905
+ const charT* end;
1906
+ do
1907
+ {
1908
+ while((m_position != m_end)
1909
+ && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1910
+ ++m_position;
1911
+ if(m_position == m_end)
1912
+ {
1913
+ // a \Q...\E sequence may terminate with the end of the expression:
1914
+ end = m_position;
1915
+ break;
1916
+ }
1917
+ if(++m_position == m_end) // skip the escape
1918
+ {
1919
+ fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
1920
+ return false;
1921
+ }
1922
+ // check to see if it's a \E:
1923
+ if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
1924
+ {
1925
+ ++m_position;
1926
+ end = m_position - 2;
1927
+ break;
1928
+ }
1929
+ // otherwise go round again:
1930
+ }while(true);
1931
+ //
1932
+ // now add all the character between the two escapes as literals:
1933
+ //
1934
+ while(start != end)
1935
+ {
1936
+ this->append_literal(*start);
1937
+ ++start;
1938
+ }
1939
+ return true;
1940
+ #ifdef BOOST_MSVC
1941
+ #pragma warning(pop)
1942
+ #endif
1943
+ }
1944
+
1945
+ template <class charT, class traits>
1946
+ bool basic_regex_parser<charT, traits>::parse_perl_extension()
1947
+ {
1948
+ if(++m_position == m_end)
1949
+ {
1950
+ // Rewind to start of (? sequence:
1951
+ --m_position;
1952
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
1953
+ fail(regex_constants::error_perl_extension, m_position - m_base);
1954
+ return false;
1955
+ }
1956
+ //
1957
+ // treat comments as a special case, as these
1958
+ // are the only ones that don't start with a leading
1959
+ // startmark state:
1960
+ //
1961
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
1962
+ {
1963
+ while((m_position != m_end)
1964
+ && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
1965
+ {}
1966
+ return true;
1967
+ }
1968
+ //
1969
+ // backup some state, and prepare the way:
1970
+ //
1971
+ int markid = 0;
1972
+ std::ptrdiff_t jump_offset = 0;
1973
+ re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
1974
+ pb->icase = this->flags() & regbase::icase;
1975
+ std::ptrdiff_t last_paren_start = this->getoffset(pb);
1976
+ // back up insertion point for alternations, and set new point:
1977
+ std::ptrdiff_t last_alt_point = m_alt_insert_point;
1978
+ this->m_pdata->m_data.align();
1979
+ m_alt_insert_point = this->m_pdata->m_data.size();
1980
+ std::ptrdiff_t expected_alt_point = m_alt_insert_point;
1981
+ bool restore_flags = true;
1982
+ regex_constants::syntax_option_type old_flags = this->flags();
1983
+ bool old_case_change = m_has_case_change;
1984
+ m_has_case_change = false;
1985
+ charT name_delim;
1986
+ int mark_reset = m_mark_reset;
1987
+ int max_mark = m_max_mark;
1988
+ m_mark_reset = -1;
1989
+ m_max_mark = m_mark_count;
1990
+ int v;
1991
+ //
1992
+ // select the actual extension used:
1993
+ //
1994
+ switch(this->m_traits.syntax_type(*m_position))
1995
+ {
1996
+ case regex_constants::syntax_or:
1997
+ m_mark_reset = m_mark_count;
1998
+ BOOST_FALLTHROUGH;
1999
+ case regex_constants::syntax_colon:
2000
+ //
2001
+ // a non-capturing mark:
2002
+ //
2003
+ pb->index = markid = 0;
2004
+ ++m_position;
2005
+ break;
2006
+ case regex_constants::syntax_digit:
2007
+ {
2008
+ //
2009
+ // a recursive subexpression:
2010
+ //
2011
+ v = this->m_traits.toi(m_position, m_end, 10);
2012
+ if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2013
+ {
2014
+ // Rewind to start of (? sequence:
2015
+ --m_position;
2016
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2017
+ fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2018
+ return false;
2019
+ }
2020
+ insert_recursion:
2021
+ pb->index = markid = 0;
2022
+ re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2023
+ pr->alt.i = v;
2024
+ pr->state_id = 0;
2025
+ static_cast<re_case*>(
2026
+ this->append_state(syntax_element_toggle_case, sizeof(re_case))
2027
+ )->icase = this->flags() & regbase::icase;
2028
+ break;
2029
+ }
2030
+ case regex_constants::syntax_plus:
2031
+ //
2032
+ // A forward-relative recursive subexpression:
2033
+ //
2034
+ ++m_position;
2035
+ v = this->m_traits.toi(m_position, m_end, 10);
2036
+ if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2037
+ {
2038
+ // Rewind to start of (? sequence:
2039
+ --m_position;
2040
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2041
+ fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2042
+ return false;
2043
+ }
2044
+ v += m_mark_count;
2045
+ goto insert_recursion;
2046
+ case regex_constants::syntax_dash:
2047
+ //
2048
+ // Possibly a backward-relative recursive subexpression:
2049
+ //
2050
+ ++m_position;
2051
+ v = this->m_traits.toi(m_position, m_end, 10);
2052
+ if(v <= 0)
2053
+ {
2054
+ --m_position;
2055
+ // Oops not a relative recursion at all, but a (?-imsx) group:
2056
+ goto option_group_jump;
2057
+ }
2058
+ v = m_mark_count + 1 - v;
2059
+ if(v <= 0)
2060
+ {
2061
+ // Rewind to start of (? sequence:
2062
+ --m_position;
2063
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2064
+ fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2065
+ return false;
2066
+ }
2067
+ goto insert_recursion;
2068
+ case regex_constants::syntax_equal:
2069
+ pb->index = markid = -1;
2070
+ ++m_position;
2071
+ jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2072
+ this->m_pdata->m_data.align();
2073
+ m_alt_insert_point = this->m_pdata->m_data.size();
2074
+ break;
2075
+ case regex_constants::syntax_not:
2076
+ pb->index = markid = -2;
2077
+ ++m_position;
2078
+ jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2079
+ this->m_pdata->m_data.align();
2080
+ m_alt_insert_point = this->m_pdata->m_data.size();
2081
+ break;
2082
+ case regex_constants::escape_type_left_word:
2083
+ {
2084
+ // a lookbehind assertion:
2085
+ if(++m_position == m_end)
2086
+ {
2087
+ // Rewind to start of (? sequence:
2088
+ --m_position;
2089
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2090
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2091
+ return false;
2092
+ }
2093
+ regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2094
+ if(t == regex_constants::syntax_not)
2095
+ pb->index = markid = -2;
2096
+ else if(t == regex_constants::syntax_equal)
2097
+ pb->index = markid = -1;
2098
+ else
2099
+ {
2100
+ // Probably a named capture which also starts (?< :
2101
+ name_delim = '>';
2102
+ --m_position;
2103
+ goto named_capture_jump;
2104
+ }
2105
+ ++m_position;
2106
+ jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2107
+ this->append_state(syntax_element_backstep, sizeof(re_brace));
2108
+ this->m_pdata->m_data.align();
2109
+ m_alt_insert_point = this->m_pdata->m_data.size();
2110
+ break;
2111
+ }
2112
+ case regex_constants::escape_type_right_word:
2113
+ //
2114
+ // an independent sub-expression:
2115
+ //
2116
+ pb->index = markid = -3;
2117
+ ++m_position;
2118
+ jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2119
+ this->m_pdata->m_data.align();
2120
+ m_alt_insert_point = this->m_pdata->m_data.size();
2121
+ break;
2122
+ case regex_constants::syntax_open_mark:
2123
+ {
2124
+ // a conditional expression:
2125
+ pb->index = markid = -4;
2126
+ if(++m_position == m_end)
2127
+ {
2128
+ // Rewind to start of (? sequence:
2129
+ --m_position;
2130
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2131
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2132
+ return false;
2133
+ }
2134
+ v = this->m_traits.toi(m_position, m_end, 10);
2135
+ if(m_position == m_end)
2136
+ {
2137
+ // Rewind to start of (? sequence:
2138
+ --m_position;
2139
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2140
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2141
+ return false;
2142
+ }
2143
+ if(*m_position == charT('R'))
2144
+ {
2145
+ if(++m_position == m_end)
2146
+ {
2147
+ // Rewind to start of (? sequence:
2148
+ --m_position;
2149
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2150
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2151
+ return false;
2152
+ }
2153
+ if(*m_position == charT('&'))
2154
+ {
2155
+ const charT* base = ++m_position;
2156
+ while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2157
+ ++m_position;
2158
+ if(m_position == m_end)
2159
+ {
2160
+ // Rewind to start of (? sequence:
2161
+ --m_position;
2162
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2163
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2164
+ return false;
2165
+ }
2166
+ v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2167
+ }
2168
+ else
2169
+ {
2170
+ v = -this->m_traits.toi(m_position, m_end, 10);
2171
+ }
2172
+ re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2173
+ br->index = v < 0 ? (v - 1) : 0;
2174
+ if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2175
+ {
2176
+ // Rewind to start of (? sequence:
2177
+ --m_position;
2178
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2179
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2180
+ return false;
2181
+ }
2182
+ if(++m_position == m_end)
2183
+ {
2184
+ // Rewind to start of (? sequence:
2185
+ --m_position;
2186
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2187
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2188
+ return false;
2189
+ }
2190
+ }
2191
+ else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2192
+ {
2193
+ const charT* base = ++m_position;
2194
+ while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2195
+ ++m_position;
2196
+ if(m_position == m_end)
2197
+ {
2198
+ // Rewind to start of (? sequence:
2199
+ --m_position;
2200
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2201
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2202
+ return false;
2203
+ }
2204
+ v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2205
+ re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2206
+ br->index = v;
2207
+ if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2208
+ {
2209
+ // Rewind to start of (? sequence:
2210
+ --m_position;
2211
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2212
+ fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2213
+ return false;
2214
+ }
2215
+ if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2216
+ {
2217
+ // Rewind to start of (? sequence:
2218
+ --m_position;
2219
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2220
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2221
+ return false;
2222
+ }
2223
+ if(++m_position == m_end)
2224
+ {
2225
+ // Rewind to start of (? sequence:
2226
+ --m_position;
2227
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2228
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2229
+ return false;
2230
+ }
2231
+ }
2232
+ else if(*m_position == charT('D'))
2233
+ {
2234
+ const char* def = "DEFINE";
2235
+ while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2236
+ ++m_position, ++def;
2237
+ if((m_position == m_end) || *def)
2238
+ {
2239
+ // Rewind to start of (? sequence:
2240
+ --m_position;
2241
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2242
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2243
+ return false;
2244
+ }
2245
+ re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2246
+ br->index = 9999; // special magic value!
2247
+ if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2248
+ {
2249
+ // Rewind to start of (? sequence:
2250
+ --m_position;
2251
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2252
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2253
+ return false;
2254
+ }
2255
+ if(++m_position == m_end)
2256
+ {
2257
+ // Rewind to start of (? sequence:
2258
+ --m_position;
2259
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2260
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2261
+ return false;
2262
+ }
2263
+ }
2264
+ else if(v > 0)
2265
+ {
2266
+ re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2267
+ br->index = v;
2268
+ if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2269
+ {
2270
+ // Rewind to start of (? sequence:
2271
+ --m_position;
2272
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2273
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2274
+ return false;
2275
+ }
2276
+ if(++m_position == m_end)
2277
+ {
2278
+ // Rewind to start of (? sequence:
2279
+ --m_position;
2280
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2281
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2282
+ return false;
2283
+ }
2284
+ }
2285
+ else
2286
+ {
2287
+ // verify that we have a lookahead or lookbehind assert:
2288
+ if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2289
+ {
2290
+ // Rewind to start of (? sequence:
2291
+ --m_position;
2292
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2293
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2294
+ return false;
2295
+ }
2296
+ if(++m_position == m_end)
2297
+ {
2298
+ // Rewind to start of (? sequence:
2299
+ --m_position;
2300
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2301
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2302
+ return false;
2303
+ }
2304
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2305
+ {
2306
+ if(++m_position == m_end)
2307
+ {
2308
+ // Rewind to start of (? sequence:
2309
+ --m_position;
2310
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2311
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2312
+ return false;
2313
+ }
2314
+ if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2315
+ && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2316
+ {
2317
+ // Rewind to start of (? sequence:
2318
+ --m_position;
2319
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2320
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2321
+ return false;
2322
+ }
2323
+ m_position -= 3;
2324
+ }
2325
+ else
2326
+ {
2327
+ if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2328
+ && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2329
+ {
2330
+ // Rewind to start of (? sequence:
2331
+ --m_position;
2332
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2333
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2334
+ return false;
2335
+ }
2336
+ m_position -= 2;
2337
+ }
2338
+ }
2339
+ break;
2340
+ }
2341
+ case regex_constants::syntax_close_mark:
2342
+ // Rewind to start of (? sequence:
2343
+ --m_position;
2344
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2345
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2346
+ return false;
2347
+ case regex_constants::escape_type_end_buffer:
2348
+ {
2349
+ name_delim = *m_position;
2350
+ named_capture_jump:
2351
+ markid = 0;
2352
+ if(0 == (this->flags() & regbase::nosubs))
2353
+ {
2354
+ markid = ++m_mark_count;
2355
+ #ifndef BOOST_NO_STD_DISTANCE
2356
+ if(this->flags() & regbase::save_subexpression_location)
2357
+ this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2358
+ #else
2359
+ if(this->flags() & regbase::save_subexpression_location)
2360
+ this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2361
+ #endif
2362
+ }
2363
+ pb->index = markid;
2364
+ const charT* base = ++m_position;
2365
+ if(m_position == m_end)
2366
+ {
2367
+ // Rewind to start of (? sequence:
2368
+ --m_position;
2369
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2370
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2371
+ return false;
2372
+ }
2373
+ while((m_position != m_end) && (*m_position != name_delim))
2374
+ ++m_position;
2375
+ if(m_position == m_end)
2376
+ {
2377
+ // Rewind to start of (? sequence:
2378
+ --m_position;
2379
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2380
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2381
+ return false;
2382
+ }
2383
+ this->m_pdata->set_name(base, m_position, markid);
2384
+ ++m_position;
2385
+ break;
2386
+ }
2387
+ default:
2388
+ if(*m_position == charT('R'))
2389
+ {
2390
+ ++m_position;
2391
+ v = 0;
2392
+ if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2393
+ {
2394
+ // Rewind to start of (? sequence:
2395
+ --m_position;
2396
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2397
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2398
+ return false;
2399
+ }
2400
+ goto insert_recursion;
2401
+ }
2402
+ if(*m_position == charT('&'))
2403
+ {
2404
+ ++m_position;
2405
+ const charT* base = m_position;
2406
+ while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2407
+ ++m_position;
2408
+ if(m_position == m_end)
2409
+ {
2410
+ // Rewind to start of (? sequence:
2411
+ --m_position;
2412
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2413
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2414
+ return false;
2415
+ }
2416
+ v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2417
+ goto insert_recursion;
2418
+ }
2419
+ if(*m_position == charT('P'))
2420
+ {
2421
+ ++m_position;
2422
+ if(m_position == m_end)
2423
+ {
2424
+ // Rewind to start of (? sequence:
2425
+ --m_position;
2426
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2427
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2428
+ return false;
2429
+ }
2430
+ if(*m_position == charT('>'))
2431
+ {
2432
+ ++m_position;
2433
+ const charT* base = m_position;
2434
+ while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2435
+ ++m_position;
2436
+ if(m_position == m_end)
2437
+ {
2438
+ // Rewind to start of (? sequence:
2439
+ --m_position;
2440
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2441
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2442
+ return false;
2443
+ }
2444
+ v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2445
+ goto insert_recursion;
2446
+ }
2447
+ }
2448
+ //
2449
+ // lets assume that we have a (?imsx) group and try and parse it:
2450
+ //
2451
+ option_group_jump:
2452
+ regex_constants::syntax_option_type opts = parse_options();
2453
+ if(m_position == m_end)
2454
+ {
2455
+ // Rewind to start of (? sequence:
2456
+ --m_position;
2457
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2458
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2459
+ return false;
2460
+ }
2461
+ // make a note of whether we have a case change:
2462
+ m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2463
+ pb->index = markid = 0;
2464
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2465
+ {
2466
+ // update flags and carry on as normal:
2467
+ this->flags(opts);
2468
+ restore_flags = false;
2469
+ old_case_change |= m_has_case_change; // defer end of scope by one ')'
2470
+ }
2471
+ else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2472
+ {
2473
+ // update flags and carry on until the matching ')' is found:
2474
+ this->flags(opts);
2475
+ ++m_position;
2476
+ }
2477
+ else
2478
+ {
2479
+ // Rewind to start of (? sequence:
2480
+ --m_position;
2481
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2482
+ fail(regex_constants::error_perl_extension, m_position - m_base);
2483
+ return false;
2484
+ }
2485
+
2486
+ // finally append a case change state if we need it:
2487
+ if(m_has_case_change)
2488
+ {
2489
+ static_cast<re_case*>(
2490
+ this->append_state(syntax_element_toggle_case, sizeof(re_case))
2491
+ )->icase = opts & regbase::icase;
2492
+ }
2493
+
2494
+ }
2495
+ //
2496
+ // now recursively add more states, this will terminate when we get to a
2497
+ // matching ')' :
2498
+ //
2499
+ parse_all();
2500
+ //
2501
+ // Unwind alternatives:
2502
+ //
2503
+ if(0 == unwind_alts(last_paren_start))
2504
+ {
2505
+ // Rewind to start of (? sequence:
2506
+ --m_position;
2507
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2508
+ fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2509
+ return false;
2510
+ }
2511
+ //
2512
+ // we either have a ')' or we have run out of characters prematurely:
2513
+ //
2514
+ if(m_position == m_end)
2515
+ {
2516
+ // Rewind to start of (? sequence:
2517
+ --m_position;
2518
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2519
+ this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
2520
+ return false;
2521
+ }
2522
+ BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2523
+ ++m_position;
2524
+ //
2525
+ // restore the flags:
2526
+ //
2527
+ if(restore_flags)
2528
+ {
2529
+ // append a case change state if we need it:
2530
+ if(m_has_case_change)
2531
+ {
2532
+ static_cast<re_case*>(
2533
+ this->append_state(syntax_element_toggle_case, sizeof(re_case))
2534
+ )->icase = old_flags & regbase::icase;
2535
+ }
2536
+ this->flags(old_flags);
2537
+ }
2538
+ //
2539
+ // set up the jump pointer if we have one:
2540
+ //
2541
+ if(jump_offset)
2542
+ {
2543
+ this->m_pdata->m_data.align();
2544
+ re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2545
+ jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2546
+ if((this->m_last_state == jmp) && (markid != -2))
2547
+ {
2548
+ // Oops... we didn't have anything inside the assertion.
2549
+ // Note we don't get here for negated forward lookahead as (?!)
2550
+ // does have some uses.
2551
+ // Rewind to start of (? sequence:
2552
+ --m_position;
2553
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2554
+ fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2555
+ return false;
2556
+ }
2557
+ }
2558
+ //
2559
+ // verify that if this is conditional expression, that we do have
2560
+ // an alternative, if not add one:
2561
+ //
2562
+ if(markid == -4)
2563
+ {
2564
+ re_syntax_base* b = this->getaddress(expected_alt_point);
2565
+ // Make sure we have exactly one alternative following this state:
2566
+ if(b->type != syntax_element_alt)
2567
+ {
2568
+ re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2569
+ alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2570
+ }
2571
+ else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2572
+ {
2573
+ // Can't have seen more than one alternative:
2574
+ // Rewind to start of (? sequence:
2575
+ --m_position;
2576
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2577
+ fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2578
+ return false;
2579
+ }
2580
+ else
2581
+ {
2582
+ // We must *not* have seen an alternative inside a (DEFINE) block:
2583
+ b = this->getaddress(b->next.i, b);
2584
+ if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2585
+ {
2586
+ // Rewind to start of (? sequence:
2587
+ --m_position;
2588
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2589
+ fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2590
+ return false;
2591
+ }
2592
+ }
2593
+ // check for invalid repetition of next state:
2594
+ b = this->getaddress(expected_alt_point);
2595
+ b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2596
+ if((b->type != syntax_element_assert_backref)
2597
+ && (b->type != syntax_element_startmark))
2598
+ {
2599
+ // Rewind to start of (? sequence:
2600
+ --m_position;
2601
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2602
+ fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2603
+ return false;
2604
+ }
2605
+ }
2606
+ //
2607
+ // append closing parenthesis state:
2608
+ //
2609
+ pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2610
+ pb->index = markid;
2611
+ pb->icase = this->flags() & regbase::icase;
2612
+ this->m_paren_start = last_paren_start;
2613
+ //
2614
+ // restore the alternate insertion point:
2615
+ //
2616
+ this->m_alt_insert_point = last_alt_point;
2617
+ //
2618
+ // and the case change data:
2619
+ //
2620
+ m_has_case_change = old_case_change;
2621
+ //
2622
+ // And the mark_reset data:
2623
+ //
2624
+ if(m_max_mark > m_mark_count)
2625
+ {
2626
+ m_mark_count = m_max_mark;
2627
+ }
2628
+ m_mark_reset = mark_reset;
2629
+ m_max_mark = max_mark;
2630
+
2631
+
2632
+ if(markid > 0)
2633
+ {
2634
+ #ifndef BOOST_NO_STD_DISTANCE
2635
+ if(this->flags() & regbase::save_subexpression_location)
2636
+ this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
2637
+ #else
2638
+ if(this->flags() & regbase::save_subexpression_location)
2639
+ this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2640
+ #endif
2641
+ //
2642
+ // allow backrefs to this mark:
2643
+ //
2644
+ if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
2645
+ this->m_backrefs |= 1u << (markid - 1);
2646
+ }
2647
+ return true;
2648
+ }
2649
+
2650
+ template <class charT, class traits>
2651
+ bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2652
+ {
2653
+ //
2654
+ // parses an emacs style \sx or \Sx construct.
2655
+ //
2656
+ if(++m_position == m_end)
2657
+ {
2658
+ // Rewind to start of sequence:
2659
+ --m_position;
2660
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2661
+ fail(regex_constants::error_escape, m_position - m_base);
2662
+ return false;
2663
+ }
2664
+ basic_char_set<charT, traits> char_set;
2665
+ if(negate)
2666
+ char_set.negate();
2667
+
2668
+ static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2669
+
2670
+ switch(*m_position)
2671
+ {
2672
+ case 's':
2673
+ case ' ':
2674
+ char_set.add_class(this->m_mask_space);
2675
+ break;
2676
+ case 'w':
2677
+ char_set.add_class(this->m_word_mask);
2678
+ break;
2679
+ case '_':
2680
+ char_set.add_single(digraph<charT>(charT('$')));
2681
+ char_set.add_single(digraph<charT>(charT('&')));
2682
+ char_set.add_single(digraph<charT>(charT('*')));
2683
+ char_set.add_single(digraph<charT>(charT('+')));
2684
+ char_set.add_single(digraph<charT>(charT('-')));
2685
+ char_set.add_single(digraph<charT>(charT('_')));
2686
+ char_set.add_single(digraph<charT>(charT('<')));
2687
+ char_set.add_single(digraph<charT>(charT('>')));
2688
+ break;
2689
+ case '.':
2690
+ char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2691
+ break;
2692
+ case '(':
2693
+ char_set.add_single(digraph<charT>(charT('(')));
2694
+ char_set.add_single(digraph<charT>(charT('[')));
2695
+ char_set.add_single(digraph<charT>(charT('{')));
2696
+ break;
2697
+ case ')':
2698
+ char_set.add_single(digraph<charT>(charT(')')));
2699
+ char_set.add_single(digraph<charT>(charT(']')));
2700
+ char_set.add_single(digraph<charT>(charT('}')));
2701
+ break;
2702
+ case '"':
2703
+ char_set.add_single(digraph<charT>(charT('"')));
2704
+ char_set.add_single(digraph<charT>(charT('\'')));
2705
+ char_set.add_single(digraph<charT>(charT('`')));
2706
+ break;
2707
+ case '\'':
2708
+ char_set.add_single(digraph<charT>(charT('\'')));
2709
+ char_set.add_single(digraph<charT>(charT(',')));
2710
+ char_set.add_single(digraph<charT>(charT('#')));
2711
+ break;
2712
+ case '<':
2713
+ char_set.add_single(digraph<charT>(charT(';')));
2714
+ break;
2715
+ case '>':
2716
+ char_set.add_single(digraph<charT>(charT('\n')));
2717
+ char_set.add_single(digraph<charT>(charT('\f')));
2718
+ break;
2719
+ default:
2720
+ fail(regex_constants::error_ctype, m_position - m_base);
2721
+ return false;
2722
+ }
2723
+ if(0 == this->append_set(char_set))
2724
+ {
2725
+ fail(regex_constants::error_ctype, m_position - m_base);
2726
+ return false;
2727
+ }
2728
+ ++m_position;
2729
+ return true;
2730
+ }
2731
+
2732
+ template <class charT, class traits>
2733
+ regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
2734
+ {
2735
+ // we have a (?imsx-imsx) group, convert it into a set of flags:
2736
+ regex_constants::syntax_option_type f = this->flags();
2737
+ bool breakout = false;
2738
+ do
2739
+ {
2740
+ switch(*m_position)
2741
+ {
2742
+ case 's':
2743
+ f |= regex_constants::mod_s;
2744
+ f &= ~regex_constants::no_mod_s;
2745
+ break;
2746
+ case 'm':
2747
+ f &= ~regex_constants::no_mod_m;
2748
+ break;
2749
+ case 'i':
2750
+ f |= regex_constants::icase;
2751
+ break;
2752
+ case 'x':
2753
+ f |= regex_constants::mod_x;
2754
+ break;
2755
+ default:
2756
+ breakout = true;
2757
+ continue;
2758
+ }
2759
+ if(++m_position == m_end)
2760
+ {
2761
+ // Rewind to start of (? sequence:
2762
+ --m_position;
2763
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2764
+ fail(regex_constants::error_paren, m_position - m_base);
2765
+ return false;
2766
+ }
2767
+ }
2768
+ while(!breakout);
2769
+
2770
+ breakout = false;
2771
+
2772
+ if(*m_position == static_cast<charT>('-'))
2773
+ {
2774
+ if(++m_position == m_end)
2775
+ {
2776
+ // Rewind to start of (? sequence:
2777
+ --m_position;
2778
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2779
+ fail(regex_constants::error_paren, m_position - m_base);
2780
+ return false;
2781
+ }
2782
+ do
2783
+ {
2784
+ switch(*m_position)
2785
+ {
2786
+ case 's':
2787
+ f &= ~regex_constants::mod_s;
2788
+ f |= regex_constants::no_mod_s;
2789
+ break;
2790
+ case 'm':
2791
+ f |= regex_constants::no_mod_m;
2792
+ break;
2793
+ case 'i':
2794
+ f &= ~regex_constants::icase;
2795
+ break;
2796
+ case 'x':
2797
+ f &= ~regex_constants::mod_x;
2798
+ break;
2799
+ default:
2800
+ breakout = true;
2801
+ continue;
2802
+ }
2803
+ if(++m_position == m_end)
2804
+ {
2805
+ // Rewind to start of (? sequence:
2806
+ --m_position;
2807
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2808
+ fail(regex_constants::error_paren, m_position - m_base);
2809
+ return false;
2810
+ }
2811
+ }
2812
+ while(!breakout);
2813
+ }
2814
+ return f;
2815
+ }
2816
+
2817
+ template <class charT, class traits>
2818
+ bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
2819
+ {
2820
+ //
2821
+ // If we didn't actually add any states after the last
2822
+ // alternative then that's an error:
2823
+ //
2824
+ if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
2825
+ && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
2826
+ &&
2827
+ !(
2828
+ ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
2829
+ &&
2830
+ ((this->flags() & regbase::no_empty_expressions) == 0)
2831
+ )
2832
+ )
2833
+ {
2834
+ fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
2835
+ return false;
2836
+ }
2837
+ //
2838
+ // Fix up our alternatives:
2839
+ //
2840
+ while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
2841
+ {
2842
+ //
2843
+ // fix up the jump to point to the end of the states
2844
+ // that we've just added:
2845
+ //
2846
+ std::ptrdiff_t jump_offset = m_alt_jumps.back();
2847
+ m_alt_jumps.pop_back();
2848
+ this->m_pdata->m_data.align();
2849
+ re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2850
+ BOOST_ASSERT(jmp->type == syntax_element_jump);
2851
+ jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
2852
+ }
2853
+ return true;
2854
+ }
2855
+
2856
+ #ifdef BOOST_MSVC
2857
+ #pragma warning(pop)
2858
+ #endif
2859
+
2860
+ } // namespace re_detail
2861
+ } // namespace boost
2862
+
2863
+ #ifdef BOOST_MSVC
2864
+ #pragma warning(push)
2865
+ #pragma warning(disable: 4103)
2866
+ #endif
2867
+ #ifdef BOOST_HAS_ABI_HEADERS
2868
+ # include BOOST_ABI_SUFFIX
2869
+ #endif
2870
+ #ifdef BOOST_MSVC
2871
+ #pragma warning(pop)
2872
+ #endif
2873
+
2874
+ #endif