passenger 4.0.27 → 4.0.28

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of passenger might be problematic. Click here for more details.

Files changed (156) hide show
  1. data.tar.gz.asc +7 -7
  2. data/.gitignore +1 -0
  3. data/NEWS +22 -0
  4. data/build/preprocessor.rb +10 -0
  5. data/build/rpm.rb +74 -65
  6. data/debian.template/rules.template +8 -0
  7. data/dev/copy_boost_headers.rb +11 -2
  8. data/doc/Users guide Apache.idmap.txt +161 -145
  9. data/doc/Users guide Apache.txt +12 -1
  10. data/doc/Users guide Nginx.idmap.txt +142 -126
  11. data/doc/Users guide Nginx.txt +14 -1
  12. data/doc/Users guide Standalone.txt +1 -0
  13. data/doc/users_guide_snippets/environment_variables.txt +1 -1
  14. data/doc/users_guide_snippets/installation.txt +2 -0
  15. data/doc/users_guide_snippets/tips.txt +118 -0
  16. data/ext/apache2/Configuration.cpp +0 -6
  17. data/ext/apache2/Configuration.hpp +0 -5
  18. data/ext/apache2/ConfigurationCommands.cpp +7 -0
  19. data/ext/apache2/ConfigurationFields.hpp +2 -0
  20. data/ext/apache2/ConfigurationSetters.cpp +24 -0
  21. data/ext/apache2/CreateDirConfig.cpp +1 -0
  22. data/ext/apache2/Hooks.cpp +0 -1
  23. data/ext/apache2/MergeDirConfig.cpp +7 -0
  24. data/ext/apache2/SetHeaders.cpp +5 -1
  25. data/ext/boost/cregex.hpp +39 -0
  26. data/ext/boost/libs/regex/src/c_regex_traits.cpp +193 -0
  27. data/ext/boost/libs/regex/src/cpp_regex_traits.cpp +117 -0
  28. data/ext/boost/libs/regex/src/cregex.cpp +660 -0
  29. data/ext/boost/libs/regex/src/instances.cpp +32 -0
  30. data/ext/boost/libs/regex/src/internals.hpp +35 -0
  31. data/ext/boost/libs/regex/src/posix_api.cpp +296 -0
  32. data/ext/boost/libs/regex/src/regex.cpp +227 -0
  33. data/ext/boost/libs/regex/src/regex_debug.cpp +59 -0
  34. data/ext/boost/libs/regex/src/regex_raw_buffer.cpp +72 -0
  35. data/ext/boost/libs/regex/src/regex_traits_defaults.cpp +692 -0
  36. data/ext/boost/libs/regex/src/static_mutex.cpp +179 -0
  37. data/ext/boost/libs/regex/src/wc_regex_traits.cpp +301 -0
  38. data/ext/boost/libs/regex/src/wide_posix_api.cpp +315 -0
  39. data/ext/boost/libs/regex/src/winstances.cpp +35 -0
  40. data/ext/boost/regex.h +100 -0
  41. data/ext/boost/regex.hpp +37 -0
  42. data/ext/boost/regex/concepts.hpp +1128 -0
  43. data/ext/boost/regex/config.hpp +435 -0
  44. data/ext/boost/regex/config/borland.hpp +72 -0
  45. data/ext/boost/regex/config/cwchar.hpp +207 -0
  46. data/ext/boost/regex/mfc.hpp +190 -0
  47. data/ext/boost/regex/pattern_except.hpp +100 -0
  48. data/ext/boost/regex/pending/object_cache.hpp +165 -0
  49. data/ext/boost/regex/pending/static_mutex.hpp +179 -0
  50. data/ext/boost/regex/pending/unicode_iterator.hpp +776 -0
  51. data/ext/boost/regex/regex_traits.hpp +35 -0
  52. data/ext/boost/regex/user.hpp +93 -0
  53. data/ext/boost/regex/v4/basic_regex.hpp +782 -0
  54. data/ext/boost/regex/v4/basic_regex_creator.hpp +1571 -0
  55. data/ext/boost/regex/v4/basic_regex_parser.hpp +2874 -0
  56. data/ext/boost/regex/v4/c_regex_traits.hpp +211 -0
  57. data/ext/boost/regex/v4/char_regex_traits.hpp +81 -0
  58. data/ext/boost/regex/v4/cpp_regex_traits.hpp +1099 -0
  59. data/ext/boost/regex/v4/cregex.hpp +330 -0
  60. data/ext/boost/regex/v4/error_type.hpp +59 -0
  61. data/ext/boost/regex/v4/fileiter.hpp +455 -0
  62. data/ext/boost/regex/v4/instances.hpp +222 -0
  63. data/ext/boost/regex/v4/iterator_category.hpp +91 -0
  64. data/ext/boost/regex/v4/iterator_traits.hpp +135 -0
  65. data/ext/boost/regex/v4/match_flags.hpp +138 -0
  66. data/ext/boost/regex/v4/match_results.hpp +702 -0
  67. data/ext/boost/regex/v4/mem_block_cache.hpp +99 -0
  68. data/ext/boost/regex/v4/perl_matcher.hpp +587 -0
  69. data/ext/boost/regex/v4/perl_matcher_common.hpp +996 -0
  70. data/ext/boost/regex/v4/perl_matcher_non_recursive.hpp +1642 -0
  71. data/ext/boost/regex/v4/perl_matcher_recursive.hpp +991 -0
  72. data/ext/boost/regex/v4/primary_transform.hpp +146 -0
  73. data/ext/boost/regex/v4/protected_call.hpp +81 -0
  74. data/ext/boost/regex/v4/regbase.hpp +180 -0
  75. data/ext/boost/regex/v4/regex.hpp +202 -0
  76. data/ext/boost/regex/v4/regex_format.hpp +1156 -0
  77. data/ext/boost/regex/v4/regex_fwd.hpp +73 -0
  78. data/ext/boost/regex/v4/regex_grep.hpp +155 -0
  79. data/ext/boost/regex/v4/regex_iterator.hpp +201 -0
  80. data/ext/boost/regex/v4/regex_match.hpp +382 -0
  81. data/ext/boost/regex/v4/regex_merge.hpp +93 -0
  82. data/ext/boost/regex/v4/regex_raw_buffer.hpp +210 -0
  83. data/ext/boost/regex/v4/regex_replace.hpp +99 -0
  84. data/ext/boost/regex/v4/regex_search.hpp +217 -0
  85. data/ext/boost/regex/v4/regex_split.hpp +172 -0
  86. data/ext/boost/regex/v4/regex_token_iterator.hpp +342 -0
  87. data/ext/boost/regex/v4/regex_traits.hpp +189 -0
  88. data/ext/boost/regex/v4/regex_traits_defaults.hpp +371 -0
  89. data/ext/boost/regex/v4/regex_workaround.hpp +232 -0
  90. data/ext/boost/regex/v4/states.hpp +301 -0
  91. data/ext/boost/regex/v4/sub_match.hpp +512 -0
  92. data/ext/boost/regex/v4/syntax_type.hpp +105 -0
  93. data/ext/boost/regex/v4/u32regex_iterator.hpp +193 -0
  94. data/ext/boost/regex/v4/u32regex_token_iterator.hpp +377 -0
  95. data/ext/boost/regex/v4/w32_regex_traits.hpp +741 -0
  96. data/ext/boost/regex_fwd.hpp +33 -0
  97. data/ext/common/AgentsStarter.h +0 -11
  98. data/ext/common/ApplicationPool2/Common.h +1 -7
  99. data/ext/common/ApplicationPool2/DirectSpawner.h +3 -3
  100. data/ext/common/ApplicationPool2/Group.h +166 -69
  101. data/ext/common/ApplicationPool2/Implementation.cpp +55 -10
  102. data/ext/common/ApplicationPool2/Options.h +45 -10
  103. data/ext/common/ApplicationPool2/PipeWatcher.h +1 -2
  104. data/ext/common/ApplicationPool2/Pool.h +29 -7
  105. data/ext/common/ApplicationPool2/Process.h +22 -3
  106. data/ext/common/ApplicationPool2/Session.h +1 -0
  107. data/ext/common/ApplicationPool2/SmartSpawner.h +5 -10
  108. data/ext/common/ApplicationPool2/Spawner.h +10 -15
  109. data/ext/common/ApplicationPool2/SuperGroup.h +10 -9
  110. data/ext/common/Constants.h +1 -3
  111. data/ext/common/Hooks.h +193 -0
  112. data/ext/common/Logging.cpp +67 -2
  113. data/ext/common/Logging.h +23 -1
  114. data/ext/common/Utils.cpp +0 -21
  115. data/ext/common/Utils.h +0 -42
  116. data/ext/common/Utils/CachedFileStat.hpp +1 -1
  117. data/ext/common/Utils/StrIntUtils.h +61 -14
  118. data/ext/common/Utils/StringMap.h +4 -0
  119. data/ext/common/agents/HelperAgent/AgentOptions.h +4 -4
  120. data/ext/common/agents/HelperAgent/Main.cpp +2 -3
  121. data/ext/common/agents/HelperAgent/RequestHandler.h +65 -2
  122. data/ext/common/agents/LoggingAgent/FilterSupport.h +3 -1
  123. data/ext/common/agents/Watchdog/Main.cpp +8 -72
  124. data/ext/nginx/CacheLocationConfig.c +29 -1
  125. data/ext/nginx/Configuration.c +0 -12
  126. data/ext/nginx/Configuration.h +0 -1
  127. data/ext/nginx/ConfigurationCommands.c +10 -0
  128. data/ext/nginx/ConfigurationFields.h +2 -0
  129. data/ext/nginx/CreateLocationConfig.c +4 -0
  130. data/ext/nginx/MergeLocationConfig.c +6 -0
  131. data/ext/oxt/system_calls.cpp +7 -1
  132. data/ext/oxt/system_calls.hpp +7 -7
  133. data/helper-scripts/node-loader.js +6 -2
  134. data/helper-scripts/rack-loader.rb +5 -2
  135. data/helper-scripts/rack-preloader.rb +5 -2
  136. data/lib/phusion_passenger.rb +1 -1
  137. data/lib/phusion_passenger/apache2/config_options.rb +8 -0
  138. data/lib/phusion_passenger/constants.rb +0 -1
  139. data/lib/phusion_passenger/nginx/config_options.rb +9 -2
  140. data/lib/phusion_passenger/platform_info/apache.rb +2 -1
  141. data/lib/phusion_passenger/platform_info/compiler.rb +15 -1
  142. data/lib/phusion_passenger/platform_info/cxx_portability.rb +2 -0
  143. data/node_lib/phusion_passenger/httplib_emulation.js +85 -17
  144. data/node_lib/phusion_passenger/request_handler.js +10 -2
  145. data/rpm/Vagrantfile +32 -0
  146. data/rpm/get_distro_id.py +4 -0
  147. data/test/cxx/ApplicationPool2/DirectSpawnerTest.cpp +2 -2
  148. data/test/cxx/ApplicationPool2/PoolTest.cpp +60 -9
  149. data/test/cxx/ApplicationPool2/SmartSpawnerTest.cpp +2 -6
  150. data/test/cxx/CachedFileStatTest.cpp +5 -5
  151. data/test/cxx/RequestHandlerTest.cpp +3 -6
  152. data/test/cxx/UtilsTest.cpp +30 -0
  153. data/test/node/httplib_emulation_spec.js +491 -0
  154. data/test/node/spec_helper.js +25 -0
  155. metadata +78 -2
  156. metadata.gz.asc +7 -7
@@ -0,0 +1,1571 @@
1
+ /*
2
+ *
3
+ * Copyright (c) 2004
4
+ * John Maddock
5
+ *
6
+ * Use, modification and distribution are subject to the
7
+ * Boost Software License, Version 1.0. (See accompanying file
8
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9
+ *
10
+ */
11
+
12
+ /*
13
+ * LOCATION: see http://www.boost.org for most recent version.
14
+ * FILE basic_regex_creator.cpp
15
+ * VERSION see <boost/version.hpp>
16
+ * DESCRIPTION: Declares template class basic_regex_creator which fills in
17
+ * the data members of a regex_data object.
18
+ */
19
+
20
+ #ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
21
+ #define BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
22
+
23
+ #ifdef BOOST_MSVC
24
+ #pragma warning(push)
25
+ #pragma warning(disable: 4103)
26
+ #endif
27
+ #ifdef BOOST_HAS_ABI_HEADERS
28
+ # include BOOST_ABI_PREFIX
29
+ #endif
30
+ #ifdef BOOST_MSVC
31
+ #pragma warning(pop)
32
+ #endif
33
+
34
+ #ifdef BOOST_MSVC
35
+ # pragma warning(push)
36
+ # pragma warning(disable: 4800)
37
+ #endif
38
+
39
+ namespace boost{
40
+
41
+ namespace re_detail{
42
+
43
+ template <class charT>
44
+ struct digraph : public std::pair<charT, charT>
45
+ {
46
+ digraph() : std::pair<charT, charT>(0, 0){}
47
+ digraph(charT c1) : std::pair<charT, charT>(c1, 0){}
48
+ digraph(charT c1, charT c2) : std::pair<charT, charT>(c1, c2)
49
+ {}
50
+ #if !BOOST_WORKAROUND(BOOST_MSVC, < 1300)
51
+ digraph(const digraph<charT>& d) : std::pair<charT, charT>(d.first, d.second){}
52
+ #endif
53
+ template <class Seq>
54
+ digraph(const Seq& s) : std::pair<charT, charT>()
55
+ {
56
+ BOOST_ASSERT(s.size() <= 2);
57
+ BOOST_ASSERT(s.size());
58
+ this->first = s[0];
59
+ this->second = (s.size() > 1) ? s[1] : 0;
60
+ }
61
+ };
62
+
63
+ template <class charT, class traits>
64
+ class basic_char_set
65
+ {
66
+ public:
67
+ typedef digraph<charT> digraph_type;
68
+ typedef typename traits::string_type string_type;
69
+ typedef typename traits::char_class_type m_type;
70
+
71
+ basic_char_set()
72
+ {
73
+ m_negate = false;
74
+ m_has_digraphs = false;
75
+ m_classes = 0;
76
+ m_negated_classes = 0;
77
+ m_empty = true;
78
+ }
79
+
80
+ void add_single(const digraph_type& s)
81
+ {
82
+ m_singles.insert(m_singles.end(), s);
83
+ if(s.second)
84
+ m_has_digraphs = true;
85
+ m_empty = false;
86
+ }
87
+ void add_range(const digraph_type& first, const digraph_type& end)
88
+ {
89
+ m_ranges.insert(m_ranges.end(), first);
90
+ m_ranges.insert(m_ranges.end(), end);
91
+ if(first.second)
92
+ {
93
+ m_has_digraphs = true;
94
+ add_single(first);
95
+ }
96
+ if(end.second)
97
+ {
98
+ m_has_digraphs = true;
99
+ add_single(end);
100
+ }
101
+ m_empty = false;
102
+ }
103
+ void add_class(m_type m)
104
+ {
105
+ m_classes |= m;
106
+ m_empty = false;
107
+ }
108
+ void add_negated_class(m_type m)
109
+ {
110
+ m_negated_classes |= m;
111
+ m_empty = false;
112
+ }
113
+ void add_equivalent(const digraph_type& s)
114
+ {
115
+ m_equivalents.insert(m_equivalents.end(), s);
116
+ if(s.second)
117
+ {
118
+ m_has_digraphs = true;
119
+ add_single(s);
120
+ }
121
+ m_empty = false;
122
+ }
123
+ void negate()
124
+ {
125
+ m_negate = true;
126
+ //m_empty = false;
127
+ }
128
+
129
+ //
130
+ // accessor functions:
131
+ //
132
+ bool has_digraphs()const
133
+ {
134
+ return m_has_digraphs;
135
+ }
136
+ bool is_negated()const
137
+ {
138
+ return m_negate;
139
+ }
140
+ typedef typename std::vector<digraph_type>::const_iterator list_iterator;
141
+ list_iterator singles_begin()const
142
+ {
143
+ return m_singles.begin();
144
+ }
145
+ list_iterator singles_end()const
146
+ {
147
+ return m_singles.end();
148
+ }
149
+ list_iterator ranges_begin()const
150
+ {
151
+ return m_ranges.begin();
152
+ }
153
+ list_iterator ranges_end()const
154
+ {
155
+ return m_ranges.end();
156
+ }
157
+ list_iterator equivalents_begin()const
158
+ {
159
+ return m_equivalents.begin();
160
+ }
161
+ list_iterator equivalents_end()const
162
+ {
163
+ return m_equivalents.end();
164
+ }
165
+ m_type classes()const
166
+ {
167
+ return m_classes;
168
+ }
169
+ m_type negated_classes()const
170
+ {
171
+ return m_negated_classes;
172
+ }
173
+ bool empty()const
174
+ {
175
+ return m_empty;
176
+ }
177
+ private:
178
+ std::vector<digraph_type> m_singles; // a list of single characters to match
179
+ std::vector<digraph_type> m_ranges; // a list of end points of our ranges
180
+ bool m_negate; // true if the set is to be negated
181
+ bool m_has_digraphs; // true if we have digraphs present
182
+ m_type m_classes; // character classes to match
183
+ m_type m_negated_classes; // negated character classes to match
184
+ bool m_empty; // whether we've added anything yet
185
+ std::vector<digraph_type> m_equivalents; // a list of equivalence classes
186
+ };
187
+
188
+ template <class charT, class traits>
189
+ class basic_regex_creator
190
+ {
191
+ public:
192
+ basic_regex_creator(regex_data<charT, traits>* data);
193
+ std::ptrdiff_t getoffset(void* addr)
194
+ {
195
+ return getoffset(addr, m_pdata->m_data.data());
196
+ }
197
+ std::ptrdiff_t getoffset(const void* addr, const void* base)
198
+ {
199
+ return static_cast<const char*>(addr) - static_cast<const char*>(base);
200
+ }
201
+ re_syntax_base* getaddress(std::ptrdiff_t off)
202
+ {
203
+ return getaddress(off, m_pdata->m_data.data());
204
+ }
205
+ re_syntax_base* getaddress(std::ptrdiff_t off, void* base)
206
+ {
207
+ return static_cast<re_syntax_base*>(static_cast<void*>(static_cast<char*>(base) + off));
208
+ }
209
+ void init(unsigned l_flags)
210
+ {
211
+ m_pdata->m_flags = l_flags;
212
+ m_icase = l_flags & regex_constants::icase;
213
+ }
214
+ regbase::flag_type flags()
215
+ {
216
+ return m_pdata->m_flags;
217
+ }
218
+ void flags(regbase::flag_type f)
219
+ {
220
+ m_pdata->m_flags = f;
221
+ if(m_icase != static_cast<bool>(f & regbase::icase))
222
+ {
223
+ m_icase = static_cast<bool>(f & regbase::icase);
224
+ }
225
+ }
226
+ re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
227
+ re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
228
+ re_literal* append_literal(charT c);
229
+ re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
230
+ re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::false_*);
231
+ re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::true_*);
232
+ void finalize(const charT* p1, const charT* p2);
233
+ protected:
234
+ regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in
235
+ const ::boost::regex_traits_wrapper<traits>&
236
+ m_traits; // convenience reference to traits class
237
+ re_syntax_base* m_last_state; // the last state we added
238
+ bool m_icase; // true for case insensitive matches
239
+ unsigned m_repeater_id; // the state_id of the next repeater
240
+ bool m_has_backrefs; // true if there are actually any backrefs
241
+ unsigned m_backrefs; // bitmask of permitted backrefs
242
+ boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
243
+ bool m_has_recursions; // set when we have recursive expresisons to fixup
244
+ std::vector<bool> m_recursion_checks; // notes which recursions we've followed while analysing this expression
245
+ typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
246
+ typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
247
+ typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
248
+ typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character
249
+ typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character
250
+ private:
251
+ basic_regex_creator& operator=(const basic_regex_creator&);
252
+ basic_regex_creator(const basic_regex_creator&);
253
+
254
+ void fixup_pointers(re_syntax_base* state);
255
+ void fixup_recursions(re_syntax_base* state);
256
+ void create_startmaps(re_syntax_base* state);
257
+ int calculate_backstep(re_syntax_base* state);
258
+ void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
259
+ unsigned get_restart_type(re_syntax_base* state);
260
+ void set_all_masks(unsigned char* bits, unsigned char);
261
+ bool is_bad_repeat(re_syntax_base* pt);
262
+ void set_bad_repeat(re_syntax_base* pt);
263
+ syntax_element_type get_repeat_type(re_syntax_base* state);
264
+ void probe_leading_repeat(re_syntax_base* state);
265
+ };
266
+
267
+ template <class charT, class traits>
268
+ basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
269
+ : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false)
270
+ {
271
+ m_pdata->m_data.clear();
272
+ m_pdata->m_status = ::boost::regex_constants::error_ok;
273
+ static const charT w = 'w';
274
+ static const charT s = 's';
275
+ static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', };
276
+ static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', };
277
+ static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', };
278
+ m_word_mask = m_traits.lookup_classname(&w, &w +1);
279
+ m_mask_space = m_traits.lookup_classname(&s, &s +1);
280
+ m_lower_mask = m_traits.lookup_classname(l, l + 5);
281
+ m_upper_mask = m_traits.lookup_classname(u, u + 5);
282
+ m_alpha_mask = m_traits.lookup_classname(a, a + 5);
283
+ m_pdata->m_word_mask = m_word_mask;
284
+ BOOST_ASSERT(m_word_mask != 0);
285
+ BOOST_ASSERT(m_mask_space != 0);
286
+ BOOST_ASSERT(m_lower_mask != 0);
287
+ BOOST_ASSERT(m_upper_mask != 0);
288
+ BOOST_ASSERT(m_alpha_mask != 0);
289
+ }
290
+
291
+ template <class charT, class traits>
292
+ re_syntax_base* basic_regex_creator<charT, traits>::append_state(syntax_element_type t, std::size_t s)
293
+ {
294
+ // if the state is a backref then make a note of it:
295
+ if(t == syntax_element_backref)
296
+ this->m_has_backrefs = true;
297
+ // append a new state, start by aligning our last one:
298
+ m_pdata->m_data.align();
299
+ // set the offset to the next state in our last one:
300
+ if(m_last_state)
301
+ m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
302
+ // now actually extent our data:
303
+ m_last_state = static_cast<re_syntax_base*>(m_pdata->m_data.extend(s));
304
+ // fill in boilerplate options in the new state:
305
+ m_last_state->next.i = 0;
306
+ m_last_state->type = t;
307
+ return m_last_state;
308
+ }
309
+
310
+ template <class charT, class traits>
311
+ re_syntax_base* basic_regex_creator<charT, traits>::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s)
312
+ {
313
+ // append a new state, start by aligning our last one:
314
+ m_pdata->m_data.align();
315
+ // set the offset to the next state in our last one:
316
+ if(m_last_state)
317
+ m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
318
+ // remember the last state position:
319
+ std::ptrdiff_t off = getoffset(m_last_state) + s;
320
+ // now actually insert our data:
321
+ re_syntax_base* new_state = static_cast<re_syntax_base*>(m_pdata->m_data.insert(pos, s));
322
+ // fill in boilerplate options in the new state:
323
+ new_state->next.i = s;
324
+ new_state->type = t;
325
+ m_last_state = getaddress(off);
326
+ return new_state;
327
+ }
328
+
329
+ template <class charT, class traits>
330
+ re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
331
+ {
332
+ re_literal* result;
333
+ // start by seeing if we have an existing re_literal we can extend:
334
+ if((0 == m_last_state) || (m_last_state->type != syntax_element_literal))
335
+ {
336
+ // no existing re_literal, create a new one:
337
+ result = static_cast<re_literal*>(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
338
+ result->length = 1;
339
+ *static_cast<charT*>(static_cast<void*>(result+1)) = m_traits.translate(c, m_icase);
340
+ }
341
+ else
342
+ {
343
+ // we have an existing re_literal, extend it:
344
+ std::ptrdiff_t off = getoffset(m_last_state);
345
+ m_pdata->m_data.extend(sizeof(charT));
346
+ m_last_state = result = static_cast<re_literal*>(getaddress(off));
347
+ charT* characters = static_cast<charT*>(static_cast<void*>(result+1));
348
+ characters[result->length] = m_traits.translate(c, m_icase);
349
+ ++(result->length);
350
+ }
351
+ return result;
352
+ }
353
+
354
+ template <class charT, class traits>
355
+ inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
356
+ const basic_char_set<charT, traits>& char_set)
357
+ {
358
+ typedef mpl::bool_< (sizeof(charT) == 1) > truth_type;
359
+ return char_set.has_digraphs()
360
+ ? append_set(char_set, static_cast<mpl::false_*>(0))
361
+ : append_set(char_set, static_cast<truth_type*>(0));
362
+ }
363
+
364
+ template <class charT, class traits>
365
+ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
366
+ const basic_char_set<charT, traits>& char_set, mpl::false_*)
367
+ {
368
+ typedef typename traits::string_type string_type;
369
+ typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
370
+ typedef typename traits::char_class_type m_type;
371
+
372
+ re_set_long<m_type>* result = static_cast<re_set_long<m_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<m_type>)));
373
+ //
374
+ // fill in the basics:
375
+ //
376
+ result->csingles = static_cast<unsigned int>(::boost::re_detail::distance(char_set.singles_begin(), char_set.singles_end()));
377
+ result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
378
+ result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
379
+ result->cclasses = char_set.classes();
380
+ result->cnclasses = char_set.negated_classes();
381
+ if(flags() & regbase::icase)
382
+ {
383
+ // adjust classes as needed:
384
+ if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
385
+ result->cclasses |= m_alpha_mask;
386
+ if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
387
+ result->cnclasses |= m_alpha_mask;
388
+ }
389
+
390
+ result->isnot = char_set.is_negated();
391
+ result->singleton = !char_set.has_digraphs();
392
+ //
393
+ // remember where the state is for later:
394
+ //
395
+ std::ptrdiff_t offset = getoffset(result);
396
+ //
397
+ // now extend with all the singles:
398
+ //
399
+ item_iterator first, last;
400
+ first = char_set.singles_begin();
401
+ last = char_set.singles_end();
402
+ while(first != last)
403
+ {
404
+ charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2)));
405
+ p[0] = m_traits.translate(first->first, m_icase);
406
+ if(first->second)
407
+ {
408
+ p[1] = m_traits.translate(first->second, m_icase);
409
+ p[2] = 0;
410
+ }
411
+ else
412
+ p[1] = 0;
413
+ ++first;
414
+ }
415
+ //
416
+ // now extend with all the ranges:
417
+ //
418
+ first = char_set.ranges_begin();
419
+ last = char_set.ranges_end();
420
+ while(first != last)
421
+ {
422
+ // first grab the endpoints of the range:
423
+ digraph<charT> c1 = *first;
424
+ c1.first = this->m_traits.translate(c1.first, this->m_icase);
425
+ c1.second = this->m_traits.translate(c1.second, this->m_icase);
426
+ ++first;
427
+ digraph<charT> c2 = *first;
428
+ c2.first = this->m_traits.translate(c2.first, this->m_icase);
429
+ c2.second = this->m_traits.translate(c2.second, this->m_icase);
430
+ ++first;
431
+ string_type s1, s2;
432
+ // different actions now depending upon whether collation is turned on:
433
+ if(flags() & regex_constants::collate)
434
+ {
435
+ // we need to transform our range into sort keys:
436
+ #if BOOST_WORKAROUND(__GNUC__, < 3)
437
+ string_type in(3, charT(0));
438
+ in[0] = c1.first;
439
+ in[1] = c1.second;
440
+ s1 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1));
441
+ in[0] = c2.first;
442
+ in[1] = c2.second;
443
+ s2 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1));
444
+ #else
445
+ charT a1[3] = { c1.first, c1.second, charT(0), };
446
+ charT a2[3] = { c2.first, c2.second, charT(0), };
447
+ s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1));
448
+ s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1));
449
+ #endif
450
+ if(s1.size() == 0)
451
+ s1 = string_type(1, charT(0));
452
+ if(s2.size() == 0)
453
+ s2 = string_type(1, charT(0));
454
+ }
455
+ else
456
+ {
457
+ if(c1.second)
458
+ {
459
+ s1.insert(s1.end(), c1.first);
460
+ s1.insert(s1.end(), c1.second);
461
+ }
462
+ else
463
+ s1 = string_type(1, c1.first);
464
+ if(c2.second)
465
+ {
466
+ s2.insert(s2.end(), c2.first);
467
+ s2.insert(s2.end(), c2.second);
468
+ }
469
+ else
470
+ s2.insert(s2.end(), c2.first);
471
+ }
472
+ if(s1 > s2)
473
+ {
474
+ // Oops error:
475
+ return 0;
476
+ }
477
+ charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
478
+ re_detail::copy(s1.begin(), s1.end(), p);
479
+ p[s1.size()] = charT(0);
480
+ p += s1.size() + 1;
481
+ re_detail::copy(s2.begin(), s2.end(), p);
482
+ p[s2.size()] = charT(0);
483
+ }
484
+ //
485
+ // now process the equivalence classes:
486
+ //
487
+ first = char_set.equivalents_begin();
488
+ last = char_set.equivalents_end();
489
+ while(first != last)
490
+ {
491
+ string_type s;
492
+ if(first->second)
493
+ {
494
+ #if BOOST_WORKAROUND(__GNUC__, < 3)
495
+ string_type in(3, charT(0));
496
+ in[0] = first->first;
497
+ in[1] = first->second;
498
+ s = m_traits.transform_primary(in.c_str(), in.c_str()+2);
499
+ #else
500
+ charT cs[3] = { first->first, first->second, charT(0), };
501
+ s = m_traits.transform_primary(cs, cs+2);
502
+ #endif
503
+ }
504
+ else
505
+ s = m_traits.transform_primary(&first->first, &first->first+1);
506
+ if(s.empty())
507
+ return 0; // invalid or unsupported equivalence class
508
+ charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
509
+ re_detail::copy(s.begin(), s.end(), p);
510
+ p[s.size()] = charT(0);
511
+ ++first;
512
+ }
513
+ //
514
+ // finally reset the address of our last state:
515
+ //
516
+ m_last_state = result = static_cast<re_set_long<m_type>*>(getaddress(offset));
517
+ return result;
518
+ }
519
+
520
+ template<class T>
521
+ inline bool char_less(T t1, T t2)
522
+ {
523
+ return t1 < t2;
524
+ }
525
+ inline bool char_less(char t1, char t2)
526
+ {
527
+ return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
528
+ }
529
+ inline bool char_less(signed char t1, signed char t2)
530
+ {
531
+ return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
532
+ }
533
+
534
+ template <class charT, class traits>
535
+ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
536
+ const basic_char_set<charT, traits>& char_set, mpl::true_*)
537
+ {
538
+ typedef typename traits::string_type string_type;
539
+ typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
540
+
541
+ re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
542
+ bool negate = char_set.is_negated();
543
+ std::memset(result->_map, 0, sizeof(result->_map));
544
+ //
545
+ // handle singles first:
546
+ //
547
+ item_iterator first, last;
548
+ first = char_set.singles_begin();
549
+ last = char_set.singles_end();
550
+ while(first != last)
551
+ {
552
+ for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
553
+ {
554
+ if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
555
+ == this->m_traits.translate(first->first, this->m_icase))
556
+ result->_map[i] = true;
557
+ }
558
+ ++first;
559
+ }
560
+ //
561
+ // OK now handle ranges:
562
+ //
563
+ first = char_set.ranges_begin();
564
+ last = char_set.ranges_end();
565
+ while(first != last)
566
+ {
567
+ // first grab the endpoints of the range:
568
+ charT c1 = this->m_traits.translate(first->first, this->m_icase);
569
+ ++first;
570
+ charT c2 = this->m_traits.translate(first->first, this->m_icase);
571
+ ++first;
572
+ // different actions now depending upon whether collation is turned on:
573
+ if(flags() & regex_constants::collate)
574
+ {
575
+ // we need to transform our range into sort keys:
576
+ charT c3[2] = { c1, charT(0), };
577
+ string_type s1 = this->m_traits.transform(c3, c3+1);
578
+ c3[0] = c2;
579
+ string_type s2 = this->m_traits.transform(c3, c3+1);
580
+ if(s1 > s2)
581
+ {
582
+ // Oops error:
583
+ return 0;
584
+ }
585
+ BOOST_ASSERT(c3[1] == charT(0));
586
+ for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
587
+ {
588
+ c3[0] = static_cast<charT>(i);
589
+ string_type s3 = this->m_traits.transform(c3, c3 +1);
590
+ if((s1 <= s3) && (s3 <= s2))
591
+ result->_map[i] = true;
592
+ }
593
+ }
594
+ else
595
+ {
596
+ if(char_less(c2, c1))
597
+ {
598
+ // Oops error:
599
+ return 0;
600
+ }
601
+ // everything in range matches:
602
+ std::memset(result->_map + static_cast<unsigned char>(c1), true, 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1));
603
+ }
604
+ }
605
+ //
606
+ // and now the classes:
607
+ //
608
+ typedef typename traits::char_class_type m_type;
609
+ m_type m = char_set.classes();
610
+ if(flags() & regbase::icase)
611
+ {
612
+ // adjust m as needed:
613
+ if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
614
+ m |= m_alpha_mask;
615
+ }
616
+ if(m != 0)
617
+ {
618
+ for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
619
+ {
620
+ if(this->m_traits.isctype(static_cast<charT>(i), m))
621
+ result->_map[i] = true;
622
+ }
623
+ }
624
+ //
625
+ // and now the negated classes:
626
+ //
627
+ m = char_set.negated_classes();
628
+ if(flags() & regbase::icase)
629
+ {
630
+ // adjust m as needed:
631
+ if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
632
+ m |= m_alpha_mask;
633
+ }
634
+ if(m != 0)
635
+ {
636
+ for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
637
+ {
638
+ if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
639
+ result->_map[i] = true;
640
+ }
641
+ }
642
+ //
643
+ // now process the equivalence classes:
644
+ //
645
+ first = char_set.equivalents_begin();
646
+ last = char_set.equivalents_end();
647
+ while(first != last)
648
+ {
649
+ string_type s;
650
+ BOOST_ASSERT(static_cast<charT>(0) == first->second);
651
+ s = m_traits.transform_primary(&first->first, &first->first+1);
652
+ if(s.empty())
653
+ return 0; // invalid or unsupported equivalence class
654
+ for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
655
+ {
656
+ charT c[2] = { (static_cast<charT>(i)), charT(0), };
657
+ string_type s2 = this->m_traits.transform_primary(c, c+1);
658
+ if(s == s2)
659
+ result->_map[i] = true;
660
+ }
661
+ ++first;
662
+ }
663
+ if(negate)
664
+ {
665
+ for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
666
+ {
667
+ result->_map[i] = !(result->_map[i]);
668
+ }
669
+ }
670
+ return result;
671
+ }
672
+
673
+ template <class charT, class traits>
674
+ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
675
+ {
676
+ if(this->m_pdata->m_status)
677
+ return;
678
+ // we've added all the states we need, now finish things off.
679
+ // start by adding a terminating state:
680
+ append_state(syntax_element_match);
681
+ // extend storage to store original expression:
682
+ std::ptrdiff_t len = p2 - p1;
683
+ m_pdata->m_expression_len = len;
684
+ charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
685
+ m_pdata->m_expression = ps;
686
+ re_detail::copy(p1, p2, ps);
687
+ ps[p2 - p1] = 0;
688
+ // fill in our other data...
689
+ // successful parsing implies a zero status:
690
+ m_pdata->m_status = 0;
691
+ // get the first state of the machine:
692
+ m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
693
+ // fixup pointers in the machine:
694
+ fixup_pointers(m_pdata->m_first_state);
695
+ if(m_has_recursions)
696
+ {
697
+ m_pdata->m_has_recursions = true;
698
+ fixup_recursions(m_pdata->m_first_state);
699
+ if(this->m_pdata->m_status)
700
+ return;
701
+ }
702
+ else
703
+ m_pdata->m_has_recursions = false;
704
+ // create nested startmaps:
705
+ create_startmaps(m_pdata->m_first_state);
706
+ // create main startmap:
707
+ std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
708
+ m_pdata->m_can_be_null = 0;
709
+
710
+ m_bad_repeats = 0;
711
+ if(m_has_recursions)
712
+ m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
713
+ create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
714
+ // get the restart type:
715
+ m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
716
+ // optimise a leading repeat if there is one:
717
+ probe_leading_repeat(m_pdata->m_first_state);
718
+ }
719
+
720
+ template <class charT, class traits>
721
+ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
722
+ {
723
+ while(state)
724
+ {
725
+ switch(state->type)
726
+ {
727
+ case syntax_element_recurse:
728
+ m_has_recursions = true;
729
+ if(state->next.i)
730
+ state->next.p = getaddress(state->next.i, state);
731
+ else
732
+ state->next.p = 0;
733
+ break;
734
+ case syntax_element_rep:
735
+ case syntax_element_dot_rep:
736
+ case syntax_element_char_rep:
737
+ case syntax_element_short_set_rep:
738
+ case syntax_element_long_set_rep:
739
+ // set the state_id of this repeat:
740
+ static_cast<re_repeat*>(state)->state_id = m_repeater_id++;
741
+ BOOST_FALLTHROUGH;
742
+ case syntax_element_alt:
743
+ std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
744
+ static_cast<re_alt*>(state)->can_be_null = 0;
745
+ BOOST_FALLTHROUGH;
746
+ case syntax_element_jump:
747
+ static_cast<re_jump*>(state)->alt.p = getaddress(static_cast<re_jump*>(state)->alt.i, state);
748
+ BOOST_FALLTHROUGH;
749
+ default:
750
+ if(state->next.i)
751
+ state->next.p = getaddress(state->next.i, state);
752
+ else
753
+ state->next.p = 0;
754
+ }
755
+ state = state->next.p;
756
+ }
757
+ }
758
+
759
+ template <class charT, class traits>
760
+ void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
761
+ {
762
+ re_syntax_base* base = state;
763
+ while(state)
764
+ {
765
+ switch(state->type)
766
+ {
767
+ case syntax_element_assert_backref:
768
+ {
769
+ // just check that the index is valid:
770
+ int idx = static_cast<const re_brace*>(state)->index;
771
+ if(idx < 0)
772
+ {
773
+ idx = -idx-1;
774
+ if(idx >= 10000)
775
+ {
776
+ idx = m_pdata->get_id(idx);
777
+ if(idx <= 0)
778
+ {
779
+ // check of sub-expression that doesn't exist:
780
+ if(0 == this->m_pdata->m_status) // update the error code if not already set
781
+ this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
782
+ //
783
+ // clear the expression, we should be empty:
784
+ //
785
+ this->m_pdata->m_expression = 0;
786
+ this->m_pdata->m_expression_len = 0;
787
+ //
788
+ // and throw if required:
789
+ //
790
+ if(0 == (this->flags() & regex_constants::no_except))
791
+ {
792
+ std::string message = "Encountered a forward reference to a marked sub-expression that does not exist.";
793
+ boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
794
+ e.raise();
795
+ }
796
+ }
797
+ }
798
+ }
799
+ }
800
+ break;
801
+ case syntax_element_recurse:
802
+ {
803
+ bool ok = false;
804
+ re_syntax_base* p = base;
805
+ std::ptrdiff_t idx = static_cast<re_jump*>(state)->alt.i;
806
+ if(idx > 10000)
807
+ {
808
+ //
809
+ // There may be more than one capture group with this hash, just do what Perl
810
+ // does and recurse to the leftmost:
811
+ //
812
+ idx = m_pdata->get_id(static_cast<int>(idx));
813
+ }
814
+ while(p)
815
+ {
816
+ if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))
817
+ {
818
+ //
819
+ // We've found the target of the recursion, set the jump target:
820
+ //
821
+ static_cast<re_jump*>(state)->alt.p = p;
822
+ ok = true;
823
+ //
824
+ // Now scan the target for nested repeats:
825
+ //
826
+ p = p->next.p;
827
+ int next_rep_id = 0;
828
+ while(p)
829
+ {
830
+ switch(p->type)
831
+ {
832
+ case syntax_element_rep:
833
+ case syntax_element_dot_rep:
834
+ case syntax_element_char_rep:
835
+ case syntax_element_short_set_rep:
836
+ case syntax_element_long_set_rep:
837
+ next_rep_id = static_cast<re_repeat*>(p)->state_id;
838
+ break;
839
+ case syntax_element_endmark:
840
+ if(static_cast<const re_brace*>(p)->index == idx)
841
+ next_rep_id = -1;
842
+ break;
843
+ default:
844
+ break;
845
+ }
846
+ if(next_rep_id)
847
+ break;
848
+ p = p->next.p;
849
+ }
850
+ if(next_rep_id > 0)
851
+ {
852
+ static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
853
+ }
854
+
855
+ break;
856
+ }
857
+ p = p->next.p;
858
+ }
859
+ if(!ok)
860
+ {
861
+ // recursion to sub-expression that doesn't exist:
862
+ if(0 == this->m_pdata->m_status) // update the error code if not already set
863
+ this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
864
+ //
865
+ // clear the expression, we should be empty:
866
+ //
867
+ this->m_pdata->m_expression = 0;
868
+ this->m_pdata->m_expression_len = 0;
869
+ //
870
+ // and throw if required:
871
+ //
872
+ if(0 == (this->flags() & regex_constants::no_except))
873
+ {
874
+ std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist.";
875
+ boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
876
+ e.raise();
877
+ }
878
+ }
879
+ }
880
+ break;
881
+ default:
882
+ break;
883
+ }
884
+ state = state->next.p;
885
+ }
886
+ }
887
+
888
+ template <class charT, class traits>
889
+ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
890
+ {
891
+ // non-recursive implementation:
892
+ // create the last map in the machine first, so that earlier maps
893
+ // can make use of the result...
894
+ //
895
+ // This was originally a recursive implementation, but that caused stack
896
+ // overflows with complex expressions on small stacks (think COM+).
897
+
898
+ // start by saving the case setting:
899
+ bool l_icase = m_icase;
900
+ std::vector<std::pair<bool, re_syntax_base*> > v;
901
+
902
+ while(state)
903
+ {
904
+ switch(state->type)
905
+ {
906
+ case syntax_element_toggle_case:
907
+ // we need to track case changes here:
908
+ m_icase = static_cast<re_case*>(state)->icase;
909
+ state = state->next.p;
910
+ continue;
911
+ case syntax_element_alt:
912
+ case syntax_element_rep:
913
+ case syntax_element_dot_rep:
914
+ case syntax_element_char_rep:
915
+ case syntax_element_short_set_rep:
916
+ case syntax_element_long_set_rep:
917
+ // just push the state onto our stack for now:
918
+ v.push_back(std::pair<bool, re_syntax_base*>(m_icase, state));
919
+ state = state->next.p;
920
+ break;
921
+ case syntax_element_backstep:
922
+ // we need to calculate how big the backstep is:
923
+ static_cast<re_brace*>(state)->index
924
+ = this->calculate_backstep(state->next.p);
925
+ if(static_cast<re_brace*>(state)->index < 0)
926
+ {
927
+ // Oops error:
928
+ if(0 == this->m_pdata->m_status) // update the error code if not already set
929
+ this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
930
+ //
931
+ // clear the expression, we should be empty:
932
+ //
933
+ this->m_pdata->m_expression = 0;
934
+ this->m_pdata->m_expression_len = 0;
935
+ //
936
+ // and throw if required:
937
+ //
938
+ if(0 == (this->flags() & regex_constants::no_except))
939
+ {
940
+ std::string message = "Invalid lookbehind assertion encountered in the regular expression.";
941
+ boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
942
+ e.raise();
943
+ }
944
+ }
945
+ BOOST_FALLTHROUGH;
946
+ default:
947
+ state = state->next.p;
948
+ }
949
+ }
950
+
951
+ // now work through our list, building all the maps as we go:
952
+ while(v.size())
953
+ {
954
+ // Initialize m_recursion_checks if we need it:
955
+ if(m_has_recursions)
956
+ m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
957
+
958
+ const std::pair<bool, re_syntax_base*>& p = v.back();
959
+ m_icase = p.first;
960
+ state = p.second;
961
+ v.pop_back();
962
+
963
+ // Build maps:
964
+ m_bad_repeats = 0;
965
+ create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
966
+ m_bad_repeats = 0;
967
+
968
+ if(m_has_recursions)
969
+ m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
970
+ create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
971
+ // adjust the type of the state to allow for faster matching:
972
+ state->type = this->get_repeat_type(state);
973
+ }
974
+ // restore case sensitivity:
975
+ m_icase = l_icase;
976
+ }
977
+
978
+ template <class charT, class traits>
979
+ int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
980
+ {
981
+ typedef typename traits::char_class_type m_type;
982
+ int result = 0;
983
+ while(state)
984
+ {
985
+ switch(state->type)
986
+ {
987
+ case syntax_element_startmark:
988
+ if((static_cast<re_brace*>(state)->index == -1)
989
+ || (static_cast<re_brace*>(state)->index == -2))
990
+ {
991
+ state = static_cast<re_jump*>(state->next.p)->alt.p->next.p;
992
+ continue;
993
+ }
994
+ else if(static_cast<re_brace*>(state)->index == -3)
995
+ {
996
+ state = state->next.p->next.p;
997
+ continue;
998
+ }
999
+ break;
1000
+ case syntax_element_endmark:
1001
+ if((static_cast<re_brace*>(state)->index == -1)
1002
+ || (static_cast<re_brace*>(state)->index == -2))
1003
+ return result;
1004
+ break;
1005
+ case syntax_element_literal:
1006
+ result += static_cast<re_literal*>(state)->length;
1007
+ break;
1008
+ case syntax_element_wild:
1009
+ case syntax_element_set:
1010
+ result += 1;
1011
+ break;
1012
+ case syntax_element_dot_rep:
1013
+ case syntax_element_char_rep:
1014
+ case syntax_element_short_set_rep:
1015
+ case syntax_element_backref:
1016
+ case syntax_element_rep:
1017
+ case syntax_element_combining:
1018
+ case syntax_element_long_set_rep:
1019
+ case syntax_element_backstep:
1020
+ {
1021
+ re_repeat* rep = static_cast<re_repeat *>(state);
1022
+ // adjust the type of the state to allow for faster matching:
1023
+ state->type = this->get_repeat_type(state);
1024
+ if((state->type == syntax_element_dot_rep)
1025
+ || (state->type == syntax_element_char_rep)
1026
+ || (state->type == syntax_element_short_set_rep))
1027
+ {
1028
+ if(rep->max != rep->min)
1029
+ return -1;
1030
+ result += static_cast<int>(rep->min);
1031
+ state = rep->alt.p;
1032
+ continue;
1033
+ }
1034
+ else if(state->type == syntax_element_long_set_rep)
1035
+ {
1036
+ BOOST_ASSERT(rep->next.p->type == syntax_element_long_set);
1037
+ if(static_cast<re_set_long<m_type>*>(rep->next.p)->singleton == 0)
1038
+ return -1;
1039
+ if(rep->max != rep->min)
1040
+ return -1;
1041
+ result += static_cast<int>(rep->min);
1042
+ state = rep->alt.p;
1043
+ continue;
1044
+ }
1045
+ }
1046
+ return -1;
1047
+ case syntax_element_long_set:
1048
+ if(static_cast<re_set_long<m_type>*>(state)->singleton == 0)
1049
+ return -1;
1050
+ result += 1;
1051
+ break;
1052
+ case syntax_element_jump:
1053
+ state = static_cast<re_jump*>(state)->alt.p;
1054
+ continue;
1055
+ case syntax_element_alt:
1056
+ {
1057
+ int r1 = calculate_backstep(state->next.p);
1058
+ int r2 = calculate_backstep(static_cast<re_alt*>(state)->alt.p);
1059
+ if((r1 < 0) || (r1 != r2))
1060
+ return -1;
1061
+ return result + r1;
1062
+ }
1063
+ default:
1064
+ break;
1065
+ }
1066
+ state = state->next.p;
1067
+ }
1068
+ return -1;
1069
+ }
1070
+
1071
+ template <class charT, class traits>
1072
+ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
1073
+ {
1074
+ int not_last_jump = 1;
1075
+ re_syntax_base* recursion_start = 0;
1076
+ int recursion_sub = 0;
1077
+ re_syntax_base* recursion_restart = 0;
1078
+
1079
+ // track case sensitivity:
1080
+ bool l_icase = m_icase;
1081
+
1082
+ while(state)
1083
+ {
1084
+ switch(state->type)
1085
+ {
1086
+ case syntax_element_toggle_case:
1087
+ l_icase = static_cast<re_case*>(state)->icase;
1088
+ state = state->next.p;
1089
+ break;
1090
+ case syntax_element_literal:
1091
+ {
1092
+ // don't set anything in *pnull, set each element in l_map
1093
+ // that could match the first character in the literal:
1094
+ if(l_map)
1095
+ {
1096
+ l_map[0] |= mask_init;
1097
+ charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
1098
+ for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1099
+ {
1100
+ if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)
1101
+ l_map[i] |= mask;
1102
+ }
1103
+ }
1104
+ return;
1105
+ }
1106
+ case syntax_element_end_line:
1107
+ {
1108
+ // next character must be a line separator (if there is one):
1109
+ if(l_map)
1110
+ {
1111
+ l_map[0] |= mask_init;
1112
+ l_map[static_cast<unsigned>('\n')] |= mask;
1113
+ l_map[static_cast<unsigned>('\r')] |= mask;
1114
+ l_map[static_cast<unsigned>('\f')] |= mask;
1115
+ l_map[0x85] |= mask;
1116
+ }
1117
+ // now figure out if we can match a NULL string at this point:
1118
+ if(pnull)
1119
+ create_startmap(state->next.p, 0, pnull, mask);
1120
+ return;
1121
+ }
1122
+ case syntax_element_recurse:
1123
+ {
1124
+ if(state->type == syntax_element_startmark)
1125
+ recursion_sub = static_cast<re_brace*>(state)->index;
1126
+ else
1127
+ recursion_sub = 0;
1128
+ if(m_recursion_checks[recursion_sub])
1129
+ {
1130
+ // Infinite recursion!!
1131
+ if(0 == this->m_pdata->m_status) // update the error code if not already set
1132
+ this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
1133
+ //
1134
+ // clear the expression, we should be empty:
1135
+ //
1136
+ this->m_pdata->m_expression = 0;
1137
+ this->m_pdata->m_expression_len = 0;
1138
+ //
1139
+ // and throw if required:
1140
+ //
1141
+ if(0 == (this->flags() & regex_constants::no_except))
1142
+ {
1143
+ std::string message = "Encountered an infinite recursion.";
1144
+ boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
1145
+ e.raise();
1146
+ }
1147
+ }
1148
+ else if(recursion_start == 0)
1149
+ {
1150
+ recursion_start = state;
1151
+ recursion_restart = state->next.p;
1152
+ state = static_cast<re_jump*>(state)->alt.p;
1153
+ m_recursion_checks[recursion_sub] = true;
1154
+ break;
1155
+ }
1156
+ m_recursion_checks[recursion_sub] = true;
1157
+ // can't handle nested recursion here...
1158
+ BOOST_FALLTHROUGH;
1159
+ }
1160
+ case syntax_element_backref:
1161
+ // can be null, and any character can match:
1162
+ if(pnull)
1163
+ *pnull |= mask;
1164
+ BOOST_FALLTHROUGH;
1165
+ case syntax_element_wild:
1166
+ {
1167
+ // can't be null, any character can match:
1168
+ set_all_masks(l_map, mask);
1169
+ return;
1170
+ }
1171
+ case syntax_element_match:
1172
+ {
1173
+ // must be null, any character can match:
1174
+ set_all_masks(l_map, mask);
1175
+ if(pnull)
1176
+ *pnull |= mask;
1177
+ return;
1178
+ }
1179
+ case syntax_element_word_start:
1180
+ {
1181
+ // recurse, then AND with all the word characters:
1182
+ create_startmap(state->next.p, l_map, pnull, mask);
1183
+ if(l_map)
1184
+ {
1185
+ l_map[0] |= mask_init;
1186
+ for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1187
+ {
1188
+ if(!m_traits.isctype(static_cast<charT>(i), m_word_mask))
1189
+ l_map[i] &= static_cast<unsigned char>(~mask);
1190
+ }
1191
+ }
1192
+ return;
1193
+ }
1194
+ case syntax_element_word_end:
1195
+ {
1196
+ // recurse, then AND with all the word characters:
1197
+ create_startmap(state->next.p, l_map, pnull, mask);
1198
+ if(l_map)
1199
+ {
1200
+ l_map[0] |= mask_init;
1201
+ for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1202
+ {
1203
+ if(m_traits.isctype(static_cast<charT>(i), m_word_mask))
1204
+ l_map[i] &= static_cast<unsigned char>(~mask);
1205
+ }
1206
+ }
1207
+ return;
1208
+ }
1209
+ case syntax_element_buffer_end:
1210
+ {
1211
+ // we *must be null* :
1212
+ if(pnull)
1213
+ *pnull |= mask;
1214
+ return;
1215
+ }
1216
+ case syntax_element_long_set:
1217
+ if(l_map)
1218
+ {
1219
+ typedef typename traits::char_class_type m_type;
1220
+ if(static_cast<re_set_long<m_type>*>(state)->singleton)
1221
+ {
1222
+ l_map[0] |= mask_init;
1223
+ for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1224
+ {
1225
+ charT c = static_cast<charT>(i);
1226
+ if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<m_type>*>(state), *m_pdata, l_icase))
1227
+ l_map[i] |= mask;
1228
+ }
1229
+ }
1230
+ else
1231
+ set_all_masks(l_map, mask);
1232
+ }
1233
+ return;
1234
+ case syntax_element_set:
1235
+ if(l_map)
1236
+ {
1237
+ l_map[0] |= mask_init;
1238
+ for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1239
+ {
1240
+ if(static_cast<re_set*>(state)->_map[
1241
+ static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])
1242
+ l_map[i] |= mask;
1243
+ }
1244
+ }
1245
+ return;
1246
+ case syntax_element_jump:
1247
+ // take the jump:
1248
+ state = static_cast<re_alt*>(state)->alt.p;
1249
+ not_last_jump = -1;
1250
+ break;
1251
+ case syntax_element_alt:
1252
+ case syntax_element_rep:
1253
+ case syntax_element_dot_rep:
1254
+ case syntax_element_char_rep:
1255
+ case syntax_element_short_set_rep:
1256
+ case syntax_element_long_set_rep:
1257
+ {
1258
+ re_alt* rep = static_cast<re_alt*>(state);
1259
+ if(rep->_map[0] & mask_init)
1260
+ {
1261
+ if(l_map)
1262
+ {
1263
+ // copy previous results:
1264
+ l_map[0] |= mask_init;
1265
+ for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
1266
+ {
1267
+ if(rep->_map[i] & mask_any)
1268
+ l_map[i] |= mask;
1269
+ }
1270
+ }
1271
+ if(pnull)
1272
+ {
1273
+ if(rep->can_be_null & mask_any)
1274
+ *pnull |= mask;
1275
+ }
1276
+ }
1277
+ else
1278
+ {
1279
+ // we haven't created a startmap for this alternative yet
1280
+ // so take the union of the two options:
1281
+ if(is_bad_repeat(state))
1282
+ {
1283
+ set_all_masks(l_map, mask);
1284
+ if(pnull)
1285
+ *pnull |= mask;
1286
+ return;
1287
+ }
1288
+ set_bad_repeat(state);
1289
+ create_startmap(state->next.p, l_map, pnull, mask);
1290
+ if((state->type == syntax_element_alt)
1291
+ || (static_cast<re_repeat*>(state)->min == 0)
1292
+ || (not_last_jump == 0))
1293
+ create_startmap(rep->alt.p, l_map, pnull, mask);
1294
+ }
1295
+ }
1296
+ return;
1297
+ case syntax_element_soft_buffer_end:
1298
+ // match newline or null:
1299
+ if(l_map)
1300
+ {
1301
+ l_map[0] |= mask_init;
1302
+ l_map[static_cast<unsigned>('\n')] |= mask;
1303
+ l_map[static_cast<unsigned>('\r')] |= mask;
1304
+ }
1305
+ if(pnull)
1306
+ *pnull |= mask;
1307
+ return;
1308
+ case syntax_element_endmark:
1309
+ // need to handle independent subs as a special case:
1310
+ if(static_cast<re_brace*>(state)->index < 0)
1311
+ {
1312
+ // can be null, any character can match:
1313
+ set_all_masks(l_map, mask);
1314
+ if(pnull)
1315
+ *pnull |= mask;
1316
+ return;
1317
+ }
1318
+ else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
1319
+ {
1320
+ // recursion termination:
1321
+ recursion_start = 0;
1322
+ state = recursion_restart;
1323
+ break;
1324
+ }
1325
+
1326
+ //
1327
+ // Normally we just go to the next state... but if this sub-expression is
1328
+ // the target of a recursion, then we might be ending a recursion, in which
1329
+ // case we should check whatever follows that recursion, as well as whatever
1330
+ // follows this state:
1331
+ //
1332
+ if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
1333
+ {
1334
+ bool ok = false;
1335
+ re_syntax_base* p = m_pdata->m_first_state;
1336
+ while(p)
1337
+ {
1338
+ if(p->type == syntax_element_recurse)
1339
+ {
1340
+ re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
1341
+ if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
1342
+ {
1343
+ ok = true;
1344
+ break;
1345
+ }
1346
+ }
1347
+ p = p->next.p;
1348
+ }
1349
+ if(ok)
1350
+ {
1351
+ create_startmap(p->next.p, l_map, pnull, mask);
1352
+ }
1353
+ }
1354
+ state = state->next.p;
1355
+ break;
1356
+
1357
+ case syntax_element_startmark:
1358
+ // need to handle independent subs as a special case:
1359
+ if(static_cast<re_brace*>(state)->index == -3)
1360
+ {
1361
+ state = state->next.p->next.p;
1362
+ break;
1363
+ }
1364
+ BOOST_FALLTHROUGH;
1365
+ default:
1366
+ state = state->next.p;
1367
+ }
1368
+ ++not_last_jump;
1369
+ }
1370
+ }
1371
+
1372
+ template <class charT, class traits>
1373
+ unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state)
1374
+ {
1375
+ //
1376
+ // find out how the machine starts, so we can optimise the search:
1377
+ //
1378
+ while(state)
1379
+ {
1380
+ switch(state->type)
1381
+ {
1382
+ case syntax_element_startmark:
1383
+ case syntax_element_endmark:
1384
+ state = state->next.p;
1385
+ continue;
1386
+ case syntax_element_start_line:
1387
+ return regbase::restart_line;
1388
+ case syntax_element_word_start:
1389
+ return regbase::restart_word;
1390
+ case syntax_element_buffer_start:
1391
+ return regbase::restart_buf;
1392
+ case syntax_element_restart_continue:
1393
+ return regbase::restart_continue;
1394
+ default:
1395
+ state = 0;
1396
+ continue;
1397
+ }
1398
+ }
1399
+ return regbase::restart_any;
1400
+ }
1401
+
1402
+ template <class charT, class traits>
1403
+ void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
1404
+ {
1405
+ //
1406
+ // set mask in all of bits elements,
1407
+ // if bits[0] has mask_init not set then we can
1408
+ // optimise this to a call to memset:
1409
+ //
1410
+ if(bits)
1411
+ {
1412
+ if(bits[0] == 0)
1413
+ (std::memset)(bits, mask, 1u << CHAR_BIT);
1414
+ else
1415
+ {
1416
+ for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
1417
+ bits[i] |= mask;
1418
+ }
1419
+ bits[0] |= mask_init;
1420
+ }
1421
+ }
1422
+
1423
+ template <class charT, class traits>
1424
+ bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
1425
+ {
1426
+ switch(pt->type)
1427
+ {
1428
+ case syntax_element_rep:
1429
+ case syntax_element_dot_rep:
1430
+ case syntax_element_char_rep:
1431
+ case syntax_element_short_set_rep:
1432
+ case syntax_element_long_set_rep:
1433
+ {
1434
+ unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
1435
+ if(state_id > sizeof(m_bad_repeats) * CHAR_BIT)
1436
+ return true; // run out of bits, assume we can't traverse this one.
1437
+ static const boost::uintmax_t one = 1uL;
1438
+ return m_bad_repeats & (one << state_id);
1439
+ }
1440
+ default:
1441
+ return false;
1442
+ }
1443
+ }
1444
+
1445
+ template <class charT, class traits>
1446
+ void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
1447
+ {
1448
+ switch(pt->type)
1449
+ {
1450
+ case syntax_element_rep:
1451
+ case syntax_element_dot_rep:
1452
+ case syntax_element_char_rep:
1453
+ case syntax_element_short_set_rep:
1454
+ case syntax_element_long_set_rep:
1455
+ {
1456
+ unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
1457
+ static const boost::uintmax_t one = 1uL;
1458
+ if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT)
1459
+ m_bad_repeats |= (one << state_id);
1460
+ }
1461
+ break;
1462
+ default:
1463
+ break;
1464
+ }
1465
+ }
1466
+
1467
+ template <class charT, class traits>
1468
+ syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state)
1469
+ {
1470
+ typedef typename traits::char_class_type m_type;
1471
+ if(state->type == syntax_element_rep)
1472
+ {
1473
+ // check to see if we are repeating a single state:
1474
+ if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)
1475
+ {
1476
+ switch(state->next.p->type)
1477
+ {
1478
+ case re_detail::syntax_element_wild:
1479
+ return re_detail::syntax_element_dot_rep;
1480
+ case re_detail::syntax_element_literal:
1481
+ return re_detail::syntax_element_char_rep;
1482
+ case re_detail::syntax_element_set:
1483
+ return re_detail::syntax_element_short_set_rep;
1484
+ case re_detail::syntax_element_long_set:
1485
+ if(static_cast<re_detail::re_set_long<m_type>*>(state->next.p)->singleton)
1486
+ return re_detail::syntax_element_long_set_rep;
1487
+ break;
1488
+ default:
1489
+ break;
1490
+ }
1491
+ }
1492
+ }
1493
+ return state->type;
1494
+ }
1495
+
1496
+ template <class charT, class traits>
1497
+ void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state)
1498
+ {
1499
+ // enumerate our states, and see if we have a leading repeat
1500
+ // for which failed search restarts can be optimised;
1501
+ do
1502
+ {
1503
+ switch(state->type)
1504
+ {
1505
+ case syntax_element_startmark:
1506
+ if(static_cast<re_brace*>(state)->index >= 0)
1507
+ {
1508
+ state = state->next.p;
1509
+ continue;
1510
+ }
1511
+ if((static_cast<re_brace*>(state)->index == -1)
1512
+ || (static_cast<re_brace*>(state)->index == -2))
1513
+ {
1514
+ // skip past the zero width assertion:
1515
+ state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p;
1516
+ continue;
1517
+ }
1518
+ if(static_cast<re_brace*>(state)->index == -3)
1519
+ {
1520
+ // Have to skip the leading jump state:
1521
+ state = state->next.p->next.p;
1522
+ continue;
1523
+ }
1524
+ return;
1525
+ case syntax_element_endmark:
1526
+ case syntax_element_start_line:
1527
+ case syntax_element_end_line:
1528
+ case syntax_element_word_boundary:
1529
+ case syntax_element_within_word:
1530
+ case syntax_element_word_start:
1531
+ case syntax_element_word_end:
1532
+ case syntax_element_buffer_start:
1533
+ case syntax_element_buffer_end:
1534
+ case syntax_element_restart_continue:
1535
+ state = state->next.p;
1536
+ break;
1537
+ case syntax_element_dot_rep:
1538
+ case syntax_element_char_rep:
1539
+ case syntax_element_short_set_rep:
1540
+ case syntax_element_long_set_rep:
1541
+ if(this->m_has_backrefs == 0)
1542
+ static_cast<re_repeat*>(state)->leading = true;
1543
+ BOOST_FALLTHROUGH;
1544
+ default:
1545
+ return;
1546
+ }
1547
+ }while(state);
1548
+ }
1549
+
1550
+
1551
+ } // namespace re_detail
1552
+
1553
+ } // namespace boost
1554
+
1555
+ #ifdef BOOST_MSVC
1556
+ # pragma warning(pop)
1557
+ #endif
1558
+
1559
+ #ifdef BOOST_MSVC
1560
+ #pragma warning(push)
1561
+ #pragma warning(disable: 4103)
1562
+ #endif
1563
+ #ifdef BOOST_HAS_ABI_HEADERS
1564
+ # include BOOST_ABI_SUFFIX
1565
+ #endif
1566
+ #ifdef BOOST_MSVC
1567
+ #pragma warning(pop)
1568
+ #endif
1569
+
1570
+ #endif
1571
+