passenger 4.0.27 → 4.0.28
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of passenger might be problematic. Click here for more details.
- data.tar.gz.asc +7 -7
- data/.gitignore +1 -0
- data/NEWS +22 -0
- data/build/preprocessor.rb +10 -0
- data/build/rpm.rb +74 -65
- data/debian.template/rules.template +8 -0
- data/dev/copy_boost_headers.rb +11 -2
- data/doc/Users guide Apache.idmap.txt +161 -145
- data/doc/Users guide Apache.txt +12 -1
- data/doc/Users guide Nginx.idmap.txt +142 -126
- data/doc/Users guide Nginx.txt +14 -1
- data/doc/Users guide Standalone.txt +1 -0
- data/doc/users_guide_snippets/environment_variables.txt +1 -1
- data/doc/users_guide_snippets/installation.txt +2 -0
- data/doc/users_guide_snippets/tips.txt +118 -0
- data/ext/apache2/Configuration.cpp +0 -6
- data/ext/apache2/Configuration.hpp +0 -5
- data/ext/apache2/ConfigurationCommands.cpp +7 -0
- data/ext/apache2/ConfigurationFields.hpp +2 -0
- data/ext/apache2/ConfigurationSetters.cpp +24 -0
- data/ext/apache2/CreateDirConfig.cpp +1 -0
- data/ext/apache2/Hooks.cpp +0 -1
- data/ext/apache2/MergeDirConfig.cpp +7 -0
- data/ext/apache2/SetHeaders.cpp +5 -1
- data/ext/boost/cregex.hpp +39 -0
- data/ext/boost/libs/regex/src/c_regex_traits.cpp +193 -0
- data/ext/boost/libs/regex/src/cpp_regex_traits.cpp +117 -0
- data/ext/boost/libs/regex/src/cregex.cpp +660 -0
- data/ext/boost/libs/regex/src/instances.cpp +32 -0
- data/ext/boost/libs/regex/src/internals.hpp +35 -0
- data/ext/boost/libs/regex/src/posix_api.cpp +296 -0
- data/ext/boost/libs/regex/src/regex.cpp +227 -0
- data/ext/boost/libs/regex/src/regex_debug.cpp +59 -0
- data/ext/boost/libs/regex/src/regex_raw_buffer.cpp +72 -0
- data/ext/boost/libs/regex/src/regex_traits_defaults.cpp +692 -0
- data/ext/boost/libs/regex/src/static_mutex.cpp +179 -0
- data/ext/boost/libs/regex/src/wc_regex_traits.cpp +301 -0
- data/ext/boost/libs/regex/src/wide_posix_api.cpp +315 -0
- data/ext/boost/libs/regex/src/winstances.cpp +35 -0
- data/ext/boost/regex.h +100 -0
- data/ext/boost/regex.hpp +37 -0
- data/ext/boost/regex/concepts.hpp +1128 -0
- data/ext/boost/regex/config.hpp +435 -0
- data/ext/boost/regex/config/borland.hpp +72 -0
- data/ext/boost/regex/config/cwchar.hpp +207 -0
- data/ext/boost/regex/mfc.hpp +190 -0
- data/ext/boost/regex/pattern_except.hpp +100 -0
- data/ext/boost/regex/pending/object_cache.hpp +165 -0
- data/ext/boost/regex/pending/static_mutex.hpp +179 -0
- data/ext/boost/regex/pending/unicode_iterator.hpp +776 -0
- data/ext/boost/regex/regex_traits.hpp +35 -0
- data/ext/boost/regex/user.hpp +93 -0
- data/ext/boost/regex/v4/basic_regex.hpp +782 -0
- data/ext/boost/regex/v4/basic_regex_creator.hpp +1571 -0
- data/ext/boost/regex/v4/basic_regex_parser.hpp +2874 -0
- data/ext/boost/regex/v4/c_regex_traits.hpp +211 -0
- data/ext/boost/regex/v4/char_regex_traits.hpp +81 -0
- data/ext/boost/regex/v4/cpp_regex_traits.hpp +1099 -0
- data/ext/boost/regex/v4/cregex.hpp +330 -0
- data/ext/boost/regex/v4/error_type.hpp +59 -0
- data/ext/boost/regex/v4/fileiter.hpp +455 -0
- data/ext/boost/regex/v4/instances.hpp +222 -0
- data/ext/boost/regex/v4/iterator_category.hpp +91 -0
- data/ext/boost/regex/v4/iterator_traits.hpp +135 -0
- data/ext/boost/regex/v4/match_flags.hpp +138 -0
- data/ext/boost/regex/v4/match_results.hpp +702 -0
- data/ext/boost/regex/v4/mem_block_cache.hpp +99 -0
- data/ext/boost/regex/v4/perl_matcher.hpp +587 -0
- data/ext/boost/regex/v4/perl_matcher_common.hpp +996 -0
- data/ext/boost/regex/v4/perl_matcher_non_recursive.hpp +1642 -0
- data/ext/boost/regex/v4/perl_matcher_recursive.hpp +991 -0
- data/ext/boost/regex/v4/primary_transform.hpp +146 -0
- data/ext/boost/regex/v4/protected_call.hpp +81 -0
- data/ext/boost/regex/v4/regbase.hpp +180 -0
- data/ext/boost/regex/v4/regex.hpp +202 -0
- data/ext/boost/regex/v4/regex_format.hpp +1156 -0
- data/ext/boost/regex/v4/regex_fwd.hpp +73 -0
- data/ext/boost/regex/v4/regex_grep.hpp +155 -0
- data/ext/boost/regex/v4/regex_iterator.hpp +201 -0
- data/ext/boost/regex/v4/regex_match.hpp +382 -0
- data/ext/boost/regex/v4/regex_merge.hpp +93 -0
- data/ext/boost/regex/v4/regex_raw_buffer.hpp +210 -0
- data/ext/boost/regex/v4/regex_replace.hpp +99 -0
- data/ext/boost/regex/v4/regex_search.hpp +217 -0
- data/ext/boost/regex/v4/regex_split.hpp +172 -0
- data/ext/boost/regex/v4/regex_token_iterator.hpp +342 -0
- data/ext/boost/regex/v4/regex_traits.hpp +189 -0
- data/ext/boost/regex/v4/regex_traits_defaults.hpp +371 -0
- data/ext/boost/regex/v4/regex_workaround.hpp +232 -0
- data/ext/boost/regex/v4/states.hpp +301 -0
- data/ext/boost/regex/v4/sub_match.hpp +512 -0
- data/ext/boost/regex/v4/syntax_type.hpp +105 -0
- data/ext/boost/regex/v4/u32regex_iterator.hpp +193 -0
- data/ext/boost/regex/v4/u32regex_token_iterator.hpp +377 -0
- data/ext/boost/regex/v4/w32_regex_traits.hpp +741 -0
- data/ext/boost/regex_fwd.hpp +33 -0
- data/ext/common/AgentsStarter.h +0 -11
- data/ext/common/ApplicationPool2/Common.h +1 -7
- data/ext/common/ApplicationPool2/DirectSpawner.h +3 -3
- data/ext/common/ApplicationPool2/Group.h +166 -69
- data/ext/common/ApplicationPool2/Implementation.cpp +55 -10
- data/ext/common/ApplicationPool2/Options.h +45 -10
- data/ext/common/ApplicationPool2/PipeWatcher.h +1 -2
- data/ext/common/ApplicationPool2/Pool.h +29 -7
- data/ext/common/ApplicationPool2/Process.h +22 -3
- data/ext/common/ApplicationPool2/Session.h +1 -0
- data/ext/common/ApplicationPool2/SmartSpawner.h +5 -10
- data/ext/common/ApplicationPool2/Spawner.h +10 -15
- data/ext/common/ApplicationPool2/SuperGroup.h +10 -9
- data/ext/common/Constants.h +1 -3
- data/ext/common/Hooks.h +193 -0
- data/ext/common/Logging.cpp +67 -2
- data/ext/common/Logging.h +23 -1
- data/ext/common/Utils.cpp +0 -21
- data/ext/common/Utils.h +0 -42
- data/ext/common/Utils/CachedFileStat.hpp +1 -1
- data/ext/common/Utils/StrIntUtils.h +61 -14
- data/ext/common/Utils/StringMap.h +4 -0
- data/ext/common/agents/HelperAgent/AgentOptions.h +4 -4
- data/ext/common/agents/HelperAgent/Main.cpp +2 -3
- data/ext/common/agents/HelperAgent/RequestHandler.h +65 -2
- data/ext/common/agents/LoggingAgent/FilterSupport.h +3 -1
- data/ext/common/agents/Watchdog/Main.cpp +8 -72
- data/ext/nginx/CacheLocationConfig.c +29 -1
- data/ext/nginx/Configuration.c +0 -12
- data/ext/nginx/Configuration.h +0 -1
- data/ext/nginx/ConfigurationCommands.c +10 -0
- data/ext/nginx/ConfigurationFields.h +2 -0
- data/ext/nginx/CreateLocationConfig.c +4 -0
- data/ext/nginx/MergeLocationConfig.c +6 -0
- data/ext/oxt/system_calls.cpp +7 -1
- data/ext/oxt/system_calls.hpp +7 -7
- data/helper-scripts/node-loader.js +6 -2
- data/helper-scripts/rack-loader.rb +5 -2
- data/helper-scripts/rack-preloader.rb +5 -2
- data/lib/phusion_passenger.rb +1 -1
- data/lib/phusion_passenger/apache2/config_options.rb +8 -0
- data/lib/phusion_passenger/constants.rb +0 -1
- data/lib/phusion_passenger/nginx/config_options.rb +9 -2
- data/lib/phusion_passenger/platform_info/apache.rb +2 -1
- data/lib/phusion_passenger/platform_info/compiler.rb +15 -1
- data/lib/phusion_passenger/platform_info/cxx_portability.rb +2 -0
- data/node_lib/phusion_passenger/httplib_emulation.js +85 -17
- data/node_lib/phusion_passenger/request_handler.js +10 -2
- data/rpm/Vagrantfile +32 -0
- data/rpm/get_distro_id.py +4 -0
- data/test/cxx/ApplicationPool2/DirectSpawnerTest.cpp +2 -2
- data/test/cxx/ApplicationPool2/PoolTest.cpp +60 -9
- data/test/cxx/ApplicationPool2/SmartSpawnerTest.cpp +2 -6
- data/test/cxx/CachedFileStatTest.cpp +5 -5
- data/test/cxx/RequestHandlerTest.cpp +3 -6
- data/test/cxx/UtilsTest.cpp +30 -0
- data/test/node/httplib_emulation_spec.js +491 -0
- data/test/node/spec_helper.js +25 -0
- metadata +78 -2
- metadata.gz.asc +7 -7
@@ -0,0 +1,1571 @@
|
|
1
|
+
/*
|
2
|
+
*
|
3
|
+
* Copyright (c) 2004
|
4
|
+
* John Maddock
|
5
|
+
*
|
6
|
+
* Use, modification and distribution are subject to the
|
7
|
+
* Boost Software License, Version 1.0. (See accompanying file
|
8
|
+
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
9
|
+
*
|
10
|
+
*/
|
11
|
+
|
12
|
+
/*
|
13
|
+
* LOCATION: see http://www.boost.org for most recent version.
|
14
|
+
* FILE basic_regex_creator.cpp
|
15
|
+
* VERSION see <boost/version.hpp>
|
16
|
+
* DESCRIPTION: Declares template class basic_regex_creator which fills in
|
17
|
+
* the data members of a regex_data object.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
|
21
|
+
#define BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
|
22
|
+
|
23
|
+
#ifdef BOOST_MSVC
|
24
|
+
#pragma warning(push)
|
25
|
+
#pragma warning(disable: 4103)
|
26
|
+
#endif
|
27
|
+
#ifdef BOOST_HAS_ABI_HEADERS
|
28
|
+
# include BOOST_ABI_PREFIX
|
29
|
+
#endif
|
30
|
+
#ifdef BOOST_MSVC
|
31
|
+
#pragma warning(pop)
|
32
|
+
#endif
|
33
|
+
|
34
|
+
#ifdef BOOST_MSVC
|
35
|
+
# pragma warning(push)
|
36
|
+
# pragma warning(disable: 4800)
|
37
|
+
#endif
|
38
|
+
|
39
|
+
namespace boost{
|
40
|
+
|
41
|
+
namespace re_detail{
|
42
|
+
|
43
|
+
template <class charT>
|
44
|
+
struct digraph : public std::pair<charT, charT>
|
45
|
+
{
|
46
|
+
digraph() : std::pair<charT, charT>(0, 0){}
|
47
|
+
digraph(charT c1) : std::pair<charT, charT>(c1, 0){}
|
48
|
+
digraph(charT c1, charT c2) : std::pair<charT, charT>(c1, c2)
|
49
|
+
{}
|
50
|
+
#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300)
|
51
|
+
digraph(const digraph<charT>& d) : std::pair<charT, charT>(d.first, d.second){}
|
52
|
+
#endif
|
53
|
+
template <class Seq>
|
54
|
+
digraph(const Seq& s) : std::pair<charT, charT>()
|
55
|
+
{
|
56
|
+
BOOST_ASSERT(s.size() <= 2);
|
57
|
+
BOOST_ASSERT(s.size());
|
58
|
+
this->first = s[0];
|
59
|
+
this->second = (s.size() > 1) ? s[1] : 0;
|
60
|
+
}
|
61
|
+
};
|
62
|
+
|
63
|
+
template <class charT, class traits>
|
64
|
+
class basic_char_set
|
65
|
+
{
|
66
|
+
public:
|
67
|
+
typedef digraph<charT> digraph_type;
|
68
|
+
typedef typename traits::string_type string_type;
|
69
|
+
typedef typename traits::char_class_type m_type;
|
70
|
+
|
71
|
+
basic_char_set()
|
72
|
+
{
|
73
|
+
m_negate = false;
|
74
|
+
m_has_digraphs = false;
|
75
|
+
m_classes = 0;
|
76
|
+
m_negated_classes = 0;
|
77
|
+
m_empty = true;
|
78
|
+
}
|
79
|
+
|
80
|
+
void add_single(const digraph_type& s)
|
81
|
+
{
|
82
|
+
m_singles.insert(m_singles.end(), s);
|
83
|
+
if(s.second)
|
84
|
+
m_has_digraphs = true;
|
85
|
+
m_empty = false;
|
86
|
+
}
|
87
|
+
void add_range(const digraph_type& first, const digraph_type& end)
|
88
|
+
{
|
89
|
+
m_ranges.insert(m_ranges.end(), first);
|
90
|
+
m_ranges.insert(m_ranges.end(), end);
|
91
|
+
if(first.second)
|
92
|
+
{
|
93
|
+
m_has_digraphs = true;
|
94
|
+
add_single(first);
|
95
|
+
}
|
96
|
+
if(end.second)
|
97
|
+
{
|
98
|
+
m_has_digraphs = true;
|
99
|
+
add_single(end);
|
100
|
+
}
|
101
|
+
m_empty = false;
|
102
|
+
}
|
103
|
+
void add_class(m_type m)
|
104
|
+
{
|
105
|
+
m_classes |= m;
|
106
|
+
m_empty = false;
|
107
|
+
}
|
108
|
+
void add_negated_class(m_type m)
|
109
|
+
{
|
110
|
+
m_negated_classes |= m;
|
111
|
+
m_empty = false;
|
112
|
+
}
|
113
|
+
void add_equivalent(const digraph_type& s)
|
114
|
+
{
|
115
|
+
m_equivalents.insert(m_equivalents.end(), s);
|
116
|
+
if(s.second)
|
117
|
+
{
|
118
|
+
m_has_digraphs = true;
|
119
|
+
add_single(s);
|
120
|
+
}
|
121
|
+
m_empty = false;
|
122
|
+
}
|
123
|
+
void negate()
|
124
|
+
{
|
125
|
+
m_negate = true;
|
126
|
+
//m_empty = false;
|
127
|
+
}
|
128
|
+
|
129
|
+
//
|
130
|
+
// accessor functions:
|
131
|
+
//
|
132
|
+
bool has_digraphs()const
|
133
|
+
{
|
134
|
+
return m_has_digraphs;
|
135
|
+
}
|
136
|
+
bool is_negated()const
|
137
|
+
{
|
138
|
+
return m_negate;
|
139
|
+
}
|
140
|
+
typedef typename std::vector<digraph_type>::const_iterator list_iterator;
|
141
|
+
list_iterator singles_begin()const
|
142
|
+
{
|
143
|
+
return m_singles.begin();
|
144
|
+
}
|
145
|
+
list_iterator singles_end()const
|
146
|
+
{
|
147
|
+
return m_singles.end();
|
148
|
+
}
|
149
|
+
list_iterator ranges_begin()const
|
150
|
+
{
|
151
|
+
return m_ranges.begin();
|
152
|
+
}
|
153
|
+
list_iterator ranges_end()const
|
154
|
+
{
|
155
|
+
return m_ranges.end();
|
156
|
+
}
|
157
|
+
list_iterator equivalents_begin()const
|
158
|
+
{
|
159
|
+
return m_equivalents.begin();
|
160
|
+
}
|
161
|
+
list_iterator equivalents_end()const
|
162
|
+
{
|
163
|
+
return m_equivalents.end();
|
164
|
+
}
|
165
|
+
m_type classes()const
|
166
|
+
{
|
167
|
+
return m_classes;
|
168
|
+
}
|
169
|
+
m_type negated_classes()const
|
170
|
+
{
|
171
|
+
return m_negated_classes;
|
172
|
+
}
|
173
|
+
bool empty()const
|
174
|
+
{
|
175
|
+
return m_empty;
|
176
|
+
}
|
177
|
+
private:
|
178
|
+
std::vector<digraph_type> m_singles; // a list of single characters to match
|
179
|
+
std::vector<digraph_type> m_ranges; // a list of end points of our ranges
|
180
|
+
bool m_negate; // true if the set is to be negated
|
181
|
+
bool m_has_digraphs; // true if we have digraphs present
|
182
|
+
m_type m_classes; // character classes to match
|
183
|
+
m_type m_negated_classes; // negated character classes to match
|
184
|
+
bool m_empty; // whether we've added anything yet
|
185
|
+
std::vector<digraph_type> m_equivalents; // a list of equivalence classes
|
186
|
+
};
|
187
|
+
|
188
|
+
template <class charT, class traits>
|
189
|
+
class basic_regex_creator
|
190
|
+
{
|
191
|
+
public:
|
192
|
+
basic_regex_creator(regex_data<charT, traits>* data);
|
193
|
+
std::ptrdiff_t getoffset(void* addr)
|
194
|
+
{
|
195
|
+
return getoffset(addr, m_pdata->m_data.data());
|
196
|
+
}
|
197
|
+
std::ptrdiff_t getoffset(const void* addr, const void* base)
|
198
|
+
{
|
199
|
+
return static_cast<const char*>(addr) - static_cast<const char*>(base);
|
200
|
+
}
|
201
|
+
re_syntax_base* getaddress(std::ptrdiff_t off)
|
202
|
+
{
|
203
|
+
return getaddress(off, m_pdata->m_data.data());
|
204
|
+
}
|
205
|
+
re_syntax_base* getaddress(std::ptrdiff_t off, void* base)
|
206
|
+
{
|
207
|
+
return static_cast<re_syntax_base*>(static_cast<void*>(static_cast<char*>(base) + off));
|
208
|
+
}
|
209
|
+
void init(unsigned l_flags)
|
210
|
+
{
|
211
|
+
m_pdata->m_flags = l_flags;
|
212
|
+
m_icase = l_flags & regex_constants::icase;
|
213
|
+
}
|
214
|
+
regbase::flag_type flags()
|
215
|
+
{
|
216
|
+
return m_pdata->m_flags;
|
217
|
+
}
|
218
|
+
void flags(regbase::flag_type f)
|
219
|
+
{
|
220
|
+
m_pdata->m_flags = f;
|
221
|
+
if(m_icase != static_cast<bool>(f & regbase::icase))
|
222
|
+
{
|
223
|
+
m_icase = static_cast<bool>(f & regbase::icase);
|
224
|
+
}
|
225
|
+
}
|
226
|
+
re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
|
227
|
+
re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
|
228
|
+
re_literal* append_literal(charT c);
|
229
|
+
re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
|
230
|
+
re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::false_*);
|
231
|
+
re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::true_*);
|
232
|
+
void finalize(const charT* p1, const charT* p2);
|
233
|
+
protected:
|
234
|
+
regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in
|
235
|
+
const ::boost::regex_traits_wrapper<traits>&
|
236
|
+
m_traits; // convenience reference to traits class
|
237
|
+
re_syntax_base* m_last_state; // the last state we added
|
238
|
+
bool m_icase; // true for case insensitive matches
|
239
|
+
unsigned m_repeater_id; // the state_id of the next repeater
|
240
|
+
bool m_has_backrefs; // true if there are actually any backrefs
|
241
|
+
unsigned m_backrefs; // bitmask of permitted backrefs
|
242
|
+
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
|
243
|
+
bool m_has_recursions; // set when we have recursive expresisons to fixup
|
244
|
+
std::vector<bool> m_recursion_checks; // notes which recursions we've followed while analysing this expression
|
245
|
+
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
|
246
|
+
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
|
247
|
+
typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
|
248
|
+
typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character
|
249
|
+
typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character
|
250
|
+
private:
|
251
|
+
basic_regex_creator& operator=(const basic_regex_creator&);
|
252
|
+
basic_regex_creator(const basic_regex_creator&);
|
253
|
+
|
254
|
+
void fixup_pointers(re_syntax_base* state);
|
255
|
+
void fixup_recursions(re_syntax_base* state);
|
256
|
+
void create_startmaps(re_syntax_base* state);
|
257
|
+
int calculate_backstep(re_syntax_base* state);
|
258
|
+
void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
|
259
|
+
unsigned get_restart_type(re_syntax_base* state);
|
260
|
+
void set_all_masks(unsigned char* bits, unsigned char);
|
261
|
+
bool is_bad_repeat(re_syntax_base* pt);
|
262
|
+
void set_bad_repeat(re_syntax_base* pt);
|
263
|
+
syntax_element_type get_repeat_type(re_syntax_base* state);
|
264
|
+
void probe_leading_repeat(re_syntax_base* state);
|
265
|
+
};
|
266
|
+
|
267
|
+
template <class charT, class traits>
|
268
|
+
basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
|
269
|
+
: m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false)
|
270
|
+
{
|
271
|
+
m_pdata->m_data.clear();
|
272
|
+
m_pdata->m_status = ::boost::regex_constants::error_ok;
|
273
|
+
static const charT w = 'w';
|
274
|
+
static const charT s = 's';
|
275
|
+
static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', };
|
276
|
+
static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', };
|
277
|
+
static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', };
|
278
|
+
m_word_mask = m_traits.lookup_classname(&w, &w +1);
|
279
|
+
m_mask_space = m_traits.lookup_classname(&s, &s +1);
|
280
|
+
m_lower_mask = m_traits.lookup_classname(l, l + 5);
|
281
|
+
m_upper_mask = m_traits.lookup_classname(u, u + 5);
|
282
|
+
m_alpha_mask = m_traits.lookup_classname(a, a + 5);
|
283
|
+
m_pdata->m_word_mask = m_word_mask;
|
284
|
+
BOOST_ASSERT(m_word_mask != 0);
|
285
|
+
BOOST_ASSERT(m_mask_space != 0);
|
286
|
+
BOOST_ASSERT(m_lower_mask != 0);
|
287
|
+
BOOST_ASSERT(m_upper_mask != 0);
|
288
|
+
BOOST_ASSERT(m_alpha_mask != 0);
|
289
|
+
}
|
290
|
+
|
291
|
+
template <class charT, class traits>
|
292
|
+
re_syntax_base* basic_regex_creator<charT, traits>::append_state(syntax_element_type t, std::size_t s)
|
293
|
+
{
|
294
|
+
// if the state is a backref then make a note of it:
|
295
|
+
if(t == syntax_element_backref)
|
296
|
+
this->m_has_backrefs = true;
|
297
|
+
// append a new state, start by aligning our last one:
|
298
|
+
m_pdata->m_data.align();
|
299
|
+
// set the offset to the next state in our last one:
|
300
|
+
if(m_last_state)
|
301
|
+
m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
|
302
|
+
// now actually extent our data:
|
303
|
+
m_last_state = static_cast<re_syntax_base*>(m_pdata->m_data.extend(s));
|
304
|
+
// fill in boilerplate options in the new state:
|
305
|
+
m_last_state->next.i = 0;
|
306
|
+
m_last_state->type = t;
|
307
|
+
return m_last_state;
|
308
|
+
}
|
309
|
+
|
310
|
+
template <class charT, class traits>
|
311
|
+
re_syntax_base* basic_regex_creator<charT, traits>::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s)
|
312
|
+
{
|
313
|
+
// append a new state, start by aligning our last one:
|
314
|
+
m_pdata->m_data.align();
|
315
|
+
// set the offset to the next state in our last one:
|
316
|
+
if(m_last_state)
|
317
|
+
m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
|
318
|
+
// remember the last state position:
|
319
|
+
std::ptrdiff_t off = getoffset(m_last_state) + s;
|
320
|
+
// now actually insert our data:
|
321
|
+
re_syntax_base* new_state = static_cast<re_syntax_base*>(m_pdata->m_data.insert(pos, s));
|
322
|
+
// fill in boilerplate options in the new state:
|
323
|
+
new_state->next.i = s;
|
324
|
+
new_state->type = t;
|
325
|
+
m_last_state = getaddress(off);
|
326
|
+
return new_state;
|
327
|
+
}
|
328
|
+
|
329
|
+
template <class charT, class traits>
|
330
|
+
re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
|
331
|
+
{
|
332
|
+
re_literal* result;
|
333
|
+
// start by seeing if we have an existing re_literal we can extend:
|
334
|
+
if((0 == m_last_state) || (m_last_state->type != syntax_element_literal))
|
335
|
+
{
|
336
|
+
// no existing re_literal, create a new one:
|
337
|
+
result = static_cast<re_literal*>(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
|
338
|
+
result->length = 1;
|
339
|
+
*static_cast<charT*>(static_cast<void*>(result+1)) = m_traits.translate(c, m_icase);
|
340
|
+
}
|
341
|
+
else
|
342
|
+
{
|
343
|
+
// we have an existing re_literal, extend it:
|
344
|
+
std::ptrdiff_t off = getoffset(m_last_state);
|
345
|
+
m_pdata->m_data.extend(sizeof(charT));
|
346
|
+
m_last_state = result = static_cast<re_literal*>(getaddress(off));
|
347
|
+
charT* characters = static_cast<charT*>(static_cast<void*>(result+1));
|
348
|
+
characters[result->length] = m_traits.translate(c, m_icase);
|
349
|
+
++(result->length);
|
350
|
+
}
|
351
|
+
return result;
|
352
|
+
}
|
353
|
+
|
354
|
+
template <class charT, class traits>
|
355
|
+
inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
356
|
+
const basic_char_set<charT, traits>& char_set)
|
357
|
+
{
|
358
|
+
typedef mpl::bool_< (sizeof(charT) == 1) > truth_type;
|
359
|
+
return char_set.has_digraphs()
|
360
|
+
? append_set(char_set, static_cast<mpl::false_*>(0))
|
361
|
+
: append_set(char_set, static_cast<truth_type*>(0));
|
362
|
+
}
|
363
|
+
|
364
|
+
template <class charT, class traits>
|
365
|
+
re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
366
|
+
const basic_char_set<charT, traits>& char_set, mpl::false_*)
|
367
|
+
{
|
368
|
+
typedef typename traits::string_type string_type;
|
369
|
+
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
|
370
|
+
typedef typename traits::char_class_type m_type;
|
371
|
+
|
372
|
+
re_set_long<m_type>* result = static_cast<re_set_long<m_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<m_type>)));
|
373
|
+
//
|
374
|
+
// fill in the basics:
|
375
|
+
//
|
376
|
+
result->csingles = static_cast<unsigned int>(::boost::re_detail::distance(char_set.singles_begin(), char_set.singles_end()));
|
377
|
+
result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
|
378
|
+
result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
|
379
|
+
result->cclasses = char_set.classes();
|
380
|
+
result->cnclasses = char_set.negated_classes();
|
381
|
+
if(flags() & regbase::icase)
|
382
|
+
{
|
383
|
+
// adjust classes as needed:
|
384
|
+
if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
|
385
|
+
result->cclasses |= m_alpha_mask;
|
386
|
+
if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
|
387
|
+
result->cnclasses |= m_alpha_mask;
|
388
|
+
}
|
389
|
+
|
390
|
+
result->isnot = char_set.is_negated();
|
391
|
+
result->singleton = !char_set.has_digraphs();
|
392
|
+
//
|
393
|
+
// remember where the state is for later:
|
394
|
+
//
|
395
|
+
std::ptrdiff_t offset = getoffset(result);
|
396
|
+
//
|
397
|
+
// now extend with all the singles:
|
398
|
+
//
|
399
|
+
item_iterator first, last;
|
400
|
+
first = char_set.singles_begin();
|
401
|
+
last = char_set.singles_end();
|
402
|
+
while(first != last)
|
403
|
+
{
|
404
|
+
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2)));
|
405
|
+
p[0] = m_traits.translate(first->first, m_icase);
|
406
|
+
if(first->second)
|
407
|
+
{
|
408
|
+
p[1] = m_traits.translate(first->second, m_icase);
|
409
|
+
p[2] = 0;
|
410
|
+
}
|
411
|
+
else
|
412
|
+
p[1] = 0;
|
413
|
+
++first;
|
414
|
+
}
|
415
|
+
//
|
416
|
+
// now extend with all the ranges:
|
417
|
+
//
|
418
|
+
first = char_set.ranges_begin();
|
419
|
+
last = char_set.ranges_end();
|
420
|
+
while(first != last)
|
421
|
+
{
|
422
|
+
// first grab the endpoints of the range:
|
423
|
+
digraph<charT> c1 = *first;
|
424
|
+
c1.first = this->m_traits.translate(c1.first, this->m_icase);
|
425
|
+
c1.second = this->m_traits.translate(c1.second, this->m_icase);
|
426
|
+
++first;
|
427
|
+
digraph<charT> c2 = *first;
|
428
|
+
c2.first = this->m_traits.translate(c2.first, this->m_icase);
|
429
|
+
c2.second = this->m_traits.translate(c2.second, this->m_icase);
|
430
|
+
++first;
|
431
|
+
string_type s1, s2;
|
432
|
+
// different actions now depending upon whether collation is turned on:
|
433
|
+
if(flags() & regex_constants::collate)
|
434
|
+
{
|
435
|
+
// we need to transform our range into sort keys:
|
436
|
+
#if BOOST_WORKAROUND(__GNUC__, < 3)
|
437
|
+
string_type in(3, charT(0));
|
438
|
+
in[0] = c1.first;
|
439
|
+
in[1] = c1.second;
|
440
|
+
s1 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1));
|
441
|
+
in[0] = c2.first;
|
442
|
+
in[1] = c2.second;
|
443
|
+
s2 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1));
|
444
|
+
#else
|
445
|
+
charT a1[3] = { c1.first, c1.second, charT(0), };
|
446
|
+
charT a2[3] = { c2.first, c2.second, charT(0), };
|
447
|
+
s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1));
|
448
|
+
s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1));
|
449
|
+
#endif
|
450
|
+
if(s1.size() == 0)
|
451
|
+
s1 = string_type(1, charT(0));
|
452
|
+
if(s2.size() == 0)
|
453
|
+
s2 = string_type(1, charT(0));
|
454
|
+
}
|
455
|
+
else
|
456
|
+
{
|
457
|
+
if(c1.second)
|
458
|
+
{
|
459
|
+
s1.insert(s1.end(), c1.first);
|
460
|
+
s1.insert(s1.end(), c1.second);
|
461
|
+
}
|
462
|
+
else
|
463
|
+
s1 = string_type(1, c1.first);
|
464
|
+
if(c2.second)
|
465
|
+
{
|
466
|
+
s2.insert(s2.end(), c2.first);
|
467
|
+
s2.insert(s2.end(), c2.second);
|
468
|
+
}
|
469
|
+
else
|
470
|
+
s2.insert(s2.end(), c2.first);
|
471
|
+
}
|
472
|
+
if(s1 > s2)
|
473
|
+
{
|
474
|
+
// Oops error:
|
475
|
+
return 0;
|
476
|
+
}
|
477
|
+
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
|
478
|
+
re_detail::copy(s1.begin(), s1.end(), p);
|
479
|
+
p[s1.size()] = charT(0);
|
480
|
+
p += s1.size() + 1;
|
481
|
+
re_detail::copy(s2.begin(), s2.end(), p);
|
482
|
+
p[s2.size()] = charT(0);
|
483
|
+
}
|
484
|
+
//
|
485
|
+
// now process the equivalence classes:
|
486
|
+
//
|
487
|
+
first = char_set.equivalents_begin();
|
488
|
+
last = char_set.equivalents_end();
|
489
|
+
while(first != last)
|
490
|
+
{
|
491
|
+
string_type s;
|
492
|
+
if(first->second)
|
493
|
+
{
|
494
|
+
#if BOOST_WORKAROUND(__GNUC__, < 3)
|
495
|
+
string_type in(3, charT(0));
|
496
|
+
in[0] = first->first;
|
497
|
+
in[1] = first->second;
|
498
|
+
s = m_traits.transform_primary(in.c_str(), in.c_str()+2);
|
499
|
+
#else
|
500
|
+
charT cs[3] = { first->first, first->second, charT(0), };
|
501
|
+
s = m_traits.transform_primary(cs, cs+2);
|
502
|
+
#endif
|
503
|
+
}
|
504
|
+
else
|
505
|
+
s = m_traits.transform_primary(&first->first, &first->first+1);
|
506
|
+
if(s.empty())
|
507
|
+
return 0; // invalid or unsupported equivalence class
|
508
|
+
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
|
509
|
+
re_detail::copy(s.begin(), s.end(), p);
|
510
|
+
p[s.size()] = charT(0);
|
511
|
+
++first;
|
512
|
+
}
|
513
|
+
//
|
514
|
+
// finally reset the address of our last state:
|
515
|
+
//
|
516
|
+
m_last_state = result = static_cast<re_set_long<m_type>*>(getaddress(offset));
|
517
|
+
return result;
|
518
|
+
}
|
519
|
+
|
520
|
+
template<class T>
|
521
|
+
inline bool char_less(T t1, T t2)
|
522
|
+
{
|
523
|
+
return t1 < t2;
|
524
|
+
}
|
525
|
+
inline bool char_less(char t1, char t2)
|
526
|
+
{
|
527
|
+
return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
|
528
|
+
}
|
529
|
+
inline bool char_less(signed char t1, signed char t2)
|
530
|
+
{
|
531
|
+
return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
|
532
|
+
}
|
533
|
+
|
534
|
+
template <class charT, class traits>
|
535
|
+
re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
536
|
+
const basic_char_set<charT, traits>& char_set, mpl::true_*)
|
537
|
+
{
|
538
|
+
typedef typename traits::string_type string_type;
|
539
|
+
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
|
540
|
+
|
541
|
+
re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
|
542
|
+
bool negate = char_set.is_negated();
|
543
|
+
std::memset(result->_map, 0, sizeof(result->_map));
|
544
|
+
//
|
545
|
+
// handle singles first:
|
546
|
+
//
|
547
|
+
item_iterator first, last;
|
548
|
+
first = char_set.singles_begin();
|
549
|
+
last = char_set.singles_end();
|
550
|
+
while(first != last)
|
551
|
+
{
|
552
|
+
for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
|
553
|
+
{
|
554
|
+
if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
|
555
|
+
== this->m_traits.translate(first->first, this->m_icase))
|
556
|
+
result->_map[i] = true;
|
557
|
+
}
|
558
|
+
++first;
|
559
|
+
}
|
560
|
+
//
|
561
|
+
// OK now handle ranges:
|
562
|
+
//
|
563
|
+
first = char_set.ranges_begin();
|
564
|
+
last = char_set.ranges_end();
|
565
|
+
while(first != last)
|
566
|
+
{
|
567
|
+
// first grab the endpoints of the range:
|
568
|
+
charT c1 = this->m_traits.translate(first->first, this->m_icase);
|
569
|
+
++first;
|
570
|
+
charT c2 = this->m_traits.translate(first->first, this->m_icase);
|
571
|
+
++first;
|
572
|
+
// different actions now depending upon whether collation is turned on:
|
573
|
+
if(flags() & regex_constants::collate)
|
574
|
+
{
|
575
|
+
// we need to transform our range into sort keys:
|
576
|
+
charT c3[2] = { c1, charT(0), };
|
577
|
+
string_type s1 = this->m_traits.transform(c3, c3+1);
|
578
|
+
c3[0] = c2;
|
579
|
+
string_type s2 = this->m_traits.transform(c3, c3+1);
|
580
|
+
if(s1 > s2)
|
581
|
+
{
|
582
|
+
// Oops error:
|
583
|
+
return 0;
|
584
|
+
}
|
585
|
+
BOOST_ASSERT(c3[1] == charT(0));
|
586
|
+
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
587
|
+
{
|
588
|
+
c3[0] = static_cast<charT>(i);
|
589
|
+
string_type s3 = this->m_traits.transform(c3, c3 +1);
|
590
|
+
if((s1 <= s3) && (s3 <= s2))
|
591
|
+
result->_map[i] = true;
|
592
|
+
}
|
593
|
+
}
|
594
|
+
else
|
595
|
+
{
|
596
|
+
if(char_less(c2, c1))
|
597
|
+
{
|
598
|
+
// Oops error:
|
599
|
+
return 0;
|
600
|
+
}
|
601
|
+
// everything in range matches:
|
602
|
+
std::memset(result->_map + static_cast<unsigned char>(c1), true, 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1));
|
603
|
+
}
|
604
|
+
}
|
605
|
+
//
|
606
|
+
// and now the classes:
|
607
|
+
//
|
608
|
+
typedef typename traits::char_class_type m_type;
|
609
|
+
m_type m = char_set.classes();
|
610
|
+
if(flags() & regbase::icase)
|
611
|
+
{
|
612
|
+
// adjust m as needed:
|
613
|
+
if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
|
614
|
+
m |= m_alpha_mask;
|
615
|
+
}
|
616
|
+
if(m != 0)
|
617
|
+
{
|
618
|
+
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
619
|
+
{
|
620
|
+
if(this->m_traits.isctype(static_cast<charT>(i), m))
|
621
|
+
result->_map[i] = true;
|
622
|
+
}
|
623
|
+
}
|
624
|
+
//
|
625
|
+
// and now the negated classes:
|
626
|
+
//
|
627
|
+
m = char_set.negated_classes();
|
628
|
+
if(flags() & regbase::icase)
|
629
|
+
{
|
630
|
+
// adjust m as needed:
|
631
|
+
if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
|
632
|
+
m |= m_alpha_mask;
|
633
|
+
}
|
634
|
+
if(m != 0)
|
635
|
+
{
|
636
|
+
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
637
|
+
{
|
638
|
+
if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
|
639
|
+
result->_map[i] = true;
|
640
|
+
}
|
641
|
+
}
|
642
|
+
//
|
643
|
+
// now process the equivalence classes:
|
644
|
+
//
|
645
|
+
first = char_set.equivalents_begin();
|
646
|
+
last = char_set.equivalents_end();
|
647
|
+
while(first != last)
|
648
|
+
{
|
649
|
+
string_type s;
|
650
|
+
BOOST_ASSERT(static_cast<charT>(0) == first->second);
|
651
|
+
s = m_traits.transform_primary(&first->first, &first->first+1);
|
652
|
+
if(s.empty())
|
653
|
+
return 0; // invalid or unsupported equivalence class
|
654
|
+
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
655
|
+
{
|
656
|
+
charT c[2] = { (static_cast<charT>(i)), charT(0), };
|
657
|
+
string_type s2 = this->m_traits.transform_primary(c, c+1);
|
658
|
+
if(s == s2)
|
659
|
+
result->_map[i] = true;
|
660
|
+
}
|
661
|
+
++first;
|
662
|
+
}
|
663
|
+
if(negate)
|
664
|
+
{
|
665
|
+
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
666
|
+
{
|
667
|
+
result->_map[i] = !(result->_map[i]);
|
668
|
+
}
|
669
|
+
}
|
670
|
+
return result;
|
671
|
+
}
|
672
|
+
|
673
|
+
template <class charT, class traits>
|
674
|
+
void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
|
675
|
+
{
|
676
|
+
if(this->m_pdata->m_status)
|
677
|
+
return;
|
678
|
+
// we've added all the states we need, now finish things off.
|
679
|
+
// start by adding a terminating state:
|
680
|
+
append_state(syntax_element_match);
|
681
|
+
// extend storage to store original expression:
|
682
|
+
std::ptrdiff_t len = p2 - p1;
|
683
|
+
m_pdata->m_expression_len = len;
|
684
|
+
charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
|
685
|
+
m_pdata->m_expression = ps;
|
686
|
+
re_detail::copy(p1, p2, ps);
|
687
|
+
ps[p2 - p1] = 0;
|
688
|
+
// fill in our other data...
|
689
|
+
// successful parsing implies a zero status:
|
690
|
+
m_pdata->m_status = 0;
|
691
|
+
// get the first state of the machine:
|
692
|
+
m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
|
693
|
+
// fixup pointers in the machine:
|
694
|
+
fixup_pointers(m_pdata->m_first_state);
|
695
|
+
if(m_has_recursions)
|
696
|
+
{
|
697
|
+
m_pdata->m_has_recursions = true;
|
698
|
+
fixup_recursions(m_pdata->m_first_state);
|
699
|
+
if(this->m_pdata->m_status)
|
700
|
+
return;
|
701
|
+
}
|
702
|
+
else
|
703
|
+
m_pdata->m_has_recursions = false;
|
704
|
+
// create nested startmaps:
|
705
|
+
create_startmaps(m_pdata->m_first_state);
|
706
|
+
// create main startmap:
|
707
|
+
std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
|
708
|
+
m_pdata->m_can_be_null = 0;
|
709
|
+
|
710
|
+
m_bad_repeats = 0;
|
711
|
+
if(m_has_recursions)
|
712
|
+
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
|
713
|
+
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
|
714
|
+
// get the restart type:
|
715
|
+
m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
|
716
|
+
// optimise a leading repeat if there is one:
|
717
|
+
probe_leading_repeat(m_pdata->m_first_state);
|
718
|
+
}
|
719
|
+
|
720
|
+
template <class charT, class traits>
|
721
|
+
void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
|
722
|
+
{
|
723
|
+
while(state)
|
724
|
+
{
|
725
|
+
switch(state->type)
|
726
|
+
{
|
727
|
+
case syntax_element_recurse:
|
728
|
+
m_has_recursions = true;
|
729
|
+
if(state->next.i)
|
730
|
+
state->next.p = getaddress(state->next.i, state);
|
731
|
+
else
|
732
|
+
state->next.p = 0;
|
733
|
+
break;
|
734
|
+
case syntax_element_rep:
|
735
|
+
case syntax_element_dot_rep:
|
736
|
+
case syntax_element_char_rep:
|
737
|
+
case syntax_element_short_set_rep:
|
738
|
+
case syntax_element_long_set_rep:
|
739
|
+
// set the state_id of this repeat:
|
740
|
+
static_cast<re_repeat*>(state)->state_id = m_repeater_id++;
|
741
|
+
BOOST_FALLTHROUGH;
|
742
|
+
case syntax_element_alt:
|
743
|
+
std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
|
744
|
+
static_cast<re_alt*>(state)->can_be_null = 0;
|
745
|
+
BOOST_FALLTHROUGH;
|
746
|
+
case syntax_element_jump:
|
747
|
+
static_cast<re_jump*>(state)->alt.p = getaddress(static_cast<re_jump*>(state)->alt.i, state);
|
748
|
+
BOOST_FALLTHROUGH;
|
749
|
+
default:
|
750
|
+
if(state->next.i)
|
751
|
+
state->next.p = getaddress(state->next.i, state);
|
752
|
+
else
|
753
|
+
state->next.p = 0;
|
754
|
+
}
|
755
|
+
state = state->next.p;
|
756
|
+
}
|
757
|
+
}
|
758
|
+
|
759
|
+
template <class charT, class traits>
|
760
|
+
void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
|
761
|
+
{
|
762
|
+
re_syntax_base* base = state;
|
763
|
+
while(state)
|
764
|
+
{
|
765
|
+
switch(state->type)
|
766
|
+
{
|
767
|
+
case syntax_element_assert_backref:
|
768
|
+
{
|
769
|
+
// just check that the index is valid:
|
770
|
+
int idx = static_cast<const re_brace*>(state)->index;
|
771
|
+
if(idx < 0)
|
772
|
+
{
|
773
|
+
idx = -idx-1;
|
774
|
+
if(idx >= 10000)
|
775
|
+
{
|
776
|
+
idx = m_pdata->get_id(idx);
|
777
|
+
if(idx <= 0)
|
778
|
+
{
|
779
|
+
// check of sub-expression that doesn't exist:
|
780
|
+
if(0 == this->m_pdata->m_status) // update the error code if not already set
|
781
|
+
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
|
782
|
+
//
|
783
|
+
// clear the expression, we should be empty:
|
784
|
+
//
|
785
|
+
this->m_pdata->m_expression = 0;
|
786
|
+
this->m_pdata->m_expression_len = 0;
|
787
|
+
//
|
788
|
+
// and throw if required:
|
789
|
+
//
|
790
|
+
if(0 == (this->flags() & regex_constants::no_except))
|
791
|
+
{
|
792
|
+
std::string message = "Encountered a forward reference to a marked sub-expression that does not exist.";
|
793
|
+
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
|
794
|
+
e.raise();
|
795
|
+
}
|
796
|
+
}
|
797
|
+
}
|
798
|
+
}
|
799
|
+
}
|
800
|
+
break;
|
801
|
+
case syntax_element_recurse:
|
802
|
+
{
|
803
|
+
bool ok = false;
|
804
|
+
re_syntax_base* p = base;
|
805
|
+
std::ptrdiff_t idx = static_cast<re_jump*>(state)->alt.i;
|
806
|
+
if(idx > 10000)
|
807
|
+
{
|
808
|
+
//
|
809
|
+
// There may be more than one capture group with this hash, just do what Perl
|
810
|
+
// does and recurse to the leftmost:
|
811
|
+
//
|
812
|
+
idx = m_pdata->get_id(static_cast<int>(idx));
|
813
|
+
}
|
814
|
+
while(p)
|
815
|
+
{
|
816
|
+
if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))
|
817
|
+
{
|
818
|
+
//
|
819
|
+
// We've found the target of the recursion, set the jump target:
|
820
|
+
//
|
821
|
+
static_cast<re_jump*>(state)->alt.p = p;
|
822
|
+
ok = true;
|
823
|
+
//
|
824
|
+
// Now scan the target for nested repeats:
|
825
|
+
//
|
826
|
+
p = p->next.p;
|
827
|
+
int next_rep_id = 0;
|
828
|
+
while(p)
|
829
|
+
{
|
830
|
+
switch(p->type)
|
831
|
+
{
|
832
|
+
case syntax_element_rep:
|
833
|
+
case syntax_element_dot_rep:
|
834
|
+
case syntax_element_char_rep:
|
835
|
+
case syntax_element_short_set_rep:
|
836
|
+
case syntax_element_long_set_rep:
|
837
|
+
next_rep_id = static_cast<re_repeat*>(p)->state_id;
|
838
|
+
break;
|
839
|
+
case syntax_element_endmark:
|
840
|
+
if(static_cast<const re_brace*>(p)->index == idx)
|
841
|
+
next_rep_id = -1;
|
842
|
+
break;
|
843
|
+
default:
|
844
|
+
break;
|
845
|
+
}
|
846
|
+
if(next_rep_id)
|
847
|
+
break;
|
848
|
+
p = p->next.p;
|
849
|
+
}
|
850
|
+
if(next_rep_id > 0)
|
851
|
+
{
|
852
|
+
static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
|
853
|
+
}
|
854
|
+
|
855
|
+
break;
|
856
|
+
}
|
857
|
+
p = p->next.p;
|
858
|
+
}
|
859
|
+
if(!ok)
|
860
|
+
{
|
861
|
+
// recursion to sub-expression that doesn't exist:
|
862
|
+
if(0 == this->m_pdata->m_status) // update the error code if not already set
|
863
|
+
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
|
864
|
+
//
|
865
|
+
// clear the expression, we should be empty:
|
866
|
+
//
|
867
|
+
this->m_pdata->m_expression = 0;
|
868
|
+
this->m_pdata->m_expression_len = 0;
|
869
|
+
//
|
870
|
+
// and throw if required:
|
871
|
+
//
|
872
|
+
if(0 == (this->flags() & regex_constants::no_except))
|
873
|
+
{
|
874
|
+
std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist.";
|
875
|
+
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
|
876
|
+
e.raise();
|
877
|
+
}
|
878
|
+
}
|
879
|
+
}
|
880
|
+
break;
|
881
|
+
default:
|
882
|
+
break;
|
883
|
+
}
|
884
|
+
state = state->next.p;
|
885
|
+
}
|
886
|
+
}
|
887
|
+
|
888
|
+
template <class charT, class traits>
|
889
|
+
void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
890
|
+
{
|
891
|
+
// non-recursive implementation:
|
892
|
+
// create the last map in the machine first, so that earlier maps
|
893
|
+
// can make use of the result...
|
894
|
+
//
|
895
|
+
// This was originally a recursive implementation, but that caused stack
|
896
|
+
// overflows with complex expressions on small stacks (think COM+).
|
897
|
+
|
898
|
+
// start by saving the case setting:
|
899
|
+
bool l_icase = m_icase;
|
900
|
+
std::vector<std::pair<bool, re_syntax_base*> > v;
|
901
|
+
|
902
|
+
while(state)
|
903
|
+
{
|
904
|
+
switch(state->type)
|
905
|
+
{
|
906
|
+
case syntax_element_toggle_case:
|
907
|
+
// we need to track case changes here:
|
908
|
+
m_icase = static_cast<re_case*>(state)->icase;
|
909
|
+
state = state->next.p;
|
910
|
+
continue;
|
911
|
+
case syntax_element_alt:
|
912
|
+
case syntax_element_rep:
|
913
|
+
case syntax_element_dot_rep:
|
914
|
+
case syntax_element_char_rep:
|
915
|
+
case syntax_element_short_set_rep:
|
916
|
+
case syntax_element_long_set_rep:
|
917
|
+
// just push the state onto our stack for now:
|
918
|
+
v.push_back(std::pair<bool, re_syntax_base*>(m_icase, state));
|
919
|
+
state = state->next.p;
|
920
|
+
break;
|
921
|
+
case syntax_element_backstep:
|
922
|
+
// we need to calculate how big the backstep is:
|
923
|
+
static_cast<re_brace*>(state)->index
|
924
|
+
= this->calculate_backstep(state->next.p);
|
925
|
+
if(static_cast<re_brace*>(state)->index < 0)
|
926
|
+
{
|
927
|
+
// Oops error:
|
928
|
+
if(0 == this->m_pdata->m_status) // update the error code if not already set
|
929
|
+
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
|
930
|
+
//
|
931
|
+
// clear the expression, we should be empty:
|
932
|
+
//
|
933
|
+
this->m_pdata->m_expression = 0;
|
934
|
+
this->m_pdata->m_expression_len = 0;
|
935
|
+
//
|
936
|
+
// and throw if required:
|
937
|
+
//
|
938
|
+
if(0 == (this->flags() & regex_constants::no_except))
|
939
|
+
{
|
940
|
+
std::string message = "Invalid lookbehind assertion encountered in the regular expression.";
|
941
|
+
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
|
942
|
+
e.raise();
|
943
|
+
}
|
944
|
+
}
|
945
|
+
BOOST_FALLTHROUGH;
|
946
|
+
default:
|
947
|
+
state = state->next.p;
|
948
|
+
}
|
949
|
+
}
|
950
|
+
|
951
|
+
// now work through our list, building all the maps as we go:
|
952
|
+
while(v.size())
|
953
|
+
{
|
954
|
+
// Initialize m_recursion_checks if we need it:
|
955
|
+
if(m_has_recursions)
|
956
|
+
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
|
957
|
+
|
958
|
+
const std::pair<bool, re_syntax_base*>& p = v.back();
|
959
|
+
m_icase = p.first;
|
960
|
+
state = p.second;
|
961
|
+
v.pop_back();
|
962
|
+
|
963
|
+
// Build maps:
|
964
|
+
m_bad_repeats = 0;
|
965
|
+
create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
|
966
|
+
m_bad_repeats = 0;
|
967
|
+
|
968
|
+
if(m_has_recursions)
|
969
|
+
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
|
970
|
+
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
|
971
|
+
// adjust the type of the state to allow for faster matching:
|
972
|
+
state->type = this->get_repeat_type(state);
|
973
|
+
}
|
974
|
+
// restore case sensitivity:
|
975
|
+
m_icase = l_icase;
|
976
|
+
}
|
977
|
+
|
978
|
+
template <class charT, class traits>
|
979
|
+
int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
|
980
|
+
{
|
981
|
+
typedef typename traits::char_class_type m_type;
|
982
|
+
int result = 0;
|
983
|
+
while(state)
|
984
|
+
{
|
985
|
+
switch(state->type)
|
986
|
+
{
|
987
|
+
case syntax_element_startmark:
|
988
|
+
if((static_cast<re_brace*>(state)->index == -1)
|
989
|
+
|| (static_cast<re_brace*>(state)->index == -2))
|
990
|
+
{
|
991
|
+
state = static_cast<re_jump*>(state->next.p)->alt.p->next.p;
|
992
|
+
continue;
|
993
|
+
}
|
994
|
+
else if(static_cast<re_brace*>(state)->index == -3)
|
995
|
+
{
|
996
|
+
state = state->next.p->next.p;
|
997
|
+
continue;
|
998
|
+
}
|
999
|
+
break;
|
1000
|
+
case syntax_element_endmark:
|
1001
|
+
if((static_cast<re_brace*>(state)->index == -1)
|
1002
|
+
|| (static_cast<re_brace*>(state)->index == -2))
|
1003
|
+
return result;
|
1004
|
+
break;
|
1005
|
+
case syntax_element_literal:
|
1006
|
+
result += static_cast<re_literal*>(state)->length;
|
1007
|
+
break;
|
1008
|
+
case syntax_element_wild:
|
1009
|
+
case syntax_element_set:
|
1010
|
+
result += 1;
|
1011
|
+
break;
|
1012
|
+
case syntax_element_dot_rep:
|
1013
|
+
case syntax_element_char_rep:
|
1014
|
+
case syntax_element_short_set_rep:
|
1015
|
+
case syntax_element_backref:
|
1016
|
+
case syntax_element_rep:
|
1017
|
+
case syntax_element_combining:
|
1018
|
+
case syntax_element_long_set_rep:
|
1019
|
+
case syntax_element_backstep:
|
1020
|
+
{
|
1021
|
+
re_repeat* rep = static_cast<re_repeat *>(state);
|
1022
|
+
// adjust the type of the state to allow for faster matching:
|
1023
|
+
state->type = this->get_repeat_type(state);
|
1024
|
+
if((state->type == syntax_element_dot_rep)
|
1025
|
+
|| (state->type == syntax_element_char_rep)
|
1026
|
+
|| (state->type == syntax_element_short_set_rep))
|
1027
|
+
{
|
1028
|
+
if(rep->max != rep->min)
|
1029
|
+
return -1;
|
1030
|
+
result += static_cast<int>(rep->min);
|
1031
|
+
state = rep->alt.p;
|
1032
|
+
continue;
|
1033
|
+
}
|
1034
|
+
else if(state->type == syntax_element_long_set_rep)
|
1035
|
+
{
|
1036
|
+
BOOST_ASSERT(rep->next.p->type == syntax_element_long_set);
|
1037
|
+
if(static_cast<re_set_long<m_type>*>(rep->next.p)->singleton == 0)
|
1038
|
+
return -1;
|
1039
|
+
if(rep->max != rep->min)
|
1040
|
+
return -1;
|
1041
|
+
result += static_cast<int>(rep->min);
|
1042
|
+
state = rep->alt.p;
|
1043
|
+
continue;
|
1044
|
+
}
|
1045
|
+
}
|
1046
|
+
return -1;
|
1047
|
+
case syntax_element_long_set:
|
1048
|
+
if(static_cast<re_set_long<m_type>*>(state)->singleton == 0)
|
1049
|
+
return -1;
|
1050
|
+
result += 1;
|
1051
|
+
break;
|
1052
|
+
case syntax_element_jump:
|
1053
|
+
state = static_cast<re_jump*>(state)->alt.p;
|
1054
|
+
continue;
|
1055
|
+
case syntax_element_alt:
|
1056
|
+
{
|
1057
|
+
int r1 = calculate_backstep(state->next.p);
|
1058
|
+
int r2 = calculate_backstep(static_cast<re_alt*>(state)->alt.p);
|
1059
|
+
if((r1 < 0) || (r1 != r2))
|
1060
|
+
return -1;
|
1061
|
+
return result + r1;
|
1062
|
+
}
|
1063
|
+
default:
|
1064
|
+
break;
|
1065
|
+
}
|
1066
|
+
state = state->next.p;
|
1067
|
+
}
|
1068
|
+
return -1;
|
1069
|
+
}
|
1070
|
+
|
1071
|
+
template <class charT, class traits>
|
1072
|
+
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
|
1073
|
+
{
|
1074
|
+
int not_last_jump = 1;
|
1075
|
+
re_syntax_base* recursion_start = 0;
|
1076
|
+
int recursion_sub = 0;
|
1077
|
+
re_syntax_base* recursion_restart = 0;
|
1078
|
+
|
1079
|
+
// track case sensitivity:
|
1080
|
+
bool l_icase = m_icase;
|
1081
|
+
|
1082
|
+
while(state)
|
1083
|
+
{
|
1084
|
+
switch(state->type)
|
1085
|
+
{
|
1086
|
+
case syntax_element_toggle_case:
|
1087
|
+
l_icase = static_cast<re_case*>(state)->icase;
|
1088
|
+
state = state->next.p;
|
1089
|
+
break;
|
1090
|
+
case syntax_element_literal:
|
1091
|
+
{
|
1092
|
+
// don't set anything in *pnull, set each element in l_map
|
1093
|
+
// that could match the first character in the literal:
|
1094
|
+
if(l_map)
|
1095
|
+
{
|
1096
|
+
l_map[0] |= mask_init;
|
1097
|
+
charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
|
1098
|
+
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
1099
|
+
{
|
1100
|
+
if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)
|
1101
|
+
l_map[i] |= mask;
|
1102
|
+
}
|
1103
|
+
}
|
1104
|
+
return;
|
1105
|
+
}
|
1106
|
+
case syntax_element_end_line:
|
1107
|
+
{
|
1108
|
+
// next character must be a line separator (if there is one):
|
1109
|
+
if(l_map)
|
1110
|
+
{
|
1111
|
+
l_map[0] |= mask_init;
|
1112
|
+
l_map[static_cast<unsigned>('\n')] |= mask;
|
1113
|
+
l_map[static_cast<unsigned>('\r')] |= mask;
|
1114
|
+
l_map[static_cast<unsigned>('\f')] |= mask;
|
1115
|
+
l_map[0x85] |= mask;
|
1116
|
+
}
|
1117
|
+
// now figure out if we can match a NULL string at this point:
|
1118
|
+
if(pnull)
|
1119
|
+
create_startmap(state->next.p, 0, pnull, mask);
|
1120
|
+
return;
|
1121
|
+
}
|
1122
|
+
case syntax_element_recurse:
|
1123
|
+
{
|
1124
|
+
if(state->type == syntax_element_startmark)
|
1125
|
+
recursion_sub = static_cast<re_brace*>(state)->index;
|
1126
|
+
else
|
1127
|
+
recursion_sub = 0;
|
1128
|
+
if(m_recursion_checks[recursion_sub])
|
1129
|
+
{
|
1130
|
+
// Infinite recursion!!
|
1131
|
+
if(0 == this->m_pdata->m_status) // update the error code if not already set
|
1132
|
+
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
|
1133
|
+
//
|
1134
|
+
// clear the expression, we should be empty:
|
1135
|
+
//
|
1136
|
+
this->m_pdata->m_expression = 0;
|
1137
|
+
this->m_pdata->m_expression_len = 0;
|
1138
|
+
//
|
1139
|
+
// and throw if required:
|
1140
|
+
//
|
1141
|
+
if(0 == (this->flags() & regex_constants::no_except))
|
1142
|
+
{
|
1143
|
+
std::string message = "Encountered an infinite recursion.";
|
1144
|
+
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
|
1145
|
+
e.raise();
|
1146
|
+
}
|
1147
|
+
}
|
1148
|
+
else if(recursion_start == 0)
|
1149
|
+
{
|
1150
|
+
recursion_start = state;
|
1151
|
+
recursion_restart = state->next.p;
|
1152
|
+
state = static_cast<re_jump*>(state)->alt.p;
|
1153
|
+
m_recursion_checks[recursion_sub] = true;
|
1154
|
+
break;
|
1155
|
+
}
|
1156
|
+
m_recursion_checks[recursion_sub] = true;
|
1157
|
+
// can't handle nested recursion here...
|
1158
|
+
BOOST_FALLTHROUGH;
|
1159
|
+
}
|
1160
|
+
case syntax_element_backref:
|
1161
|
+
// can be null, and any character can match:
|
1162
|
+
if(pnull)
|
1163
|
+
*pnull |= mask;
|
1164
|
+
BOOST_FALLTHROUGH;
|
1165
|
+
case syntax_element_wild:
|
1166
|
+
{
|
1167
|
+
// can't be null, any character can match:
|
1168
|
+
set_all_masks(l_map, mask);
|
1169
|
+
return;
|
1170
|
+
}
|
1171
|
+
case syntax_element_match:
|
1172
|
+
{
|
1173
|
+
// must be null, any character can match:
|
1174
|
+
set_all_masks(l_map, mask);
|
1175
|
+
if(pnull)
|
1176
|
+
*pnull |= mask;
|
1177
|
+
return;
|
1178
|
+
}
|
1179
|
+
case syntax_element_word_start:
|
1180
|
+
{
|
1181
|
+
// recurse, then AND with all the word characters:
|
1182
|
+
create_startmap(state->next.p, l_map, pnull, mask);
|
1183
|
+
if(l_map)
|
1184
|
+
{
|
1185
|
+
l_map[0] |= mask_init;
|
1186
|
+
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
1187
|
+
{
|
1188
|
+
if(!m_traits.isctype(static_cast<charT>(i), m_word_mask))
|
1189
|
+
l_map[i] &= static_cast<unsigned char>(~mask);
|
1190
|
+
}
|
1191
|
+
}
|
1192
|
+
return;
|
1193
|
+
}
|
1194
|
+
case syntax_element_word_end:
|
1195
|
+
{
|
1196
|
+
// recurse, then AND with all the word characters:
|
1197
|
+
create_startmap(state->next.p, l_map, pnull, mask);
|
1198
|
+
if(l_map)
|
1199
|
+
{
|
1200
|
+
l_map[0] |= mask_init;
|
1201
|
+
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
1202
|
+
{
|
1203
|
+
if(m_traits.isctype(static_cast<charT>(i), m_word_mask))
|
1204
|
+
l_map[i] &= static_cast<unsigned char>(~mask);
|
1205
|
+
}
|
1206
|
+
}
|
1207
|
+
return;
|
1208
|
+
}
|
1209
|
+
case syntax_element_buffer_end:
|
1210
|
+
{
|
1211
|
+
// we *must be null* :
|
1212
|
+
if(pnull)
|
1213
|
+
*pnull |= mask;
|
1214
|
+
return;
|
1215
|
+
}
|
1216
|
+
case syntax_element_long_set:
|
1217
|
+
if(l_map)
|
1218
|
+
{
|
1219
|
+
typedef typename traits::char_class_type m_type;
|
1220
|
+
if(static_cast<re_set_long<m_type>*>(state)->singleton)
|
1221
|
+
{
|
1222
|
+
l_map[0] |= mask_init;
|
1223
|
+
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
1224
|
+
{
|
1225
|
+
charT c = static_cast<charT>(i);
|
1226
|
+
if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<m_type>*>(state), *m_pdata, l_icase))
|
1227
|
+
l_map[i] |= mask;
|
1228
|
+
}
|
1229
|
+
}
|
1230
|
+
else
|
1231
|
+
set_all_masks(l_map, mask);
|
1232
|
+
}
|
1233
|
+
return;
|
1234
|
+
case syntax_element_set:
|
1235
|
+
if(l_map)
|
1236
|
+
{
|
1237
|
+
l_map[0] |= mask_init;
|
1238
|
+
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
1239
|
+
{
|
1240
|
+
if(static_cast<re_set*>(state)->_map[
|
1241
|
+
static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])
|
1242
|
+
l_map[i] |= mask;
|
1243
|
+
}
|
1244
|
+
}
|
1245
|
+
return;
|
1246
|
+
case syntax_element_jump:
|
1247
|
+
// take the jump:
|
1248
|
+
state = static_cast<re_alt*>(state)->alt.p;
|
1249
|
+
not_last_jump = -1;
|
1250
|
+
break;
|
1251
|
+
case syntax_element_alt:
|
1252
|
+
case syntax_element_rep:
|
1253
|
+
case syntax_element_dot_rep:
|
1254
|
+
case syntax_element_char_rep:
|
1255
|
+
case syntax_element_short_set_rep:
|
1256
|
+
case syntax_element_long_set_rep:
|
1257
|
+
{
|
1258
|
+
re_alt* rep = static_cast<re_alt*>(state);
|
1259
|
+
if(rep->_map[0] & mask_init)
|
1260
|
+
{
|
1261
|
+
if(l_map)
|
1262
|
+
{
|
1263
|
+
// copy previous results:
|
1264
|
+
l_map[0] |= mask_init;
|
1265
|
+
for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
|
1266
|
+
{
|
1267
|
+
if(rep->_map[i] & mask_any)
|
1268
|
+
l_map[i] |= mask;
|
1269
|
+
}
|
1270
|
+
}
|
1271
|
+
if(pnull)
|
1272
|
+
{
|
1273
|
+
if(rep->can_be_null & mask_any)
|
1274
|
+
*pnull |= mask;
|
1275
|
+
}
|
1276
|
+
}
|
1277
|
+
else
|
1278
|
+
{
|
1279
|
+
// we haven't created a startmap for this alternative yet
|
1280
|
+
// so take the union of the two options:
|
1281
|
+
if(is_bad_repeat(state))
|
1282
|
+
{
|
1283
|
+
set_all_masks(l_map, mask);
|
1284
|
+
if(pnull)
|
1285
|
+
*pnull |= mask;
|
1286
|
+
return;
|
1287
|
+
}
|
1288
|
+
set_bad_repeat(state);
|
1289
|
+
create_startmap(state->next.p, l_map, pnull, mask);
|
1290
|
+
if((state->type == syntax_element_alt)
|
1291
|
+
|| (static_cast<re_repeat*>(state)->min == 0)
|
1292
|
+
|| (not_last_jump == 0))
|
1293
|
+
create_startmap(rep->alt.p, l_map, pnull, mask);
|
1294
|
+
}
|
1295
|
+
}
|
1296
|
+
return;
|
1297
|
+
case syntax_element_soft_buffer_end:
|
1298
|
+
// match newline or null:
|
1299
|
+
if(l_map)
|
1300
|
+
{
|
1301
|
+
l_map[0] |= mask_init;
|
1302
|
+
l_map[static_cast<unsigned>('\n')] |= mask;
|
1303
|
+
l_map[static_cast<unsigned>('\r')] |= mask;
|
1304
|
+
}
|
1305
|
+
if(pnull)
|
1306
|
+
*pnull |= mask;
|
1307
|
+
return;
|
1308
|
+
case syntax_element_endmark:
|
1309
|
+
// need to handle independent subs as a special case:
|
1310
|
+
if(static_cast<re_brace*>(state)->index < 0)
|
1311
|
+
{
|
1312
|
+
// can be null, any character can match:
|
1313
|
+
set_all_masks(l_map, mask);
|
1314
|
+
if(pnull)
|
1315
|
+
*pnull |= mask;
|
1316
|
+
return;
|
1317
|
+
}
|
1318
|
+
else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
|
1319
|
+
{
|
1320
|
+
// recursion termination:
|
1321
|
+
recursion_start = 0;
|
1322
|
+
state = recursion_restart;
|
1323
|
+
break;
|
1324
|
+
}
|
1325
|
+
|
1326
|
+
//
|
1327
|
+
// Normally we just go to the next state... but if this sub-expression is
|
1328
|
+
// the target of a recursion, then we might be ending a recursion, in which
|
1329
|
+
// case we should check whatever follows that recursion, as well as whatever
|
1330
|
+
// follows this state:
|
1331
|
+
//
|
1332
|
+
if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
|
1333
|
+
{
|
1334
|
+
bool ok = false;
|
1335
|
+
re_syntax_base* p = m_pdata->m_first_state;
|
1336
|
+
while(p)
|
1337
|
+
{
|
1338
|
+
if(p->type == syntax_element_recurse)
|
1339
|
+
{
|
1340
|
+
re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
|
1341
|
+
if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
|
1342
|
+
{
|
1343
|
+
ok = true;
|
1344
|
+
break;
|
1345
|
+
}
|
1346
|
+
}
|
1347
|
+
p = p->next.p;
|
1348
|
+
}
|
1349
|
+
if(ok)
|
1350
|
+
{
|
1351
|
+
create_startmap(p->next.p, l_map, pnull, mask);
|
1352
|
+
}
|
1353
|
+
}
|
1354
|
+
state = state->next.p;
|
1355
|
+
break;
|
1356
|
+
|
1357
|
+
case syntax_element_startmark:
|
1358
|
+
// need to handle independent subs as a special case:
|
1359
|
+
if(static_cast<re_brace*>(state)->index == -3)
|
1360
|
+
{
|
1361
|
+
state = state->next.p->next.p;
|
1362
|
+
break;
|
1363
|
+
}
|
1364
|
+
BOOST_FALLTHROUGH;
|
1365
|
+
default:
|
1366
|
+
state = state->next.p;
|
1367
|
+
}
|
1368
|
+
++not_last_jump;
|
1369
|
+
}
|
1370
|
+
}
|
1371
|
+
|
1372
|
+
template <class charT, class traits>
|
1373
|
+
unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state)
|
1374
|
+
{
|
1375
|
+
//
|
1376
|
+
// find out how the machine starts, so we can optimise the search:
|
1377
|
+
//
|
1378
|
+
while(state)
|
1379
|
+
{
|
1380
|
+
switch(state->type)
|
1381
|
+
{
|
1382
|
+
case syntax_element_startmark:
|
1383
|
+
case syntax_element_endmark:
|
1384
|
+
state = state->next.p;
|
1385
|
+
continue;
|
1386
|
+
case syntax_element_start_line:
|
1387
|
+
return regbase::restart_line;
|
1388
|
+
case syntax_element_word_start:
|
1389
|
+
return regbase::restart_word;
|
1390
|
+
case syntax_element_buffer_start:
|
1391
|
+
return regbase::restart_buf;
|
1392
|
+
case syntax_element_restart_continue:
|
1393
|
+
return regbase::restart_continue;
|
1394
|
+
default:
|
1395
|
+
state = 0;
|
1396
|
+
continue;
|
1397
|
+
}
|
1398
|
+
}
|
1399
|
+
return regbase::restart_any;
|
1400
|
+
}
|
1401
|
+
|
1402
|
+
template <class charT, class traits>
|
1403
|
+
void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
|
1404
|
+
{
|
1405
|
+
//
|
1406
|
+
// set mask in all of bits elements,
|
1407
|
+
// if bits[0] has mask_init not set then we can
|
1408
|
+
// optimise this to a call to memset:
|
1409
|
+
//
|
1410
|
+
if(bits)
|
1411
|
+
{
|
1412
|
+
if(bits[0] == 0)
|
1413
|
+
(std::memset)(bits, mask, 1u << CHAR_BIT);
|
1414
|
+
else
|
1415
|
+
{
|
1416
|
+
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
1417
|
+
bits[i] |= mask;
|
1418
|
+
}
|
1419
|
+
bits[0] |= mask_init;
|
1420
|
+
}
|
1421
|
+
}
|
1422
|
+
|
1423
|
+
template <class charT, class traits>
|
1424
|
+
bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
|
1425
|
+
{
|
1426
|
+
switch(pt->type)
|
1427
|
+
{
|
1428
|
+
case syntax_element_rep:
|
1429
|
+
case syntax_element_dot_rep:
|
1430
|
+
case syntax_element_char_rep:
|
1431
|
+
case syntax_element_short_set_rep:
|
1432
|
+
case syntax_element_long_set_rep:
|
1433
|
+
{
|
1434
|
+
unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
|
1435
|
+
if(state_id > sizeof(m_bad_repeats) * CHAR_BIT)
|
1436
|
+
return true; // run out of bits, assume we can't traverse this one.
|
1437
|
+
static const boost::uintmax_t one = 1uL;
|
1438
|
+
return m_bad_repeats & (one << state_id);
|
1439
|
+
}
|
1440
|
+
default:
|
1441
|
+
return false;
|
1442
|
+
}
|
1443
|
+
}
|
1444
|
+
|
1445
|
+
template <class charT, class traits>
|
1446
|
+
void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
|
1447
|
+
{
|
1448
|
+
switch(pt->type)
|
1449
|
+
{
|
1450
|
+
case syntax_element_rep:
|
1451
|
+
case syntax_element_dot_rep:
|
1452
|
+
case syntax_element_char_rep:
|
1453
|
+
case syntax_element_short_set_rep:
|
1454
|
+
case syntax_element_long_set_rep:
|
1455
|
+
{
|
1456
|
+
unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
|
1457
|
+
static const boost::uintmax_t one = 1uL;
|
1458
|
+
if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT)
|
1459
|
+
m_bad_repeats |= (one << state_id);
|
1460
|
+
}
|
1461
|
+
break;
|
1462
|
+
default:
|
1463
|
+
break;
|
1464
|
+
}
|
1465
|
+
}
|
1466
|
+
|
1467
|
+
template <class charT, class traits>
|
1468
|
+
syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state)
|
1469
|
+
{
|
1470
|
+
typedef typename traits::char_class_type m_type;
|
1471
|
+
if(state->type == syntax_element_rep)
|
1472
|
+
{
|
1473
|
+
// check to see if we are repeating a single state:
|
1474
|
+
if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)
|
1475
|
+
{
|
1476
|
+
switch(state->next.p->type)
|
1477
|
+
{
|
1478
|
+
case re_detail::syntax_element_wild:
|
1479
|
+
return re_detail::syntax_element_dot_rep;
|
1480
|
+
case re_detail::syntax_element_literal:
|
1481
|
+
return re_detail::syntax_element_char_rep;
|
1482
|
+
case re_detail::syntax_element_set:
|
1483
|
+
return re_detail::syntax_element_short_set_rep;
|
1484
|
+
case re_detail::syntax_element_long_set:
|
1485
|
+
if(static_cast<re_detail::re_set_long<m_type>*>(state->next.p)->singleton)
|
1486
|
+
return re_detail::syntax_element_long_set_rep;
|
1487
|
+
break;
|
1488
|
+
default:
|
1489
|
+
break;
|
1490
|
+
}
|
1491
|
+
}
|
1492
|
+
}
|
1493
|
+
return state->type;
|
1494
|
+
}
|
1495
|
+
|
1496
|
+
template <class charT, class traits>
|
1497
|
+
void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state)
|
1498
|
+
{
|
1499
|
+
// enumerate our states, and see if we have a leading repeat
|
1500
|
+
// for which failed search restarts can be optimised;
|
1501
|
+
do
|
1502
|
+
{
|
1503
|
+
switch(state->type)
|
1504
|
+
{
|
1505
|
+
case syntax_element_startmark:
|
1506
|
+
if(static_cast<re_brace*>(state)->index >= 0)
|
1507
|
+
{
|
1508
|
+
state = state->next.p;
|
1509
|
+
continue;
|
1510
|
+
}
|
1511
|
+
if((static_cast<re_brace*>(state)->index == -1)
|
1512
|
+
|| (static_cast<re_brace*>(state)->index == -2))
|
1513
|
+
{
|
1514
|
+
// skip past the zero width assertion:
|
1515
|
+
state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p;
|
1516
|
+
continue;
|
1517
|
+
}
|
1518
|
+
if(static_cast<re_brace*>(state)->index == -3)
|
1519
|
+
{
|
1520
|
+
// Have to skip the leading jump state:
|
1521
|
+
state = state->next.p->next.p;
|
1522
|
+
continue;
|
1523
|
+
}
|
1524
|
+
return;
|
1525
|
+
case syntax_element_endmark:
|
1526
|
+
case syntax_element_start_line:
|
1527
|
+
case syntax_element_end_line:
|
1528
|
+
case syntax_element_word_boundary:
|
1529
|
+
case syntax_element_within_word:
|
1530
|
+
case syntax_element_word_start:
|
1531
|
+
case syntax_element_word_end:
|
1532
|
+
case syntax_element_buffer_start:
|
1533
|
+
case syntax_element_buffer_end:
|
1534
|
+
case syntax_element_restart_continue:
|
1535
|
+
state = state->next.p;
|
1536
|
+
break;
|
1537
|
+
case syntax_element_dot_rep:
|
1538
|
+
case syntax_element_char_rep:
|
1539
|
+
case syntax_element_short_set_rep:
|
1540
|
+
case syntax_element_long_set_rep:
|
1541
|
+
if(this->m_has_backrefs == 0)
|
1542
|
+
static_cast<re_repeat*>(state)->leading = true;
|
1543
|
+
BOOST_FALLTHROUGH;
|
1544
|
+
default:
|
1545
|
+
return;
|
1546
|
+
}
|
1547
|
+
}while(state);
|
1548
|
+
}
|
1549
|
+
|
1550
|
+
|
1551
|
+
} // namespace re_detail
|
1552
|
+
|
1553
|
+
} // namespace boost
|
1554
|
+
|
1555
|
+
#ifdef BOOST_MSVC
|
1556
|
+
# pragma warning(pop)
|
1557
|
+
#endif
|
1558
|
+
|
1559
|
+
#ifdef BOOST_MSVC
|
1560
|
+
#pragma warning(push)
|
1561
|
+
#pragma warning(disable: 4103)
|
1562
|
+
#endif
|
1563
|
+
#ifdef BOOST_HAS_ABI_HEADERS
|
1564
|
+
# include BOOST_ABI_SUFFIX
|
1565
|
+
#endif
|
1566
|
+
#ifdef BOOST_MSVC
|
1567
|
+
#pragma warning(pop)
|
1568
|
+
#endif
|
1569
|
+
|
1570
|
+
#endif
|
1571
|
+
|