passenger 4.0.27 → 4.0.28
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of passenger might be problematic. Click here for more details.
- data.tar.gz.asc +7 -7
- data/.gitignore +1 -0
- data/NEWS +22 -0
- data/build/preprocessor.rb +10 -0
- data/build/rpm.rb +74 -65
- data/debian.template/rules.template +8 -0
- data/dev/copy_boost_headers.rb +11 -2
- data/doc/Users guide Apache.idmap.txt +161 -145
- data/doc/Users guide Apache.txt +12 -1
- data/doc/Users guide Nginx.idmap.txt +142 -126
- data/doc/Users guide Nginx.txt +14 -1
- data/doc/Users guide Standalone.txt +1 -0
- data/doc/users_guide_snippets/environment_variables.txt +1 -1
- data/doc/users_guide_snippets/installation.txt +2 -0
- data/doc/users_guide_snippets/tips.txt +118 -0
- data/ext/apache2/Configuration.cpp +0 -6
- data/ext/apache2/Configuration.hpp +0 -5
- data/ext/apache2/ConfigurationCommands.cpp +7 -0
- data/ext/apache2/ConfigurationFields.hpp +2 -0
- data/ext/apache2/ConfigurationSetters.cpp +24 -0
- data/ext/apache2/CreateDirConfig.cpp +1 -0
- data/ext/apache2/Hooks.cpp +0 -1
- data/ext/apache2/MergeDirConfig.cpp +7 -0
- data/ext/apache2/SetHeaders.cpp +5 -1
- data/ext/boost/cregex.hpp +39 -0
- data/ext/boost/libs/regex/src/c_regex_traits.cpp +193 -0
- data/ext/boost/libs/regex/src/cpp_regex_traits.cpp +117 -0
- data/ext/boost/libs/regex/src/cregex.cpp +660 -0
- data/ext/boost/libs/regex/src/instances.cpp +32 -0
- data/ext/boost/libs/regex/src/internals.hpp +35 -0
- data/ext/boost/libs/regex/src/posix_api.cpp +296 -0
- data/ext/boost/libs/regex/src/regex.cpp +227 -0
- data/ext/boost/libs/regex/src/regex_debug.cpp +59 -0
- data/ext/boost/libs/regex/src/regex_raw_buffer.cpp +72 -0
- data/ext/boost/libs/regex/src/regex_traits_defaults.cpp +692 -0
- data/ext/boost/libs/regex/src/static_mutex.cpp +179 -0
- data/ext/boost/libs/regex/src/wc_regex_traits.cpp +301 -0
- data/ext/boost/libs/regex/src/wide_posix_api.cpp +315 -0
- data/ext/boost/libs/regex/src/winstances.cpp +35 -0
- data/ext/boost/regex.h +100 -0
- data/ext/boost/regex.hpp +37 -0
- data/ext/boost/regex/concepts.hpp +1128 -0
- data/ext/boost/regex/config.hpp +435 -0
- data/ext/boost/regex/config/borland.hpp +72 -0
- data/ext/boost/regex/config/cwchar.hpp +207 -0
- data/ext/boost/regex/mfc.hpp +190 -0
- data/ext/boost/regex/pattern_except.hpp +100 -0
- data/ext/boost/regex/pending/object_cache.hpp +165 -0
- data/ext/boost/regex/pending/static_mutex.hpp +179 -0
- data/ext/boost/regex/pending/unicode_iterator.hpp +776 -0
- data/ext/boost/regex/regex_traits.hpp +35 -0
- data/ext/boost/regex/user.hpp +93 -0
- data/ext/boost/regex/v4/basic_regex.hpp +782 -0
- data/ext/boost/regex/v4/basic_regex_creator.hpp +1571 -0
- data/ext/boost/regex/v4/basic_regex_parser.hpp +2874 -0
- data/ext/boost/regex/v4/c_regex_traits.hpp +211 -0
- data/ext/boost/regex/v4/char_regex_traits.hpp +81 -0
- data/ext/boost/regex/v4/cpp_regex_traits.hpp +1099 -0
- data/ext/boost/regex/v4/cregex.hpp +330 -0
- data/ext/boost/regex/v4/error_type.hpp +59 -0
- data/ext/boost/regex/v4/fileiter.hpp +455 -0
- data/ext/boost/regex/v4/instances.hpp +222 -0
- data/ext/boost/regex/v4/iterator_category.hpp +91 -0
- data/ext/boost/regex/v4/iterator_traits.hpp +135 -0
- data/ext/boost/regex/v4/match_flags.hpp +138 -0
- data/ext/boost/regex/v4/match_results.hpp +702 -0
- data/ext/boost/regex/v4/mem_block_cache.hpp +99 -0
- data/ext/boost/regex/v4/perl_matcher.hpp +587 -0
- data/ext/boost/regex/v4/perl_matcher_common.hpp +996 -0
- data/ext/boost/regex/v4/perl_matcher_non_recursive.hpp +1642 -0
- data/ext/boost/regex/v4/perl_matcher_recursive.hpp +991 -0
- data/ext/boost/regex/v4/primary_transform.hpp +146 -0
- data/ext/boost/regex/v4/protected_call.hpp +81 -0
- data/ext/boost/regex/v4/regbase.hpp +180 -0
- data/ext/boost/regex/v4/regex.hpp +202 -0
- data/ext/boost/regex/v4/regex_format.hpp +1156 -0
- data/ext/boost/regex/v4/regex_fwd.hpp +73 -0
- data/ext/boost/regex/v4/regex_grep.hpp +155 -0
- data/ext/boost/regex/v4/regex_iterator.hpp +201 -0
- data/ext/boost/regex/v4/regex_match.hpp +382 -0
- data/ext/boost/regex/v4/regex_merge.hpp +93 -0
- data/ext/boost/regex/v4/regex_raw_buffer.hpp +210 -0
- data/ext/boost/regex/v4/regex_replace.hpp +99 -0
- data/ext/boost/regex/v4/regex_search.hpp +217 -0
- data/ext/boost/regex/v4/regex_split.hpp +172 -0
- data/ext/boost/regex/v4/regex_token_iterator.hpp +342 -0
- data/ext/boost/regex/v4/regex_traits.hpp +189 -0
- data/ext/boost/regex/v4/regex_traits_defaults.hpp +371 -0
- data/ext/boost/regex/v4/regex_workaround.hpp +232 -0
- data/ext/boost/regex/v4/states.hpp +301 -0
- data/ext/boost/regex/v4/sub_match.hpp +512 -0
- data/ext/boost/regex/v4/syntax_type.hpp +105 -0
- data/ext/boost/regex/v4/u32regex_iterator.hpp +193 -0
- data/ext/boost/regex/v4/u32regex_token_iterator.hpp +377 -0
- data/ext/boost/regex/v4/w32_regex_traits.hpp +741 -0
- data/ext/boost/regex_fwd.hpp +33 -0
- data/ext/common/AgentsStarter.h +0 -11
- data/ext/common/ApplicationPool2/Common.h +1 -7
- data/ext/common/ApplicationPool2/DirectSpawner.h +3 -3
- data/ext/common/ApplicationPool2/Group.h +166 -69
- data/ext/common/ApplicationPool2/Implementation.cpp +55 -10
- data/ext/common/ApplicationPool2/Options.h +45 -10
- data/ext/common/ApplicationPool2/PipeWatcher.h +1 -2
- data/ext/common/ApplicationPool2/Pool.h +29 -7
- data/ext/common/ApplicationPool2/Process.h +22 -3
- data/ext/common/ApplicationPool2/Session.h +1 -0
- data/ext/common/ApplicationPool2/SmartSpawner.h +5 -10
- data/ext/common/ApplicationPool2/Spawner.h +10 -15
- data/ext/common/ApplicationPool2/SuperGroup.h +10 -9
- data/ext/common/Constants.h +1 -3
- data/ext/common/Hooks.h +193 -0
- data/ext/common/Logging.cpp +67 -2
- data/ext/common/Logging.h +23 -1
- data/ext/common/Utils.cpp +0 -21
- data/ext/common/Utils.h +0 -42
- data/ext/common/Utils/CachedFileStat.hpp +1 -1
- data/ext/common/Utils/StrIntUtils.h +61 -14
- data/ext/common/Utils/StringMap.h +4 -0
- data/ext/common/agents/HelperAgent/AgentOptions.h +4 -4
- data/ext/common/agents/HelperAgent/Main.cpp +2 -3
- data/ext/common/agents/HelperAgent/RequestHandler.h +65 -2
- data/ext/common/agents/LoggingAgent/FilterSupport.h +3 -1
- data/ext/common/agents/Watchdog/Main.cpp +8 -72
- data/ext/nginx/CacheLocationConfig.c +29 -1
- data/ext/nginx/Configuration.c +0 -12
- data/ext/nginx/Configuration.h +0 -1
- data/ext/nginx/ConfigurationCommands.c +10 -0
- data/ext/nginx/ConfigurationFields.h +2 -0
- data/ext/nginx/CreateLocationConfig.c +4 -0
- data/ext/nginx/MergeLocationConfig.c +6 -0
- data/ext/oxt/system_calls.cpp +7 -1
- data/ext/oxt/system_calls.hpp +7 -7
- data/helper-scripts/node-loader.js +6 -2
- data/helper-scripts/rack-loader.rb +5 -2
- data/helper-scripts/rack-preloader.rb +5 -2
- data/lib/phusion_passenger.rb +1 -1
- data/lib/phusion_passenger/apache2/config_options.rb +8 -0
- data/lib/phusion_passenger/constants.rb +0 -1
- data/lib/phusion_passenger/nginx/config_options.rb +9 -2
- data/lib/phusion_passenger/platform_info/apache.rb +2 -1
- data/lib/phusion_passenger/platform_info/compiler.rb +15 -1
- data/lib/phusion_passenger/platform_info/cxx_portability.rb +2 -0
- data/node_lib/phusion_passenger/httplib_emulation.js +85 -17
- data/node_lib/phusion_passenger/request_handler.js +10 -2
- data/rpm/Vagrantfile +32 -0
- data/rpm/get_distro_id.py +4 -0
- data/test/cxx/ApplicationPool2/DirectSpawnerTest.cpp +2 -2
- data/test/cxx/ApplicationPool2/PoolTest.cpp +60 -9
- data/test/cxx/ApplicationPool2/SmartSpawnerTest.cpp +2 -6
- data/test/cxx/CachedFileStatTest.cpp +5 -5
- data/test/cxx/RequestHandlerTest.cpp +3 -6
- data/test/cxx/UtilsTest.cpp +30 -0
- data/test/node/httplib_emulation_spec.js +491 -0
- data/test/node/spec_helper.js +25 -0
- metadata +78 -2
- metadata.gz.asc +7 -7
@@ -0,0 +1,165 @@
|
|
1
|
+
/*
|
2
|
+
*
|
3
|
+
* Copyright (c) 2004
|
4
|
+
* John Maddock
|
5
|
+
*
|
6
|
+
* Use, modification and distribution are subject to the
|
7
|
+
* Boost Software License, Version 1.0. (See accompanying file
|
8
|
+
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
9
|
+
*
|
10
|
+
*/
|
11
|
+
|
12
|
+
/*
|
13
|
+
* LOCATION: see http://www.boost.org for most recent version.
|
14
|
+
* FILE object_cache.hpp
|
15
|
+
* VERSION see <boost/version.hpp>
|
16
|
+
* DESCRIPTION: Implements a generic object cache.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#ifndef BOOST_REGEX_OBJECT_CACHE_HPP
|
20
|
+
#define BOOST_REGEX_OBJECT_CACHE_HPP
|
21
|
+
|
22
|
+
#include <map>
|
23
|
+
#include <list>
|
24
|
+
#include <stdexcept>
|
25
|
+
#include <string>
|
26
|
+
#include <boost/config.hpp>
|
27
|
+
#include <boost/shared_ptr.hpp>
|
28
|
+
#ifdef BOOST_HAS_THREADS
|
29
|
+
#include <boost/regex/pending/static_mutex.hpp>
|
30
|
+
#endif
|
31
|
+
|
32
|
+
namespace boost{
|
33
|
+
|
34
|
+
template <class Key, class Object>
|
35
|
+
class object_cache
|
36
|
+
{
|
37
|
+
public:
|
38
|
+
typedef std::pair< ::boost::shared_ptr<Object const>, Key const*> value_type;
|
39
|
+
typedef std::list<value_type> list_type;
|
40
|
+
typedef typename list_type::iterator list_iterator;
|
41
|
+
typedef std::map<Key, list_iterator> map_type;
|
42
|
+
typedef typename map_type::iterator map_iterator;
|
43
|
+
typedef typename list_type::size_type size_type;
|
44
|
+
static boost::shared_ptr<Object const> get(const Key& k, size_type l_max_cache_size);
|
45
|
+
|
46
|
+
private:
|
47
|
+
static boost::shared_ptr<Object const> do_get(const Key& k, size_type l_max_cache_size);
|
48
|
+
|
49
|
+
struct data
|
50
|
+
{
|
51
|
+
list_type cont;
|
52
|
+
map_type index;
|
53
|
+
};
|
54
|
+
|
55
|
+
// Needed by compilers not implementing the resolution to DR45. For reference,
|
56
|
+
// see http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45.
|
57
|
+
friend struct data;
|
58
|
+
};
|
59
|
+
|
60
|
+
template <class Key, class Object>
|
61
|
+
boost::shared_ptr<Object const> object_cache<Key, Object>::get(const Key& k, size_type l_max_cache_size)
|
62
|
+
{
|
63
|
+
#ifdef BOOST_HAS_THREADS
|
64
|
+
static boost::static_mutex mut = BOOST_STATIC_MUTEX_INIT;
|
65
|
+
|
66
|
+
boost::static_mutex::scoped_lock l(mut);
|
67
|
+
if(l)
|
68
|
+
{
|
69
|
+
return do_get(k, l_max_cache_size);
|
70
|
+
}
|
71
|
+
//
|
72
|
+
// what do we do if the lock fails?
|
73
|
+
// for now just throw, but we should never really get here...
|
74
|
+
//
|
75
|
+
::boost::throw_exception(std::runtime_error("Error in thread safety code: could not acquire a lock"));
|
76
|
+
#if defined(BOOST_NO_UNREACHABLE_RETURN_DETECTION) || defined(BOOST_NO_EXCEPTIONS)
|
77
|
+
return boost::shared_ptr<Object>();
|
78
|
+
#endif
|
79
|
+
#else
|
80
|
+
return do_get(k, l_max_cache_size);
|
81
|
+
#endif
|
82
|
+
}
|
83
|
+
|
84
|
+
template <class Key, class Object>
|
85
|
+
boost::shared_ptr<Object const> object_cache<Key, Object>::do_get(const Key& k, size_type l_max_cache_size)
|
86
|
+
{
|
87
|
+
typedef typename object_cache<Key, Object>::data object_data;
|
88
|
+
typedef typename map_type::size_type map_size_type;
|
89
|
+
static object_data s_data;
|
90
|
+
|
91
|
+
//
|
92
|
+
// see if the object is already in the cache:
|
93
|
+
//
|
94
|
+
map_iterator mpos = s_data.index.find(k);
|
95
|
+
if(mpos != s_data.index.end())
|
96
|
+
{
|
97
|
+
//
|
98
|
+
// Eureka!
|
99
|
+
// We have a cached item, bump it up the list and return it:
|
100
|
+
//
|
101
|
+
if(--(s_data.cont.end()) != mpos->second)
|
102
|
+
{
|
103
|
+
// splice out the item we want to move:
|
104
|
+
list_type temp;
|
105
|
+
temp.splice(temp.end(), s_data.cont, mpos->second);
|
106
|
+
// and now place it at the end of the list:
|
107
|
+
s_data.cont.splice(s_data.cont.end(), temp, temp.begin());
|
108
|
+
BOOST_ASSERT(*(s_data.cont.back().second) == k);
|
109
|
+
// update index with new position:
|
110
|
+
mpos->second = --(s_data.cont.end());
|
111
|
+
BOOST_ASSERT(&(mpos->first) == mpos->second->second);
|
112
|
+
BOOST_ASSERT(&(mpos->first) == s_data.cont.back().second);
|
113
|
+
}
|
114
|
+
return s_data.cont.back().first;
|
115
|
+
}
|
116
|
+
//
|
117
|
+
// if we get here then the item is not in the cache,
|
118
|
+
// so create it:
|
119
|
+
//
|
120
|
+
boost::shared_ptr<Object const> result(new Object(k));
|
121
|
+
//
|
122
|
+
// Add it to the list, and index it:
|
123
|
+
//
|
124
|
+
s_data.cont.push_back(value_type(result, static_cast<Key const*>(0)));
|
125
|
+
s_data.index.insert(std::make_pair(k, --(s_data.cont.end())));
|
126
|
+
s_data.cont.back().second = &(s_data.index.find(k)->first);
|
127
|
+
map_size_type s = s_data.index.size();
|
128
|
+
BOOST_ASSERT(s_data.index[k]->first.get() == result.get());
|
129
|
+
BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second);
|
130
|
+
BOOST_ASSERT(s_data.index.find(k)->first == k);
|
131
|
+
if(s > l_max_cache_size)
|
132
|
+
{
|
133
|
+
//
|
134
|
+
// We have too many items in the list, so we need to start
|
135
|
+
// popping them off the back of the list, but only if they're
|
136
|
+
// being held uniquely by us:
|
137
|
+
//
|
138
|
+
list_iterator pos = s_data.cont.begin();
|
139
|
+
list_iterator last = s_data.cont.end();
|
140
|
+
while((pos != last) && (s > l_max_cache_size))
|
141
|
+
{
|
142
|
+
if(pos->first.unique())
|
143
|
+
{
|
144
|
+
list_iterator condemmed(pos);
|
145
|
+
++pos;
|
146
|
+
// now remove the items from our containers,
|
147
|
+
// then order has to be as follows:
|
148
|
+
BOOST_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end());
|
149
|
+
s_data.index.erase(*(condemmed->second));
|
150
|
+
s_data.cont.erase(condemmed);
|
151
|
+
--s;
|
152
|
+
}
|
153
|
+
else
|
154
|
+
++pos;
|
155
|
+
}
|
156
|
+
BOOST_ASSERT(s_data.index[k]->first.get() == result.get());
|
157
|
+
BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second);
|
158
|
+
BOOST_ASSERT(s_data.index.find(k)->first == k);
|
159
|
+
}
|
160
|
+
return result;
|
161
|
+
}
|
162
|
+
|
163
|
+
}
|
164
|
+
|
165
|
+
#endif
|
@@ -0,0 +1,179 @@
|
|
1
|
+
/*
|
2
|
+
*
|
3
|
+
* Copyright (c) 2004
|
4
|
+
* John Maddock
|
5
|
+
*
|
6
|
+
* Use, modification and distribution are subject to the
|
7
|
+
* Boost Software License, Version 1.0. (See accompanying file
|
8
|
+
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
9
|
+
*
|
10
|
+
*/
|
11
|
+
|
12
|
+
/*
|
13
|
+
* LOCATION: see http://www.boost.org for most recent version.
|
14
|
+
* FILE static_mutex.hpp
|
15
|
+
* VERSION see <boost/version.hpp>
|
16
|
+
* DESCRIPTION: Declares static_mutex lock type, there are three different
|
17
|
+
* implementations: POSIX pthreads, WIN32 threads, and portable,
|
18
|
+
* these are described in more detail below.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#ifndef BOOST_REGEX_STATIC_MUTEX_HPP
|
22
|
+
#define BOOST_REGEX_STATIC_MUTEX_HPP
|
23
|
+
|
24
|
+
#include <boost/config.hpp>
|
25
|
+
#include <boost/regex/config.hpp> // dll import/export options.
|
26
|
+
|
27
|
+
#ifdef BOOST_HAS_PTHREADS
|
28
|
+
#include <pthread.h>
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#if defined(BOOST_HAS_PTHREADS) && defined(PTHREAD_MUTEX_INITIALIZER)
|
32
|
+
//
|
33
|
+
// pthreads version:
|
34
|
+
// simple wrap around a pthread_mutex_t initialized with
|
35
|
+
// PTHREAD_MUTEX_INITIALIZER.
|
36
|
+
//
|
37
|
+
namespace boost{
|
38
|
+
|
39
|
+
class static_mutex;
|
40
|
+
|
41
|
+
#define BOOST_STATIC_MUTEX_INIT { PTHREAD_MUTEX_INITIALIZER, }
|
42
|
+
|
43
|
+
class BOOST_REGEX_DECL scoped_static_mutex_lock
|
44
|
+
{
|
45
|
+
public:
|
46
|
+
scoped_static_mutex_lock(static_mutex& mut, bool lk = true);
|
47
|
+
~scoped_static_mutex_lock();
|
48
|
+
inline bool locked()const
|
49
|
+
{
|
50
|
+
return m_have_lock;
|
51
|
+
}
|
52
|
+
inline operator void const*()const
|
53
|
+
{
|
54
|
+
return locked() ? this : 0;
|
55
|
+
}
|
56
|
+
void lock();
|
57
|
+
void unlock();
|
58
|
+
private:
|
59
|
+
static_mutex& m_mutex;
|
60
|
+
bool m_have_lock;
|
61
|
+
};
|
62
|
+
|
63
|
+
class static_mutex
|
64
|
+
{
|
65
|
+
public:
|
66
|
+
typedef scoped_static_mutex_lock scoped_lock;
|
67
|
+
pthread_mutex_t m_mutex;
|
68
|
+
};
|
69
|
+
|
70
|
+
} // namespace boost
|
71
|
+
#elif defined(BOOST_HAS_WINTHREADS)
|
72
|
+
//
|
73
|
+
// Win32 version:
|
74
|
+
// Use a 32-bit int as a lock, along with a test-and-set
|
75
|
+
// implementation using InterlockedCompareExchange.
|
76
|
+
//
|
77
|
+
|
78
|
+
#include <boost/cstdint.hpp>
|
79
|
+
|
80
|
+
namespace boost{
|
81
|
+
|
82
|
+
class BOOST_REGEX_DECL scoped_static_mutex_lock;
|
83
|
+
|
84
|
+
class static_mutex
|
85
|
+
{
|
86
|
+
public:
|
87
|
+
typedef scoped_static_mutex_lock scoped_lock;
|
88
|
+
boost::int32_t m_mutex;
|
89
|
+
};
|
90
|
+
|
91
|
+
#define BOOST_STATIC_MUTEX_INIT { 0, }
|
92
|
+
|
93
|
+
class BOOST_REGEX_DECL scoped_static_mutex_lock
|
94
|
+
{
|
95
|
+
public:
|
96
|
+
scoped_static_mutex_lock(static_mutex& mut, bool lk = true);
|
97
|
+
~scoped_static_mutex_lock();
|
98
|
+
operator void const*()const
|
99
|
+
{
|
100
|
+
return locked() ? this : 0;
|
101
|
+
}
|
102
|
+
bool locked()const
|
103
|
+
{
|
104
|
+
return m_have_lock;
|
105
|
+
}
|
106
|
+
void lock();
|
107
|
+
void unlock();
|
108
|
+
private:
|
109
|
+
static_mutex& m_mutex;
|
110
|
+
bool m_have_lock;
|
111
|
+
scoped_static_mutex_lock(const scoped_static_mutex_lock&);
|
112
|
+
scoped_static_mutex_lock& operator=(const scoped_static_mutex_lock&);
|
113
|
+
};
|
114
|
+
|
115
|
+
} // namespace
|
116
|
+
|
117
|
+
#else
|
118
|
+
//
|
119
|
+
// Portable version of a static mutex based on Boost.Thread library:
|
120
|
+
// This has to use a single mutex shared by all instances of static_mutex
|
121
|
+
// because boost::call_once doesn't alow us to pass instance information
|
122
|
+
// down to the initialisation proceedure. In fact the initialisation routine
|
123
|
+
// may need to be called more than once - but only once per instance.
|
124
|
+
//
|
125
|
+
// Since this preprocessor path is almost never taken, we hide these header
|
126
|
+
// dependencies so that build tools don't find them.
|
127
|
+
//
|
128
|
+
#define B1 <boost/thread/once.hpp>
|
129
|
+
#define B2 <boost/thread/recursive_mutex.hpp>
|
130
|
+
#include B1
|
131
|
+
#include B2
|
132
|
+
#undef B1
|
133
|
+
#undef B2
|
134
|
+
|
135
|
+
namespace boost{
|
136
|
+
|
137
|
+
class BOOST_REGEX_DECL scoped_static_mutex_lock;
|
138
|
+
extern "C" BOOST_REGEX_DECL void boost_regex_free_static_mutex();
|
139
|
+
|
140
|
+
class BOOST_REGEX_DECL static_mutex
|
141
|
+
{
|
142
|
+
public:
|
143
|
+
typedef scoped_static_mutex_lock scoped_lock;
|
144
|
+
static void init();
|
145
|
+
static boost::recursive_mutex* m_pmutex;
|
146
|
+
static boost::once_flag m_once;
|
147
|
+
};
|
148
|
+
|
149
|
+
#define BOOST_STATIC_MUTEX_INIT { }
|
150
|
+
|
151
|
+
class BOOST_REGEX_DECL scoped_static_mutex_lock
|
152
|
+
{
|
153
|
+
public:
|
154
|
+
scoped_static_mutex_lock(static_mutex& mut, bool lk = true);
|
155
|
+
~scoped_static_mutex_lock();
|
156
|
+
operator void const*()const;
|
157
|
+
bool locked()const;
|
158
|
+
void lock();
|
159
|
+
void unlock();
|
160
|
+
private:
|
161
|
+
boost::recursive_mutex::scoped_lock* m_plock;
|
162
|
+
bool m_have_lock;
|
163
|
+
};
|
164
|
+
|
165
|
+
inline scoped_static_mutex_lock::operator void const*()const
|
166
|
+
{
|
167
|
+
return locked() ? this : 0;
|
168
|
+
}
|
169
|
+
|
170
|
+
inline bool scoped_static_mutex_lock::locked()const
|
171
|
+
{
|
172
|
+
return m_have_lock;
|
173
|
+
}
|
174
|
+
|
175
|
+
} // namespace
|
176
|
+
|
177
|
+
#endif
|
178
|
+
|
179
|
+
#endif
|
@@ -0,0 +1,776 @@
|
|
1
|
+
/*
|
2
|
+
*
|
3
|
+
* Copyright (c) 2004
|
4
|
+
* John Maddock
|
5
|
+
*
|
6
|
+
* Use, modification and distribution are subject to the
|
7
|
+
* Boost Software License, Version 1.0. (See accompanying file
|
8
|
+
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
9
|
+
*
|
10
|
+
*/
|
11
|
+
|
12
|
+
/*
|
13
|
+
* LOCATION: see http://www.boost.org for most recent version.
|
14
|
+
* FILE unicode_iterator.hpp
|
15
|
+
* VERSION see <boost/version.hpp>
|
16
|
+
* DESCRIPTION: Iterator adapters for converting between different Unicode encodings.
|
17
|
+
*/
|
18
|
+
|
19
|
+
/****************************************************************************
|
20
|
+
|
21
|
+
Contents:
|
22
|
+
~~~~~~~~~
|
23
|
+
|
24
|
+
1) Read Only, Input Adapters:
|
25
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
26
|
+
|
27
|
+
template <class BaseIterator, class U8Type = ::boost::uint8_t>
|
28
|
+
class u32_to_u8_iterator;
|
29
|
+
|
30
|
+
Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8.
|
31
|
+
|
32
|
+
template <class BaseIterator, class U32Type = ::boost::uint32_t>
|
33
|
+
class u8_to_u32_iterator;
|
34
|
+
|
35
|
+
Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32.
|
36
|
+
|
37
|
+
template <class BaseIterator, class U16Type = ::boost::uint16_t>
|
38
|
+
class u32_to_u16_iterator;
|
39
|
+
|
40
|
+
Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16.
|
41
|
+
|
42
|
+
template <class BaseIterator, class U32Type = ::boost::uint32_t>
|
43
|
+
class u16_to_u32_iterator;
|
44
|
+
|
45
|
+
Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32.
|
46
|
+
|
47
|
+
2) Single pass output iterator adapters:
|
48
|
+
|
49
|
+
template <class BaseIterator>
|
50
|
+
class utf8_output_iterator;
|
51
|
+
|
52
|
+
Accepts UTF-32 code points and forwards them on as UTF-8 code points.
|
53
|
+
|
54
|
+
template <class BaseIterator>
|
55
|
+
class utf16_output_iterator;
|
56
|
+
|
57
|
+
Accepts UTF-32 code points and forwards them on as UTF-16 code points.
|
58
|
+
|
59
|
+
****************************************************************************/
|
60
|
+
|
61
|
+
#ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP
|
62
|
+
#define BOOST_REGEX_UNICODE_ITERATOR_HPP
|
63
|
+
#include <boost/cstdint.hpp>
|
64
|
+
#include <boost/assert.hpp>
|
65
|
+
#include <boost/iterator/iterator_facade.hpp>
|
66
|
+
#include <boost/static_assert.hpp>
|
67
|
+
#include <boost/throw_exception.hpp>
|
68
|
+
#include <stdexcept>
|
69
|
+
#ifndef BOOST_NO_STD_LOCALE
|
70
|
+
#include <sstream>
|
71
|
+
#include <ios>
|
72
|
+
#endif
|
73
|
+
#include <limits.h> // CHAR_BIT
|
74
|
+
|
75
|
+
namespace boost{
|
76
|
+
|
77
|
+
namespace detail{
|
78
|
+
|
79
|
+
static const ::boost::uint16_t high_surrogate_base = 0xD7C0u;
|
80
|
+
static const ::boost::uint16_t low_surrogate_base = 0xDC00u;
|
81
|
+
static const ::boost::uint32_t ten_bit_mask = 0x3FFu;
|
82
|
+
|
83
|
+
inline bool is_high_surrogate(::boost::uint16_t v)
|
84
|
+
{
|
85
|
+
return (v & 0xFFFFFC00u) == 0xd800u;
|
86
|
+
}
|
87
|
+
inline bool is_low_surrogate(::boost::uint16_t v)
|
88
|
+
{
|
89
|
+
return (v & 0xFFFFFC00u) == 0xdc00u;
|
90
|
+
}
|
91
|
+
template <class T>
|
92
|
+
inline bool is_surrogate(T v)
|
93
|
+
{
|
94
|
+
return (v & 0xFFFFF800u) == 0xd800;
|
95
|
+
}
|
96
|
+
|
97
|
+
inline unsigned utf8_byte_count(boost::uint8_t c)
|
98
|
+
{
|
99
|
+
// if the most significant bit with a zero in it is in position
|
100
|
+
// 8-N then there are N bytes in this UTF-8 sequence:
|
101
|
+
boost::uint8_t mask = 0x80u;
|
102
|
+
unsigned result = 0;
|
103
|
+
while(c & mask)
|
104
|
+
{
|
105
|
+
++result;
|
106
|
+
mask >>= 1;
|
107
|
+
}
|
108
|
+
return (result == 0) ? 1 : ((result > 4) ? 4 : result);
|
109
|
+
}
|
110
|
+
|
111
|
+
inline unsigned utf8_trailing_byte_count(boost::uint8_t c)
|
112
|
+
{
|
113
|
+
return utf8_byte_count(c) - 1;
|
114
|
+
}
|
115
|
+
|
116
|
+
#ifdef BOOST_MSVC
|
117
|
+
#pragma warning(push)
|
118
|
+
#pragma warning(disable:4100)
|
119
|
+
#endif
|
120
|
+
inline void invalid_utf32_code_point(::boost::uint32_t val)
|
121
|
+
{
|
122
|
+
#ifndef BOOST_NO_STD_LOCALE
|
123
|
+
std::stringstream ss;
|
124
|
+
ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence";
|
125
|
+
std::out_of_range e(ss.str());
|
126
|
+
#else
|
127
|
+
std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-16 sequence");
|
128
|
+
#endif
|
129
|
+
boost::throw_exception(e);
|
130
|
+
}
|
131
|
+
#ifdef BOOST_MSVC
|
132
|
+
#pragma warning(pop)
|
133
|
+
#endif
|
134
|
+
|
135
|
+
|
136
|
+
} // namespace detail
|
137
|
+
|
138
|
+
template <class BaseIterator, class U16Type = ::boost::uint16_t>
|
139
|
+
class u32_to_u16_iterator
|
140
|
+
: public boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type>
|
141
|
+
{
|
142
|
+
typedef boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type;
|
143
|
+
|
144
|
+
#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
|
145
|
+
typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
|
146
|
+
|
147
|
+
BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
|
148
|
+
BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16);
|
149
|
+
#endif
|
150
|
+
|
151
|
+
public:
|
152
|
+
typename base_type::reference
|
153
|
+
dereference()const
|
154
|
+
{
|
155
|
+
if(m_current == 2)
|
156
|
+
extract_current();
|
157
|
+
return m_values[m_current];
|
158
|
+
}
|
159
|
+
bool equal(const u32_to_u16_iterator& that)const
|
160
|
+
{
|
161
|
+
if(m_position == that.m_position)
|
162
|
+
{
|
163
|
+
// Both m_currents must be equal, or both even
|
164
|
+
// this is the same as saying their sum must be even:
|
165
|
+
return (m_current + that.m_current) & 1u ? false : true;
|
166
|
+
}
|
167
|
+
return false;
|
168
|
+
}
|
169
|
+
void increment()
|
170
|
+
{
|
171
|
+
// if we have a pending read then read now, so that we know whether
|
172
|
+
// to skip a position, or move to a low-surrogate:
|
173
|
+
if(m_current == 2)
|
174
|
+
{
|
175
|
+
// pending read:
|
176
|
+
extract_current();
|
177
|
+
}
|
178
|
+
// move to the next surrogate position:
|
179
|
+
++m_current;
|
180
|
+
// if we've reached the end skip a position:
|
181
|
+
if(m_values[m_current] == 0)
|
182
|
+
{
|
183
|
+
m_current = 2;
|
184
|
+
++m_position;
|
185
|
+
}
|
186
|
+
}
|
187
|
+
void decrement()
|
188
|
+
{
|
189
|
+
if(m_current != 1)
|
190
|
+
{
|
191
|
+
// decrementing an iterator always leads to a valid position:
|
192
|
+
--m_position;
|
193
|
+
extract_current();
|
194
|
+
m_current = m_values[1] ? 1 : 0;
|
195
|
+
}
|
196
|
+
else
|
197
|
+
{
|
198
|
+
m_current = 0;
|
199
|
+
}
|
200
|
+
}
|
201
|
+
BaseIterator base()const
|
202
|
+
{
|
203
|
+
return m_position;
|
204
|
+
}
|
205
|
+
// construct:
|
206
|
+
u32_to_u16_iterator() : m_position(), m_current(0)
|
207
|
+
{
|
208
|
+
m_values[0] = 0;
|
209
|
+
m_values[1] = 0;
|
210
|
+
m_values[2] = 0;
|
211
|
+
}
|
212
|
+
u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2)
|
213
|
+
{
|
214
|
+
m_values[0] = 0;
|
215
|
+
m_values[1] = 0;
|
216
|
+
m_values[2] = 0;
|
217
|
+
}
|
218
|
+
private:
|
219
|
+
|
220
|
+
void extract_current()const
|
221
|
+
{
|
222
|
+
// begin by checking for a code point out of range:
|
223
|
+
::boost::uint32_t v = *m_position;
|
224
|
+
if(v >= 0x10000u)
|
225
|
+
{
|
226
|
+
if(v > 0x10FFFFu)
|
227
|
+
detail::invalid_utf32_code_point(*m_position);
|
228
|
+
// split into two surrogates:
|
229
|
+
m_values[0] = static_cast<U16Type>(v >> 10) + detail::high_surrogate_base;
|
230
|
+
m_values[1] = static_cast<U16Type>(v & detail::ten_bit_mask) + detail::low_surrogate_base;
|
231
|
+
m_current = 0;
|
232
|
+
BOOST_ASSERT(detail::is_high_surrogate(m_values[0]));
|
233
|
+
BOOST_ASSERT(detail::is_low_surrogate(m_values[1]));
|
234
|
+
}
|
235
|
+
else
|
236
|
+
{
|
237
|
+
// 16-bit code point:
|
238
|
+
m_values[0] = static_cast<U16Type>(*m_position);
|
239
|
+
m_values[1] = 0;
|
240
|
+
m_current = 0;
|
241
|
+
// value must not be a surrogate:
|
242
|
+
if(detail::is_surrogate(m_values[0]))
|
243
|
+
detail::invalid_utf32_code_point(*m_position);
|
244
|
+
}
|
245
|
+
}
|
246
|
+
BaseIterator m_position;
|
247
|
+
mutable U16Type m_values[3];
|
248
|
+
mutable unsigned m_current;
|
249
|
+
};
|
250
|
+
|
251
|
+
template <class BaseIterator, class U32Type = ::boost::uint32_t>
|
252
|
+
class u16_to_u32_iterator
|
253
|
+
: public boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
|
254
|
+
{
|
255
|
+
typedef boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
|
256
|
+
// special values for pending iterator reads:
|
257
|
+
BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu);
|
258
|
+
|
259
|
+
#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
|
260
|
+
typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
|
261
|
+
|
262
|
+
BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16);
|
263
|
+
BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
|
264
|
+
#endif
|
265
|
+
|
266
|
+
public:
|
267
|
+
typename base_type::reference
|
268
|
+
dereference()const
|
269
|
+
{
|
270
|
+
if(m_value == pending_read)
|
271
|
+
extract_current();
|
272
|
+
return m_value;
|
273
|
+
}
|
274
|
+
bool equal(const u16_to_u32_iterator& that)const
|
275
|
+
{
|
276
|
+
return m_position == that.m_position;
|
277
|
+
}
|
278
|
+
void increment()
|
279
|
+
{
|
280
|
+
// skip high surrogate first if there is one:
|
281
|
+
if(detail::is_high_surrogate(*m_position)) ++m_position;
|
282
|
+
++m_position;
|
283
|
+
m_value = pending_read;
|
284
|
+
}
|
285
|
+
void decrement()
|
286
|
+
{
|
287
|
+
--m_position;
|
288
|
+
// if we have a low surrogate then go back one more:
|
289
|
+
if(detail::is_low_surrogate(*m_position))
|
290
|
+
--m_position;
|
291
|
+
m_value = pending_read;
|
292
|
+
}
|
293
|
+
BaseIterator base()const
|
294
|
+
{
|
295
|
+
return m_position;
|
296
|
+
}
|
297
|
+
// construct:
|
298
|
+
u16_to_u32_iterator() : m_position()
|
299
|
+
{
|
300
|
+
m_value = pending_read;
|
301
|
+
}
|
302
|
+
u16_to_u32_iterator(BaseIterator b) : m_position(b)
|
303
|
+
{
|
304
|
+
m_value = pending_read;
|
305
|
+
}
|
306
|
+
//
|
307
|
+
// Range checked version:
|
308
|
+
//
|
309
|
+
u16_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b)
|
310
|
+
{
|
311
|
+
m_value = pending_read;
|
312
|
+
//
|
313
|
+
// The range must not start with a low surrogate, or end in a high surrogate,
|
314
|
+
// otherwise we run the risk of running outside the underlying input range.
|
315
|
+
// Likewise b must not be located at a low surrogate.
|
316
|
+
//
|
317
|
+
boost::uint16_t val;
|
318
|
+
if(start != end)
|
319
|
+
{
|
320
|
+
if((b != start) && (b != end))
|
321
|
+
{
|
322
|
+
val = *b;
|
323
|
+
if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u))
|
324
|
+
invalid_code_point(val);
|
325
|
+
}
|
326
|
+
val = *start;
|
327
|
+
if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u))
|
328
|
+
invalid_code_point(val);
|
329
|
+
val = *--end;
|
330
|
+
if(detail::is_high_surrogate(val))
|
331
|
+
invalid_code_point(val);
|
332
|
+
}
|
333
|
+
}
|
334
|
+
private:
|
335
|
+
static void invalid_code_point(::boost::uint16_t val)
|
336
|
+
{
|
337
|
+
#ifndef BOOST_NO_STD_LOCALE
|
338
|
+
std::stringstream ss;
|
339
|
+
ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence";
|
340
|
+
std::out_of_range e(ss.str());
|
341
|
+
#else
|
342
|
+
std::out_of_range e("Misplaced UTF-16 surrogate encountered while trying to encode UTF-32 sequence");
|
343
|
+
#endif
|
344
|
+
boost::throw_exception(e);
|
345
|
+
}
|
346
|
+
void extract_current()const
|
347
|
+
{
|
348
|
+
m_value = static_cast<U32Type>(static_cast< ::boost::uint16_t>(*m_position));
|
349
|
+
// if the last value is a high surrogate then adjust m_position and m_value as needed:
|
350
|
+
if(detail::is_high_surrogate(*m_position))
|
351
|
+
{
|
352
|
+
// precondition; next value must have be a low-surrogate:
|
353
|
+
BaseIterator next(m_position);
|
354
|
+
::boost::uint16_t t = *++next;
|
355
|
+
if((t & 0xFC00u) != 0xDC00u)
|
356
|
+
invalid_code_point(t);
|
357
|
+
m_value = (m_value - detail::high_surrogate_base) << 10;
|
358
|
+
m_value |= (static_cast<U32Type>(static_cast< ::boost::uint16_t>(t)) & detail::ten_bit_mask);
|
359
|
+
}
|
360
|
+
// postcondition; result must not be a surrogate:
|
361
|
+
if(detail::is_surrogate(m_value))
|
362
|
+
invalid_code_point(static_cast< ::boost::uint16_t>(m_value));
|
363
|
+
}
|
364
|
+
BaseIterator m_position;
|
365
|
+
mutable U32Type m_value;
|
366
|
+
};
|
367
|
+
|
368
|
+
template <class BaseIterator, class U8Type = ::boost::uint8_t>
|
369
|
+
class u32_to_u8_iterator
|
370
|
+
: public boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type>
|
371
|
+
{
|
372
|
+
typedef boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type;
|
373
|
+
|
374
|
+
#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
|
375
|
+
typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
|
376
|
+
|
377
|
+
BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
|
378
|
+
BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8);
|
379
|
+
#endif
|
380
|
+
|
381
|
+
public:
|
382
|
+
typename base_type::reference
|
383
|
+
dereference()const
|
384
|
+
{
|
385
|
+
if(m_current == 4)
|
386
|
+
extract_current();
|
387
|
+
return m_values[m_current];
|
388
|
+
}
|
389
|
+
bool equal(const u32_to_u8_iterator& that)const
|
390
|
+
{
|
391
|
+
if(m_position == that.m_position)
|
392
|
+
{
|
393
|
+
// either the m_current's must be equal, or one must be 0 and
|
394
|
+
// the other 4: which means neither must have bits 1 or 2 set:
|
395
|
+
return (m_current == that.m_current)
|
396
|
+
|| (((m_current | that.m_current) & 3) == 0);
|
397
|
+
}
|
398
|
+
return false;
|
399
|
+
}
|
400
|
+
void increment()
|
401
|
+
{
|
402
|
+
// if we have a pending read then read now, so that we know whether
|
403
|
+
// to skip a position, or move to a low-surrogate:
|
404
|
+
if(m_current == 4)
|
405
|
+
{
|
406
|
+
// pending read:
|
407
|
+
extract_current();
|
408
|
+
}
|
409
|
+
// move to the next surrogate position:
|
410
|
+
++m_current;
|
411
|
+
// if we've reached the end skip a position:
|
412
|
+
if(m_values[m_current] == 0)
|
413
|
+
{
|
414
|
+
m_current = 4;
|
415
|
+
++m_position;
|
416
|
+
}
|
417
|
+
}
|
418
|
+
void decrement()
|
419
|
+
{
|
420
|
+
if((m_current & 3) == 0)
|
421
|
+
{
|
422
|
+
--m_position;
|
423
|
+
extract_current();
|
424
|
+
m_current = 3;
|
425
|
+
while(m_current && (m_values[m_current] == 0))
|
426
|
+
--m_current;
|
427
|
+
}
|
428
|
+
else
|
429
|
+
--m_current;
|
430
|
+
}
|
431
|
+
BaseIterator base()const
|
432
|
+
{
|
433
|
+
return m_position;
|
434
|
+
}
|
435
|
+
// construct:
|
436
|
+
u32_to_u8_iterator() : m_position(), m_current(0)
|
437
|
+
{
|
438
|
+
m_values[0] = 0;
|
439
|
+
m_values[1] = 0;
|
440
|
+
m_values[2] = 0;
|
441
|
+
m_values[3] = 0;
|
442
|
+
m_values[4] = 0;
|
443
|
+
}
|
444
|
+
u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4)
|
445
|
+
{
|
446
|
+
m_values[0] = 0;
|
447
|
+
m_values[1] = 0;
|
448
|
+
m_values[2] = 0;
|
449
|
+
m_values[3] = 0;
|
450
|
+
m_values[4] = 0;
|
451
|
+
}
|
452
|
+
private:
|
453
|
+
|
454
|
+
void extract_current()const
|
455
|
+
{
|
456
|
+
boost::uint32_t c = *m_position;
|
457
|
+
if(c > 0x10FFFFu)
|
458
|
+
detail::invalid_utf32_code_point(c);
|
459
|
+
if(c < 0x80u)
|
460
|
+
{
|
461
|
+
m_values[0] = static_cast<unsigned char>(c);
|
462
|
+
m_values[1] = static_cast<unsigned char>(0u);
|
463
|
+
m_values[2] = static_cast<unsigned char>(0u);
|
464
|
+
m_values[3] = static_cast<unsigned char>(0u);
|
465
|
+
}
|
466
|
+
else if(c < 0x800u)
|
467
|
+
{
|
468
|
+
m_values[0] = static_cast<unsigned char>(0xC0u + (c >> 6));
|
469
|
+
m_values[1] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
|
470
|
+
m_values[2] = static_cast<unsigned char>(0u);
|
471
|
+
m_values[3] = static_cast<unsigned char>(0u);
|
472
|
+
}
|
473
|
+
else if(c < 0x10000u)
|
474
|
+
{
|
475
|
+
m_values[0] = static_cast<unsigned char>(0xE0u + (c >> 12));
|
476
|
+
m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
|
477
|
+
m_values[2] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
|
478
|
+
m_values[3] = static_cast<unsigned char>(0u);
|
479
|
+
}
|
480
|
+
else
|
481
|
+
{
|
482
|
+
m_values[0] = static_cast<unsigned char>(0xF0u + (c >> 18));
|
483
|
+
m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu));
|
484
|
+
m_values[2] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
|
485
|
+
m_values[3] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
|
486
|
+
}
|
487
|
+
m_current= 0;
|
488
|
+
}
|
489
|
+
BaseIterator m_position;
|
490
|
+
mutable U8Type m_values[5];
|
491
|
+
mutable unsigned m_current;
|
492
|
+
};
|
493
|
+
|
494
|
+
template <class BaseIterator, class U32Type = ::boost::uint32_t>
|
495
|
+
class u8_to_u32_iterator
|
496
|
+
: public boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
|
497
|
+
{
|
498
|
+
typedef boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
|
499
|
+
// special values for pending iterator reads:
|
500
|
+
BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu);
|
501
|
+
|
502
|
+
#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
|
503
|
+
typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
|
504
|
+
|
505
|
+
BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8);
|
506
|
+
BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
|
507
|
+
#endif
|
508
|
+
|
509
|
+
public:
|
510
|
+
typename base_type::reference
|
511
|
+
dereference()const
|
512
|
+
{
|
513
|
+
if(m_value == pending_read)
|
514
|
+
extract_current();
|
515
|
+
return m_value;
|
516
|
+
}
|
517
|
+
bool equal(const u8_to_u32_iterator& that)const
|
518
|
+
{
|
519
|
+
return m_position == that.m_position;
|
520
|
+
}
|
521
|
+
void increment()
|
522
|
+
{
|
523
|
+
// We must not start with a continuation character:
|
524
|
+
if((static_cast<boost::uint8_t>(*m_position) & 0xC0) == 0x80)
|
525
|
+
invalid_sequence();
|
526
|
+
// skip high surrogate first if there is one:
|
527
|
+
unsigned c = detail::utf8_byte_count(*m_position);
|
528
|
+
if(m_value == pending_read)
|
529
|
+
{
|
530
|
+
// Since we haven't read in a value, we need to validate the code points:
|
531
|
+
for(unsigned i = 0; i < c; ++i)
|
532
|
+
{
|
533
|
+
++m_position;
|
534
|
+
// We must have a continuation byte:
|
535
|
+
if((i != c - 1) && ((static_cast<boost::uint8_t>(*m_position) & 0xC0) != 0x80))
|
536
|
+
invalid_sequence();
|
537
|
+
}
|
538
|
+
}
|
539
|
+
else
|
540
|
+
{
|
541
|
+
std::advance(m_position, c);
|
542
|
+
}
|
543
|
+
m_value = pending_read;
|
544
|
+
}
|
545
|
+
void decrement()
|
546
|
+
{
|
547
|
+
// Keep backtracking until we don't have a trailing character:
|
548
|
+
unsigned count = 0;
|
549
|
+
while((*--m_position & 0xC0u) == 0x80u) ++count;
|
550
|
+
// now check that the sequence was valid:
|
551
|
+
if(count != detail::utf8_trailing_byte_count(*m_position))
|
552
|
+
invalid_sequence();
|
553
|
+
m_value = pending_read;
|
554
|
+
}
|
555
|
+
BaseIterator base()const
|
556
|
+
{
|
557
|
+
return m_position;
|
558
|
+
}
|
559
|
+
// construct:
|
560
|
+
u8_to_u32_iterator() : m_position()
|
561
|
+
{
|
562
|
+
m_value = pending_read;
|
563
|
+
}
|
564
|
+
u8_to_u32_iterator(BaseIterator b) : m_position(b)
|
565
|
+
{
|
566
|
+
m_value = pending_read;
|
567
|
+
}
|
568
|
+
//
|
569
|
+
// Checked constructor:
|
570
|
+
//
|
571
|
+
u8_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b)
|
572
|
+
{
|
573
|
+
m_value = pending_read;
|
574
|
+
//
|
575
|
+
// We must not start with a continuation character, or end with a
|
576
|
+
// truncated UTF-8 sequence otherwise we run the risk of going past
|
577
|
+
// the start/end of the underlying sequence:
|
578
|
+
//
|
579
|
+
if(start != end)
|
580
|
+
{
|
581
|
+
unsigned char v = *start;
|
582
|
+
if((v & 0xC0u) == 0x80u)
|
583
|
+
invalid_sequence();
|
584
|
+
if((b != start) && (b != end) && ((*b & 0xC0u) == 0x80u))
|
585
|
+
invalid_sequence();
|
586
|
+
BaseIterator pos = end;
|
587
|
+
do
|
588
|
+
{
|
589
|
+
v = *--pos;
|
590
|
+
}
|
591
|
+
while((start != pos) && ((v & 0xC0u) == 0x80u));
|
592
|
+
std::ptrdiff_t extra = detail::utf8_byte_count(v);
|
593
|
+
if(std::distance(pos, end) < extra)
|
594
|
+
invalid_sequence();
|
595
|
+
}
|
596
|
+
}
|
597
|
+
private:
|
598
|
+
static void invalid_sequence()
|
599
|
+
{
|
600
|
+
std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character");
|
601
|
+
boost::throw_exception(e);
|
602
|
+
}
|
603
|
+
void extract_current()const
|
604
|
+
{
|
605
|
+
m_value = static_cast<U32Type>(static_cast< ::boost::uint8_t>(*m_position));
|
606
|
+
// we must not have a continuation character:
|
607
|
+
if((m_value & 0xC0u) == 0x80u)
|
608
|
+
invalid_sequence();
|
609
|
+
// see how many extra bytes we have:
|
610
|
+
unsigned extra = detail::utf8_trailing_byte_count(*m_position);
|
611
|
+
// extract the extra bits, 6 from each extra byte:
|
612
|
+
BaseIterator next(m_position);
|
613
|
+
for(unsigned c = 0; c < extra; ++c)
|
614
|
+
{
|
615
|
+
++next;
|
616
|
+
m_value <<= 6;
|
617
|
+
// We must have a continuation byte:
|
618
|
+
if((static_cast<boost::uint8_t>(*next) & 0xC0) != 0x80)
|
619
|
+
invalid_sequence();
|
620
|
+
m_value += static_cast<boost::uint8_t>(*next) & 0x3Fu;
|
621
|
+
}
|
622
|
+
// we now need to remove a few of the leftmost bits, but how many depends
|
623
|
+
// upon how many extra bytes we've extracted:
|
624
|
+
static const boost::uint32_t masks[4] =
|
625
|
+
{
|
626
|
+
0x7Fu,
|
627
|
+
0x7FFu,
|
628
|
+
0xFFFFu,
|
629
|
+
0x1FFFFFu,
|
630
|
+
};
|
631
|
+
m_value &= masks[extra];
|
632
|
+
// check the result:
|
633
|
+
if(m_value > static_cast<U32Type>(0x10FFFFu))
|
634
|
+
invalid_sequence();
|
635
|
+
}
|
636
|
+
BaseIterator m_position;
|
637
|
+
mutable U32Type m_value;
|
638
|
+
};
|
639
|
+
|
640
|
+
template <class BaseIterator>
|
641
|
+
class utf16_output_iterator
|
642
|
+
{
|
643
|
+
public:
|
644
|
+
typedef void difference_type;
|
645
|
+
typedef void value_type;
|
646
|
+
typedef boost::uint32_t* pointer;
|
647
|
+
typedef boost::uint32_t& reference;
|
648
|
+
typedef std::output_iterator_tag iterator_category;
|
649
|
+
|
650
|
+
utf16_output_iterator(const BaseIterator& b)
|
651
|
+
: m_position(b){}
|
652
|
+
utf16_output_iterator(const utf16_output_iterator& that)
|
653
|
+
: m_position(that.m_position){}
|
654
|
+
utf16_output_iterator& operator=(const utf16_output_iterator& that)
|
655
|
+
{
|
656
|
+
m_position = that.m_position;
|
657
|
+
return *this;
|
658
|
+
}
|
659
|
+
const utf16_output_iterator& operator*()const
|
660
|
+
{
|
661
|
+
return *this;
|
662
|
+
}
|
663
|
+
void operator=(boost::uint32_t val)const
|
664
|
+
{
|
665
|
+
push(val);
|
666
|
+
}
|
667
|
+
utf16_output_iterator& operator++()
|
668
|
+
{
|
669
|
+
return *this;
|
670
|
+
}
|
671
|
+
utf16_output_iterator& operator++(int)
|
672
|
+
{
|
673
|
+
return *this;
|
674
|
+
}
|
675
|
+
BaseIterator base()const
|
676
|
+
{
|
677
|
+
return m_position;
|
678
|
+
}
|
679
|
+
private:
|
680
|
+
void push(boost::uint32_t v)const
|
681
|
+
{
|
682
|
+
if(v >= 0x10000u)
|
683
|
+
{
|
684
|
+
// begin by checking for a code point out of range:
|
685
|
+
if(v > 0x10FFFFu)
|
686
|
+
detail::invalid_utf32_code_point(v);
|
687
|
+
// split into two surrogates:
|
688
|
+
*m_position++ = static_cast<boost::uint16_t>(v >> 10) + detail::high_surrogate_base;
|
689
|
+
*m_position++ = static_cast<boost::uint16_t>(v & detail::ten_bit_mask) + detail::low_surrogate_base;
|
690
|
+
}
|
691
|
+
else
|
692
|
+
{
|
693
|
+
// 16-bit code point:
|
694
|
+
// value must not be a surrogate:
|
695
|
+
if(detail::is_surrogate(v))
|
696
|
+
detail::invalid_utf32_code_point(v);
|
697
|
+
*m_position++ = static_cast<boost::uint16_t>(v);
|
698
|
+
}
|
699
|
+
}
|
700
|
+
mutable BaseIterator m_position;
|
701
|
+
};
|
702
|
+
|
703
|
+
template <class BaseIterator>
|
704
|
+
class utf8_output_iterator
|
705
|
+
{
|
706
|
+
public:
|
707
|
+
typedef void difference_type;
|
708
|
+
typedef void value_type;
|
709
|
+
typedef boost::uint32_t* pointer;
|
710
|
+
typedef boost::uint32_t& reference;
|
711
|
+
typedef std::output_iterator_tag iterator_category;
|
712
|
+
|
713
|
+
utf8_output_iterator(const BaseIterator& b)
|
714
|
+
: m_position(b){}
|
715
|
+
utf8_output_iterator(const utf8_output_iterator& that)
|
716
|
+
: m_position(that.m_position){}
|
717
|
+
utf8_output_iterator& operator=(const utf8_output_iterator& that)
|
718
|
+
{
|
719
|
+
m_position = that.m_position;
|
720
|
+
return *this;
|
721
|
+
}
|
722
|
+
const utf8_output_iterator& operator*()const
|
723
|
+
{
|
724
|
+
return *this;
|
725
|
+
}
|
726
|
+
void operator=(boost::uint32_t val)const
|
727
|
+
{
|
728
|
+
push(val);
|
729
|
+
}
|
730
|
+
utf8_output_iterator& operator++()
|
731
|
+
{
|
732
|
+
return *this;
|
733
|
+
}
|
734
|
+
utf8_output_iterator& operator++(int)
|
735
|
+
{
|
736
|
+
return *this;
|
737
|
+
}
|
738
|
+
BaseIterator base()const
|
739
|
+
{
|
740
|
+
return m_position;
|
741
|
+
}
|
742
|
+
private:
|
743
|
+
void push(boost::uint32_t c)const
|
744
|
+
{
|
745
|
+
if(c > 0x10FFFFu)
|
746
|
+
detail::invalid_utf32_code_point(c);
|
747
|
+
if(c < 0x80u)
|
748
|
+
{
|
749
|
+
*m_position++ = static_cast<unsigned char>(c);
|
750
|
+
}
|
751
|
+
else if(c < 0x800u)
|
752
|
+
{
|
753
|
+
*m_position++ = static_cast<unsigned char>(0xC0u + (c >> 6));
|
754
|
+
*m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
|
755
|
+
}
|
756
|
+
else if(c < 0x10000u)
|
757
|
+
{
|
758
|
+
*m_position++ = static_cast<unsigned char>(0xE0u + (c >> 12));
|
759
|
+
*m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
|
760
|
+
*m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
|
761
|
+
}
|
762
|
+
else
|
763
|
+
{
|
764
|
+
*m_position++ = static_cast<unsigned char>(0xF0u + (c >> 18));
|
765
|
+
*m_position++ = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu));
|
766
|
+
*m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
|
767
|
+
*m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
|
768
|
+
}
|
769
|
+
}
|
770
|
+
mutable BaseIterator m_position;
|
771
|
+
};
|
772
|
+
|
773
|
+
} // namespace boost
|
774
|
+
|
775
|
+
#endif // BOOST_REGEX_UNICODE_ITERATOR_HPP
|
776
|
+
|