passenger 4.0.27 → 4.0.28

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of passenger might be problematic. Click here for more details.

Files changed (156) hide show
  1. data.tar.gz.asc +7 -7
  2. data/.gitignore +1 -0
  3. data/NEWS +22 -0
  4. data/build/preprocessor.rb +10 -0
  5. data/build/rpm.rb +74 -65
  6. data/debian.template/rules.template +8 -0
  7. data/dev/copy_boost_headers.rb +11 -2
  8. data/doc/Users guide Apache.idmap.txt +161 -145
  9. data/doc/Users guide Apache.txt +12 -1
  10. data/doc/Users guide Nginx.idmap.txt +142 -126
  11. data/doc/Users guide Nginx.txt +14 -1
  12. data/doc/Users guide Standalone.txt +1 -0
  13. data/doc/users_guide_snippets/environment_variables.txt +1 -1
  14. data/doc/users_guide_snippets/installation.txt +2 -0
  15. data/doc/users_guide_snippets/tips.txt +118 -0
  16. data/ext/apache2/Configuration.cpp +0 -6
  17. data/ext/apache2/Configuration.hpp +0 -5
  18. data/ext/apache2/ConfigurationCommands.cpp +7 -0
  19. data/ext/apache2/ConfigurationFields.hpp +2 -0
  20. data/ext/apache2/ConfigurationSetters.cpp +24 -0
  21. data/ext/apache2/CreateDirConfig.cpp +1 -0
  22. data/ext/apache2/Hooks.cpp +0 -1
  23. data/ext/apache2/MergeDirConfig.cpp +7 -0
  24. data/ext/apache2/SetHeaders.cpp +5 -1
  25. data/ext/boost/cregex.hpp +39 -0
  26. data/ext/boost/libs/regex/src/c_regex_traits.cpp +193 -0
  27. data/ext/boost/libs/regex/src/cpp_regex_traits.cpp +117 -0
  28. data/ext/boost/libs/regex/src/cregex.cpp +660 -0
  29. data/ext/boost/libs/regex/src/instances.cpp +32 -0
  30. data/ext/boost/libs/regex/src/internals.hpp +35 -0
  31. data/ext/boost/libs/regex/src/posix_api.cpp +296 -0
  32. data/ext/boost/libs/regex/src/regex.cpp +227 -0
  33. data/ext/boost/libs/regex/src/regex_debug.cpp +59 -0
  34. data/ext/boost/libs/regex/src/regex_raw_buffer.cpp +72 -0
  35. data/ext/boost/libs/regex/src/regex_traits_defaults.cpp +692 -0
  36. data/ext/boost/libs/regex/src/static_mutex.cpp +179 -0
  37. data/ext/boost/libs/regex/src/wc_regex_traits.cpp +301 -0
  38. data/ext/boost/libs/regex/src/wide_posix_api.cpp +315 -0
  39. data/ext/boost/libs/regex/src/winstances.cpp +35 -0
  40. data/ext/boost/regex.h +100 -0
  41. data/ext/boost/regex.hpp +37 -0
  42. data/ext/boost/regex/concepts.hpp +1128 -0
  43. data/ext/boost/regex/config.hpp +435 -0
  44. data/ext/boost/regex/config/borland.hpp +72 -0
  45. data/ext/boost/regex/config/cwchar.hpp +207 -0
  46. data/ext/boost/regex/mfc.hpp +190 -0
  47. data/ext/boost/regex/pattern_except.hpp +100 -0
  48. data/ext/boost/regex/pending/object_cache.hpp +165 -0
  49. data/ext/boost/regex/pending/static_mutex.hpp +179 -0
  50. data/ext/boost/regex/pending/unicode_iterator.hpp +776 -0
  51. data/ext/boost/regex/regex_traits.hpp +35 -0
  52. data/ext/boost/regex/user.hpp +93 -0
  53. data/ext/boost/regex/v4/basic_regex.hpp +782 -0
  54. data/ext/boost/regex/v4/basic_regex_creator.hpp +1571 -0
  55. data/ext/boost/regex/v4/basic_regex_parser.hpp +2874 -0
  56. data/ext/boost/regex/v4/c_regex_traits.hpp +211 -0
  57. data/ext/boost/regex/v4/char_regex_traits.hpp +81 -0
  58. data/ext/boost/regex/v4/cpp_regex_traits.hpp +1099 -0
  59. data/ext/boost/regex/v4/cregex.hpp +330 -0
  60. data/ext/boost/regex/v4/error_type.hpp +59 -0
  61. data/ext/boost/regex/v4/fileiter.hpp +455 -0
  62. data/ext/boost/regex/v4/instances.hpp +222 -0
  63. data/ext/boost/regex/v4/iterator_category.hpp +91 -0
  64. data/ext/boost/regex/v4/iterator_traits.hpp +135 -0
  65. data/ext/boost/regex/v4/match_flags.hpp +138 -0
  66. data/ext/boost/regex/v4/match_results.hpp +702 -0
  67. data/ext/boost/regex/v4/mem_block_cache.hpp +99 -0
  68. data/ext/boost/regex/v4/perl_matcher.hpp +587 -0
  69. data/ext/boost/regex/v4/perl_matcher_common.hpp +996 -0
  70. data/ext/boost/regex/v4/perl_matcher_non_recursive.hpp +1642 -0
  71. data/ext/boost/regex/v4/perl_matcher_recursive.hpp +991 -0
  72. data/ext/boost/regex/v4/primary_transform.hpp +146 -0
  73. data/ext/boost/regex/v4/protected_call.hpp +81 -0
  74. data/ext/boost/regex/v4/regbase.hpp +180 -0
  75. data/ext/boost/regex/v4/regex.hpp +202 -0
  76. data/ext/boost/regex/v4/regex_format.hpp +1156 -0
  77. data/ext/boost/regex/v4/regex_fwd.hpp +73 -0
  78. data/ext/boost/regex/v4/regex_grep.hpp +155 -0
  79. data/ext/boost/regex/v4/regex_iterator.hpp +201 -0
  80. data/ext/boost/regex/v4/regex_match.hpp +382 -0
  81. data/ext/boost/regex/v4/regex_merge.hpp +93 -0
  82. data/ext/boost/regex/v4/regex_raw_buffer.hpp +210 -0
  83. data/ext/boost/regex/v4/regex_replace.hpp +99 -0
  84. data/ext/boost/regex/v4/regex_search.hpp +217 -0
  85. data/ext/boost/regex/v4/regex_split.hpp +172 -0
  86. data/ext/boost/regex/v4/regex_token_iterator.hpp +342 -0
  87. data/ext/boost/regex/v4/regex_traits.hpp +189 -0
  88. data/ext/boost/regex/v4/regex_traits_defaults.hpp +371 -0
  89. data/ext/boost/regex/v4/regex_workaround.hpp +232 -0
  90. data/ext/boost/regex/v4/states.hpp +301 -0
  91. data/ext/boost/regex/v4/sub_match.hpp +512 -0
  92. data/ext/boost/regex/v4/syntax_type.hpp +105 -0
  93. data/ext/boost/regex/v4/u32regex_iterator.hpp +193 -0
  94. data/ext/boost/regex/v4/u32regex_token_iterator.hpp +377 -0
  95. data/ext/boost/regex/v4/w32_regex_traits.hpp +741 -0
  96. data/ext/boost/regex_fwd.hpp +33 -0
  97. data/ext/common/AgentsStarter.h +0 -11
  98. data/ext/common/ApplicationPool2/Common.h +1 -7
  99. data/ext/common/ApplicationPool2/DirectSpawner.h +3 -3
  100. data/ext/common/ApplicationPool2/Group.h +166 -69
  101. data/ext/common/ApplicationPool2/Implementation.cpp +55 -10
  102. data/ext/common/ApplicationPool2/Options.h +45 -10
  103. data/ext/common/ApplicationPool2/PipeWatcher.h +1 -2
  104. data/ext/common/ApplicationPool2/Pool.h +29 -7
  105. data/ext/common/ApplicationPool2/Process.h +22 -3
  106. data/ext/common/ApplicationPool2/Session.h +1 -0
  107. data/ext/common/ApplicationPool2/SmartSpawner.h +5 -10
  108. data/ext/common/ApplicationPool2/Spawner.h +10 -15
  109. data/ext/common/ApplicationPool2/SuperGroup.h +10 -9
  110. data/ext/common/Constants.h +1 -3
  111. data/ext/common/Hooks.h +193 -0
  112. data/ext/common/Logging.cpp +67 -2
  113. data/ext/common/Logging.h +23 -1
  114. data/ext/common/Utils.cpp +0 -21
  115. data/ext/common/Utils.h +0 -42
  116. data/ext/common/Utils/CachedFileStat.hpp +1 -1
  117. data/ext/common/Utils/StrIntUtils.h +61 -14
  118. data/ext/common/Utils/StringMap.h +4 -0
  119. data/ext/common/agents/HelperAgent/AgentOptions.h +4 -4
  120. data/ext/common/agents/HelperAgent/Main.cpp +2 -3
  121. data/ext/common/agents/HelperAgent/RequestHandler.h +65 -2
  122. data/ext/common/agents/LoggingAgent/FilterSupport.h +3 -1
  123. data/ext/common/agents/Watchdog/Main.cpp +8 -72
  124. data/ext/nginx/CacheLocationConfig.c +29 -1
  125. data/ext/nginx/Configuration.c +0 -12
  126. data/ext/nginx/Configuration.h +0 -1
  127. data/ext/nginx/ConfigurationCommands.c +10 -0
  128. data/ext/nginx/ConfigurationFields.h +2 -0
  129. data/ext/nginx/CreateLocationConfig.c +4 -0
  130. data/ext/nginx/MergeLocationConfig.c +6 -0
  131. data/ext/oxt/system_calls.cpp +7 -1
  132. data/ext/oxt/system_calls.hpp +7 -7
  133. data/helper-scripts/node-loader.js +6 -2
  134. data/helper-scripts/rack-loader.rb +5 -2
  135. data/helper-scripts/rack-preloader.rb +5 -2
  136. data/lib/phusion_passenger.rb +1 -1
  137. data/lib/phusion_passenger/apache2/config_options.rb +8 -0
  138. data/lib/phusion_passenger/constants.rb +0 -1
  139. data/lib/phusion_passenger/nginx/config_options.rb +9 -2
  140. data/lib/phusion_passenger/platform_info/apache.rb +2 -1
  141. data/lib/phusion_passenger/platform_info/compiler.rb +15 -1
  142. data/lib/phusion_passenger/platform_info/cxx_portability.rb +2 -0
  143. data/node_lib/phusion_passenger/httplib_emulation.js +85 -17
  144. data/node_lib/phusion_passenger/request_handler.js +10 -2
  145. data/rpm/Vagrantfile +32 -0
  146. data/rpm/get_distro_id.py +4 -0
  147. data/test/cxx/ApplicationPool2/DirectSpawnerTest.cpp +2 -2
  148. data/test/cxx/ApplicationPool2/PoolTest.cpp +60 -9
  149. data/test/cxx/ApplicationPool2/SmartSpawnerTest.cpp +2 -6
  150. data/test/cxx/CachedFileStatTest.cpp +5 -5
  151. data/test/cxx/RequestHandlerTest.cpp +3 -6
  152. data/test/cxx/UtilsTest.cpp +30 -0
  153. data/test/node/httplib_emulation_spec.js +491 -0
  154. data/test/node/spec_helper.js +25 -0
  155. metadata +78 -2
  156. metadata.gz.asc +7 -7
@@ -0,0 +1,165 @@
1
+ /*
2
+ *
3
+ * Copyright (c) 2004
4
+ * John Maddock
5
+ *
6
+ * Use, modification and distribution are subject to the
7
+ * Boost Software License, Version 1.0. (See accompanying file
8
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9
+ *
10
+ */
11
+
12
+ /*
13
+ * LOCATION: see http://www.boost.org for most recent version.
14
+ * FILE object_cache.hpp
15
+ * VERSION see <boost/version.hpp>
16
+ * DESCRIPTION: Implements a generic object cache.
17
+ */
18
+
19
+ #ifndef BOOST_REGEX_OBJECT_CACHE_HPP
20
+ #define BOOST_REGEX_OBJECT_CACHE_HPP
21
+
22
+ #include <map>
23
+ #include <list>
24
+ #include <stdexcept>
25
+ #include <string>
26
+ #include <boost/config.hpp>
27
+ #include <boost/shared_ptr.hpp>
28
+ #ifdef BOOST_HAS_THREADS
29
+ #include <boost/regex/pending/static_mutex.hpp>
30
+ #endif
31
+
32
+ namespace boost{
33
+
34
+ template <class Key, class Object>
35
+ class object_cache
36
+ {
37
+ public:
38
+ typedef std::pair< ::boost::shared_ptr<Object const>, Key const*> value_type;
39
+ typedef std::list<value_type> list_type;
40
+ typedef typename list_type::iterator list_iterator;
41
+ typedef std::map<Key, list_iterator> map_type;
42
+ typedef typename map_type::iterator map_iterator;
43
+ typedef typename list_type::size_type size_type;
44
+ static boost::shared_ptr<Object const> get(const Key& k, size_type l_max_cache_size);
45
+
46
+ private:
47
+ static boost::shared_ptr<Object const> do_get(const Key& k, size_type l_max_cache_size);
48
+
49
+ struct data
50
+ {
51
+ list_type cont;
52
+ map_type index;
53
+ };
54
+
55
+ // Needed by compilers not implementing the resolution to DR45. For reference,
56
+ // see http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45.
57
+ friend struct data;
58
+ };
59
+
60
+ template <class Key, class Object>
61
+ boost::shared_ptr<Object const> object_cache<Key, Object>::get(const Key& k, size_type l_max_cache_size)
62
+ {
63
+ #ifdef BOOST_HAS_THREADS
64
+ static boost::static_mutex mut = BOOST_STATIC_MUTEX_INIT;
65
+
66
+ boost::static_mutex::scoped_lock l(mut);
67
+ if(l)
68
+ {
69
+ return do_get(k, l_max_cache_size);
70
+ }
71
+ //
72
+ // what do we do if the lock fails?
73
+ // for now just throw, but we should never really get here...
74
+ //
75
+ ::boost::throw_exception(std::runtime_error("Error in thread safety code: could not acquire a lock"));
76
+ #if defined(BOOST_NO_UNREACHABLE_RETURN_DETECTION) || defined(BOOST_NO_EXCEPTIONS)
77
+ return boost::shared_ptr<Object>();
78
+ #endif
79
+ #else
80
+ return do_get(k, l_max_cache_size);
81
+ #endif
82
+ }
83
+
84
+ template <class Key, class Object>
85
+ boost::shared_ptr<Object const> object_cache<Key, Object>::do_get(const Key& k, size_type l_max_cache_size)
86
+ {
87
+ typedef typename object_cache<Key, Object>::data object_data;
88
+ typedef typename map_type::size_type map_size_type;
89
+ static object_data s_data;
90
+
91
+ //
92
+ // see if the object is already in the cache:
93
+ //
94
+ map_iterator mpos = s_data.index.find(k);
95
+ if(mpos != s_data.index.end())
96
+ {
97
+ //
98
+ // Eureka!
99
+ // We have a cached item, bump it up the list and return it:
100
+ //
101
+ if(--(s_data.cont.end()) != mpos->second)
102
+ {
103
+ // splice out the item we want to move:
104
+ list_type temp;
105
+ temp.splice(temp.end(), s_data.cont, mpos->second);
106
+ // and now place it at the end of the list:
107
+ s_data.cont.splice(s_data.cont.end(), temp, temp.begin());
108
+ BOOST_ASSERT(*(s_data.cont.back().second) == k);
109
+ // update index with new position:
110
+ mpos->second = --(s_data.cont.end());
111
+ BOOST_ASSERT(&(mpos->first) == mpos->second->second);
112
+ BOOST_ASSERT(&(mpos->first) == s_data.cont.back().second);
113
+ }
114
+ return s_data.cont.back().first;
115
+ }
116
+ //
117
+ // if we get here then the item is not in the cache,
118
+ // so create it:
119
+ //
120
+ boost::shared_ptr<Object const> result(new Object(k));
121
+ //
122
+ // Add it to the list, and index it:
123
+ //
124
+ s_data.cont.push_back(value_type(result, static_cast<Key const*>(0)));
125
+ s_data.index.insert(std::make_pair(k, --(s_data.cont.end())));
126
+ s_data.cont.back().second = &(s_data.index.find(k)->first);
127
+ map_size_type s = s_data.index.size();
128
+ BOOST_ASSERT(s_data.index[k]->first.get() == result.get());
129
+ BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second);
130
+ BOOST_ASSERT(s_data.index.find(k)->first == k);
131
+ if(s > l_max_cache_size)
132
+ {
133
+ //
134
+ // We have too many items in the list, so we need to start
135
+ // popping them off the back of the list, but only if they're
136
+ // being held uniquely by us:
137
+ //
138
+ list_iterator pos = s_data.cont.begin();
139
+ list_iterator last = s_data.cont.end();
140
+ while((pos != last) && (s > l_max_cache_size))
141
+ {
142
+ if(pos->first.unique())
143
+ {
144
+ list_iterator condemmed(pos);
145
+ ++pos;
146
+ // now remove the items from our containers,
147
+ // then order has to be as follows:
148
+ BOOST_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end());
149
+ s_data.index.erase(*(condemmed->second));
150
+ s_data.cont.erase(condemmed);
151
+ --s;
152
+ }
153
+ else
154
+ ++pos;
155
+ }
156
+ BOOST_ASSERT(s_data.index[k]->first.get() == result.get());
157
+ BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second);
158
+ BOOST_ASSERT(s_data.index.find(k)->first == k);
159
+ }
160
+ return result;
161
+ }
162
+
163
+ }
164
+
165
+ #endif
@@ -0,0 +1,179 @@
1
+ /*
2
+ *
3
+ * Copyright (c) 2004
4
+ * John Maddock
5
+ *
6
+ * Use, modification and distribution are subject to the
7
+ * Boost Software License, Version 1.0. (See accompanying file
8
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9
+ *
10
+ */
11
+
12
+ /*
13
+ * LOCATION: see http://www.boost.org for most recent version.
14
+ * FILE static_mutex.hpp
15
+ * VERSION see <boost/version.hpp>
16
+ * DESCRIPTION: Declares static_mutex lock type, there are three different
17
+ * implementations: POSIX pthreads, WIN32 threads, and portable,
18
+ * these are described in more detail below.
19
+ */
20
+
21
+ #ifndef BOOST_REGEX_STATIC_MUTEX_HPP
22
+ #define BOOST_REGEX_STATIC_MUTEX_HPP
23
+
24
+ #include <boost/config.hpp>
25
+ #include <boost/regex/config.hpp> // dll import/export options.
26
+
27
+ #ifdef BOOST_HAS_PTHREADS
28
+ #include <pthread.h>
29
+ #endif
30
+
31
+ #if defined(BOOST_HAS_PTHREADS) && defined(PTHREAD_MUTEX_INITIALIZER)
32
+ //
33
+ // pthreads version:
34
+ // simple wrap around a pthread_mutex_t initialized with
35
+ // PTHREAD_MUTEX_INITIALIZER.
36
+ //
37
+ namespace boost{
38
+
39
+ class static_mutex;
40
+
41
+ #define BOOST_STATIC_MUTEX_INIT { PTHREAD_MUTEX_INITIALIZER, }
42
+
43
+ class BOOST_REGEX_DECL scoped_static_mutex_lock
44
+ {
45
+ public:
46
+ scoped_static_mutex_lock(static_mutex& mut, bool lk = true);
47
+ ~scoped_static_mutex_lock();
48
+ inline bool locked()const
49
+ {
50
+ return m_have_lock;
51
+ }
52
+ inline operator void const*()const
53
+ {
54
+ return locked() ? this : 0;
55
+ }
56
+ void lock();
57
+ void unlock();
58
+ private:
59
+ static_mutex& m_mutex;
60
+ bool m_have_lock;
61
+ };
62
+
63
+ class static_mutex
64
+ {
65
+ public:
66
+ typedef scoped_static_mutex_lock scoped_lock;
67
+ pthread_mutex_t m_mutex;
68
+ };
69
+
70
+ } // namespace boost
71
+ #elif defined(BOOST_HAS_WINTHREADS)
72
+ //
73
+ // Win32 version:
74
+ // Use a 32-bit int as a lock, along with a test-and-set
75
+ // implementation using InterlockedCompareExchange.
76
+ //
77
+
78
+ #include <boost/cstdint.hpp>
79
+
80
+ namespace boost{
81
+
82
+ class BOOST_REGEX_DECL scoped_static_mutex_lock;
83
+
84
+ class static_mutex
85
+ {
86
+ public:
87
+ typedef scoped_static_mutex_lock scoped_lock;
88
+ boost::int32_t m_mutex;
89
+ };
90
+
91
+ #define BOOST_STATIC_MUTEX_INIT { 0, }
92
+
93
+ class BOOST_REGEX_DECL scoped_static_mutex_lock
94
+ {
95
+ public:
96
+ scoped_static_mutex_lock(static_mutex& mut, bool lk = true);
97
+ ~scoped_static_mutex_lock();
98
+ operator void const*()const
99
+ {
100
+ return locked() ? this : 0;
101
+ }
102
+ bool locked()const
103
+ {
104
+ return m_have_lock;
105
+ }
106
+ void lock();
107
+ void unlock();
108
+ private:
109
+ static_mutex& m_mutex;
110
+ bool m_have_lock;
111
+ scoped_static_mutex_lock(const scoped_static_mutex_lock&);
112
+ scoped_static_mutex_lock& operator=(const scoped_static_mutex_lock&);
113
+ };
114
+
115
+ } // namespace
116
+
117
+ #else
118
+ //
119
+ // Portable version of a static mutex based on Boost.Thread library:
120
+ // This has to use a single mutex shared by all instances of static_mutex
121
+ // because boost::call_once doesn't alow us to pass instance information
122
+ // down to the initialisation proceedure. In fact the initialisation routine
123
+ // may need to be called more than once - but only once per instance.
124
+ //
125
+ // Since this preprocessor path is almost never taken, we hide these header
126
+ // dependencies so that build tools don't find them.
127
+ //
128
+ #define B1 <boost/thread/once.hpp>
129
+ #define B2 <boost/thread/recursive_mutex.hpp>
130
+ #include B1
131
+ #include B2
132
+ #undef B1
133
+ #undef B2
134
+
135
+ namespace boost{
136
+
137
+ class BOOST_REGEX_DECL scoped_static_mutex_lock;
138
+ extern "C" BOOST_REGEX_DECL void boost_regex_free_static_mutex();
139
+
140
+ class BOOST_REGEX_DECL static_mutex
141
+ {
142
+ public:
143
+ typedef scoped_static_mutex_lock scoped_lock;
144
+ static void init();
145
+ static boost::recursive_mutex* m_pmutex;
146
+ static boost::once_flag m_once;
147
+ };
148
+
149
+ #define BOOST_STATIC_MUTEX_INIT { }
150
+
151
+ class BOOST_REGEX_DECL scoped_static_mutex_lock
152
+ {
153
+ public:
154
+ scoped_static_mutex_lock(static_mutex& mut, bool lk = true);
155
+ ~scoped_static_mutex_lock();
156
+ operator void const*()const;
157
+ bool locked()const;
158
+ void lock();
159
+ void unlock();
160
+ private:
161
+ boost::recursive_mutex::scoped_lock* m_plock;
162
+ bool m_have_lock;
163
+ };
164
+
165
+ inline scoped_static_mutex_lock::operator void const*()const
166
+ {
167
+ return locked() ? this : 0;
168
+ }
169
+
170
+ inline bool scoped_static_mutex_lock::locked()const
171
+ {
172
+ return m_have_lock;
173
+ }
174
+
175
+ } // namespace
176
+
177
+ #endif
178
+
179
+ #endif
@@ -0,0 +1,776 @@
1
+ /*
2
+ *
3
+ * Copyright (c) 2004
4
+ * John Maddock
5
+ *
6
+ * Use, modification and distribution are subject to the
7
+ * Boost Software License, Version 1.0. (See accompanying file
8
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9
+ *
10
+ */
11
+
12
+ /*
13
+ * LOCATION: see http://www.boost.org for most recent version.
14
+ * FILE unicode_iterator.hpp
15
+ * VERSION see <boost/version.hpp>
16
+ * DESCRIPTION: Iterator adapters for converting between different Unicode encodings.
17
+ */
18
+
19
+ /****************************************************************************
20
+
21
+ Contents:
22
+ ~~~~~~~~~
23
+
24
+ 1) Read Only, Input Adapters:
25
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
26
+
27
+ template <class BaseIterator, class U8Type = ::boost::uint8_t>
28
+ class u32_to_u8_iterator;
29
+
30
+ Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8.
31
+
32
+ template <class BaseIterator, class U32Type = ::boost::uint32_t>
33
+ class u8_to_u32_iterator;
34
+
35
+ Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32.
36
+
37
+ template <class BaseIterator, class U16Type = ::boost::uint16_t>
38
+ class u32_to_u16_iterator;
39
+
40
+ Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16.
41
+
42
+ template <class BaseIterator, class U32Type = ::boost::uint32_t>
43
+ class u16_to_u32_iterator;
44
+
45
+ Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32.
46
+
47
+ 2) Single pass output iterator adapters:
48
+
49
+ template <class BaseIterator>
50
+ class utf8_output_iterator;
51
+
52
+ Accepts UTF-32 code points and forwards them on as UTF-8 code points.
53
+
54
+ template <class BaseIterator>
55
+ class utf16_output_iterator;
56
+
57
+ Accepts UTF-32 code points and forwards them on as UTF-16 code points.
58
+
59
+ ****************************************************************************/
60
+
61
+ #ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP
62
+ #define BOOST_REGEX_UNICODE_ITERATOR_HPP
63
+ #include <boost/cstdint.hpp>
64
+ #include <boost/assert.hpp>
65
+ #include <boost/iterator/iterator_facade.hpp>
66
+ #include <boost/static_assert.hpp>
67
+ #include <boost/throw_exception.hpp>
68
+ #include <stdexcept>
69
+ #ifndef BOOST_NO_STD_LOCALE
70
+ #include <sstream>
71
+ #include <ios>
72
+ #endif
73
+ #include <limits.h> // CHAR_BIT
74
+
75
+ namespace boost{
76
+
77
+ namespace detail{
78
+
79
+ static const ::boost::uint16_t high_surrogate_base = 0xD7C0u;
80
+ static const ::boost::uint16_t low_surrogate_base = 0xDC00u;
81
+ static const ::boost::uint32_t ten_bit_mask = 0x3FFu;
82
+
83
+ inline bool is_high_surrogate(::boost::uint16_t v)
84
+ {
85
+ return (v & 0xFFFFFC00u) == 0xd800u;
86
+ }
87
+ inline bool is_low_surrogate(::boost::uint16_t v)
88
+ {
89
+ return (v & 0xFFFFFC00u) == 0xdc00u;
90
+ }
91
+ template <class T>
92
+ inline bool is_surrogate(T v)
93
+ {
94
+ return (v & 0xFFFFF800u) == 0xd800;
95
+ }
96
+
97
+ inline unsigned utf8_byte_count(boost::uint8_t c)
98
+ {
99
+ // if the most significant bit with a zero in it is in position
100
+ // 8-N then there are N bytes in this UTF-8 sequence:
101
+ boost::uint8_t mask = 0x80u;
102
+ unsigned result = 0;
103
+ while(c & mask)
104
+ {
105
+ ++result;
106
+ mask >>= 1;
107
+ }
108
+ return (result == 0) ? 1 : ((result > 4) ? 4 : result);
109
+ }
110
+
111
+ inline unsigned utf8_trailing_byte_count(boost::uint8_t c)
112
+ {
113
+ return utf8_byte_count(c) - 1;
114
+ }
115
+
116
+ #ifdef BOOST_MSVC
117
+ #pragma warning(push)
118
+ #pragma warning(disable:4100)
119
+ #endif
120
+ inline void invalid_utf32_code_point(::boost::uint32_t val)
121
+ {
122
+ #ifndef BOOST_NO_STD_LOCALE
123
+ std::stringstream ss;
124
+ ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence";
125
+ std::out_of_range e(ss.str());
126
+ #else
127
+ std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-16 sequence");
128
+ #endif
129
+ boost::throw_exception(e);
130
+ }
131
+ #ifdef BOOST_MSVC
132
+ #pragma warning(pop)
133
+ #endif
134
+
135
+
136
+ } // namespace detail
137
+
138
+ template <class BaseIterator, class U16Type = ::boost::uint16_t>
139
+ class u32_to_u16_iterator
140
+ : public boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type>
141
+ {
142
+ typedef boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type;
143
+
144
+ #if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
145
+ typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
146
+
147
+ BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
148
+ BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16);
149
+ #endif
150
+
151
+ public:
152
+ typename base_type::reference
153
+ dereference()const
154
+ {
155
+ if(m_current == 2)
156
+ extract_current();
157
+ return m_values[m_current];
158
+ }
159
+ bool equal(const u32_to_u16_iterator& that)const
160
+ {
161
+ if(m_position == that.m_position)
162
+ {
163
+ // Both m_currents must be equal, or both even
164
+ // this is the same as saying their sum must be even:
165
+ return (m_current + that.m_current) & 1u ? false : true;
166
+ }
167
+ return false;
168
+ }
169
+ void increment()
170
+ {
171
+ // if we have a pending read then read now, so that we know whether
172
+ // to skip a position, or move to a low-surrogate:
173
+ if(m_current == 2)
174
+ {
175
+ // pending read:
176
+ extract_current();
177
+ }
178
+ // move to the next surrogate position:
179
+ ++m_current;
180
+ // if we've reached the end skip a position:
181
+ if(m_values[m_current] == 0)
182
+ {
183
+ m_current = 2;
184
+ ++m_position;
185
+ }
186
+ }
187
+ void decrement()
188
+ {
189
+ if(m_current != 1)
190
+ {
191
+ // decrementing an iterator always leads to a valid position:
192
+ --m_position;
193
+ extract_current();
194
+ m_current = m_values[1] ? 1 : 0;
195
+ }
196
+ else
197
+ {
198
+ m_current = 0;
199
+ }
200
+ }
201
+ BaseIterator base()const
202
+ {
203
+ return m_position;
204
+ }
205
+ // construct:
206
+ u32_to_u16_iterator() : m_position(), m_current(0)
207
+ {
208
+ m_values[0] = 0;
209
+ m_values[1] = 0;
210
+ m_values[2] = 0;
211
+ }
212
+ u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2)
213
+ {
214
+ m_values[0] = 0;
215
+ m_values[1] = 0;
216
+ m_values[2] = 0;
217
+ }
218
+ private:
219
+
220
+ void extract_current()const
221
+ {
222
+ // begin by checking for a code point out of range:
223
+ ::boost::uint32_t v = *m_position;
224
+ if(v >= 0x10000u)
225
+ {
226
+ if(v > 0x10FFFFu)
227
+ detail::invalid_utf32_code_point(*m_position);
228
+ // split into two surrogates:
229
+ m_values[0] = static_cast<U16Type>(v >> 10) + detail::high_surrogate_base;
230
+ m_values[1] = static_cast<U16Type>(v & detail::ten_bit_mask) + detail::low_surrogate_base;
231
+ m_current = 0;
232
+ BOOST_ASSERT(detail::is_high_surrogate(m_values[0]));
233
+ BOOST_ASSERT(detail::is_low_surrogate(m_values[1]));
234
+ }
235
+ else
236
+ {
237
+ // 16-bit code point:
238
+ m_values[0] = static_cast<U16Type>(*m_position);
239
+ m_values[1] = 0;
240
+ m_current = 0;
241
+ // value must not be a surrogate:
242
+ if(detail::is_surrogate(m_values[0]))
243
+ detail::invalid_utf32_code_point(*m_position);
244
+ }
245
+ }
246
+ BaseIterator m_position;
247
+ mutable U16Type m_values[3];
248
+ mutable unsigned m_current;
249
+ };
250
+
251
+ template <class BaseIterator, class U32Type = ::boost::uint32_t>
252
+ class u16_to_u32_iterator
253
+ : public boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
254
+ {
255
+ typedef boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
256
+ // special values for pending iterator reads:
257
+ BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu);
258
+
259
+ #if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
260
+ typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
261
+
262
+ BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16);
263
+ BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
264
+ #endif
265
+
266
+ public:
267
+ typename base_type::reference
268
+ dereference()const
269
+ {
270
+ if(m_value == pending_read)
271
+ extract_current();
272
+ return m_value;
273
+ }
274
+ bool equal(const u16_to_u32_iterator& that)const
275
+ {
276
+ return m_position == that.m_position;
277
+ }
278
+ void increment()
279
+ {
280
+ // skip high surrogate first if there is one:
281
+ if(detail::is_high_surrogate(*m_position)) ++m_position;
282
+ ++m_position;
283
+ m_value = pending_read;
284
+ }
285
+ void decrement()
286
+ {
287
+ --m_position;
288
+ // if we have a low surrogate then go back one more:
289
+ if(detail::is_low_surrogate(*m_position))
290
+ --m_position;
291
+ m_value = pending_read;
292
+ }
293
+ BaseIterator base()const
294
+ {
295
+ return m_position;
296
+ }
297
+ // construct:
298
+ u16_to_u32_iterator() : m_position()
299
+ {
300
+ m_value = pending_read;
301
+ }
302
+ u16_to_u32_iterator(BaseIterator b) : m_position(b)
303
+ {
304
+ m_value = pending_read;
305
+ }
306
+ //
307
+ // Range checked version:
308
+ //
309
+ u16_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b)
310
+ {
311
+ m_value = pending_read;
312
+ //
313
+ // The range must not start with a low surrogate, or end in a high surrogate,
314
+ // otherwise we run the risk of running outside the underlying input range.
315
+ // Likewise b must not be located at a low surrogate.
316
+ //
317
+ boost::uint16_t val;
318
+ if(start != end)
319
+ {
320
+ if((b != start) && (b != end))
321
+ {
322
+ val = *b;
323
+ if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u))
324
+ invalid_code_point(val);
325
+ }
326
+ val = *start;
327
+ if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u))
328
+ invalid_code_point(val);
329
+ val = *--end;
330
+ if(detail::is_high_surrogate(val))
331
+ invalid_code_point(val);
332
+ }
333
+ }
334
+ private:
335
+ static void invalid_code_point(::boost::uint16_t val)
336
+ {
337
+ #ifndef BOOST_NO_STD_LOCALE
338
+ std::stringstream ss;
339
+ ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence";
340
+ std::out_of_range e(ss.str());
341
+ #else
342
+ std::out_of_range e("Misplaced UTF-16 surrogate encountered while trying to encode UTF-32 sequence");
343
+ #endif
344
+ boost::throw_exception(e);
345
+ }
346
+ void extract_current()const
347
+ {
348
+ m_value = static_cast<U32Type>(static_cast< ::boost::uint16_t>(*m_position));
349
+ // if the last value is a high surrogate then adjust m_position and m_value as needed:
350
+ if(detail::is_high_surrogate(*m_position))
351
+ {
352
+ // precondition; next value must have be a low-surrogate:
353
+ BaseIterator next(m_position);
354
+ ::boost::uint16_t t = *++next;
355
+ if((t & 0xFC00u) != 0xDC00u)
356
+ invalid_code_point(t);
357
+ m_value = (m_value - detail::high_surrogate_base) << 10;
358
+ m_value |= (static_cast<U32Type>(static_cast< ::boost::uint16_t>(t)) & detail::ten_bit_mask);
359
+ }
360
+ // postcondition; result must not be a surrogate:
361
+ if(detail::is_surrogate(m_value))
362
+ invalid_code_point(static_cast< ::boost::uint16_t>(m_value));
363
+ }
364
+ BaseIterator m_position;
365
+ mutable U32Type m_value;
366
+ };
367
+
368
+ template <class BaseIterator, class U8Type = ::boost::uint8_t>
369
+ class u32_to_u8_iterator
370
+ : public boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type>
371
+ {
372
+ typedef boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type;
373
+
374
+ #if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
375
+ typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
376
+
377
+ BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
378
+ BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8);
379
+ #endif
380
+
381
+ public:
382
+ typename base_type::reference
383
+ dereference()const
384
+ {
385
+ if(m_current == 4)
386
+ extract_current();
387
+ return m_values[m_current];
388
+ }
389
+ bool equal(const u32_to_u8_iterator& that)const
390
+ {
391
+ if(m_position == that.m_position)
392
+ {
393
+ // either the m_current's must be equal, or one must be 0 and
394
+ // the other 4: which means neither must have bits 1 or 2 set:
395
+ return (m_current == that.m_current)
396
+ || (((m_current | that.m_current) & 3) == 0);
397
+ }
398
+ return false;
399
+ }
400
+ void increment()
401
+ {
402
+ // if we have a pending read then read now, so that we know whether
403
+ // to skip a position, or move to a low-surrogate:
404
+ if(m_current == 4)
405
+ {
406
+ // pending read:
407
+ extract_current();
408
+ }
409
+ // move to the next surrogate position:
410
+ ++m_current;
411
+ // if we've reached the end skip a position:
412
+ if(m_values[m_current] == 0)
413
+ {
414
+ m_current = 4;
415
+ ++m_position;
416
+ }
417
+ }
418
+ void decrement()
419
+ {
420
+ if((m_current & 3) == 0)
421
+ {
422
+ --m_position;
423
+ extract_current();
424
+ m_current = 3;
425
+ while(m_current && (m_values[m_current] == 0))
426
+ --m_current;
427
+ }
428
+ else
429
+ --m_current;
430
+ }
431
+ BaseIterator base()const
432
+ {
433
+ return m_position;
434
+ }
435
+ // construct:
436
+ u32_to_u8_iterator() : m_position(), m_current(0)
437
+ {
438
+ m_values[0] = 0;
439
+ m_values[1] = 0;
440
+ m_values[2] = 0;
441
+ m_values[3] = 0;
442
+ m_values[4] = 0;
443
+ }
444
+ u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4)
445
+ {
446
+ m_values[0] = 0;
447
+ m_values[1] = 0;
448
+ m_values[2] = 0;
449
+ m_values[3] = 0;
450
+ m_values[4] = 0;
451
+ }
452
+ private:
453
+
454
+ void extract_current()const
455
+ {
456
+ boost::uint32_t c = *m_position;
457
+ if(c > 0x10FFFFu)
458
+ detail::invalid_utf32_code_point(c);
459
+ if(c < 0x80u)
460
+ {
461
+ m_values[0] = static_cast<unsigned char>(c);
462
+ m_values[1] = static_cast<unsigned char>(0u);
463
+ m_values[2] = static_cast<unsigned char>(0u);
464
+ m_values[3] = static_cast<unsigned char>(0u);
465
+ }
466
+ else if(c < 0x800u)
467
+ {
468
+ m_values[0] = static_cast<unsigned char>(0xC0u + (c >> 6));
469
+ m_values[1] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
470
+ m_values[2] = static_cast<unsigned char>(0u);
471
+ m_values[3] = static_cast<unsigned char>(0u);
472
+ }
473
+ else if(c < 0x10000u)
474
+ {
475
+ m_values[0] = static_cast<unsigned char>(0xE0u + (c >> 12));
476
+ m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
477
+ m_values[2] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
478
+ m_values[3] = static_cast<unsigned char>(0u);
479
+ }
480
+ else
481
+ {
482
+ m_values[0] = static_cast<unsigned char>(0xF0u + (c >> 18));
483
+ m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu));
484
+ m_values[2] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
485
+ m_values[3] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
486
+ }
487
+ m_current= 0;
488
+ }
489
+ BaseIterator m_position;
490
+ mutable U8Type m_values[5];
491
+ mutable unsigned m_current;
492
+ };
493
+
494
+ template <class BaseIterator, class U32Type = ::boost::uint32_t>
495
+ class u8_to_u32_iterator
496
+ : public boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
497
+ {
498
+ typedef boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
499
+ // special values for pending iterator reads:
500
+ BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu);
501
+
502
+ #if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
503
+ typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
504
+
505
+ BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8);
506
+ BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
507
+ #endif
508
+
509
+ public:
510
+ typename base_type::reference
511
+ dereference()const
512
+ {
513
+ if(m_value == pending_read)
514
+ extract_current();
515
+ return m_value;
516
+ }
517
+ bool equal(const u8_to_u32_iterator& that)const
518
+ {
519
+ return m_position == that.m_position;
520
+ }
521
+ void increment()
522
+ {
523
+ // We must not start with a continuation character:
524
+ if((static_cast<boost::uint8_t>(*m_position) & 0xC0) == 0x80)
525
+ invalid_sequence();
526
+ // skip high surrogate first if there is one:
527
+ unsigned c = detail::utf8_byte_count(*m_position);
528
+ if(m_value == pending_read)
529
+ {
530
+ // Since we haven't read in a value, we need to validate the code points:
531
+ for(unsigned i = 0; i < c; ++i)
532
+ {
533
+ ++m_position;
534
+ // We must have a continuation byte:
535
+ if((i != c - 1) && ((static_cast<boost::uint8_t>(*m_position) & 0xC0) != 0x80))
536
+ invalid_sequence();
537
+ }
538
+ }
539
+ else
540
+ {
541
+ std::advance(m_position, c);
542
+ }
543
+ m_value = pending_read;
544
+ }
545
+ void decrement()
546
+ {
547
+ // Keep backtracking until we don't have a trailing character:
548
+ unsigned count = 0;
549
+ while((*--m_position & 0xC0u) == 0x80u) ++count;
550
+ // now check that the sequence was valid:
551
+ if(count != detail::utf8_trailing_byte_count(*m_position))
552
+ invalid_sequence();
553
+ m_value = pending_read;
554
+ }
555
+ BaseIterator base()const
556
+ {
557
+ return m_position;
558
+ }
559
+ // construct:
560
+ u8_to_u32_iterator() : m_position()
561
+ {
562
+ m_value = pending_read;
563
+ }
564
+ u8_to_u32_iterator(BaseIterator b) : m_position(b)
565
+ {
566
+ m_value = pending_read;
567
+ }
568
+ //
569
+ // Checked constructor:
570
+ //
571
+ u8_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b)
572
+ {
573
+ m_value = pending_read;
574
+ //
575
+ // We must not start with a continuation character, or end with a
576
+ // truncated UTF-8 sequence otherwise we run the risk of going past
577
+ // the start/end of the underlying sequence:
578
+ //
579
+ if(start != end)
580
+ {
581
+ unsigned char v = *start;
582
+ if((v & 0xC0u) == 0x80u)
583
+ invalid_sequence();
584
+ if((b != start) && (b != end) && ((*b & 0xC0u) == 0x80u))
585
+ invalid_sequence();
586
+ BaseIterator pos = end;
587
+ do
588
+ {
589
+ v = *--pos;
590
+ }
591
+ while((start != pos) && ((v & 0xC0u) == 0x80u));
592
+ std::ptrdiff_t extra = detail::utf8_byte_count(v);
593
+ if(std::distance(pos, end) < extra)
594
+ invalid_sequence();
595
+ }
596
+ }
597
+ private:
598
+ static void invalid_sequence()
599
+ {
600
+ std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character");
601
+ boost::throw_exception(e);
602
+ }
603
+ void extract_current()const
604
+ {
605
+ m_value = static_cast<U32Type>(static_cast< ::boost::uint8_t>(*m_position));
606
+ // we must not have a continuation character:
607
+ if((m_value & 0xC0u) == 0x80u)
608
+ invalid_sequence();
609
+ // see how many extra bytes we have:
610
+ unsigned extra = detail::utf8_trailing_byte_count(*m_position);
611
+ // extract the extra bits, 6 from each extra byte:
612
+ BaseIterator next(m_position);
613
+ for(unsigned c = 0; c < extra; ++c)
614
+ {
615
+ ++next;
616
+ m_value <<= 6;
617
+ // We must have a continuation byte:
618
+ if((static_cast<boost::uint8_t>(*next) & 0xC0) != 0x80)
619
+ invalid_sequence();
620
+ m_value += static_cast<boost::uint8_t>(*next) & 0x3Fu;
621
+ }
622
+ // we now need to remove a few of the leftmost bits, but how many depends
623
+ // upon how many extra bytes we've extracted:
624
+ static const boost::uint32_t masks[4] =
625
+ {
626
+ 0x7Fu,
627
+ 0x7FFu,
628
+ 0xFFFFu,
629
+ 0x1FFFFFu,
630
+ };
631
+ m_value &= masks[extra];
632
+ // check the result:
633
+ if(m_value > static_cast<U32Type>(0x10FFFFu))
634
+ invalid_sequence();
635
+ }
636
+ BaseIterator m_position;
637
+ mutable U32Type m_value;
638
+ };
639
+
640
+ template <class BaseIterator>
641
+ class utf16_output_iterator
642
+ {
643
+ public:
644
+ typedef void difference_type;
645
+ typedef void value_type;
646
+ typedef boost::uint32_t* pointer;
647
+ typedef boost::uint32_t& reference;
648
+ typedef std::output_iterator_tag iterator_category;
649
+
650
+ utf16_output_iterator(const BaseIterator& b)
651
+ : m_position(b){}
652
+ utf16_output_iterator(const utf16_output_iterator& that)
653
+ : m_position(that.m_position){}
654
+ utf16_output_iterator& operator=(const utf16_output_iterator& that)
655
+ {
656
+ m_position = that.m_position;
657
+ return *this;
658
+ }
659
+ const utf16_output_iterator& operator*()const
660
+ {
661
+ return *this;
662
+ }
663
+ void operator=(boost::uint32_t val)const
664
+ {
665
+ push(val);
666
+ }
667
+ utf16_output_iterator& operator++()
668
+ {
669
+ return *this;
670
+ }
671
+ utf16_output_iterator& operator++(int)
672
+ {
673
+ return *this;
674
+ }
675
+ BaseIterator base()const
676
+ {
677
+ return m_position;
678
+ }
679
+ private:
680
+ void push(boost::uint32_t v)const
681
+ {
682
+ if(v >= 0x10000u)
683
+ {
684
+ // begin by checking for a code point out of range:
685
+ if(v > 0x10FFFFu)
686
+ detail::invalid_utf32_code_point(v);
687
+ // split into two surrogates:
688
+ *m_position++ = static_cast<boost::uint16_t>(v >> 10) + detail::high_surrogate_base;
689
+ *m_position++ = static_cast<boost::uint16_t>(v & detail::ten_bit_mask) + detail::low_surrogate_base;
690
+ }
691
+ else
692
+ {
693
+ // 16-bit code point:
694
+ // value must not be a surrogate:
695
+ if(detail::is_surrogate(v))
696
+ detail::invalid_utf32_code_point(v);
697
+ *m_position++ = static_cast<boost::uint16_t>(v);
698
+ }
699
+ }
700
+ mutable BaseIterator m_position;
701
+ };
702
+
703
+ template <class BaseIterator>
704
+ class utf8_output_iterator
705
+ {
706
+ public:
707
+ typedef void difference_type;
708
+ typedef void value_type;
709
+ typedef boost::uint32_t* pointer;
710
+ typedef boost::uint32_t& reference;
711
+ typedef std::output_iterator_tag iterator_category;
712
+
713
+ utf8_output_iterator(const BaseIterator& b)
714
+ : m_position(b){}
715
+ utf8_output_iterator(const utf8_output_iterator& that)
716
+ : m_position(that.m_position){}
717
+ utf8_output_iterator& operator=(const utf8_output_iterator& that)
718
+ {
719
+ m_position = that.m_position;
720
+ return *this;
721
+ }
722
+ const utf8_output_iterator& operator*()const
723
+ {
724
+ return *this;
725
+ }
726
+ void operator=(boost::uint32_t val)const
727
+ {
728
+ push(val);
729
+ }
730
+ utf8_output_iterator& operator++()
731
+ {
732
+ return *this;
733
+ }
734
+ utf8_output_iterator& operator++(int)
735
+ {
736
+ return *this;
737
+ }
738
+ BaseIterator base()const
739
+ {
740
+ return m_position;
741
+ }
742
+ private:
743
+ void push(boost::uint32_t c)const
744
+ {
745
+ if(c > 0x10FFFFu)
746
+ detail::invalid_utf32_code_point(c);
747
+ if(c < 0x80u)
748
+ {
749
+ *m_position++ = static_cast<unsigned char>(c);
750
+ }
751
+ else if(c < 0x800u)
752
+ {
753
+ *m_position++ = static_cast<unsigned char>(0xC0u + (c >> 6));
754
+ *m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
755
+ }
756
+ else if(c < 0x10000u)
757
+ {
758
+ *m_position++ = static_cast<unsigned char>(0xE0u + (c >> 12));
759
+ *m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
760
+ *m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
761
+ }
762
+ else
763
+ {
764
+ *m_position++ = static_cast<unsigned char>(0xF0u + (c >> 18));
765
+ *m_position++ = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu));
766
+ *m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
767
+ *m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
768
+ }
769
+ }
770
+ mutable BaseIterator m_position;
771
+ };
772
+
773
+ } // namespace boost
774
+
775
+ #endif // BOOST_REGEX_UNICODE_ITERATOR_HPP
776
+