language_detection 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. data/.gitignore +19 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +85 -0
  5. data/Rakefile +11 -0
  6. data/ext/cld/Makefile +34 -0
  7. data/ext/cld/base/basictypes.h +348 -0
  8. data/ext/cld/base/build_config.h +124 -0
  9. data/ext/cld/base/casts.h +156 -0
  10. data/ext/cld/base/commandlineflags.h +443 -0
  11. data/ext/cld/base/crash.h +41 -0
  12. data/ext/cld/base/dynamic_annotations.h +358 -0
  13. data/ext/cld/base/global_strip_options.h +59 -0
  14. data/ext/cld/base/log_severity.h +46 -0
  15. data/ext/cld/base/logging.h +1403 -0
  16. data/ext/cld/base/macros.h +243 -0
  17. data/ext/cld/base/port.h +54 -0
  18. data/ext/cld/base/scoped_ptr.h +428 -0
  19. data/ext/cld/base/stl_decl.h +0 -0
  20. data/ext/cld/base/stl_decl_msvc.h +107 -0
  21. data/ext/cld/base/string_util.h +29 -0
  22. data/ext/cld/base/strtoint.h +93 -0
  23. data/ext/cld/base/template_util.h +96 -0
  24. data/ext/cld/base/type_traits.h +198 -0
  25. data/ext/cld/base/vlog_is_on.h +143 -0
  26. data/ext/cld/cld.so +0 -0
  27. data/ext/cld/encodings/compact_lang_det/cldutil.cc +905 -0
  28. data/ext/cld/encodings/compact_lang_det/cldutil.h +1205 -0
  29. data/ext/cld/encodings/compact_lang_det/cldutil_dbg.h +76 -0
  30. data/ext/cld/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
  31. data/ext/cld/encodings/compact_lang_det/compact_lang_det.cc +62 -0
  32. data/ext/cld/encodings/compact_lang_det/compact_lang_det.h +145 -0
  33. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
  34. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
  35. data/ext/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
  36. data/ext/cld/encodings/compact_lang_det/compile.cmd +1 -0
  37. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
  38. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.h +119 -0
  39. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
  40. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
  41. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
  42. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
  43. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
  44. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
  45. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
  46. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
  47. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
  48. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
  49. data/ext/cld/encodings/compact_lang_det/getonescriptspan.cc +570 -0
  50. data/ext/cld/encodings/compact_lang_det/getonescriptspan.h +131 -0
  51. data/ext/cld/encodings/compact_lang_det/letterscript_enum.cc +117 -0
  52. data/ext/cld/encodings/compact_lang_det/letterscript_enum.h +99 -0
  53. data/ext/cld/encodings/compact_lang_det/subsetsequence.cc +259 -0
  54. data/ext/cld/encodings/compact_lang_det/subsetsequence.h +44 -0
  55. data/ext/cld/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
  56. data/ext/cld/encodings/compact_lang_det/tote.cc +299 -0
  57. data/ext/cld/encodings/compact_lang_det/tote.h +89 -0
  58. data/ext/cld/encodings/compact_lang_det/unittest_data.h +193 -0
  59. data/ext/cld/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
  60. data/ext/cld/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
  61. data/ext/cld/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
  62. data/ext/cld/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
  63. data/ext/cld/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
  64. data/ext/cld/encodings/compact_lang_det/win/cld_google.h +18 -0
  65. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
  66. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
  67. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
  68. data/ext/cld/encodings/compact_lang_det/win/cld_logging.h +21 -0
  69. data/ext/cld/encodings/compact_lang_det/win/cld_macros.h +19 -0
  70. data/ext/cld/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
  71. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
  72. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
  73. data/ext/cld/encodings/compact_lang_det/win/cld_unilib.h +15 -0
  74. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
  75. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
  76. data/ext/cld/encodings/compact_lang_det/win/cld_utf.h +24 -0
  77. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
  78. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
  79. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
  80. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
  81. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
  82. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
  83. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
  84. data/ext/cld/encodings/internal/encodings.cc +12 -0
  85. data/ext/cld/encodings/lang_enc.h +254 -0
  86. data/ext/cld/encodings/proto/encodings.pb.h +169 -0
  87. data/ext/cld/encodings/public/encodings.h +301 -0
  88. data/ext/cld/extconf.rb +1 -0
  89. data/ext/cld/language_detection.cc +88 -0
  90. data/ext/cld/languages/internal/languages.cc +337 -0
  91. data/ext/cld/languages/proto/languages.pb.h +179 -0
  92. data/ext/cld/languages/public/languages.h +379 -0
  93. data/language_detection.gemspec +28 -0
  94. data/lib/language_detection/string.rb +1 -0
  95. data/lib/language_detection/version.rb +3 -0
  96. data/lib/language_detection.rb +54 -0
  97. data/test/_helper.rb +15 -0
  98. data/test/fixtures/languages.csv +80 -0
  99. data/test/language_detection_test.rb +88 -0
  100. metadata +250 -0
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ ext/cld/*.o
19
+ ext/cld/*.a
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in language_detection.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Vojtech Hyza
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,85 @@
1
+ # LanguageDetection
2
+
3
+ Ruby bindings for Chromium Compact Language Detector ([source](http://src.chromium.org/viewvc/chrome/trunk/src/third_party/cld/)). This gem is using source codes from [chromium-compact-language-detector](http://code.google.com/p/chromium-compact-language-detector/) port.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'language_detection'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install language_detection
18
+
19
+ ## Usage
20
+
21
+ ```ruby
22
+ >> require 'language_detection'
23
+ => true
24
+ >> language = LanguageDetection.perform("This is some example text for language detection")
25
+ => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>51, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]}
26
+ >> language.name
27
+ => "ENGLISH"
28
+ >> language.code
29
+ => "en"
30
+ >> language.reliable
31
+ => true
32
+ >> language.details # contains up to 3 languages sorted by score
33
+ => [{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]
34
+ >> language.details.first.percent
35
+ => 100
36
+ >> language.details.first.score
37
+ => 49.43273905996759
38
+ ```
39
+
40
+ the other way is to include `LanguageDetection` module in your class
41
+
42
+ ```ruby
43
+ class Article
44
+ include LanguageDetection
45
+
46
+ attr_accessor :title, :content
47
+
48
+ def initialize(params = {})
49
+ @title = params[:title]
50
+ @content = params[:content]
51
+ end
52
+
53
+ def to_s
54
+ "#{title}\n#{content}"
55
+ end
56
+ end
57
+ ```
58
+
59
+ which provides `Article#language` method using `Article#to_s` method as parameter
60
+
61
+ ```ruby
62
+ >> article = Article.new :title => "Web development that doesn't hurt", :content => "Tens of thousands of Rails applications are already live..."
63
+ >> article.language
64
+ => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>93, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>80.22690437601297}]}
65
+ ```
66
+
67
+ or you can add `String#language` method by `require 'language_detection/string'`
68
+
69
+ ```ruby
70
+ >> require 'language_detection'
71
+ => true
72
+ >> require 'language_detection/string'
73
+ => true
74
+ >> "Web development that doesn't hurt".language
75
+ => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>36, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>39.70826580226905}]}
76
+ ```
77
+
78
+
79
+ ## Contributing
80
+
81
+ 1. Fork it
82
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
83
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
84
+ 4. Push to the branch (`git push origin my-new-feature`)
85
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :default => :test
4
+
5
+ require 'rake/testtask'
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/*_test.rb']
9
+ test.verbose = true
10
+ # test.warning = true
11
+ end
data/ext/cld/Makefile ADDED
@@ -0,0 +1,34 @@
1
+ # TODO: Generate Makefile
2
+
3
+ CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS
4
+ LDFLAGS=-L.
5
+ CC=g++
6
+ AR=ar
7
+
8
+ SOURCES = encodings/compact_lang_det/cldutil.cc \
9
+ encodings/compact_lang_det/cldutil_dbg_empty.cc \
10
+ encodings/compact_lang_det/compact_lang_det.cc \
11
+ encodings/compact_lang_det/compact_lang_det_impl.cc \
12
+ encodings/compact_lang_det/ext_lang_enc.cc \
13
+ encodings/compact_lang_det/getonescriptspan.cc \
14
+ encodings/compact_lang_det/letterscript_enum.cc \
15
+ encodings/compact_lang_det/tote.cc \
16
+ encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc \
17
+ encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc \
18
+ encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc \
19
+ encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc \
20
+ encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc \
21
+ encodings/compact_lang_det/win/cld_htmlutils_windows.cc \
22
+ encodings/compact_lang_det/win/cld_unilib_windows.cc \
23
+ encodings/compact_lang_det/win/cld_utf8statetable.cc \
24
+ encodings/compact_lang_det/win/cld_utf8utils_windows.cc \
25
+ encodings/internal/encodings.cc \
26
+ languages/internal/languages.cc \
27
+ language_detection.cc
28
+
29
+ install:
30
+ rm -f *.o
31
+ rm -f libcld.a
32
+ $(CC) -c $(CFLAGS) $(SOURCES)
33
+ $(AR) rcs libcld.a *.o
34
+ $(CC) -DCLD_WINDOWS -I. -L. -shared -o cld.so -lstdc++ *.o
@@ -0,0 +1,348 @@
1
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ #ifndef BASE_BASICTYPES_H_
6
+ #define BASE_BASICTYPES_H_
7
+
8
+ #include <limits.h> // So we can set the bounds of our types
9
+ #include <stddef.h> // For size_t
10
+ #include <string.h> // for memcpy
11
+
12
+ #include "base/port.h" // Types that only need exist on certain systems
13
+
14
+ #ifndef COMPILER_MSVC
15
+ // stdint.h is part of C99 but MSVC doesn't have it.
16
+ #include <stdint.h> // For intptr_t.
17
+ #endif
18
+
19
+ typedef signed char schar;
20
+ typedef signed char int8;
21
+ typedef short int16;
22
+ // TODO(mbelshe) Remove these type guards. These are
23
+ // temporary to avoid conflicts with npapi.h.
24
+ #ifndef _INT32
25
+ #define _INT32
26
+ typedef int int32;
27
+ #endif
28
+
29
+ // The NSPR system headers define 64-bit as |long| when possible. In order to
30
+ // not have typedef mismatches, we do the same on LP64.
31
+ #if __LP64__
32
+ typedef long int64;
33
+ #else
34
+ typedef long long int64;
35
+ #endif
36
+
37
+ // NOTE: unsigned types are DANGEROUS in loops and other arithmetical
38
+ // places. Use the signed types unless your variable represents a bit
39
+ // pattern (eg a hash value) or you really need the extra bit. Do NOT
40
+ // use 'unsigned' to express "this value should always be positive";
41
+ // use assertions for this.
42
+
43
+ typedef unsigned char uint8;
44
+ typedef unsigned short uint16;
45
+ // TODO(mbelshe) Remove these type guards. These are
46
+ // temporary to avoid conflicts with npapi.h.
47
+ #ifndef _UINT32
48
+ #define _UINT32
49
+ typedef unsigned int uint32;
50
+ #endif
51
+
52
+ // See the comment above about NSPR and 64-bit.
53
+ #if __LP64__
54
+ typedef unsigned long uint64;
55
+ #else
56
+ typedef unsigned long long uint64;
57
+ #endif
58
+
59
+ // A type to represent a Unicode code-point value. As of Unicode 4.0,
60
+ // such values require up to 21 bits.
61
+ // (For type-checking on pointers, make this explicitly signed,
62
+ // and it should always be the signed version of whatever int32 is.)
63
+ typedef signed int char32;
64
+
65
+ const uint8 kuint8max = (( uint8) 0xFF);
66
+ const uint16 kuint16max = ((uint16) 0xFFFF);
67
+ const uint32 kuint32max = ((uint32) 0xFFFFFFFF);
68
+ const uint64 kuint64max = ((uint64) GG_LONGLONG(0xFFFFFFFFFFFFFFFF));
69
+ const int8 kint8min = (( int8) 0x80);
70
+ const int8 kint8max = (( int8) 0x7F);
71
+ const int16 kint16min = (( int16) 0x8000);
72
+ const int16 kint16max = (( int16) 0x7FFF);
73
+ const int32 kint32min = (( int32) 0x80000000);
74
+ const int32 kint32max = (( int32) 0x7FFFFFFF);
75
+ const int64 kint64min = (( int64) GG_LONGLONG(0x8000000000000000));
76
+ const int64 kint64max = (( int64) GG_LONGLONG(0x7FFFFFFFFFFFFFFF));
77
+
78
+ // A macro to disallow the copy constructor and operator= functions
79
+ // This should be used in the private: declarations for a class
80
+ #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
81
+ TypeName(const TypeName&); \
82
+ void operator=(const TypeName&)
83
+
84
+ // An older, deprecated, politically incorrect name for the above.
85
+ #define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
86
+
87
+ // A macro to disallow all the implicit constructors, namely the
88
+ // default constructor, copy constructor and operator= functions.
89
+ //
90
+ // This should be used in the private: declarations for a class
91
+ // that wants to prevent anyone from instantiating it. This is
92
+ // especially useful for classes containing only static methods.
93
+ #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
94
+ TypeName(); \
95
+ DISALLOW_COPY_AND_ASSIGN(TypeName)
96
+
97
+ // The arraysize(arr) macro returns the # of elements in an array arr.
98
+ // The expression is a compile-time constant, and therefore can be
99
+ // used in defining new arrays, for example. If you use arraysize on
100
+ // a pointer by mistake, you will get a compile-time error.
101
+ //
102
+ // One caveat is that arraysize() doesn't accept any array of an
103
+ // anonymous type or a type defined inside a function. In these rare
104
+ // cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is
105
+ // due to a limitation in C++'s template system. The limitation might
106
+ // eventually be removed, but it hasn't happened yet.
107
+
108
+ // This template function declaration is used in defining arraysize.
109
+ // Note that the function doesn't need an implementation, as we only
110
+ // use its type.
111
+ template <typename T, size_t N>
112
+ char (&ArraySizeHelper(T (&array)[N]))[N];
113
+
114
+ // That gcc wants both of these prototypes seems mysterious. VC, for
115
+ // its part, can't decide which to use (another mystery). Matching of
116
+ // template overloads: the final frontier.
117
+ #ifndef _MSC_VER
118
+ template <typename T, size_t N>
119
+ char (&ArraySizeHelper(const T (&array)[N]))[N];
120
+ #endif
121
+
122
+ #define arraysize(array) (sizeof(ArraySizeHelper(array)))
123
+
124
+ // ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
125
+ // but can be used on anonymous types or types defined inside
126
+ // functions. It's less safe than arraysize as it accepts some
127
+ // (although not all) pointers. Therefore, you should use arraysize
128
+ // whenever possible.
129
+ //
130
+ // The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
131
+ // size_t.
132
+ //
133
+ // ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error
134
+ //
135
+ // "warning: division by zero in ..."
136
+ //
137
+ // when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
138
+ // You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
139
+ //
140
+ // The following comments are on the implementation details, and can
141
+ // be ignored by the users.
142
+ //
143
+ // ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
144
+ // the array) and sizeof(*(arr)) (the # of bytes in one array
145
+ // element). If the former is divisible by the latter, perhaps arr is
146
+ // indeed an array, in which case the division result is the # of
147
+ // elements in the array. Otherwise, arr cannot possibly be an array,
148
+ // and we generate a compiler error to prevent the code from
149
+ // compiling.
150
+ //
151
+ // Since the size of bool is implementation-defined, we need to cast
152
+ // !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
153
+ // result has type size_t.
154
+ //
155
+ // This macro is not perfect as it wrongfully accepts certain
156
+ // pointers, namely where the pointer size is divisible by the pointee
157
+ // size. Since all our code has to go through a 32-bit compiler,
158
+ // where a pointer is 4 bytes, this means all pointers to a type whose
159
+ // size is 3 or greater than 4 will be (righteously) rejected.
160
+
161
+ #define ARRAYSIZE_UNSAFE(a) \
162
+ ((sizeof(a) / sizeof(*(a))) / \
163
+ static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
164
+
165
+
166
+ // Use implicit_cast as a safe version of static_cast or const_cast
167
+ // for upcasting in the type hierarchy (i.e. casting a pointer to Foo
168
+ // to a pointer to SuperclassOfFoo or casting a pointer to Foo to
169
+ // a const pointer to Foo).
170
+ // When you use implicit_cast, the compiler checks that the cast is safe.
171
+ // Such explicit implicit_casts are necessary in surprisingly many
172
+ // situations where C++ demands an exact type match instead of an
173
+ // argument type convertable to a target type.
174
+ //
175
+ // The From type can be inferred, so the preferred syntax for using
176
+ // implicit_cast is the same as for static_cast etc.:
177
+ //
178
+ // implicit_cast<ToType>(expr)
179
+ //
180
+ // implicit_cast would have been part of the C++ standard library,
181
+ // but the proposal was submitted too late. It will probably make
182
+ // its way into the language in the future.
183
+ template<typename To, typename From>
184
+ inline To implicit_cast(From const &f) {
185
+ return f;
186
+ }
187
+
188
+ // The COMPILE_ASSERT macro can be used to verify that a compile time
189
+ // expression is true. For example, you could use it to verify the
190
+ // size of a static array:
191
+ //
192
+ // COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
193
+ // content_type_names_incorrect_size);
194
+ //
195
+ // or to make sure a struct is smaller than a certain size:
196
+ //
197
+ // COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
198
+ //
199
+ // The second argument to the macro is the name of the variable. If
200
+ // the expression is false, most compilers will issue a warning/error
201
+ // containing the name of the variable.
202
+
203
+ template <bool>
204
+ struct CompileAssert {
205
+ };
206
+
207
+ #undef COMPILE_ASSERT
208
+ #define COMPILE_ASSERT(expr, msg) \
209
+ typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
210
+
211
+ // Implementation details of COMPILE_ASSERT:
212
+ //
213
+ // - COMPILE_ASSERT works by defining an array type that has -1
214
+ // elements (and thus is invalid) when the expression is false.
215
+ //
216
+ // - The simpler definition
217
+ //
218
+ // #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
219
+ //
220
+ // does not work, as gcc supports variable-length arrays whose sizes
221
+ // are determined at run-time (this is gcc's extension and not part
222
+ // of the C++ standard). As a result, gcc fails to reject the
223
+ // following code with the simple definition:
224
+ //
225
+ // int foo;
226
+ // COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
227
+ // // not a compile-time constant.
228
+ //
229
+ // - By using the type CompileAssert<(bool(expr))>, we ensures that
230
+ // expr is a compile-time constant. (Template arguments must be
231
+ // determined at compile-time.)
232
+ //
233
+ // - The outter parentheses in CompileAssert<(bool(expr))> are necessary
234
+ // to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
235
+ //
236
+ // CompileAssert<bool(expr)>
237
+ //
238
+ // instead, these compilers will refuse to compile
239
+ //
240
+ // COMPILE_ASSERT(5 > 0, some_message);
241
+ //
242
+ // (They seem to think the ">" in "5 > 0" marks the end of the
243
+ // template argument list.)
244
+ //
245
+ // - The array size is (bool(expr) ? 1 : -1), instead of simply
246
+ //
247
+ // ((expr) ? 1 : -1).
248
+ //
249
+ // This is to avoid running into a bug in MS VC 7.1, which
250
+ // causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
251
+
252
+
253
+ // MetatagId refers to metatag-id that we assign to
254
+ // each metatag <name, value> pair..
255
+ typedef uint32 MetatagId;
256
+
257
+ // Argument type used in interfaces that can optionally take ownership
258
+ // of a passed in argument. If TAKE_OWNERSHIP is passed, the called
259
+ // object takes ownership of the argument. Otherwise it does not.
260
+ enum Ownership {
261
+ DO_NOT_TAKE_OWNERSHIP,
262
+ TAKE_OWNERSHIP
263
+ };
264
+
265
+ // bit_cast<Dest,Source> is a template function that implements the
266
+ // equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
267
+ // very low-level functions like the protobuf library and fast math
268
+ // support.
269
+ //
270
+ // float f = 3.14159265358979;
271
+ // int i = bit_cast<int32>(f);
272
+ // // i = 0x40490fdb
273
+ //
274
+ // The classical address-casting method is:
275
+ //
276
+ // // WRONG
277
+ // float f = 3.14159265358979; // WRONG
278
+ // int i = * reinterpret_cast<int*>(&f); // WRONG
279
+ //
280
+ // The address-casting method actually produces undefined behavior
281
+ // according to ISO C++ specification section 3.10 -15 -. Roughly, this
282
+ // section says: if an object in memory has one type, and a program
283
+ // accesses it with a different type, then the result is undefined
284
+ // behavior for most values of "different type".
285
+ //
286
+ // This is true for any cast syntax, either *(int*)&f or
287
+ // *reinterpret_cast<int*>(&f). And it is particularly true for
288
+ // conversions betweeen integral lvalues and floating-point lvalues.
289
+ //
290
+ // The purpose of 3.10 -15- is to allow optimizing compilers to assume
291
+ // that expressions with different types refer to different memory. gcc
292
+ // 4.0.1 has an optimizer that takes advantage of this. So a
293
+ // non-conforming program quietly produces wildly incorrect output.
294
+ //
295
+ // The problem is not the use of reinterpret_cast. The problem is type
296
+ // punning: holding an object in memory of one type and reading its bits
297
+ // back using a different type.
298
+ //
299
+ // The C++ standard is more subtle and complex than this, but that
300
+ // is the basic idea.
301
+ //
302
+ // Anyways ...
303
+ //
304
+ // bit_cast<> calls memcpy() which is blessed by the standard,
305
+ // especially by the example in section 3.9 . Also, of course,
306
+ // bit_cast<> wraps up the nasty logic in one place.
307
+ //
308
+ // Fortunately memcpy() is very fast. In optimized mode, with a
309
+ // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
310
+ // code with the minimal amount of data movement. On a 32-bit system,
311
+ // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
312
+ // compiles to two loads and two stores.
313
+ //
314
+ // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
315
+ //
316
+ // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
317
+ // is likely to surprise you.
318
+
319
+ template <class Dest, class Source>
320
+ inline Dest bit_cast(const Source& source) {
321
+ // Compile time assertion: sizeof(Dest) == sizeof(Source)
322
+ // A compile error here means your Dest and Source have different sizes.
323
+ typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
324
+
325
+ Dest dest;
326
+ memcpy(&dest, &source, sizeof(dest));
327
+ return dest;
328
+ }
329
+
330
+ // The following enum should be used only as a constructor argument to indicate
331
+ // that the variable has static storage class, and that the constructor should
332
+ // do nothing to its state. It indicates to the reader that it is legal to
333
+ // declare a static instance of the class, provided the constructor is given
334
+ // the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a
335
+ // static variable that has a constructor or a destructor because invocation
336
+ // order is undefined. However, IF the type can be initialized by filling with
337
+ // zeroes (which the loader does for static variables), AND the destructor also
338
+ // does nothing to the storage, AND there are no virtual methods, then a
339
+ // constructor declared as
340
+ // explicit MyClass(base::LinkerInitialized x) {}
341
+ // and invoked as
342
+ // static MyClass my_variable_name(base::LINKER_INITIALIZED);
343
+ namespace base {
344
+ enum LinkerInitialized { LINKER_INITIALIZED };
345
+ } // base
346
+
347
+
348
+ #endif // BASE_BASICTYPES_H_
@@ -0,0 +1,124 @@
1
+ // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ // This file adds defines about the platform we're currently building on.
6
+ // Operating System:
7
+ // OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX)
8
+ // Compiler:
9
+ // COMPILER_MSVC / COMPILER_GCC
10
+ // Processor:
11
+ // ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_X86_FAMILY (X86 or X86_64)
12
+ // ARCH_CPU_32_BITS / ARCH_CPU_64_BITS
13
+
14
+ #ifndef BUILD_BUILD_CONFIG_H_
15
+ #define BUILD_BUILD_CONFIG_H_
16
+
17
+ // A set of macros to use for platform detection.
18
+ #if defined(__APPLE__)
19
+ #define OS_MACOSX 1
20
+ #elif defined(__linux__)
21
+ #define OS_LINUX 1
22
+ // Use TOOLKIT_GTK on linux if TOOLKIT_VIEWS isn't defined.
23
+ #if !defined(TOOLKIT_VIEWS)
24
+ #define TOOLKIT_GTK
25
+ #endif
26
+ #elif defined(_WIN32)
27
+ #define OS_WIN 1
28
+ #define TOOLKIT_VIEWS 1
29
+ #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30
+ #define OS_FREEBSD 1
31
+ #define TOOLKIT_GTK
32
+ #elif defined(__NetBSD__)
33
+ #define OS_NETBSD 1
34
+ #define TOOLKIT_GTK
35
+ #elif defined(__OpenBSD__)
36
+ #define OS_OPENBSD 1
37
+ #define TOOLKIT_GTK
38
+ #elif defined(__DragonFly__)
39
+ #define OS_DRAGONFLY 1
40
+ #define TOOLKIT_GTK
41
+ #elif defined(__sun)
42
+ #define OS_SOLARIS 1
43
+ #define TOOLKIT_GTK
44
+ #else
45
+ #error Please add support for your platform in build/build_config.h
46
+ #endif
47
+
48
+ // A flag derived from the above flags, used to cover GTK code in
49
+ // both TOOLKIT_GTK and TOOLKIT_VIEWS.
50
+ #if defined(TOOLKIT_GTK) || (defined(TOOLKIT_VIEWS) && !defined(OS_WIN))
51
+ #define TOOLKIT_USES_GTK 1
52
+ #endif
53
+
54
+ #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY)
55
+ #define USE_NSS 1 // Use NSS for crypto.
56
+ #define USE_X11 1 // Use X for graphics.
57
+ #endif
58
+
59
+ // For access to standard POSIXish features, use OS_POSIX instead of a
60
+ // more specific macro.
61
+ #if defined(OS_MACOSX) || defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_SOLARIS) || defined(OS_DRAGONFLY)
62
+ #define OS_POSIX 1
63
+ // Use base::DataPack for name/value pairs.
64
+ #define USE_BASE_DATA_PACK 1
65
+ #endif
66
+
67
+ // Use tcmalloc
68
+ #if defined(OS_WIN) && ! defined(NO_TCMALLOC)
69
+ #define USE_TCMALLOC 1
70
+ #endif
71
+
72
+ // Compiler detection.
73
+ #if defined(__GNUC__)
74
+ #define COMPILER_GCC 1
75
+ #elif defined(_MSC_VER)
76
+ #define COMPILER_MSVC 1
77
+ #else
78
+ #error Please add support for your compiler in build/build_config.h
79
+ #endif
80
+
81
+ // Processor architecture detection. For more info on what's defined, see:
82
+ // http://msdn.microsoft.com/en-us/library/b0084kay.aspx
83
+ // http://www.agner.org/optimize/calling_conventions.pdf
84
+ // or with gcc, run: "echo | gcc -E -dM -"
85
+ #if defined(_M_X64) || defined(__x86_64__)
86
+ #define ARCH_CPU_X86_FAMILY 1
87
+ #define ARCH_CPU_X86_64 1
88
+ #define ARCH_CPU_64_BITS 1
89
+ #elif defined(_M_IX86) || defined(__i386__)
90
+ #define ARCH_CPU_X86_FAMILY 1
91
+ #define ARCH_CPU_X86 1
92
+ #define ARCH_CPU_32_BITS 1
93
+ #elif defined(__ARMEL__)
94
+ #define ARCH_CPU_ARM_FAMILY 1
95
+ #define ARCH_CPU_ARMEL 1
96
+ #define ARCH_CPU_32_BITS 1
97
+ #define WCHAR_T_IS_UNSIGNED 1
98
+ #elif defined(__ARCH_PPC) || defined(__ppc__)
99
+ #define ARCH_CPU_PPC_FAMILY 1
100
+ #define ARCH_CPU_32_BITS 1
101
+ #else
102
+ #error Please add support for your architecture in build/build_config.h
103
+ #endif
104
+
105
+ // Type detection for wchar_t.
106
+ #if defined(OS_WIN)
107
+ #define WCHAR_T_IS_UTF16
108
+ #elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
109
+ defined(__WCHAR_MAX__) && \
110
+ (__WCHAR_MAX__ == 0x7fffffff || __WCHAR_MAX__ == 0xffffffff)
111
+ #define WCHAR_T_IS_UTF32
112
+ #elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
113
+ defined(__WCHAR_MAX__) && \
114
+ (__WCHAR_MAX__ == 0x7fff || __WCHAR_MAX__ == 0xffff)
115
+ // On Posix, we'll detect short wchar_t, but projects aren't guaranteed to
116
+ // compile in this mode (in particular, Chrome doesn't). This is intended for
117
+ // other projects using base who manage their own dependencies and make sure
118
+ // short wchar works for them.
119
+ #define WCHAR_T_IS_UTF16
120
+ #else
121
+ #error Please add support for your compiler in build/build_config.h
122
+ #endif
123
+
124
+ #endif // BUILD_BUILD_CONFIG_H_