language_detection 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. data/.gitignore +19 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +85 -0
  5. data/Rakefile +11 -0
  6. data/ext/cld/Makefile +34 -0
  7. data/ext/cld/base/basictypes.h +348 -0
  8. data/ext/cld/base/build_config.h +124 -0
  9. data/ext/cld/base/casts.h +156 -0
  10. data/ext/cld/base/commandlineflags.h +443 -0
  11. data/ext/cld/base/crash.h +41 -0
  12. data/ext/cld/base/dynamic_annotations.h +358 -0
  13. data/ext/cld/base/global_strip_options.h +59 -0
  14. data/ext/cld/base/log_severity.h +46 -0
  15. data/ext/cld/base/logging.h +1403 -0
  16. data/ext/cld/base/macros.h +243 -0
  17. data/ext/cld/base/port.h +54 -0
  18. data/ext/cld/base/scoped_ptr.h +428 -0
  19. data/ext/cld/base/stl_decl.h +0 -0
  20. data/ext/cld/base/stl_decl_msvc.h +107 -0
  21. data/ext/cld/base/string_util.h +29 -0
  22. data/ext/cld/base/strtoint.h +93 -0
  23. data/ext/cld/base/template_util.h +96 -0
  24. data/ext/cld/base/type_traits.h +198 -0
  25. data/ext/cld/base/vlog_is_on.h +143 -0
  26. data/ext/cld/cld.so +0 -0
  27. data/ext/cld/encodings/compact_lang_det/cldutil.cc +905 -0
  28. data/ext/cld/encodings/compact_lang_det/cldutil.h +1205 -0
  29. data/ext/cld/encodings/compact_lang_det/cldutil_dbg.h +76 -0
  30. data/ext/cld/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
  31. data/ext/cld/encodings/compact_lang_det/compact_lang_det.cc +62 -0
  32. data/ext/cld/encodings/compact_lang_det/compact_lang_det.h +145 -0
  33. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
  34. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
  35. data/ext/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
  36. data/ext/cld/encodings/compact_lang_det/compile.cmd +1 -0
  37. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
  38. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.h +119 -0
  39. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
  40. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
  41. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
  42. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
  43. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
  44. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
  45. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
  46. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
  47. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
  48. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
  49. data/ext/cld/encodings/compact_lang_det/getonescriptspan.cc +570 -0
  50. data/ext/cld/encodings/compact_lang_det/getonescriptspan.h +131 -0
  51. data/ext/cld/encodings/compact_lang_det/letterscript_enum.cc +117 -0
  52. data/ext/cld/encodings/compact_lang_det/letterscript_enum.h +99 -0
  53. data/ext/cld/encodings/compact_lang_det/subsetsequence.cc +259 -0
  54. data/ext/cld/encodings/compact_lang_det/subsetsequence.h +44 -0
  55. data/ext/cld/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
  56. data/ext/cld/encodings/compact_lang_det/tote.cc +299 -0
  57. data/ext/cld/encodings/compact_lang_det/tote.h +89 -0
  58. data/ext/cld/encodings/compact_lang_det/unittest_data.h +193 -0
  59. data/ext/cld/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
  60. data/ext/cld/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
  61. data/ext/cld/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
  62. data/ext/cld/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
  63. data/ext/cld/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
  64. data/ext/cld/encodings/compact_lang_det/win/cld_google.h +18 -0
  65. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
  66. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
  67. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
  68. data/ext/cld/encodings/compact_lang_det/win/cld_logging.h +21 -0
  69. data/ext/cld/encodings/compact_lang_det/win/cld_macros.h +19 -0
  70. data/ext/cld/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
  71. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
  72. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
  73. data/ext/cld/encodings/compact_lang_det/win/cld_unilib.h +15 -0
  74. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
  75. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
  76. data/ext/cld/encodings/compact_lang_det/win/cld_utf.h +24 -0
  77. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
  78. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
  79. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
  80. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
  81. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
  82. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
  83. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
  84. data/ext/cld/encodings/internal/encodings.cc +12 -0
  85. data/ext/cld/encodings/lang_enc.h +254 -0
  86. data/ext/cld/encodings/proto/encodings.pb.h +169 -0
  87. data/ext/cld/encodings/public/encodings.h +301 -0
  88. data/ext/cld/extconf.rb +1 -0
  89. data/ext/cld/language_detection.cc +88 -0
  90. data/ext/cld/languages/internal/languages.cc +337 -0
  91. data/ext/cld/languages/proto/languages.pb.h +179 -0
  92. data/ext/cld/languages/public/languages.h +379 -0
  93. data/language_detection.gemspec +28 -0
  94. data/lib/language_detection/string.rb +1 -0
  95. data/lib/language_detection/version.rb +3 -0
  96. data/lib/language_detection.rb +54 -0
  97. data/test/_helper.rb +15 -0
  98. data/test/fixtures/languages.csv +80 -0
  99. data/test/language_detection_test.rb +88 -0
  100. metadata +250 -0
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ ext/cld/*.o
19
+ ext/cld/*.a
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in language_detection.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Vojtech Hyza
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,85 @@
1
+ # LanguageDetection
2
+
3
+ Ruby bindings for Chromium Compact Language Detector ([source](http://src.chromium.org/viewvc/chrome/trunk/src/third_party/cld/)). This gem is using source codes from [chromium-compact-language-detector](http://code.google.com/p/chromium-compact-language-detector/) port.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'language_detection'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install language_detection
18
+
19
+ ## Usage
20
+
21
+ ```ruby
22
+ >> require 'language_detection'
23
+ => true
24
+ >> language = LanguageDetection.perform("This is some example text for language detection")
25
+ => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>51, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]}
26
+ >> language.name
27
+ => "ENGLISH"
28
+ >> language.code
29
+ => "en"
30
+ >> language.reliable
31
+ => true
32
+ >> language.details # contains up to 3 languages sorted by score
33
+ => [{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]
34
+ >> language.details.first.percent
35
+ => 100
36
+ >> language.details.first.score
37
+ => 49.43273905996759
38
+ ```
39
+
40
+ the other way is to include `LanguageDetection` module in your class
41
+
42
+ ```ruby
43
+ class Article
44
+ include LanguageDetection
45
+
46
+ attr_accessor :title, :content
47
+
48
+ def initialize(params = {})
49
+ @title = params[:title]
50
+ @content = params[:content]
51
+ end
52
+
53
+ def to_s
54
+ "#{title}\n#{content}"
55
+ end
56
+ end
57
+ ```
58
+
59
+ which provides `Article#language` method using `Article#to_s` method as parameter
60
+
61
+ ```ruby
62
+ >> article = Article.new :title => "Web development that doesn't hurt", :content => "Tens of thousands of Rails applications are already live..."
63
+ >> article.language
64
+ => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>93, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>80.22690437601297}]}
65
+ ```
66
+
67
+ or you can add `String#language` method by `require 'language_detection/string'`
68
+
69
+ ```ruby
70
+ >> require 'language_detection'
71
+ => true
72
+ >> require 'language_detection/string'
73
+ => true
74
+ >> "Web development that doesn't hurt".language
75
+ => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>36, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>39.70826580226905}]}
76
+ ```
77
+
78
+
79
+ ## Contributing
80
+
81
+ 1. Fork it
82
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
83
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
84
+ 4. Push to the branch (`git push origin my-new-feature`)
85
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :default => :test
4
+
5
+ require 'rake/testtask'
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/*_test.rb']
9
+ test.verbose = true
10
+ # test.warning = true
11
+ end
data/ext/cld/Makefile ADDED
@@ -0,0 +1,34 @@
1
+ # TODO: Generate Makefile
2
+
3
+ CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS
4
+ LDFLAGS=-L.
5
+ CC=g++
6
+ AR=ar
7
+
8
+ SOURCES = encodings/compact_lang_det/cldutil.cc \
9
+ encodings/compact_lang_det/cldutil_dbg_empty.cc \
10
+ encodings/compact_lang_det/compact_lang_det.cc \
11
+ encodings/compact_lang_det/compact_lang_det_impl.cc \
12
+ encodings/compact_lang_det/ext_lang_enc.cc \
13
+ encodings/compact_lang_det/getonescriptspan.cc \
14
+ encodings/compact_lang_det/letterscript_enum.cc \
15
+ encodings/compact_lang_det/tote.cc \
16
+ encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc \
17
+ encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc \
18
+ encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc \
19
+ encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc \
20
+ encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc \
21
+ encodings/compact_lang_det/win/cld_htmlutils_windows.cc \
22
+ encodings/compact_lang_det/win/cld_unilib_windows.cc \
23
+ encodings/compact_lang_det/win/cld_utf8statetable.cc \
24
+ encodings/compact_lang_det/win/cld_utf8utils_windows.cc \
25
+ encodings/internal/encodings.cc \
26
+ languages/internal/languages.cc \
27
+ language_detection.cc
28
+
29
+ install:
30
+ rm -f *.o
31
+ rm -f libcld.a
32
+ $(CC) -c $(CFLAGS) $(SOURCES)
33
+ $(AR) rcs libcld.a *.o
34
+ $(CC) -DCLD_WINDOWS -I. -L. -shared -o cld.so -lstdc++ *.o
@@ -0,0 +1,348 @@
1
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ #ifndef BASE_BASICTYPES_H_
6
+ #define BASE_BASICTYPES_H_
7
+
8
+ #include <limits.h> // So we can set the bounds of our types
9
+ #include <stddef.h> // For size_t
10
+ #include <string.h> // for memcpy
11
+
12
+ #include "base/port.h" // Types that only need exist on certain systems
13
+
14
+ #ifndef COMPILER_MSVC
15
+ // stdint.h is part of C99 but MSVC doesn't have it.
16
+ #include <stdint.h> // For intptr_t.
17
+ #endif
18
+
19
+ typedef signed char schar;
20
+ typedef signed char int8;
21
+ typedef short int16;
22
+ // TODO(mbelshe) Remove these type guards. These are
23
+ // temporary to avoid conflicts with npapi.h.
24
+ #ifndef _INT32
25
+ #define _INT32
26
+ typedef int int32;
27
+ #endif
28
+
29
+ // The NSPR system headers define 64-bit as |long| when possible. In order to
30
+ // not have typedef mismatches, we do the same on LP64.
31
+ #if __LP64__
32
+ typedef long int64;
33
+ #else
34
+ typedef long long int64;
35
+ #endif
36
+
37
+ // NOTE: unsigned types are DANGEROUS in loops and other arithmetical
38
+ // places. Use the signed types unless your variable represents a bit
39
+ // pattern (eg a hash value) or you really need the extra bit. Do NOT
40
+ // use 'unsigned' to express "this value should always be positive";
41
+ // use assertions for this.
42
+
43
+ typedef unsigned char uint8;
44
+ typedef unsigned short uint16;
45
+ // TODO(mbelshe) Remove these type guards. These are
46
+ // temporary to avoid conflicts with npapi.h.
47
+ #ifndef _UINT32
48
+ #define _UINT32
49
+ typedef unsigned int uint32;
50
+ #endif
51
+
52
+ // See the comment above about NSPR and 64-bit.
53
+ #if __LP64__
54
+ typedef unsigned long uint64;
55
+ #else
56
+ typedef unsigned long long uint64;
57
+ #endif
58
+
59
+ // A type to represent a Unicode code-point value. As of Unicode 4.0,
60
+ // such values require up to 21 bits.
61
+ // (For type-checking on pointers, make this explicitly signed,
62
+ // and it should always be the signed version of whatever int32 is.)
63
+ typedef signed int char32;
64
+
65
+ const uint8 kuint8max = (( uint8) 0xFF);
66
+ const uint16 kuint16max = ((uint16) 0xFFFF);
67
+ const uint32 kuint32max = ((uint32) 0xFFFFFFFF);
68
+ const uint64 kuint64max = ((uint64) GG_LONGLONG(0xFFFFFFFFFFFFFFFF));
69
+ const int8 kint8min = (( int8) 0x80);
70
+ const int8 kint8max = (( int8) 0x7F);
71
+ const int16 kint16min = (( int16) 0x8000);
72
+ const int16 kint16max = (( int16) 0x7FFF);
73
+ const int32 kint32min = (( int32) 0x80000000);
74
+ const int32 kint32max = (( int32) 0x7FFFFFFF);
75
+ const int64 kint64min = (( int64) GG_LONGLONG(0x8000000000000000));
76
+ const int64 kint64max = (( int64) GG_LONGLONG(0x7FFFFFFFFFFFFFFF));
77
+
78
+ // A macro to disallow the copy constructor and operator= functions
79
+ // This should be used in the private: declarations for a class
80
+ #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
81
+ TypeName(const TypeName&); \
82
+ void operator=(const TypeName&)
83
+
84
+ // An older, deprecated, politically incorrect name for the above.
85
+ #define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
86
+
87
+ // A macro to disallow all the implicit constructors, namely the
88
+ // default constructor, copy constructor and operator= functions.
89
+ //
90
+ // This should be used in the private: declarations for a class
91
+ // that wants to prevent anyone from instantiating it. This is
92
+ // especially useful for classes containing only static methods.
93
+ #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
94
+ TypeName(); \
95
+ DISALLOW_COPY_AND_ASSIGN(TypeName)
96
+
97
+ // The arraysize(arr) macro returns the # of elements in an array arr.
98
+ // The expression is a compile-time constant, and therefore can be
99
+ // used in defining new arrays, for example. If you use arraysize on
100
+ // a pointer by mistake, you will get a compile-time error.
101
+ //
102
+ // One caveat is that arraysize() doesn't accept any array of an
103
+ // anonymous type or a type defined inside a function. In these rare
104
+ // cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is
105
+ // due to a limitation in C++'s template system. The limitation might
106
+ // eventually be removed, but it hasn't happened yet.
107
+
108
+ // This template function declaration is used in defining arraysize.
109
+ // Note that the function doesn't need an implementation, as we only
110
+ // use its type.
111
+ template <typename T, size_t N>
112
+ char (&ArraySizeHelper(T (&array)[N]))[N];
113
+
114
+ // That gcc wants both of these prototypes seems mysterious. VC, for
115
+ // its part, can't decide which to use (another mystery). Matching of
116
+ // template overloads: the final frontier.
117
+ #ifndef _MSC_VER
118
+ template <typename T, size_t N>
119
+ char (&ArraySizeHelper(const T (&array)[N]))[N];
120
+ #endif
121
+
122
+ #define arraysize(array) (sizeof(ArraySizeHelper(array)))
123
+
124
+ // ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
125
+ // but can be used on anonymous types or types defined inside
126
+ // functions. It's less safe than arraysize as it accepts some
127
+ // (although not all) pointers. Therefore, you should use arraysize
128
+ // whenever possible.
129
+ //
130
+ // The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
131
+ // size_t.
132
+ //
133
+ // ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error
134
+ //
135
+ // "warning: division by zero in ..."
136
+ //
137
+ // when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
138
+ // You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
139
+ //
140
+ // The following comments are on the implementation details, and can
141
+ // be ignored by the users.
142
+ //
143
+ // ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
144
+ // the array) and sizeof(*(arr)) (the # of bytes in one array
145
+ // element). If the former is divisible by the latter, perhaps arr is
146
+ // indeed an array, in which case the division result is the # of
147
+ // elements in the array. Otherwise, arr cannot possibly be an array,
148
+ // and we generate a compiler error to prevent the code from
149
+ // compiling.
150
+ //
151
+ // Since the size of bool is implementation-defined, we need to cast
152
+ // !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
153
+ // result has type size_t.
154
+ //
155
+ // This macro is not perfect as it wrongfully accepts certain
156
+ // pointers, namely where the pointer size is divisible by the pointee
157
+ // size. Since all our code has to go through a 32-bit compiler,
158
+ // where a pointer is 4 bytes, this means all pointers to a type whose
159
+ // size is 3 or greater than 4 will be (righteously) rejected.
160
+
161
+ #define ARRAYSIZE_UNSAFE(a) \
162
+ ((sizeof(a) / sizeof(*(a))) / \
163
+ static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
164
+
165
+
166
+ // Use implicit_cast as a safe version of static_cast or const_cast
167
+ // for upcasting in the type hierarchy (i.e. casting a pointer to Foo
168
+ // to a pointer to SuperclassOfFoo or casting a pointer to Foo to
169
+ // a const pointer to Foo).
170
+ // When you use implicit_cast, the compiler checks that the cast is safe.
171
+ // Such explicit implicit_casts are necessary in surprisingly many
172
+ // situations where C++ demands an exact type match instead of an
173
+ // argument type convertable to a target type.
174
+ //
175
+ // The From type can be inferred, so the preferred syntax for using
176
+ // implicit_cast is the same as for static_cast etc.:
177
+ //
178
+ // implicit_cast<ToType>(expr)
179
+ //
180
+ // implicit_cast would have been part of the C++ standard library,
181
+ // but the proposal was submitted too late. It will probably make
182
+ // its way into the language in the future.
183
+ template<typename To, typename From>
184
+ inline To implicit_cast(From const &f) {
185
+ return f;
186
+ }
187
+
188
+ // The COMPILE_ASSERT macro can be used to verify that a compile time
189
+ // expression is true. For example, you could use it to verify the
190
+ // size of a static array:
191
+ //
192
+ // COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
193
+ // content_type_names_incorrect_size);
194
+ //
195
+ // or to make sure a struct is smaller than a certain size:
196
+ //
197
+ // COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
198
+ //
199
+ // The second argument to the macro is the name of the variable. If
200
+ // the expression is false, most compilers will issue a warning/error
201
+ // containing the name of the variable.
202
+
203
+ template <bool>
204
+ struct CompileAssert {
205
+ };
206
+
207
+ #undef COMPILE_ASSERT
208
+ #define COMPILE_ASSERT(expr, msg) \
209
+ typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
210
+
211
+ // Implementation details of COMPILE_ASSERT:
212
+ //
213
+ // - COMPILE_ASSERT works by defining an array type that has -1
214
+ // elements (and thus is invalid) when the expression is false.
215
+ //
216
+ // - The simpler definition
217
+ //
218
+ // #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
219
+ //
220
+ // does not work, as gcc supports variable-length arrays whose sizes
221
+ // are determined at run-time (this is gcc's extension and not part
222
+ // of the C++ standard). As a result, gcc fails to reject the
223
+ // following code with the simple definition:
224
+ //
225
+ // int foo;
226
+ // COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
227
+ // // not a compile-time constant.
228
+ //
229
+ // - By using the type CompileAssert<(bool(expr))>, we ensures that
230
+ // expr is a compile-time constant. (Template arguments must be
231
+ // determined at compile-time.)
232
+ //
233
+ // - The outter parentheses in CompileAssert<(bool(expr))> are necessary
234
+ // to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
235
+ //
236
+ // CompileAssert<bool(expr)>
237
+ //
238
+ // instead, these compilers will refuse to compile
239
+ //
240
+ // COMPILE_ASSERT(5 > 0, some_message);
241
+ //
242
+ // (They seem to think the ">" in "5 > 0" marks the end of the
243
+ // template argument list.)
244
+ //
245
+ // - The array size is (bool(expr) ? 1 : -1), instead of simply
246
+ //
247
+ // ((expr) ? 1 : -1).
248
+ //
249
+ // This is to avoid running into a bug in MS VC 7.1, which
250
+ // causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
251
+
252
+
253
+ // MetatagId refers to metatag-id that we assign to
254
+ // each metatag <name, value> pair..
255
+ typedef uint32 MetatagId;
256
+
257
+ // Argument type used in interfaces that can optionally take ownership
258
+ // of a passed in argument. If TAKE_OWNERSHIP is passed, the called
259
+ // object takes ownership of the argument. Otherwise it does not.
260
+ enum Ownership {
261
+ DO_NOT_TAKE_OWNERSHIP,
262
+ TAKE_OWNERSHIP
263
+ };
264
+
265
+ // bit_cast<Dest,Source> is a template function that implements the
266
+ // equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
267
+ // very low-level functions like the protobuf library and fast math
268
+ // support.
269
+ //
270
+ // float f = 3.14159265358979;
271
+ // int i = bit_cast<int32>(f);
272
+ // // i = 0x40490fdb
273
+ //
274
+ // The classical address-casting method is:
275
+ //
276
+ // // WRONG
277
+ // float f = 3.14159265358979; // WRONG
278
+ // int i = * reinterpret_cast<int*>(&f); // WRONG
279
+ //
280
+ // The address-casting method actually produces undefined behavior
281
+ // according to ISO C++ specification section 3.10 -15 -. Roughly, this
282
+ // section says: if an object in memory has one type, and a program
283
+ // accesses it with a different type, then the result is undefined
284
+ // behavior for most values of "different type".
285
+ //
286
+ // This is true for any cast syntax, either *(int*)&f or
287
+ // *reinterpret_cast<int*>(&f). And it is particularly true for
288
+ // conversions betweeen integral lvalues and floating-point lvalues.
289
+ //
290
+ // The purpose of 3.10 -15- is to allow optimizing compilers to assume
291
+ // that expressions with different types refer to different memory. gcc
292
+ // 4.0.1 has an optimizer that takes advantage of this. So a
293
+ // non-conforming program quietly produces wildly incorrect output.
294
+ //
295
+ // The problem is not the use of reinterpret_cast. The problem is type
296
+ // punning: holding an object in memory of one type and reading its bits
297
+ // back using a different type.
298
+ //
299
+ // The C++ standard is more subtle and complex than this, but that
300
+ // is the basic idea.
301
+ //
302
+ // Anyways ...
303
+ //
304
+ // bit_cast<> calls memcpy() which is blessed by the standard,
305
+ // especially by the example in section 3.9 . Also, of course,
306
+ // bit_cast<> wraps up the nasty logic in one place.
307
+ //
308
+ // Fortunately memcpy() is very fast. In optimized mode, with a
309
+ // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
310
+ // code with the minimal amount of data movement. On a 32-bit system,
311
+ // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
312
+ // compiles to two loads and two stores.
313
+ //
314
+ // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
315
+ //
316
+ // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
317
+ // is likely to surprise you.
318
+
319
+ template <class Dest, class Source>
320
+ inline Dest bit_cast(const Source& source) {
321
+ // Compile time assertion: sizeof(Dest) == sizeof(Source)
322
+ // A compile error here means your Dest and Source have different sizes.
323
+ typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
324
+
325
+ Dest dest;
326
+ memcpy(&dest, &source, sizeof(dest));
327
+ return dest;
328
+ }
329
+
330
+ // The following enum should be used only as a constructor argument to indicate
331
+ // that the variable has static storage class, and that the constructor should
332
+ // do nothing to its state. It indicates to the reader that it is legal to
333
+ // declare a static instance of the class, provided the constructor is given
334
+ // the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a
335
+ // static variable that has a constructor or a destructor because invocation
336
+ // order is undefined. However, IF the type can be initialized by filling with
337
+ // zeroes (which the loader does for static variables), AND the destructor also
338
+ // does nothing to the storage, AND there are no virtual methods, then a
339
+ // constructor declared as
340
+ // explicit MyClass(base::LinkerInitialized x) {}
341
+ // and invoked as
342
+ // static MyClass my_variable_name(base::LINKER_INITIALIZED);
343
+ namespace base {
344
+ enum LinkerInitialized { LINKER_INITIALIZED };
345
+ } // base
346
+
347
+
348
+ #endif // BASE_BASICTYPES_H_
@@ -0,0 +1,124 @@
1
+ // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ // This file adds defines about the platform we're currently building on.
6
+ // Operating System:
7
+ // OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX)
8
+ // Compiler:
9
+ // COMPILER_MSVC / COMPILER_GCC
10
+ // Processor:
11
+ // ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_X86_FAMILY (X86 or X86_64)
12
+ // ARCH_CPU_32_BITS / ARCH_CPU_64_BITS
13
+
14
+ #ifndef BUILD_BUILD_CONFIG_H_
15
+ #define BUILD_BUILD_CONFIG_H_
16
+
17
+ // A set of macros to use for platform detection.
18
+ #if defined(__APPLE__)
19
+ #define OS_MACOSX 1
20
+ #elif defined(__linux__)
21
+ #define OS_LINUX 1
22
+ // Use TOOLKIT_GTK on linux if TOOLKIT_VIEWS isn't defined.
23
+ #if !defined(TOOLKIT_VIEWS)
24
+ #define TOOLKIT_GTK
25
+ #endif
26
+ #elif defined(_WIN32)
27
+ #define OS_WIN 1
28
+ #define TOOLKIT_VIEWS 1
29
+ #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30
+ #define OS_FREEBSD 1
31
+ #define TOOLKIT_GTK
32
+ #elif defined(__NetBSD__)
33
+ #define OS_NETBSD 1
34
+ #define TOOLKIT_GTK
35
+ #elif defined(__OpenBSD__)
36
+ #define OS_OPENBSD 1
37
+ #define TOOLKIT_GTK
38
+ #elif defined(__DragonFly__)
39
+ #define OS_DRAGONFLY 1
40
+ #define TOOLKIT_GTK
41
+ #elif defined(__sun)
42
+ #define OS_SOLARIS 1
43
+ #define TOOLKIT_GTK
44
+ #else
45
+ #error Please add support for your platform in build/build_config.h
46
+ #endif
47
+
48
+ // A flag derived from the above flags, used to cover GTK code in
49
+ // both TOOLKIT_GTK and TOOLKIT_VIEWS.
50
+ #if defined(TOOLKIT_GTK) || (defined(TOOLKIT_VIEWS) && !defined(OS_WIN))
51
+ #define TOOLKIT_USES_GTK 1
52
+ #endif
53
+
54
+ #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY)
55
+ #define USE_NSS 1 // Use NSS for crypto.
56
+ #define USE_X11 1 // Use X for graphics.
57
+ #endif
58
+
59
+ // For access to standard POSIXish features, use OS_POSIX instead of a
60
+ // more specific macro.
61
+ #if defined(OS_MACOSX) || defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_SOLARIS) || defined(OS_DRAGONFLY)
62
+ #define OS_POSIX 1
63
+ // Use base::DataPack for name/value pairs.
64
+ #define USE_BASE_DATA_PACK 1
65
+ #endif
66
+
67
+ // Use tcmalloc
68
+ #if defined(OS_WIN) && ! defined(NO_TCMALLOC)
69
+ #define USE_TCMALLOC 1
70
+ #endif
71
+
72
+ // Compiler detection.
73
+ #if defined(__GNUC__)
74
+ #define COMPILER_GCC 1
75
+ #elif defined(_MSC_VER)
76
+ #define COMPILER_MSVC 1
77
+ #else
78
+ #error Please add support for your compiler in build/build_config.h
79
+ #endif
80
+
81
+ // Processor architecture detection. For more info on what's defined, see:
82
+ // http://msdn.microsoft.com/en-us/library/b0084kay.aspx
83
+ // http://www.agner.org/optimize/calling_conventions.pdf
84
+ // or with gcc, run: "echo | gcc -E -dM -"
85
+ #if defined(_M_X64) || defined(__x86_64__)
86
+ #define ARCH_CPU_X86_FAMILY 1
87
+ #define ARCH_CPU_X86_64 1
88
+ #define ARCH_CPU_64_BITS 1
89
+ #elif defined(_M_IX86) || defined(__i386__)
90
+ #define ARCH_CPU_X86_FAMILY 1
91
+ #define ARCH_CPU_X86 1
92
+ #define ARCH_CPU_32_BITS 1
93
+ #elif defined(__ARMEL__)
94
+ #define ARCH_CPU_ARM_FAMILY 1
95
+ #define ARCH_CPU_ARMEL 1
96
+ #define ARCH_CPU_32_BITS 1
97
+ #define WCHAR_T_IS_UNSIGNED 1
98
+ #elif defined(__ARCH_PPC) || defined(__ppc__)
99
+ #define ARCH_CPU_PPC_FAMILY 1
100
+ #define ARCH_CPU_32_BITS 1
101
+ #else
102
+ #error Please add support for your architecture in build/build_config.h
103
+ #endif
104
+
105
+ // Type detection for wchar_t.
106
+ #if defined(OS_WIN)
107
+ #define WCHAR_T_IS_UTF16
108
+ #elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
109
+ defined(__WCHAR_MAX__) && \
110
+ (__WCHAR_MAX__ == 0x7fffffff || __WCHAR_MAX__ == 0xffffffff)
111
+ #define WCHAR_T_IS_UTF32
112
+ #elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
113
+ defined(__WCHAR_MAX__) && \
114
+ (__WCHAR_MAX__ == 0x7fff || __WCHAR_MAX__ == 0xffff)
115
+ // On Posix, we'll detect short wchar_t, but projects aren't guaranteed to
116
+ // compile in this mode (in particular, Chrome doesn't). This is intended for
117
+ // other projects using base who manage their own dependencies and make sure
118
+ // short wchar works for them.
119
+ #define WCHAR_T_IS_UTF16
120
+ #else
121
+ #error Please add support for your compiler in build/build_config.h
122
+ #endif
123
+
124
+ #endif // BUILD_BUILD_CONFIG_H_