language_detection 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +85 -0
- data/Rakefile +11 -0
- data/ext/cld/Makefile +34 -0
- data/ext/cld/base/basictypes.h +348 -0
- data/ext/cld/base/build_config.h +124 -0
- data/ext/cld/base/casts.h +156 -0
- data/ext/cld/base/commandlineflags.h +443 -0
- data/ext/cld/base/crash.h +41 -0
- data/ext/cld/base/dynamic_annotations.h +358 -0
- data/ext/cld/base/global_strip_options.h +59 -0
- data/ext/cld/base/log_severity.h +46 -0
- data/ext/cld/base/logging.h +1403 -0
- data/ext/cld/base/macros.h +243 -0
- data/ext/cld/base/port.h +54 -0
- data/ext/cld/base/scoped_ptr.h +428 -0
- data/ext/cld/base/stl_decl.h +0 -0
- data/ext/cld/base/stl_decl_msvc.h +107 -0
- data/ext/cld/base/string_util.h +29 -0
- data/ext/cld/base/strtoint.h +93 -0
- data/ext/cld/base/template_util.h +96 -0
- data/ext/cld/base/type_traits.h +198 -0
- data/ext/cld/base/vlog_is_on.h +143 -0
- data/ext/cld/cld.so +0 -0
- data/ext/cld/encodings/compact_lang_det/cldutil.cc +905 -0
- data/ext/cld/encodings/compact_lang_det/cldutil.h +1205 -0
- data/ext/cld/encodings/compact_lang_det/cldutil_dbg.h +76 -0
- data/ext/cld/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det.cc +62 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det.h +145 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
- data/ext/cld/encodings/compact_lang_det/compile.cmd +1 -0
- data/ext/cld/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
- data/ext/cld/encodings/compact_lang_det/ext_lang_enc.h +119 -0
- data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
- data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
- data/ext/cld/encodings/compact_lang_det/getonescriptspan.cc +570 -0
- data/ext/cld/encodings/compact_lang_det/getonescriptspan.h +131 -0
- data/ext/cld/encodings/compact_lang_det/letterscript_enum.cc +117 -0
- data/ext/cld/encodings/compact_lang_det/letterscript_enum.h +99 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence.cc +259 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence.h +44 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
- data/ext/cld/encodings/compact_lang_det/tote.cc +299 -0
- data/ext/cld/encodings/compact_lang_det/tote.h +89 -0
- data/ext/cld/encodings/compact_lang_det/unittest_data.h +193 -0
- data/ext/cld/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
- data/ext/cld/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
- data/ext/cld/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_google.h +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_logging.h +21 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_macros.h +19 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib.h +15 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf.h +24 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
- data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
- data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
- data/ext/cld/encodings/internal/encodings.cc +12 -0
- data/ext/cld/encodings/lang_enc.h +254 -0
- data/ext/cld/encodings/proto/encodings.pb.h +169 -0
- data/ext/cld/encodings/public/encodings.h +301 -0
- data/ext/cld/extconf.rb +1 -0
- data/ext/cld/language_detection.cc +88 -0
- data/ext/cld/languages/internal/languages.cc +337 -0
- data/ext/cld/languages/proto/languages.pb.h +179 -0
- data/ext/cld/languages/public/languages.h +379 -0
- data/language_detection.gemspec +28 -0
- data/lib/language_detection/string.rb +1 -0
- data/lib/language_detection/version.rb +3 -0
- data/lib/language_detection.rb +54 -0
- data/test/_helper.rb +15 -0
- data/test/fixtures/languages.csv +80 -0
- data/test/language_detection_test.rb +88 -0
- metadata +250 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Vojtech Hyza
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
# LanguageDetection
|
2
|
+
|
3
|
+
Ruby bindings for Chromium Compact Language Detector ([source](http://src.chromium.org/viewvc/chrome/trunk/src/third_party/cld/)). This gem is using source codes from [chromium-compact-language-detector](http://code.google.com/p/chromium-compact-language-detector/) port.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'language_detection'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install language_detection
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
>> require 'language_detection'
|
23
|
+
=> true
|
24
|
+
>> language = LanguageDetection.perform("This is some example text for language detection")
|
25
|
+
=> {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>51, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]}
|
26
|
+
>> language.name
|
27
|
+
=> "ENGLISH"
|
28
|
+
>> language.code
|
29
|
+
=> "en"
|
30
|
+
>> language.reliable
|
31
|
+
=> true
|
32
|
+
>> language.details # contains up to 3 languages sorted by score
|
33
|
+
=> [{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]
|
34
|
+
>> language.details.first.percent
|
35
|
+
=> 100
|
36
|
+
>> language.details.first.score
|
37
|
+
=> 49.43273905996759
|
38
|
+
```
|
39
|
+
|
40
|
+
the other way is to include `LanguageDetection` module in your class
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
class Article
|
44
|
+
include LanguageDetection
|
45
|
+
|
46
|
+
attr_accessor :title, :content
|
47
|
+
|
48
|
+
def initialize(params = {})
|
49
|
+
@title = params[:title]
|
50
|
+
@content = params[:content]
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_s
|
54
|
+
"#{title}\n#{content}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
```
|
58
|
+
|
59
|
+
which provides `Article#language` method using `Article#to_s` method as parameter
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
>> article = Article.new :title => "Web development that doesn't hurt", :content => "Tens of thousands of Rails applications are already live..."
|
63
|
+
>> article.language
|
64
|
+
=> {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>93, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>80.22690437601297}]}
|
65
|
+
```
|
66
|
+
|
67
|
+
or you can add `String#language` method by `require 'language_detection/string'`
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
>> require 'language_detection'
|
71
|
+
=> true
|
72
|
+
>> require 'language_detection/string'
|
73
|
+
=> true
|
74
|
+
>> "Web development that doesn't hurt".language
|
75
|
+
=> {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>36, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>39.70826580226905}]}
|
76
|
+
```
|
77
|
+
|
78
|
+
|
79
|
+
## Contributing
|
80
|
+
|
81
|
+
1. Fork it
|
82
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
83
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
84
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
85
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/ext/cld/Makefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# TODO: Generate Makefile
|
2
|
+
|
3
|
+
CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS
|
4
|
+
LDFLAGS=-L.
|
5
|
+
CC=g++
|
6
|
+
AR=ar
|
7
|
+
|
8
|
+
SOURCES = encodings/compact_lang_det/cldutil.cc \
|
9
|
+
encodings/compact_lang_det/cldutil_dbg_empty.cc \
|
10
|
+
encodings/compact_lang_det/compact_lang_det.cc \
|
11
|
+
encodings/compact_lang_det/compact_lang_det_impl.cc \
|
12
|
+
encodings/compact_lang_det/ext_lang_enc.cc \
|
13
|
+
encodings/compact_lang_det/getonescriptspan.cc \
|
14
|
+
encodings/compact_lang_det/letterscript_enum.cc \
|
15
|
+
encodings/compact_lang_det/tote.cc \
|
16
|
+
encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc \
|
17
|
+
encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc \
|
18
|
+
encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc \
|
19
|
+
encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc \
|
20
|
+
encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc \
|
21
|
+
encodings/compact_lang_det/win/cld_htmlutils_windows.cc \
|
22
|
+
encodings/compact_lang_det/win/cld_unilib_windows.cc \
|
23
|
+
encodings/compact_lang_det/win/cld_utf8statetable.cc \
|
24
|
+
encodings/compact_lang_det/win/cld_utf8utils_windows.cc \
|
25
|
+
encodings/internal/encodings.cc \
|
26
|
+
languages/internal/languages.cc \
|
27
|
+
language_detection.cc
|
28
|
+
|
29
|
+
install:
|
30
|
+
rm -f *.o
|
31
|
+
rm -f libcld.a
|
32
|
+
$(CC) -c $(CFLAGS) $(SOURCES)
|
33
|
+
$(AR) rcs libcld.a *.o
|
34
|
+
$(CC) -DCLD_WINDOWS -I. -L. -shared -o cld.so -lstdc++ *.o
|
@@ -0,0 +1,348 @@
|
|
1
|
+
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file.
|
4
|
+
|
5
|
+
#ifndef BASE_BASICTYPES_H_
|
6
|
+
#define BASE_BASICTYPES_H_
|
7
|
+
|
8
|
+
#include <limits.h> // So we can set the bounds of our types
|
9
|
+
#include <stddef.h> // For size_t
|
10
|
+
#include <string.h> // for memcpy
|
11
|
+
|
12
|
+
#include "base/port.h" // Types that only need exist on certain systems
|
13
|
+
|
14
|
+
#ifndef COMPILER_MSVC
|
15
|
+
// stdint.h is part of C99 but MSVC doesn't have it.
|
16
|
+
#include <stdint.h> // For intptr_t.
|
17
|
+
#endif
|
18
|
+
|
19
|
+
typedef signed char schar;
|
20
|
+
typedef signed char int8;
|
21
|
+
typedef short int16;
|
22
|
+
// TODO(mbelshe) Remove these type guards. These are
|
23
|
+
// temporary to avoid conflicts with npapi.h.
|
24
|
+
#ifndef _INT32
|
25
|
+
#define _INT32
|
26
|
+
typedef int int32;
|
27
|
+
#endif
|
28
|
+
|
29
|
+
// The NSPR system headers define 64-bit as |long| when possible. In order to
|
30
|
+
// not have typedef mismatches, we do the same on LP64.
|
31
|
+
#if __LP64__
|
32
|
+
typedef long int64;
|
33
|
+
#else
|
34
|
+
typedef long long int64;
|
35
|
+
#endif
|
36
|
+
|
37
|
+
// NOTE: unsigned types are DANGEROUS in loops and other arithmetical
|
38
|
+
// places. Use the signed types unless your variable represents a bit
|
39
|
+
// pattern (eg a hash value) or you really need the extra bit. Do NOT
|
40
|
+
// use 'unsigned' to express "this value should always be positive";
|
41
|
+
// use assertions for this.
|
42
|
+
|
43
|
+
typedef unsigned char uint8;
|
44
|
+
typedef unsigned short uint16;
|
45
|
+
// TODO(mbelshe) Remove these type guards. These are
|
46
|
+
// temporary to avoid conflicts with npapi.h.
|
47
|
+
#ifndef _UINT32
|
48
|
+
#define _UINT32
|
49
|
+
typedef unsigned int uint32;
|
50
|
+
#endif
|
51
|
+
|
52
|
+
// See the comment above about NSPR and 64-bit.
|
53
|
+
#if __LP64__
|
54
|
+
typedef unsigned long uint64;
|
55
|
+
#else
|
56
|
+
typedef unsigned long long uint64;
|
57
|
+
#endif
|
58
|
+
|
59
|
+
// A type to represent a Unicode code-point value. As of Unicode 4.0,
|
60
|
+
// such values require up to 21 bits.
|
61
|
+
// (For type-checking on pointers, make this explicitly signed,
|
62
|
+
// and it should always be the signed version of whatever int32 is.)
|
63
|
+
typedef signed int char32;
|
64
|
+
|
65
|
+
const uint8 kuint8max = (( uint8) 0xFF);
|
66
|
+
const uint16 kuint16max = ((uint16) 0xFFFF);
|
67
|
+
const uint32 kuint32max = ((uint32) 0xFFFFFFFF);
|
68
|
+
const uint64 kuint64max = ((uint64) GG_LONGLONG(0xFFFFFFFFFFFFFFFF));
|
69
|
+
const int8 kint8min = (( int8) 0x80);
|
70
|
+
const int8 kint8max = (( int8) 0x7F);
|
71
|
+
const int16 kint16min = (( int16) 0x8000);
|
72
|
+
const int16 kint16max = (( int16) 0x7FFF);
|
73
|
+
const int32 kint32min = (( int32) 0x80000000);
|
74
|
+
const int32 kint32max = (( int32) 0x7FFFFFFF);
|
75
|
+
const int64 kint64min = (( int64) GG_LONGLONG(0x8000000000000000));
|
76
|
+
const int64 kint64max = (( int64) GG_LONGLONG(0x7FFFFFFFFFFFFFFF));
|
77
|
+
|
78
|
+
// A macro to disallow the copy constructor and operator= functions
|
79
|
+
// This should be used in the private: declarations for a class
|
80
|
+
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
81
|
+
TypeName(const TypeName&); \
|
82
|
+
void operator=(const TypeName&)
|
83
|
+
|
84
|
+
// An older, deprecated, politically incorrect name for the above.
|
85
|
+
#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
|
86
|
+
|
87
|
+
// A macro to disallow all the implicit constructors, namely the
|
88
|
+
// default constructor, copy constructor and operator= functions.
|
89
|
+
//
|
90
|
+
// This should be used in the private: declarations for a class
|
91
|
+
// that wants to prevent anyone from instantiating it. This is
|
92
|
+
// especially useful for classes containing only static methods.
|
93
|
+
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
|
94
|
+
TypeName(); \
|
95
|
+
DISALLOW_COPY_AND_ASSIGN(TypeName)
|
96
|
+
|
97
|
+
// The arraysize(arr) macro returns the # of elements in an array arr.
|
98
|
+
// The expression is a compile-time constant, and therefore can be
|
99
|
+
// used in defining new arrays, for example. If you use arraysize on
|
100
|
+
// a pointer by mistake, you will get a compile-time error.
|
101
|
+
//
|
102
|
+
// One caveat is that arraysize() doesn't accept any array of an
|
103
|
+
// anonymous type or a type defined inside a function. In these rare
|
104
|
+
// cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is
|
105
|
+
// due to a limitation in C++'s template system. The limitation might
|
106
|
+
// eventually be removed, but it hasn't happened yet.
|
107
|
+
|
108
|
+
// This template function declaration is used in defining arraysize.
|
109
|
+
// Note that the function doesn't need an implementation, as we only
|
110
|
+
// use its type.
|
111
|
+
template <typename T, size_t N>
|
112
|
+
char (&ArraySizeHelper(T (&array)[N]))[N];
|
113
|
+
|
114
|
+
// That gcc wants both of these prototypes seems mysterious. VC, for
|
115
|
+
// its part, can't decide which to use (another mystery). Matching of
|
116
|
+
// template overloads: the final frontier.
|
117
|
+
#ifndef _MSC_VER
|
118
|
+
template <typename T, size_t N>
|
119
|
+
char (&ArraySizeHelper(const T (&array)[N]))[N];
|
120
|
+
#endif
|
121
|
+
|
122
|
+
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
|
123
|
+
|
124
|
+
// ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
|
125
|
+
// but can be used on anonymous types or types defined inside
|
126
|
+
// functions. It's less safe than arraysize as it accepts some
|
127
|
+
// (although not all) pointers. Therefore, you should use arraysize
|
128
|
+
// whenever possible.
|
129
|
+
//
|
130
|
+
// The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
|
131
|
+
// size_t.
|
132
|
+
//
|
133
|
+
// ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error
|
134
|
+
//
|
135
|
+
// "warning: division by zero in ..."
|
136
|
+
//
|
137
|
+
// when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
|
138
|
+
// You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
|
139
|
+
//
|
140
|
+
// The following comments are on the implementation details, and can
|
141
|
+
// be ignored by the users.
|
142
|
+
//
|
143
|
+
// ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
|
144
|
+
// the array) and sizeof(*(arr)) (the # of bytes in one array
|
145
|
+
// element). If the former is divisible by the latter, perhaps arr is
|
146
|
+
// indeed an array, in which case the division result is the # of
|
147
|
+
// elements in the array. Otherwise, arr cannot possibly be an array,
|
148
|
+
// and we generate a compiler error to prevent the code from
|
149
|
+
// compiling.
|
150
|
+
//
|
151
|
+
// Since the size of bool is implementation-defined, we need to cast
|
152
|
+
// !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
|
153
|
+
// result has type size_t.
|
154
|
+
//
|
155
|
+
// This macro is not perfect as it wrongfully accepts certain
|
156
|
+
// pointers, namely where the pointer size is divisible by the pointee
|
157
|
+
// size. Since all our code has to go through a 32-bit compiler,
|
158
|
+
// where a pointer is 4 bytes, this means all pointers to a type whose
|
159
|
+
// size is 3 or greater than 4 will be (righteously) rejected.
|
160
|
+
|
161
|
+
#define ARRAYSIZE_UNSAFE(a) \
|
162
|
+
((sizeof(a) / sizeof(*(a))) / \
|
163
|
+
static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
|
164
|
+
|
165
|
+
|
166
|
+
// Use implicit_cast as a safe version of static_cast or const_cast
|
167
|
+
// for upcasting in the type hierarchy (i.e. casting a pointer to Foo
|
168
|
+
// to a pointer to SuperclassOfFoo or casting a pointer to Foo to
|
169
|
+
// a const pointer to Foo).
|
170
|
+
// When you use implicit_cast, the compiler checks that the cast is safe.
|
171
|
+
// Such explicit implicit_casts are necessary in surprisingly many
|
172
|
+
// situations where C++ demands an exact type match instead of an
|
173
|
+
// argument type convertable to a target type.
|
174
|
+
//
|
175
|
+
// The From type can be inferred, so the preferred syntax for using
|
176
|
+
// implicit_cast is the same as for static_cast etc.:
|
177
|
+
//
|
178
|
+
// implicit_cast<ToType>(expr)
|
179
|
+
//
|
180
|
+
// implicit_cast would have been part of the C++ standard library,
|
181
|
+
// but the proposal was submitted too late. It will probably make
|
182
|
+
// its way into the language in the future.
|
183
|
+
template<typename To, typename From>
|
184
|
+
inline To implicit_cast(From const &f) {
|
185
|
+
return f;
|
186
|
+
}
|
187
|
+
|
188
|
+
// The COMPILE_ASSERT macro can be used to verify that a compile time
|
189
|
+
// expression is true. For example, you could use it to verify the
|
190
|
+
// size of a static array:
|
191
|
+
//
|
192
|
+
// COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
|
193
|
+
// content_type_names_incorrect_size);
|
194
|
+
//
|
195
|
+
// or to make sure a struct is smaller than a certain size:
|
196
|
+
//
|
197
|
+
// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
|
198
|
+
//
|
199
|
+
// The second argument to the macro is the name of the variable. If
|
200
|
+
// the expression is false, most compilers will issue a warning/error
|
201
|
+
// containing the name of the variable.
|
202
|
+
|
203
|
+
template <bool>
|
204
|
+
struct CompileAssert {
|
205
|
+
};
|
206
|
+
|
207
|
+
#undef COMPILE_ASSERT
|
208
|
+
#define COMPILE_ASSERT(expr, msg) \
|
209
|
+
typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
|
210
|
+
|
211
|
+
// Implementation details of COMPILE_ASSERT:
|
212
|
+
//
|
213
|
+
// - COMPILE_ASSERT works by defining an array type that has -1
|
214
|
+
// elements (and thus is invalid) when the expression is false.
|
215
|
+
//
|
216
|
+
// - The simpler definition
|
217
|
+
//
|
218
|
+
// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
|
219
|
+
//
|
220
|
+
// does not work, as gcc supports variable-length arrays whose sizes
|
221
|
+
// are determined at run-time (this is gcc's extension and not part
|
222
|
+
// of the C++ standard). As a result, gcc fails to reject the
|
223
|
+
// following code with the simple definition:
|
224
|
+
//
|
225
|
+
// int foo;
|
226
|
+
// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
|
227
|
+
// // not a compile-time constant.
|
228
|
+
//
|
229
|
+
// - By using the type CompileAssert<(bool(expr))>, we ensures that
|
230
|
+
// expr is a compile-time constant. (Template arguments must be
|
231
|
+
// determined at compile-time.)
|
232
|
+
//
|
233
|
+
// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
|
234
|
+
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
|
235
|
+
//
|
236
|
+
// CompileAssert<bool(expr)>
|
237
|
+
//
|
238
|
+
// instead, these compilers will refuse to compile
|
239
|
+
//
|
240
|
+
// COMPILE_ASSERT(5 > 0, some_message);
|
241
|
+
//
|
242
|
+
// (They seem to think the ">" in "5 > 0" marks the end of the
|
243
|
+
// template argument list.)
|
244
|
+
//
|
245
|
+
// - The array size is (bool(expr) ? 1 : -1), instead of simply
|
246
|
+
//
|
247
|
+
// ((expr) ? 1 : -1).
|
248
|
+
//
|
249
|
+
// This is to avoid running into a bug in MS VC 7.1, which
|
250
|
+
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
|
251
|
+
|
252
|
+
|
253
|
+
// MetatagId refers to metatag-id that we assign to
|
254
|
+
// each metatag <name, value> pair..
|
255
|
+
typedef uint32 MetatagId;
|
256
|
+
|
257
|
+
// Argument type used in interfaces that can optionally take ownership
|
258
|
+
// of a passed in argument. If TAKE_OWNERSHIP is passed, the called
|
259
|
+
// object takes ownership of the argument. Otherwise it does not.
|
260
|
+
enum Ownership {
|
261
|
+
DO_NOT_TAKE_OWNERSHIP,
|
262
|
+
TAKE_OWNERSHIP
|
263
|
+
};
|
264
|
+
|
265
|
+
// bit_cast<Dest,Source> is a template function that implements the
|
266
|
+
// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
|
267
|
+
// very low-level functions like the protobuf library and fast math
|
268
|
+
// support.
|
269
|
+
//
|
270
|
+
// float f = 3.14159265358979;
|
271
|
+
// int i = bit_cast<int32>(f);
|
272
|
+
// // i = 0x40490fdb
|
273
|
+
//
|
274
|
+
// The classical address-casting method is:
|
275
|
+
//
|
276
|
+
// // WRONG
|
277
|
+
// float f = 3.14159265358979; // WRONG
|
278
|
+
// int i = * reinterpret_cast<int*>(&f); // WRONG
|
279
|
+
//
|
280
|
+
// The address-casting method actually produces undefined behavior
|
281
|
+
// according to ISO C++ specification section 3.10 -15 -. Roughly, this
|
282
|
+
// section says: if an object in memory has one type, and a program
|
283
|
+
// accesses it with a different type, then the result is undefined
|
284
|
+
// behavior for most values of "different type".
|
285
|
+
//
|
286
|
+
// This is true for any cast syntax, either *(int*)&f or
|
287
|
+
// *reinterpret_cast<int*>(&f). And it is particularly true for
|
288
|
+
// conversions betweeen integral lvalues and floating-point lvalues.
|
289
|
+
//
|
290
|
+
// The purpose of 3.10 -15- is to allow optimizing compilers to assume
|
291
|
+
// that expressions with different types refer to different memory. gcc
|
292
|
+
// 4.0.1 has an optimizer that takes advantage of this. So a
|
293
|
+
// non-conforming program quietly produces wildly incorrect output.
|
294
|
+
//
|
295
|
+
// The problem is not the use of reinterpret_cast. The problem is type
|
296
|
+
// punning: holding an object in memory of one type and reading its bits
|
297
|
+
// back using a different type.
|
298
|
+
//
|
299
|
+
// The C++ standard is more subtle and complex than this, but that
|
300
|
+
// is the basic idea.
|
301
|
+
//
|
302
|
+
// Anyways ...
|
303
|
+
//
|
304
|
+
// bit_cast<> calls memcpy() which is blessed by the standard,
|
305
|
+
// especially by the example in section 3.9 . Also, of course,
|
306
|
+
// bit_cast<> wraps up the nasty logic in one place.
|
307
|
+
//
|
308
|
+
// Fortunately memcpy() is very fast. In optimized mode, with a
|
309
|
+
// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
|
310
|
+
// code with the minimal amount of data movement. On a 32-bit system,
|
311
|
+
// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
|
312
|
+
// compiles to two loads and two stores.
|
313
|
+
//
|
314
|
+
// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
|
315
|
+
//
|
316
|
+
// WARNING: if Dest or Source is a non-POD type, the result of the memcpy
|
317
|
+
// is likely to surprise you.
|
318
|
+
|
319
|
+
template <class Dest, class Source>
|
320
|
+
inline Dest bit_cast(const Source& source) {
|
321
|
+
// Compile time assertion: sizeof(Dest) == sizeof(Source)
|
322
|
+
// A compile error here means your Dest and Source have different sizes.
|
323
|
+
typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
|
324
|
+
|
325
|
+
Dest dest;
|
326
|
+
memcpy(&dest, &source, sizeof(dest));
|
327
|
+
return dest;
|
328
|
+
}
|
329
|
+
|
330
|
+
// The following enum should be used only as a constructor argument to indicate
|
331
|
+
// that the variable has static storage class, and that the constructor should
|
332
|
+
// do nothing to its state. It indicates to the reader that it is legal to
|
333
|
+
// declare a static instance of the class, provided the constructor is given
|
334
|
+
// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a
|
335
|
+
// static variable that has a constructor or a destructor because invocation
|
336
|
+
// order is undefined. However, IF the type can be initialized by filling with
|
337
|
+
// zeroes (which the loader does for static variables), AND the destructor also
|
338
|
+
// does nothing to the storage, AND there are no virtual methods, then a
|
339
|
+
// constructor declared as
|
340
|
+
// explicit MyClass(base::LinkerInitialized x) {}
|
341
|
+
// and invoked as
|
342
|
+
// static MyClass my_variable_name(base::LINKER_INITIALIZED);
|
343
|
+
namespace base {
|
344
|
+
enum LinkerInitialized { LINKER_INITIALIZED };
|
345
|
+
} // base
|
346
|
+
|
347
|
+
|
348
|
+
#endif // BASE_BASICTYPES_H_
|
@@ -0,0 +1,124 @@
|
|
1
|
+
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file.
|
4
|
+
|
5
|
+
// This file adds defines about the platform we're currently building on.
|
6
|
+
// Operating System:
|
7
|
+
// OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX)
|
8
|
+
// Compiler:
|
9
|
+
// COMPILER_MSVC / COMPILER_GCC
|
10
|
+
// Processor:
|
11
|
+
// ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_X86_FAMILY (X86 or X86_64)
|
12
|
+
// ARCH_CPU_32_BITS / ARCH_CPU_64_BITS
|
13
|
+
|
14
|
+
#ifndef BUILD_BUILD_CONFIG_H_
|
15
|
+
#define BUILD_BUILD_CONFIG_H_
|
16
|
+
|
17
|
+
// A set of macros to use for platform detection.
|
18
|
+
#if defined(__APPLE__)
|
19
|
+
#define OS_MACOSX 1
|
20
|
+
#elif defined(__linux__)
|
21
|
+
#define OS_LINUX 1
|
22
|
+
// Use TOOLKIT_GTK on linux if TOOLKIT_VIEWS isn't defined.
|
23
|
+
#if !defined(TOOLKIT_VIEWS)
|
24
|
+
#define TOOLKIT_GTK
|
25
|
+
#endif
|
26
|
+
#elif defined(_WIN32)
|
27
|
+
#define OS_WIN 1
|
28
|
+
#define TOOLKIT_VIEWS 1
|
29
|
+
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
30
|
+
#define OS_FREEBSD 1
|
31
|
+
#define TOOLKIT_GTK
|
32
|
+
#elif defined(__NetBSD__)
|
33
|
+
#define OS_NETBSD 1
|
34
|
+
#define TOOLKIT_GTK
|
35
|
+
#elif defined(__OpenBSD__)
|
36
|
+
#define OS_OPENBSD 1
|
37
|
+
#define TOOLKIT_GTK
|
38
|
+
#elif defined(__DragonFly__)
|
39
|
+
#define OS_DRAGONFLY 1
|
40
|
+
#define TOOLKIT_GTK
|
41
|
+
#elif defined(__sun)
|
42
|
+
#define OS_SOLARIS 1
|
43
|
+
#define TOOLKIT_GTK
|
44
|
+
#else
|
45
|
+
#error Please add support for your platform in build/build_config.h
|
46
|
+
#endif
|
47
|
+
|
48
|
+
// A flag derived from the above flags, used to cover GTK code in
|
49
|
+
// both TOOLKIT_GTK and TOOLKIT_VIEWS.
|
50
|
+
#if defined(TOOLKIT_GTK) || (defined(TOOLKIT_VIEWS) && !defined(OS_WIN))
|
51
|
+
#define TOOLKIT_USES_GTK 1
|
52
|
+
#endif
|
53
|
+
|
54
|
+
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY)
|
55
|
+
#define USE_NSS 1 // Use NSS for crypto.
|
56
|
+
#define USE_X11 1 // Use X for graphics.
|
57
|
+
#endif
|
58
|
+
|
59
|
+
// For access to standard POSIXish features, use OS_POSIX instead of a
|
60
|
+
// more specific macro.
|
61
|
+
#if defined(OS_MACOSX) || defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_SOLARIS) || defined(OS_DRAGONFLY)
|
62
|
+
#define OS_POSIX 1
|
63
|
+
// Use base::DataPack for name/value pairs.
|
64
|
+
#define USE_BASE_DATA_PACK 1
|
65
|
+
#endif
|
66
|
+
|
67
|
+
// Use tcmalloc
|
68
|
+
#if defined(OS_WIN) && ! defined(NO_TCMALLOC)
|
69
|
+
#define USE_TCMALLOC 1
|
70
|
+
#endif
|
71
|
+
|
72
|
+
// Compiler detection.
|
73
|
+
#if defined(__GNUC__)
|
74
|
+
#define COMPILER_GCC 1
|
75
|
+
#elif defined(_MSC_VER)
|
76
|
+
#define COMPILER_MSVC 1
|
77
|
+
#else
|
78
|
+
#error Please add support for your compiler in build/build_config.h
|
79
|
+
#endif
|
80
|
+
|
81
|
+
// Processor architecture detection. For more info on what's defined, see:
|
82
|
+
// http://msdn.microsoft.com/en-us/library/b0084kay.aspx
|
83
|
+
// http://www.agner.org/optimize/calling_conventions.pdf
|
84
|
+
// or with gcc, run: "echo | gcc -E -dM -"
|
85
|
+
#if defined(_M_X64) || defined(__x86_64__)
|
86
|
+
#define ARCH_CPU_X86_FAMILY 1
|
87
|
+
#define ARCH_CPU_X86_64 1
|
88
|
+
#define ARCH_CPU_64_BITS 1
|
89
|
+
#elif defined(_M_IX86) || defined(__i386__)
|
90
|
+
#define ARCH_CPU_X86_FAMILY 1
|
91
|
+
#define ARCH_CPU_X86 1
|
92
|
+
#define ARCH_CPU_32_BITS 1
|
93
|
+
#elif defined(__ARMEL__)
|
94
|
+
#define ARCH_CPU_ARM_FAMILY 1
|
95
|
+
#define ARCH_CPU_ARMEL 1
|
96
|
+
#define ARCH_CPU_32_BITS 1
|
97
|
+
#define WCHAR_T_IS_UNSIGNED 1
|
98
|
+
#elif defined(__ARCH_PPC) || defined(__ppc__)
|
99
|
+
#define ARCH_CPU_PPC_FAMILY 1
|
100
|
+
#define ARCH_CPU_32_BITS 1
|
101
|
+
#else
|
102
|
+
#error Please add support for your architecture in build/build_config.h
|
103
|
+
#endif
|
104
|
+
|
105
|
+
// Type detection for wchar_t.
|
106
|
+
#if defined(OS_WIN)
|
107
|
+
#define WCHAR_T_IS_UTF16
|
108
|
+
#elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
|
109
|
+
defined(__WCHAR_MAX__) && \
|
110
|
+
(__WCHAR_MAX__ == 0x7fffffff || __WCHAR_MAX__ == 0xffffffff)
|
111
|
+
#define WCHAR_T_IS_UTF32
|
112
|
+
#elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
|
113
|
+
defined(__WCHAR_MAX__) && \
|
114
|
+
(__WCHAR_MAX__ == 0x7fff || __WCHAR_MAX__ == 0xffff)
|
115
|
+
// On Posix, we'll detect short wchar_t, but projects aren't guaranteed to
|
116
|
+
// compile in this mode (in particular, Chrome doesn't). This is intended for
|
117
|
+
// other projects using base who manage their own dependencies and make sure
|
118
|
+
// short wchar works for them.
|
119
|
+
#define WCHAR_T_IS_UTF16
|
120
|
+
#else
|
121
|
+
#error Please add support for your compiler in build/build_config.h
|
122
|
+
#endif
|
123
|
+
|
124
|
+
#endif // BUILD_BUILD_CONFIG_H_
|