cld 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. data/LICENSE +27 -0
  2. data/Manifest +106 -0
  3. data/README.rdoc +173 -0
  4. data/Rakefile +15 -0
  5. data/base/basictypes.h +348 -0
  6. data/base/build_config.h +115 -0
  7. data/base/casts.h +156 -0
  8. data/base/commandlineflags.h +443 -0
  9. data/base/crash.h +41 -0
  10. data/base/dynamic_annotations.h +358 -0
  11. data/base/global_strip_options.h +59 -0
  12. data/base/log_severity.h +46 -0
  13. data/base/logging.h +1403 -0
  14. data/base/macros.h +243 -0
  15. data/base/port.h +54 -0
  16. data/base/scoped_ptr.h +428 -0
  17. data/base/stl_decl.h +0 -0
  18. data/base/stl_decl_msvc.h +107 -0
  19. data/base/string_util.h +29 -0
  20. data/base/strtoint.h +93 -0
  21. data/base/template_util.h +96 -0
  22. data/base/type_traits.h +198 -0
  23. data/base/vlog_is_on.h +143 -0
  24. data/build.sh +48 -0
  25. data/build.win.cmd +28 -0
  26. data/cld.gemspec +30 -0
  27. data/cld_encodings.h +95 -0
  28. data/encodings/compact_lang_det/#cldutil.cc# +905 -0
  29. data/encodings/compact_lang_det/#cldutil.h# +1205 -0
  30. data/encodings/compact_lang_det/#compact_lang_det_impl.h# +171 -0
  31. data/encodings/compact_lang_det/#ext_lang_enc.cc# +545 -0
  32. data/encodings/compact_lang_det/#ext_lang_enc.h# +119 -0
  33. data/encodings/compact_lang_det/#getonescriptspan.cc# +570 -0
  34. data/encodings/compact_lang_det/#getonescriptspan.h# +131 -0
  35. data/encodings/compact_lang_det/#tote.cc# +299 -0
  36. data/encodings/compact_lang_det/#tote.h# +89 -0
  37. data/encodings/compact_lang_det/cldutil.cc +905 -0
  38. data/encodings/compact_lang_det/cldutil.h +1205 -0
  39. data/encodings/compact_lang_det/cldutil_dbg.h +76 -0
  40. data/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
  41. data/encodings/compact_lang_det/compact_lang_det.cc +62 -0
  42. data/encodings/compact_lang_det/compact_lang_det.h +145 -0
  43. data/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
  44. data/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
  45. data/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
  46. data/encodings/compact_lang_det/compile.cmd +1 -0
  47. data/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
  48. data/encodings/compact_lang_det/ext_lang_enc.h +119 -0
  49. data/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
  50. data/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
  51. data/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
  52. data/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
  53. data/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
  54. data/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
  55. data/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
  56. data/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
  57. data/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
  58. data/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
  59. data/encodings/compact_lang_det/getonescriptspan.cc +570 -0
  60. data/encodings/compact_lang_det/getonescriptspan.h +131 -0
  61. data/encodings/compact_lang_det/letterscript_enum.cc +117 -0
  62. data/encodings/compact_lang_det/letterscript_enum.h +99 -0
  63. data/encodings/compact_lang_det/subsetsequence.cc +259 -0
  64. data/encodings/compact_lang_det/subsetsequence.h +44 -0
  65. data/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
  66. data/encodings/compact_lang_det/tote.cc +299 -0
  67. data/encodings/compact_lang_det/tote.h +89 -0
  68. data/encodings/compact_lang_det/unittest_data.h +193 -0
  69. data/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
  70. data/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
  71. data/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
  72. data/encodings/compact_lang_det/win/#cld_unilib_windows.cc# +29 -0
  73. data/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
  74. data/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
  75. data/encodings/compact_lang_det/win/cld_google.h +18 -0
  76. data/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
  77. data/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
  78. data/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
  79. data/encodings/compact_lang_det/win/cld_logging.h +21 -0
  80. data/encodings/compact_lang_det/win/cld_macros.h +19 -0
  81. data/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
  82. data/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
  83. data/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
  84. data/encodings/compact_lang_det/win/cld_unilib.h +15 -0
  85. data/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
  86. data/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
  87. data/encodings/compact_lang_det/win/cld_utf.h +24 -0
  88. data/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
  89. data/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
  90. data/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
  91. data/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
  92. data/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
  93. data/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
  94. data/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
  95. data/encodings/internal/encodings.cc +12 -0
  96. data/encodings/lang_enc.h +254 -0
  97. data/encodings/proto/encodings.pb.h +169 -0
  98. data/encodings/public/encodings.h +301 -0
  99. data/ext/cld/extconf.rb +7 -0
  100. data/languages/internal/#languages.cc# +337 -0
  101. data/languages/internal/languages.cc +337 -0
  102. data/languages/proto/languages.pb.h +179 -0
  103. data/languages/public/languages.h +379 -0
  104. data/lib/cld.rb +12 -0
  105. data/test/test.rb +570 -0
  106. data/thunk.cc +131 -0
  107. metadata +168 -0
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,106 @@
1
+ LICENSE
2
+ README.rdoc
3
+ Rakefile
4
+ base/basictypes.h
5
+ base/build_config.h
6
+ base/casts.h
7
+ base/commandlineflags.h
8
+ base/crash.h
9
+ base/dynamic_annotations.h
10
+ base/global_strip_options.h
11
+ base/log_severity.h
12
+ base/logging.h
13
+ base/macros.h
14
+ base/port.h
15
+ base/scoped_ptr.h
16
+ base/stl_decl.h
17
+ base/stl_decl_msvc.h
18
+ base/string_util.h
19
+ base/strtoint.h
20
+ base/template_util.h
21
+ base/type_traits.h
22
+ base/vlog_is_on.h
23
+ build.sh
24
+ build.win.cmd
25
+ cld.gemspec
26
+ cld_encodings.h
27
+ encodings/compact_lang_det/#cldutil.cc#
28
+ encodings/compact_lang_det/#cldutil.h#
29
+ encodings/compact_lang_det/#compact_lang_det_impl.h#
30
+ encodings/compact_lang_det/#ext_lang_enc.cc#
31
+ encodings/compact_lang_det/#ext_lang_enc.h#
32
+ encodings/compact_lang_det/#getonescriptspan.cc#
33
+ encodings/compact_lang_det/#getonescriptspan.h#
34
+ encodings/compact_lang_det/#tote.cc#
35
+ encodings/compact_lang_det/#tote.h#
36
+ encodings/compact_lang_det/cldutil.cc
37
+ encodings/compact_lang_det/cldutil.h
38
+ encodings/compact_lang_det/cldutil_dbg.h
39
+ encodings/compact_lang_det/cldutil_dbg_empty.cc
40
+ encodings/compact_lang_det/compact_lang_det.cc
41
+ encodings/compact_lang_det/compact_lang_det.h
42
+ encodings/compact_lang_det/compact_lang_det_impl.cc
43
+ encodings/compact_lang_det/compact_lang_det_impl.h
44
+ encodings/compact_lang_det/compact_lang_det_unittest_small.cc
45
+ encodings/compact_lang_det/compile.cmd
46
+ encodings/compact_lang_det/ext_lang_enc.cc
47
+ encodings/compact_lang_det/ext_lang_enc.h
48
+ encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc
49
+ encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc
50
+ encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc
51
+ encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc
52
+ encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc
53
+ encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc
54
+ encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc
55
+ encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h
56
+ encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc
57
+ encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc
58
+ encodings/compact_lang_det/getonescriptspan.cc
59
+ encodings/compact_lang_det/getonescriptspan.h
60
+ encodings/compact_lang_det/letterscript_enum.cc
61
+ encodings/compact_lang_det/letterscript_enum.h
62
+ encodings/compact_lang_det/subsetsequence.cc
63
+ encodings/compact_lang_det/subsetsequence.h
64
+ encodings/compact_lang_det/subsetsequence_unittest.cc
65
+ encodings/compact_lang_det/tote.cc
66
+ encodings/compact_lang_det/tote.h
67
+ encodings/compact_lang_det/unittest_data.h
68
+ encodings/compact_lang_det/utf8propjustletter.h
69
+ encodings/compact_lang_det/utf8propletterscriptnum.h
70
+ encodings/compact_lang_det/utf8scannotjustletterspecial.h
71
+ encodings/compact_lang_det/win/#cld_unilib_windows.cc#
72
+ encodings/compact_lang_det/win/cld_basictypes.h
73
+ encodings/compact_lang_det/win/cld_commandlineflags.h
74
+ encodings/compact_lang_det/win/cld_google.h
75
+ encodings/compact_lang_det/win/cld_htmlutils.h
76
+ encodings/compact_lang_det/win/cld_htmlutils_google3.cc
77
+ encodings/compact_lang_det/win/cld_htmlutils_windows.cc
78
+ encodings/compact_lang_det/win/cld_logging.h
79
+ encodings/compact_lang_det/win/cld_macros.h
80
+ encodings/compact_lang_det/win/cld_strtoint.h
81
+ encodings/compact_lang_det/win/cld_unicodetext.cc
82
+ encodings/compact_lang_det/win/cld_unicodetext.h
83
+ encodings/compact_lang_det/win/cld_unilib.h
84
+ encodings/compact_lang_det/win/cld_unilib_google3.cc
85
+ encodings/compact_lang_det/win/cld_unilib_windows.cc
86
+ encodings/compact_lang_det/win/cld_utf.h
87
+ encodings/compact_lang_det/win/cld_utf8statetable.cc
88
+ encodings/compact_lang_det/win/cld_utf8statetable.h
89
+ encodings/compact_lang_det/win/cld_utf8utils.h
90
+ encodings/compact_lang_det/win/cld_utf8utils_google3.cc
91
+ encodings/compact_lang_det/win/cld_utf8utils_windows.cc
92
+ encodings/compact_lang_det/win/normalizedunicodetext.cc
93
+ encodings/compact_lang_det/win/normalizedunicodetext.h
94
+ encodings/internal/encodings.cc
95
+ encodings/lang_enc.h
96
+ encodings/proto/encodings.pb.h
97
+ encodings/public/encodings.h
98
+ ext/cld/extconf.rb
99
+ languages/internal/#languages.cc#
100
+ languages/internal/languages.cc
101
+ languages/proto/languages.pb.h
102
+ languages/public/languages.h
103
+ lib/cld.rb
104
+ test/test.rb
105
+ thunk.cc
106
+ Manifest
@@ -0,0 +1,173 @@
1
+ This is a wrapper of the Compact Language Detection library from Chrome.
2
+ To use :
3
+ require "cld"
4
+ language = CLD.detect_language("piece of text")
5
+ is_english = CLD.english?("我不是英文")
6
+
7
+
8
+ detect_language returns a unique integer representing each language, here are the languages:
9
+ ENGLISH = 0,
10
+ DANISH = 1,
11
+ DUTCH = 2,
12
+ FINNISH = 3,
13
+ FRENCH = 4,
14
+ GERMAN = 5,
15
+ HEBREW = 6,
16
+ ITALIAN = 7,
17
+ JAPANESE = 8,
18
+ KOREAN = 9,
19
+ NORWEGIAN = 10,
20
+ POLISH = 11,
21
+ PORTUGUESE = 12,
22
+ RUSSIAN = 13,
23
+ SPANISH = 14,
24
+ SWEDISH = 15,
25
+ CHINESE = 16,
26
+ CZECH = 17,
27
+ GREEK = 18,
28
+ ICELANDIC = 19,
29
+ LATVIAN = 20,
30
+ LITHUANIAN = 21,
31
+ ROMANIAN = 22,
32
+ HUNGARIAN = 23,
33
+ ESTONIAN = 24,
34
+ TG_UNKNOWN_LANGUAGE = 25,
35
+ UNKNOWN_LANGUAGE = 26,
36
+ BULGARIAN = 27,
37
+ CROATIAN = 28,
38
+ SERBIAN = 29,
39
+ IRISH = 30,
40
+ GALICIAN = 31,
41
+ TAGALOG = 32,
42
+ TURKISH = 33,
43
+ UKRAINIAN = 34,
44
+ HINDI = 35,
45
+ MACEDONIAN = 36,
46
+ BENGALI = 37,
47
+ INDONESIAN = 38,
48
+ LATIN = 39,
49
+ MALAY = 40,
50
+ MALAYALAM = 41,
51
+ WELSH = 42,
52
+ NEPALI = 43,
53
+ TELUGU = 44,
54
+ ALBANIAN = 45,
55
+ TAMIL = 46,
56
+ BELARUSIAN = 47,
57
+ JAVANESE = 48,
58
+ OCCITAN = 49,
59
+ URDU = 50,
60
+ BIHARI = 51,
61
+ GUJARATI = 52,
62
+ THAI = 53,
63
+ ARABIC = 54,
64
+ CATALAN = 55,
65
+ ESPERANTO = 56,
66
+ BASQUE = 57,
67
+ INTERLINGUA = 58,
68
+ KANNADA = 59,
69
+ PUNJABI = 60,
70
+ SCOTS_GAELIC = 61,
71
+ SWAHILI = 62,
72
+ SLOVENIAN = 63,
73
+ MARATHI = 64,
74
+ MALTESE = 65,
75
+ VIETNAMESE = 66,
76
+ FRISIAN = 67,
77
+ SLOVAK = 68,
78
+ CHINESE_T = 69,
79
+ FAROESE = 70,
80
+ SUNDANESE = 71,
81
+ UZBEK = 72,
82
+ AMHARIC = 73,
83
+ AZERBAIJANI = 74,
84
+ GEORGIAN = 75,
85
+ TIGRINYA = 76,
86
+ PERSIAN = 77,
87
+ BOSNIAN = 78,
88
+ SINHALESE = 79,
89
+ NORWEGIAN_N = 80,
90
+ PORTUGUESE_P = 81,
91
+ PORTUGUESE_B = 82,
92
+ XHOSA = 83,
93
+ ZULU = 84,
94
+ GUARANI = 85,
95
+ SESOTHO = 86,
96
+ TURKMEN = 87,
97
+ KYRGYZ = 88,
98
+ BRETON = 89,
99
+ TWI = 90,
100
+ YIDDISH = 91,
101
+ SERBO_CROATIAN= 92,
102
+ SOMALI = 93,
103
+ UIGHUR = 94,
104
+ KURDISH = 95,
105
+ MONGOLIAN = 96,
106
+ ARMENIAN = 97,
107
+ LAOTHIAN = 98,
108
+ SINDHI = 99,
109
+ RHAETO_ROMANCE= 100,
110
+ AFRIKAANS = 101,
111
+ LUXEMBOURGISH = 102,
112
+ BURMESE = 103,
113
+ KHMER = 104,
114
+ TIBETAN = 105,
115
+ DHIVEHI = 106,
116
+ CHEROKEE = 107,
117
+ SYRIAC = 108,
118
+ LIMBU = 109,
119
+ ORIYA = 110,
120
+ ASSAMESE = 111,
121
+ CORSICAN = 112,
122
+ INTERLINGUE = 113,
123
+ KAZAKH = 114,
124
+ LINGALA = 115,
125
+ MOLDAVIAN = 116,
126
+ PASHTO = 117,
127
+ QUECHUA = 118,
128
+ SHONA = 119,
129
+ TAJIK = 120,
130
+ TATAR = 121,
131
+ TONGA = 122,
132
+ YORUBA = 123,
133
+ CREOLES_AND_PIDGINS_ENGLISH_BASED = 124,
134
+ CREOLES_AND_PIDGINS_FRENCH_BASED = 125,
135
+ CREOLES_AND_PIDGINS_PORTUGUESE_BASED = 126,
136
+ CREOLES_AND_PIDGINS_OTHER = 127,
137
+ MAORI = 128,
138
+ WOLOF = 129,
139
+ ABKHAZIAN = 130,
140
+ AFAR = 131,
141
+ AYMARA = 132,
142
+ BASHKIR = 133,
143
+ BISLAMA = 134,
144
+ DZONGKHA = 135,
145
+ FIJIAN = 136,
146
+ GREENLANDIC = 137,
147
+ HAUSA = 138,
148
+ HAITIAN_CREOLE= 139,
149
+ INUPIAK = 140,
150
+ INUKTITUT = 141,
151
+ KASHMIRI = 142,
152
+ KINYARWANDA = 143,
153
+ MALAGASY = 144,
154
+ NAURU = 145,
155
+ OROMO = 146,
156
+ RUNDI = 147,
157
+ SAMOAN = 148,
158
+ SANGO = 149,
159
+ SANSKRIT = 150,
160
+ SISWANT = 151,
161
+ TSONGA = 152,
162
+ TSWANA = 153,
163
+ VOLAPUK = 154,
164
+ ZHUANG = 155,
165
+ KHASI = 156,
166
+ SCOTS = 157,
167
+ GANDA = 158,
168
+ MANX = 159,
169
+ MONTENEGRIN = 160,
170
+ NUM_LANGUAGES = 161,
171
+
172
+ Thanks to Mike McCandless for finding this code and writing a python version
173
+ Thanks to the Chrome Authors.
@@ -0,0 +1,15 @@
1
+
2
+ require 'rubygems'
3
+ require 'rake'
4
+ require 'echoe'
5
+
6
+ Echoe.new('cld', '0.1.0') do |p|
7
+ p.description = "Compact Language Detection from chrome"
8
+ p.url = "http://github.com/jtoy/cld"
9
+ p.author = "Jason Toy"
10
+ p.email = "jtoy@jtoy.net"
11
+ p.ignore_pattern = ["tmp/*", "script/*"]
12
+ p.development_dependencies = []
13
+ end
14
+
15
+ #Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,348 @@
1
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ #ifndef BASE_BASICTYPES_H_
6
+ #define BASE_BASICTYPES_H_
7
+
8
+ #include <limits.h> // So we can set the bounds of our types
9
+ #include <stddef.h> // For size_t
10
+ #include <string.h> // for memcpy
11
+
12
+ #include "base/port.h" // Types that only need exist on certain systems
13
+
14
+ #ifndef COMPILER_MSVC
15
+ // stdint.h is part of C99 but MSVC doesn't have it.
16
+ #include <stdint.h> // For intptr_t.
17
+ #endif
18
+
19
+ typedef signed char schar;
20
+ typedef signed char int8;
21
+ typedef short int16;
22
+ // TODO(mbelshe) Remove these type guards. These are
23
+ // temporary to avoid conflicts with npapi.h.
24
+ #ifndef _INT32
25
+ #define _INT32
26
+ typedef int int32;
27
+ #endif
28
+
29
+ // The NSPR system headers define 64-bit as |long| when possible. In order to
30
+ // not have typedef mismatches, we do the same on LP64.
31
+ #if __LP64__
32
+ typedef long int64;
33
+ #else
34
+ typedef long long int64;
35
+ #endif
36
+
37
+ // NOTE: unsigned types are DANGEROUS in loops and other arithmetical
38
+ // places. Use the signed types unless your variable represents a bit
39
+ // pattern (eg a hash value) or you really need the extra bit. Do NOT
40
+ // use 'unsigned' to express "this value should always be positive";
41
+ // use assertions for this.
42
+
43
+ typedef unsigned char uint8;
44
+ typedef unsigned short uint16;
45
+ // TODO(mbelshe) Remove these type guards. These are
46
+ // temporary to avoid conflicts with npapi.h.
47
+ #ifndef _UINT32
48
+ #define _UINT32
49
+ typedef unsigned int uint32;
50
+ #endif
51
+
52
+ // See the comment above about NSPR and 64-bit.
53
+ #if __LP64__
54
+ typedef unsigned long uint64;
55
+ #else
56
+ typedef unsigned long long uint64;
57
+ #endif
58
+
59
+ // A type to represent a Unicode code-point value. As of Unicode 4.0,
60
+ // such values require up to 21 bits.
61
+ // (For type-checking on pointers, make this explicitly signed,
62
+ // and it should always be the signed version of whatever int32 is.)
63
+ typedef signed int char32;
64
+
65
+ const uint8 kuint8max = (( uint8) 0xFF);
66
+ const uint16 kuint16max = ((uint16) 0xFFFF);
67
+ const uint32 kuint32max = ((uint32) 0xFFFFFFFF);
68
+ const uint64 kuint64max = ((uint64) GG_LONGLONG(0xFFFFFFFFFFFFFFFF));
69
+ const int8 kint8min = (( int8) 0x80);
70
+ const int8 kint8max = (( int8) 0x7F);
71
+ const int16 kint16min = (( int16) 0x8000);
72
+ const int16 kint16max = (( int16) 0x7FFF);
73
+ const int32 kint32min = (( int32) 0x80000000);
74
+ const int32 kint32max = (( int32) 0x7FFFFFFF);
75
+ const int64 kint64min = (( int64) GG_LONGLONG(0x8000000000000000));
76
+ const int64 kint64max = (( int64) GG_LONGLONG(0x7FFFFFFFFFFFFFFF));
77
+
78
+ // A macro to disallow the copy constructor and operator= functions
79
+ // This should be used in the private: declarations for a class
80
+ #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
81
+ TypeName(const TypeName&); \
82
+ void operator=(const TypeName&)
83
+
84
+ // An older, deprecated, politically incorrect name for the above.
85
+ #define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
86
+
87
+ // A macro to disallow all the implicit constructors, namely the
88
+ // default constructor, copy constructor and operator= functions.
89
+ //
90
+ // This should be used in the private: declarations for a class
91
+ // that wants to prevent anyone from instantiating it. This is
92
+ // especially useful for classes containing only static methods.
93
+ #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
94
+ TypeName(); \
95
+ DISALLOW_COPY_AND_ASSIGN(TypeName)
96
+
97
+ // The arraysize(arr) macro returns the # of elements in an array arr.
98
+ // The expression is a compile-time constant, and therefore can be
99
+ // used in defining new arrays, for example. If you use arraysize on
100
+ // a pointer by mistake, you will get a compile-time error.
101
+ //
102
+ // One caveat is that arraysize() doesn't accept any array of an
103
+ // anonymous type or a type defined inside a function. In these rare
104
+ // cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is
105
+ // due to a limitation in C++'s template system. The limitation might
106
+ // eventually be removed, but it hasn't happened yet.
107
+
108
+ // This template function declaration is used in defining arraysize.
109
+ // Note that the function doesn't need an implementation, as we only
110
+ // use its type.
111
+ template <typename T, size_t N>
112
+ char (&ArraySizeHelper(T (&array)[N]))[N];
113
+
114
+ // That gcc wants both of these prototypes seems mysterious. VC, for
115
+ // its part, can't decide which to use (another mystery). Matching of
116
+ // template overloads: the final frontier.
117
+ #ifndef _MSC_VER
118
+ template <typename T, size_t N>
119
+ char (&ArraySizeHelper(const T (&array)[N]))[N];
120
+ #endif
121
+
122
+ #define arraysize(array) (sizeof(ArraySizeHelper(array)))
123
+
124
+ // ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
125
+ // but can be used on anonymous types or types defined inside
126
+ // functions. It's less safe than arraysize as it accepts some
127
+ // (although not all) pointers. Therefore, you should use arraysize
128
+ // whenever possible.
129
+ //
130
+ // The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
131
+ // size_t.
132
+ //
133
+ // ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error
134
+ //
135
+ // "warning: division by zero in ..."
136
+ //
137
+ // when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
138
+ // You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
139
+ //
140
+ // The following comments are on the implementation details, and can
141
+ // be ignored by the users.
142
+ //
143
+ // ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
144
+ // the array) and sizeof(*(arr)) (the # of bytes in one array
145
+ // element). If the former is divisible by the latter, perhaps arr is
146
+ // indeed an array, in which case the division result is the # of
147
+ // elements in the array. Otherwise, arr cannot possibly be an array,
148
+ // and we generate a compiler error to prevent the code from
149
+ // compiling.
150
+ //
151
+ // Since the size of bool is implementation-defined, we need to cast
152
+ // !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
153
+ // result has type size_t.
154
+ //
155
+ // This macro is not perfect as it wrongfully accepts certain
156
+ // pointers, namely where the pointer size is divisible by the pointee
157
+ // size. Since all our code has to go through a 32-bit compiler,
158
+ // where a pointer is 4 bytes, this means all pointers to a type whose
159
+ // size is 3 or greater than 4 will be (righteously) rejected.
160
+
161
+ #define ARRAYSIZE_UNSAFE(a) \
162
+ ((sizeof(a) / sizeof(*(a))) / \
163
+ static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
164
+
165
+
166
+ // Use implicit_cast as a safe version of static_cast or const_cast
167
+ // for upcasting in the type hierarchy (i.e. casting a pointer to Foo
168
+ // to a pointer to SuperclassOfFoo or casting a pointer to Foo to
169
+ // a const pointer to Foo).
170
+ // When you use implicit_cast, the compiler checks that the cast is safe.
171
+ // Such explicit implicit_casts are necessary in surprisingly many
172
+ // situations where C++ demands an exact type match instead of an
173
+ // argument type convertable to a target type.
174
+ //
175
+ // The From type can be inferred, so the preferred syntax for using
176
+ // implicit_cast is the same as for static_cast etc.:
177
+ //
178
+ // implicit_cast<ToType>(expr)
179
+ //
180
+ // implicit_cast would have been part of the C++ standard library,
181
+ // but the proposal was submitted too late. It will probably make
182
+ // its way into the language in the future.
183
+ template<typename To, typename From>
184
+ inline To implicit_cast(From const &f) {
185
+ return f;
186
+ }
187
+
188
+ // The COMPILE_ASSERT macro can be used to verify that a compile time
189
+ // expression is true. For example, you could use it to verify the
190
+ // size of a static array:
191
+ //
192
+ // COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
193
+ // content_type_names_incorrect_size);
194
+ //
195
+ // or to make sure a struct is smaller than a certain size:
196
+ //
197
+ // COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
198
+ //
199
+ // The second argument to the macro is the name of the variable. If
200
+ // the expression is false, most compilers will issue a warning/error
201
+ // containing the name of the variable.
202
+
203
+ template <bool>
204
+ struct CompileAssert {
205
+ };
206
+
207
+ #undef COMPILE_ASSERT
208
+ #define COMPILE_ASSERT(expr, msg) \
209
+ typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
210
+
211
+ // Implementation details of COMPILE_ASSERT:
212
+ //
213
+ // - COMPILE_ASSERT works by defining an array type that has -1
214
+ // elements (and thus is invalid) when the expression is false.
215
+ //
216
+ // - The simpler definition
217
+ //
218
+ // #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
219
+ //
220
+ // does not work, as gcc supports variable-length arrays whose sizes
221
+ // are determined at run-time (this is gcc's extension and not part
222
+ // of the C++ standard). As a result, gcc fails to reject the
223
+ // following code with the simple definition:
224
+ //
225
+ // int foo;
226
+ // COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
227
+ // // not a compile-time constant.
228
+ //
229
+ // - By using the type CompileAssert<(bool(expr))>, we ensures that
230
+ // expr is a compile-time constant. (Template arguments must be
231
+ // determined at compile-time.)
232
+ //
233
+ // - The outter parentheses in CompileAssert<(bool(expr))> are necessary
234
+ // to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
235
+ //
236
+ // CompileAssert<bool(expr)>
237
+ //
238
+ // instead, these compilers will refuse to compile
239
+ //
240
+ // COMPILE_ASSERT(5 > 0, some_message);
241
+ //
242
+ // (They seem to think the ">" in "5 > 0" marks the end of the
243
+ // template argument list.)
244
+ //
245
+ // - The array size is (bool(expr) ? 1 : -1), instead of simply
246
+ //
247
+ // ((expr) ? 1 : -1).
248
+ //
249
+ // This is to avoid running into a bug in MS VC 7.1, which
250
+ // causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
251
+
252
+
253
+ // MetatagId refers to metatag-id that we assign to
254
+ // each metatag <name, value> pair..
255
+ typedef uint32 MetatagId;
256
+
257
+ // Argument type used in interfaces that can optionally take ownership
258
+ // of a passed in argument. If TAKE_OWNERSHIP is passed, the called
259
+ // object takes ownership of the argument. Otherwise it does not.
260
+ enum Ownership {
261
+ DO_NOT_TAKE_OWNERSHIP,
262
+ TAKE_OWNERSHIP
263
+ };
264
+
265
+ // bit_cast<Dest,Source> is a template function that implements the
266
+ // equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
267
+ // very low-level functions like the protobuf library and fast math
268
+ // support.
269
+ //
270
+ // float f = 3.14159265358979;
271
+ // int i = bit_cast<int32>(f);
272
+ // // i = 0x40490fdb
273
+ //
274
+ // The classical address-casting method is:
275
+ //
276
+ // // WRONG
277
+ // float f = 3.14159265358979; // WRONG
278
+ // int i = * reinterpret_cast<int*>(&f); // WRONG
279
+ //
280
+ // The address-casting method actually produces undefined behavior
281
+ // according to ISO C++ specification section 3.10 -15 -. Roughly, this
282
+ // section says: if an object in memory has one type, and a program
283
+ // accesses it with a different type, then the result is undefined
284
+ // behavior for most values of "different type".
285
+ //
286
+ // This is true for any cast syntax, either *(int*)&f or
287
+ // *reinterpret_cast<int*>(&f). And it is particularly true for
288
+ // conversions betweeen integral lvalues and floating-point lvalues.
289
+ //
290
+ // The purpose of 3.10 -15- is to allow optimizing compilers to assume
291
+ // that expressions with different types refer to different memory. gcc
292
+ // 4.0.1 has an optimizer that takes advantage of this. So a
293
+ // non-conforming program quietly produces wildly incorrect output.
294
+ //
295
+ // The problem is not the use of reinterpret_cast. The problem is type
296
+ // punning: holding an object in memory of one type and reading its bits
297
+ // back using a different type.
298
+ //
299
+ // The C++ standard is more subtle and complex than this, but that
300
+ // is the basic idea.
301
+ //
302
+ // Anyways ...
303
+ //
304
+ // bit_cast<> calls memcpy() which is blessed by the standard,
305
+ // especially by the example in section 3.9 . Also, of course,
306
+ // bit_cast<> wraps up the nasty logic in one place.
307
+ //
308
+ // Fortunately memcpy() is very fast. In optimized mode, with a
309
+ // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
310
+ // code with the minimal amount of data movement. On a 32-bit system,
311
+ // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
312
+ // compiles to two loads and two stores.
313
+ //
314
+ // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
315
+ //
316
+ // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
317
+ // is likely to surprise you.
318
+
319
+ template <class Dest, class Source>
320
+ inline Dest bit_cast(const Source& source) {
321
+ // Compile time assertion: sizeof(Dest) == sizeof(Source)
322
+ // A compile error here means your Dest and Source have different sizes.
323
+ typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
324
+
325
+ Dest dest;
326
+ memcpy(&dest, &source, sizeof(dest));
327
+ return dest;
328
+ }
329
+
330
+ // The following enum should be used only as a constructor argument to indicate
331
+ // that the variable has static storage class, and that the constructor should
332
+ // do nothing to its state. It indicates to the reader that it is legal to
333
+ // declare a static instance of the class, provided the constructor is given
334
+ // the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a
335
+ // static variable that has a constructor or a destructor because invocation
336
+ // order is undefined. However, IF the type can be initialized by filling with
337
+ // zeroes (which the loader does for static variables), AND the destructor also
338
+ // does nothing to the storage, AND there are no virtual methods, then a
339
+ // constructor declared as
340
+ // explicit MyClass(base::LinkerInitialized x) {}
341
+ // and invoked as
342
+ // static MyClass my_variable_name(base::LINKER_INITIALIZED);
343
+ namespace base {
344
+ enum LinkerInitialized { LINKER_INITIALIZED };
345
+ } // base
346
+
347
+
348
+ #endif // BASE_BASICTYPES_H_