krukid-cld 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. data/LICENSE +27 -0
  2. data/Manifest +106 -0
  3. data/README.rdoc +173 -0
  4. data/Rakefile +15 -0
  5. data/base/basictypes.h +348 -0
  6. data/base/build_config.h +115 -0
  7. data/base/casts.h +156 -0
  8. data/base/commandlineflags.h +443 -0
  9. data/base/crash.h +41 -0
  10. data/base/dynamic_annotations.h +358 -0
  11. data/base/global_strip_options.h +59 -0
  12. data/base/log_severity.h +46 -0
  13. data/base/logging.h +1403 -0
  14. data/base/macros.h +243 -0
  15. data/base/port.h +54 -0
  16. data/base/scoped_ptr.h +428 -0
  17. data/base/stl_decl.h +0 -0
  18. data/base/stl_decl_msvc.h +107 -0
  19. data/base/string_util.h +29 -0
  20. data/base/strtoint.h +93 -0
  21. data/base/template_util.h +96 -0
  22. data/base/type_traits.h +198 -0
  23. data/base/vlog_is_on.h +143 -0
  24. data/build.sh +48 -0
  25. data/build.win.cmd +28 -0
  26. data/cld.gemspec +33 -0
  27. data/cld_encodings.h +95 -0
  28. data/encodings/compact_lang_det/#cldutil.cc# +905 -0
  29. data/encodings/compact_lang_det/#cldutil.h# +1205 -0
  30. data/encodings/compact_lang_det/#compact_lang_det_impl.h# +171 -0
  31. data/encodings/compact_lang_det/#ext_lang_enc.cc# +545 -0
  32. data/encodings/compact_lang_det/#ext_lang_enc.h# +119 -0
  33. data/encodings/compact_lang_det/#getonescriptspan.cc# +570 -0
  34. data/encodings/compact_lang_det/#getonescriptspan.h# +131 -0
  35. data/encodings/compact_lang_det/#tote.cc# +299 -0
  36. data/encodings/compact_lang_det/#tote.h# +89 -0
  37. data/encodings/compact_lang_det/cldutil.cc +905 -0
  38. data/encodings/compact_lang_det/cldutil.h +1205 -0
  39. data/encodings/compact_lang_det/cldutil_dbg.h +76 -0
  40. data/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
  41. data/encodings/compact_lang_det/compact_lang_det.cc +62 -0
  42. data/encodings/compact_lang_det/compact_lang_det.h +145 -0
  43. data/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
  44. data/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
  45. data/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
  46. data/encodings/compact_lang_det/compile.cmd +1 -0
  47. data/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
  48. data/encodings/compact_lang_det/ext_lang_enc.h +119 -0
  49. data/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
  50. data/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
  51. data/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
  52. data/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
  53. data/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
  54. data/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
  55. data/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
  56. data/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
  57. data/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
  58. data/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
  59. data/encodings/compact_lang_det/getonescriptspan.cc +570 -0
  60. data/encodings/compact_lang_det/getonescriptspan.h +131 -0
  61. data/encodings/compact_lang_det/letterscript_enum.cc +117 -0
  62. data/encodings/compact_lang_det/letterscript_enum.h +99 -0
  63. data/encodings/compact_lang_det/subsetsequence.cc +259 -0
  64. data/encodings/compact_lang_det/subsetsequence.h +44 -0
  65. data/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
  66. data/encodings/compact_lang_det/tote.cc +299 -0
  67. data/encodings/compact_lang_det/tote.h +89 -0
  68. data/encodings/compact_lang_det/unittest_data.h +193 -0
  69. data/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
  70. data/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
  71. data/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
  72. data/encodings/compact_lang_det/win/#cld_unilib_windows.cc# +29 -0
  73. data/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
  74. data/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
  75. data/encodings/compact_lang_det/win/cld_google.h +18 -0
  76. data/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
  77. data/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
  78. data/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
  79. data/encodings/compact_lang_det/win/cld_logging.h +21 -0
  80. data/encodings/compact_lang_det/win/cld_macros.h +19 -0
  81. data/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
  82. data/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
  83. data/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
  84. data/encodings/compact_lang_det/win/cld_unilib.h +15 -0
  85. data/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
  86. data/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
  87. data/encodings/compact_lang_det/win/cld_utf.h +24 -0
  88. data/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
  89. data/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
  90. data/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
  91. data/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
  92. data/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
  93. data/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
  94. data/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
  95. data/encodings/internal/encodings.cc +12 -0
  96. data/encodings/lang_enc.h +254 -0
  97. data/encodings/proto/encodings.pb.h +169 -0
  98. data/encodings/public/encodings.h +301 -0
  99. data/ext/cld/extconf.rb +8 -0
  100. data/krukid-cld.gemspec +33 -0
  101. data/languages/internal/#languages.cc# +337 -0
  102. data/languages/internal/languages.cc +337 -0
  103. data/languages/proto/languages.pb.h +179 -0
  104. data/languages/public/languages.h +379 -0
  105. data/lib/cld.rb +12 -0
  106. data/test/test.rb +570 -0
  107. data/thunk.cc +131 -0
  108. metadata +196 -0
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/Manifest ADDED
@@ -0,0 +1,106 @@
1
+ LICENSE
2
+ README.rdoc
3
+ Rakefile
4
+ base/basictypes.h
5
+ base/build_config.h
6
+ base/casts.h
7
+ base/commandlineflags.h
8
+ base/crash.h
9
+ base/dynamic_annotations.h
10
+ base/global_strip_options.h
11
+ base/log_severity.h
12
+ base/logging.h
13
+ base/macros.h
14
+ base/port.h
15
+ base/scoped_ptr.h
16
+ base/stl_decl.h
17
+ base/stl_decl_msvc.h
18
+ base/string_util.h
19
+ base/strtoint.h
20
+ base/template_util.h
21
+ base/type_traits.h
22
+ base/vlog_is_on.h
23
+ build.sh
24
+ build.win.cmd
25
+ cld.gemspec
26
+ cld_encodings.h
27
+ encodings/compact_lang_det/#cldutil.cc#
28
+ encodings/compact_lang_det/#cldutil.h#
29
+ encodings/compact_lang_det/#compact_lang_det_impl.h#
30
+ encodings/compact_lang_det/#ext_lang_enc.cc#
31
+ encodings/compact_lang_det/#ext_lang_enc.h#
32
+ encodings/compact_lang_det/#getonescriptspan.cc#
33
+ encodings/compact_lang_det/#getonescriptspan.h#
34
+ encodings/compact_lang_det/#tote.cc#
35
+ encodings/compact_lang_det/#tote.h#
36
+ encodings/compact_lang_det/cldutil.cc
37
+ encodings/compact_lang_det/cldutil.h
38
+ encodings/compact_lang_det/cldutil_dbg.h
39
+ encodings/compact_lang_det/cldutil_dbg_empty.cc
40
+ encodings/compact_lang_det/compact_lang_det.cc
41
+ encodings/compact_lang_det/compact_lang_det.h
42
+ encodings/compact_lang_det/compact_lang_det_impl.cc
43
+ encodings/compact_lang_det/compact_lang_det_impl.h
44
+ encodings/compact_lang_det/compact_lang_det_unittest_small.cc
45
+ encodings/compact_lang_det/compile.cmd
46
+ encodings/compact_lang_det/ext_lang_enc.cc
47
+ encodings/compact_lang_det/ext_lang_enc.h
48
+ encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc
49
+ encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc
50
+ encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc
51
+ encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc
52
+ encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc
53
+ encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc
54
+ encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc
55
+ encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h
56
+ encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc
57
+ encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc
58
+ encodings/compact_lang_det/getonescriptspan.cc
59
+ encodings/compact_lang_det/getonescriptspan.h
60
+ encodings/compact_lang_det/letterscript_enum.cc
61
+ encodings/compact_lang_det/letterscript_enum.h
62
+ encodings/compact_lang_det/subsetsequence.cc
63
+ encodings/compact_lang_det/subsetsequence.h
64
+ encodings/compact_lang_det/subsetsequence_unittest.cc
65
+ encodings/compact_lang_det/tote.cc
66
+ encodings/compact_lang_det/tote.h
67
+ encodings/compact_lang_det/unittest_data.h
68
+ encodings/compact_lang_det/utf8propjustletter.h
69
+ encodings/compact_lang_det/utf8propletterscriptnum.h
70
+ encodings/compact_lang_det/utf8scannotjustletterspecial.h
71
+ encodings/compact_lang_det/win/#cld_unilib_windows.cc#
72
+ encodings/compact_lang_det/win/cld_basictypes.h
73
+ encodings/compact_lang_det/win/cld_commandlineflags.h
74
+ encodings/compact_lang_det/win/cld_google.h
75
+ encodings/compact_lang_det/win/cld_htmlutils.h
76
+ encodings/compact_lang_det/win/cld_htmlutils_google3.cc
77
+ encodings/compact_lang_det/win/cld_htmlutils_windows.cc
78
+ encodings/compact_lang_det/win/cld_logging.h
79
+ encodings/compact_lang_det/win/cld_macros.h
80
+ encodings/compact_lang_det/win/cld_strtoint.h
81
+ encodings/compact_lang_det/win/cld_unicodetext.cc
82
+ encodings/compact_lang_det/win/cld_unicodetext.h
83
+ encodings/compact_lang_det/win/cld_unilib.h
84
+ encodings/compact_lang_det/win/cld_unilib_google3.cc
85
+ encodings/compact_lang_det/win/cld_unilib_windows.cc
86
+ encodings/compact_lang_det/win/cld_utf.h
87
+ encodings/compact_lang_det/win/cld_utf8statetable.cc
88
+ encodings/compact_lang_det/win/cld_utf8statetable.h
89
+ encodings/compact_lang_det/win/cld_utf8utils.h
90
+ encodings/compact_lang_det/win/cld_utf8utils_google3.cc
91
+ encodings/compact_lang_det/win/cld_utf8utils_windows.cc
92
+ encodings/compact_lang_det/win/normalizedunicodetext.cc
93
+ encodings/compact_lang_det/win/normalizedunicodetext.h
94
+ encodings/internal/encodings.cc
95
+ encodings/lang_enc.h
96
+ encodings/proto/encodings.pb.h
97
+ encodings/public/encodings.h
98
+ ext/cld/extconf.rb
99
+ languages/internal/#languages.cc#
100
+ languages/internal/languages.cc
101
+ languages/proto/languages.pb.h
102
+ languages/public/languages.h
103
+ lib/cld.rb
104
+ test/test.rb
105
+ thunk.cc
106
+ Manifest
data/README.rdoc ADDED
@@ -0,0 +1,173 @@
1
+ This is a wrapper of the Compact Language Detection library from Chrome.
2
+ To use :
3
+ require "cld"
4
+ language = CLD.detect_language("piece of text")
5
+ is_english = CLD.english?("我不是英文")
6
+
7
+
8
+ detect_language returns a unique integer representing each language, here are the languages:
9
+ ENGLISH = 0,
10
+ DANISH = 1,
11
+ DUTCH = 2,
12
+ FINNISH = 3,
13
+ FRENCH = 4,
14
+ GERMAN = 5,
15
+ HEBREW = 6,
16
+ ITALIAN = 7,
17
+ JAPANESE = 8,
18
+ KOREAN = 9,
19
+ NORWEGIAN = 10,
20
+ POLISH = 11,
21
+ PORTUGUESE = 12,
22
+ RUSSIAN = 13,
23
+ SPANISH = 14,
24
+ SWEDISH = 15,
25
+ CHINESE = 16,
26
+ CZECH = 17,
27
+ GREEK = 18,
28
+ ICELANDIC = 19,
29
+ LATVIAN = 20,
30
+ LITHUANIAN = 21,
31
+ ROMANIAN = 22,
32
+ HUNGARIAN = 23,
33
+ ESTONIAN = 24,
34
+ TG_UNKNOWN_LANGUAGE = 25,
35
+ UNKNOWN_LANGUAGE = 26,
36
+ BULGARIAN = 27,
37
+ CROATIAN = 28,
38
+ SERBIAN = 29,
39
+ IRISH = 30,
40
+ GALICIAN = 31,
41
+ TAGALOG = 32,
42
+ TURKISH = 33,
43
+ UKRAINIAN = 34,
44
+ HINDI = 35,
45
+ MACEDONIAN = 36,
46
+ BENGALI = 37,
47
+ INDONESIAN = 38,
48
+ LATIN = 39,
49
+ MALAY = 40,
50
+ MALAYALAM = 41,
51
+ WELSH = 42,
52
+ NEPALI = 43,
53
+ TELUGU = 44,
54
+ ALBANIAN = 45,
55
+ TAMIL = 46,
56
+ BELARUSIAN = 47,
57
+ JAVANESE = 48,
58
+ OCCITAN = 49,
59
+ URDU = 50,
60
+ BIHARI = 51,
61
+ GUJARATI = 52,
62
+ THAI = 53,
63
+ ARABIC = 54,
64
+ CATALAN = 55,
65
+ ESPERANTO = 56,
66
+ BASQUE = 57,
67
+ INTERLINGUA = 58,
68
+ KANNADA = 59,
69
+ PUNJABI = 60,
70
+ SCOTS_GAELIC = 61,
71
+ SWAHILI = 62,
72
+ SLOVENIAN = 63,
73
+ MARATHI = 64,
74
+ MALTESE = 65,
75
+ VIETNAMESE = 66,
76
+ FRISIAN = 67,
77
+ SLOVAK = 68,
78
+ CHINESE_T = 69,
79
+ FAROESE = 70,
80
+ SUNDANESE = 71,
81
+ UZBEK = 72,
82
+ AMHARIC = 73,
83
+ AZERBAIJANI = 74,
84
+ GEORGIAN = 75,
85
+ TIGRINYA = 76,
86
+ PERSIAN = 77,
87
+ BOSNIAN = 78,
88
+ SINHALESE = 79,
89
+ NORWEGIAN_N = 80,
90
+ PORTUGUESE_P = 81,
91
+ PORTUGUESE_B = 82,
92
+ XHOSA = 83,
93
+ ZULU = 84,
94
+ GUARANI = 85,
95
+ SESOTHO = 86,
96
+ TURKMEN = 87,
97
+ KYRGYZ = 88,
98
+ BRETON = 89,
99
+ TWI = 90,
100
+ YIDDISH = 91,
101
+ SERBO_CROATIAN= 92,
102
+ SOMALI = 93,
103
+ UIGHUR = 94,
104
+ KURDISH = 95,
105
+ MONGOLIAN = 96,
106
+ ARMENIAN = 97,
107
+ LAOTHIAN = 98,
108
+ SINDHI = 99,
109
+ RHAETO_ROMANCE= 100,
110
+ AFRIKAANS = 101,
111
+ LUXEMBOURGISH = 102,
112
+ BURMESE = 103,
113
+ KHMER = 104,
114
+ TIBETAN = 105,
115
+ DHIVEHI = 106,
116
+ CHEROKEE = 107,
117
+ SYRIAC = 108,
118
+ LIMBU = 109,
119
+ ORIYA = 110,
120
+ ASSAMESE = 111,
121
+ CORSICAN = 112,
122
+ INTERLINGUE = 113,
123
+ KAZAKH = 114,
124
+ LINGALA = 115,
125
+ MOLDAVIAN = 116,
126
+ PASHTO = 117,
127
+ QUECHUA = 118,
128
+ SHONA = 119,
129
+ TAJIK = 120,
130
+ TATAR = 121,
131
+ TONGA = 122,
132
+ YORUBA = 123,
133
+ CREOLES_AND_PIDGINS_ENGLISH_BASED = 124,
134
+ CREOLES_AND_PIDGINS_FRENCH_BASED = 125,
135
+ CREOLES_AND_PIDGINS_PORTUGUESE_BASED = 126,
136
+ CREOLES_AND_PIDGINS_OTHER = 127,
137
+ MAORI = 128,
138
+ WOLOF = 129,
139
+ ABKHAZIAN = 130,
140
+ AFAR = 131,
141
+ AYMARA = 132,
142
+ BASHKIR = 133,
143
+ BISLAMA = 134,
144
+ DZONGKHA = 135,
145
+ FIJIAN = 136,
146
+ GREENLANDIC = 137,
147
+ HAUSA = 138,
148
+ HAITIAN_CREOLE= 139,
149
+ INUPIAK = 140,
150
+ INUKTITUT = 141,
151
+ KASHMIRI = 142,
152
+ KINYARWANDA = 143,
153
+ MALAGASY = 144,
154
+ NAURU = 145,
155
+ OROMO = 146,
156
+ RUNDI = 147,
157
+ SAMOAN = 148,
158
+ SANGO = 149,
159
+ SANSKRIT = 150,
160
+ SISWANT = 151,
161
+ TSONGA = 152,
162
+ TSWANA = 153,
163
+ VOLAPUK = 154,
164
+ ZHUANG = 155,
165
+ KHASI = 156,
166
+ SCOTS = 157,
167
+ GANDA = 158,
168
+ MANX = 159,
169
+ MONTENEGRIN = 160,
170
+ NUM_LANGUAGES = 161,
171
+
172
+ Thanks to Mike McCandless for finding this code and writing a python version
173
+ Thanks to the Chrome Authors.
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('krukid-cld', '0.4.0') do |p|
6
+ p.description = "Compact Language Detection from chrome (jtoy fork/gemfix)"
7
+ p.url = "http://github.com/krukid/cld"
8
+ p.author = "krukid"
9
+ p.email = "krukid@gmail.com"
10
+ p.ignore_pattern = ["tmp/*", "script/*"]
11
+ p.runtime_dependencies = ["ffi"]
12
+ p.development_dependencies = []
13
+ end
14
+
15
+ #Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
data/base/basictypes.h ADDED
@@ -0,0 +1,348 @@
1
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ #ifndef BASE_BASICTYPES_H_
6
+ #define BASE_BASICTYPES_H_
7
+
8
+ #include <limits.h> // So we can set the bounds of our types
9
+ #include <stddef.h> // For size_t
10
+ #include <string.h> // for memcpy
11
+
12
+ #include "base/port.h" // Types that only need exist on certain systems
13
+
14
+ #ifndef COMPILER_MSVC
15
+ // stdint.h is part of C99 but MSVC doesn't have it.
16
+ #include <stdint.h> // For intptr_t.
17
+ #endif
18
+
19
+ typedef signed char schar;
20
+ typedef signed char int8;
21
+ typedef short int16;
22
+ // TODO(mbelshe) Remove these type guards. These are
23
+ // temporary to avoid conflicts with npapi.h.
24
+ #ifndef _INT32
25
+ #define _INT32
26
+ typedef int int32;
27
+ #endif
28
+
29
+ // The NSPR system headers define 64-bit as |long| when possible. In order to
30
+ // not have typedef mismatches, we do the same on LP64.
31
+ #if __LP64__
32
+ typedef long int64;
33
+ #else
34
+ typedef long long int64;
35
+ #endif
36
+
37
+ // NOTE: unsigned types are DANGEROUS in loops and other arithmetical
38
+ // places. Use the signed types unless your variable represents a bit
39
+ // pattern (eg a hash value) or you really need the extra bit. Do NOT
40
+ // use 'unsigned' to express "this value should always be positive";
41
+ // use assertions for this.
42
+
43
+ typedef unsigned char uint8;
44
+ typedef unsigned short uint16;
45
+ // TODO(mbelshe) Remove these type guards. These are
46
+ // temporary to avoid conflicts with npapi.h.
47
+ #ifndef _UINT32
48
+ #define _UINT32
49
+ typedef unsigned int uint32;
50
+ #endif
51
+
52
+ // See the comment above about NSPR and 64-bit.
53
+ #if __LP64__
54
+ typedef unsigned long uint64;
55
+ #else
56
+ typedef unsigned long long uint64;
57
+ #endif
58
+
59
+ // A type to represent a Unicode code-point value. As of Unicode 4.0,
60
+ // such values require up to 21 bits.
61
+ // (For type-checking on pointers, make this explicitly signed,
62
+ // and it should always be the signed version of whatever int32 is.)
63
+ typedef signed int char32;
64
+
65
+ const uint8 kuint8max = (( uint8) 0xFF);
66
+ const uint16 kuint16max = ((uint16) 0xFFFF);
67
+ const uint32 kuint32max = ((uint32) 0xFFFFFFFF);
68
+ const uint64 kuint64max = ((uint64) GG_LONGLONG(0xFFFFFFFFFFFFFFFF));
69
+ const int8 kint8min = (( int8) 0x80);
70
+ const int8 kint8max = (( int8) 0x7F);
71
+ const int16 kint16min = (( int16) 0x8000);
72
+ const int16 kint16max = (( int16) 0x7FFF);
73
+ const int32 kint32min = (( int32) 0x80000000);
74
+ const int32 kint32max = (( int32) 0x7FFFFFFF);
75
+ const int64 kint64min = (( int64) GG_LONGLONG(0x8000000000000000));
76
+ const int64 kint64max = (( int64) GG_LONGLONG(0x7FFFFFFFFFFFFFFF));
77
+
78
+ // A macro to disallow the copy constructor and operator= functions
79
+ // This should be used in the private: declarations for a class
80
+ #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
81
+ TypeName(const TypeName&); \
82
+ void operator=(const TypeName&)
83
+
84
+ // An older, deprecated, politically incorrect name for the above.
85
+ #define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
86
+
87
+ // A macro to disallow all the implicit constructors, namely the
88
+ // default constructor, copy constructor and operator= functions.
89
+ //
90
+ // This should be used in the private: declarations for a class
91
+ // that wants to prevent anyone from instantiating it. This is
92
+ // especially useful for classes containing only static methods.
93
+ #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
94
+ TypeName(); \
95
+ DISALLOW_COPY_AND_ASSIGN(TypeName)
96
+
97
+ // The arraysize(arr) macro returns the # of elements in an array arr.
98
+ // The expression is a compile-time constant, and therefore can be
99
+ // used in defining new arrays, for example. If you use arraysize on
100
+ // a pointer by mistake, you will get a compile-time error.
101
+ //
102
+ // One caveat is that arraysize() doesn't accept any array of an
103
+ // anonymous type or a type defined inside a function. In these rare
104
+ // cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is
105
+ // due to a limitation in C++'s template system. The limitation might
106
+ // eventually be removed, but it hasn't happened yet.
107
+
108
+ // This template function declaration is used in defining arraysize.
109
+ // Note that the function doesn't need an implementation, as we only
110
+ // use its type.
111
+ template <typename T, size_t N>
112
+ char (&ArraySizeHelper(T (&array)[N]))[N];
113
+
114
+ // That gcc wants both of these prototypes seems mysterious. VC, for
115
+ // its part, can't decide which to use (another mystery). Matching of
116
+ // template overloads: the final frontier.
117
+ #ifndef _MSC_VER
118
+ template <typename T, size_t N>
119
+ char (&ArraySizeHelper(const T (&array)[N]))[N];
120
+ #endif
121
+
122
+ #define arraysize(array) (sizeof(ArraySizeHelper(array)))
123
+
124
+ // ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
125
+ // but can be used on anonymous types or types defined inside
126
+ // functions. It's less safe than arraysize as it accepts some
127
+ // (although not all) pointers. Therefore, you should use arraysize
128
+ // whenever possible.
129
+ //
130
+ // The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
131
+ // size_t.
132
+ //
133
+ // ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error
134
+ //
135
+ // "warning: division by zero in ..."
136
+ //
137
+ // when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
138
+ // You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
139
+ //
140
+ // The following comments are on the implementation details, and can
141
+ // be ignored by the users.
142
+ //
143
+ // ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
144
+ // the array) and sizeof(*(arr)) (the # of bytes in one array
145
+ // element). If the former is divisible by the latter, perhaps arr is
146
+ // indeed an array, in which case the division result is the # of
147
+ // elements in the array. Otherwise, arr cannot possibly be an array,
148
+ // and we generate a compiler error to prevent the code from
149
+ // compiling.
150
+ //
151
+ // Since the size of bool is implementation-defined, we need to cast
152
+ // !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
153
+ // result has type size_t.
154
+ //
155
+ // This macro is not perfect as it wrongfully accepts certain
156
+ // pointers, namely where the pointer size is divisible by the pointee
157
+ // size. Since all our code has to go through a 32-bit compiler,
158
+ // where a pointer is 4 bytes, this means all pointers to a type whose
159
+ // size is 3 or greater than 4 will be (righteously) rejected.
160
+
161
+ #define ARRAYSIZE_UNSAFE(a) \
162
+ ((sizeof(a) / sizeof(*(a))) / \
163
+ static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
164
+
165
+
166
+ // Use implicit_cast as a safe version of static_cast or const_cast
167
+ // for upcasting in the type hierarchy (i.e. casting a pointer to Foo
168
+ // to a pointer to SuperclassOfFoo or casting a pointer to Foo to
169
+ // a const pointer to Foo).
170
+ // When you use implicit_cast, the compiler checks that the cast is safe.
171
+ // Such explicit implicit_casts are necessary in surprisingly many
172
+ // situations where C++ demands an exact type match instead of an
173
+ // argument type convertable to a target type.
174
+ //
175
+ // The From type can be inferred, so the preferred syntax for using
176
+ // implicit_cast is the same as for static_cast etc.:
177
+ //
178
+ // implicit_cast<ToType>(expr)
179
+ //
180
+ // implicit_cast would have been part of the C++ standard library,
181
+ // but the proposal was submitted too late. It will probably make
182
+ // its way into the language in the future.
183
+ template<typename To, typename From>
184
+ inline To implicit_cast(From const &f) {
185
+ return f;
186
+ }
187
+
188
+ // The COMPILE_ASSERT macro can be used to verify that a compile time
189
+ // expression is true. For example, you could use it to verify the
190
+ // size of a static array:
191
+ //
192
+ // COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
193
+ // content_type_names_incorrect_size);
194
+ //
195
+ // or to make sure a struct is smaller than a certain size:
196
+ //
197
+ // COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
198
+ //
199
+ // The second argument to the macro is the name of the variable. If
200
+ // the expression is false, most compilers will issue a warning/error
201
+ // containing the name of the variable.
202
+
203
+ template <bool>
204
+ struct CompileAssert {
205
+ };
206
+
207
+ #undef COMPILE_ASSERT
208
+ #define COMPILE_ASSERT(expr, msg) \
209
+ typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
210
+
211
+ // Implementation details of COMPILE_ASSERT:
212
+ //
213
+ // - COMPILE_ASSERT works by defining an array type that has -1
214
+ // elements (and thus is invalid) when the expression is false.
215
+ //
216
+ // - The simpler definition
217
+ //
218
+ // #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
219
+ //
220
+ // does not work, as gcc supports variable-length arrays whose sizes
221
+ // are determined at run-time (this is gcc's extension and not part
222
+ // of the C++ standard). As a result, gcc fails to reject the
223
+ // following code with the simple definition:
224
+ //
225
+ // int foo;
226
+ // COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
227
+ // // not a compile-time constant.
228
+ //
229
+ // - By using the type CompileAssert<(bool(expr))>, we ensures that
230
+ // expr is a compile-time constant. (Template arguments must be
231
+ // determined at compile-time.)
232
+ //
233
+ // - The outter parentheses in CompileAssert<(bool(expr))> are necessary
234
+ // to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
235
+ //
236
+ // CompileAssert<bool(expr)>
237
+ //
238
+ // instead, these compilers will refuse to compile
239
+ //
240
+ // COMPILE_ASSERT(5 > 0, some_message);
241
+ //
242
+ // (They seem to think the ">" in "5 > 0" marks the end of the
243
+ // template argument list.)
244
+ //
245
+ // - The array size is (bool(expr) ? 1 : -1), instead of simply
246
+ //
247
+ // ((expr) ? 1 : -1).
248
+ //
249
+ // This is to avoid running into a bug in MS VC 7.1, which
250
+ // causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
251
+
252
+
253
+ // MetatagId refers to metatag-id that we assign to
254
+ // each metatag <name, value> pair..
255
+ typedef uint32 MetatagId;
256
+
257
+ // Argument type used in interfaces that can optionally take ownership
258
+ // of a passed in argument. If TAKE_OWNERSHIP is passed, the called
259
+ // object takes ownership of the argument. Otherwise it does not.
260
+ enum Ownership {
261
+ DO_NOT_TAKE_OWNERSHIP,
262
+ TAKE_OWNERSHIP
263
+ };
264
+
265
+ // bit_cast<Dest,Source> is a template function that implements the
266
+ // equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
267
+ // very low-level functions like the protobuf library and fast math
268
+ // support.
269
+ //
270
+ // float f = 3.14159265358979;
271
+ // int i = bit_cast<int32>(f);
272
+ // // i = 0x40490fdb
273
+ //
274
+ // The classical address-casting method is:
275
+ //
276
+ // // WRONG
277
+ // float f = 3.14159265358979; // WRONG
278
+ // int i = * reinterpret_cast<int*>(&f); // WRONG
279
+ //
280
+ // The address-casting method actually produces undefined behavior
281
+ // according to ISO C++ specification section 3.10 -15 -. Roughly, this
282
+ // section says: if an object in memory has one type, and a program
283
+ // accesses it with a different type, then the result is undefined
284
+ // behavior for most values of "different type".
285
+ //
286
+ // This is true for any cast syntax, either *(int*)&f or
287
+ // *reinterpret_cast<int*>(&f). And it is particularly true for
288
+ // conversions betweeen integral lvalues and floating-point lvalues.
289
+ //
290
+ // The purpose of 3.10 -15- is to allow optimizing compilers to assume
291
+ // that expressions with different types refer to different memory. gcc
292
+ // 4.0.1 has an optimizer that takes advantage of this. So a
293
+ // non-conforming program quietly produces wildly incorrect output.
294
+ //
295
+ // The problem is not the use of reinterpret_cast. The problem is type
296
+ // punning: holding an object in memory of one type and reading its bits
297
+ // back using a different type.
298
+ //
299
+ // The C++ standard is more subtle and complex than this, but that
300
+ // is the basic idea.
301
+ //
302
+ // Anyways ...
303
+ //
304
+ // bit_cast<> calls memcpy() which is blessed by the standard,
305
+ // especially by the example in section 3.9 . Also, of course,
306
+ // bit_cast<> wraps up the nasty logic in one place.
307
+ //
308
+ // Fortunately memcpy() is very fast. In optimized mode, with a
309
+ // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
310
+ // code with the minimal amount of data movement. On a 32-bit system,
311
+ // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
312
+ // compiles to two loads and two stores.
313
+ //
314
+ // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
315
+ //
316
+ // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
317
+ // is likely to surprise you.
318
+
319
+ template <class Dest, class Source>
320
+ inline Dest bit_cast(const Source& source) {
321
+ // Compile time assertion: sizeof(Dest) == sizeof(Source)
322
+ // A compile error here means your Dest and Source have different sizes.
323
+ typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
324
+
325
+ Dest dest;
326
+ memcpy(&dest, &source, sizeof(dest));
327
+ return dest;
328
+ }
329
+
330
+ // The following enum should be used only as a constructor argument to indicate
331
+ // that the variable has static storage class, and that the constructor should
332
+ // do nothing to its state. It indicates to the reader that it is legal to
333
+ // declare a static instance of the class, provided the constructor is given
334
+ // the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a
335
+ // static variable that has a constructor or a destructor because invocation
336
+ // order is undefined. However, IF the type can be initialized by filling with
337
+ // zeroes (which the loader does for static variables), AND the destructor also
338
+ // does nothing to the storage, AND there are no virtual methods, then a
339
+ // constructor declared as
340
+ // explicit MyClass(base::LinkerInitialized x) {}
341
+ // and invoked as
342
+ // static MyClass my_variable_name(base::LINKER_INITIALIZED);
343
+ namespace base {
344
+ enum LinkerInitialized { LINKER_INITIALIZED };
345
+ } // base
346
+
347
+
348
+ #endif // BASE_BASICTYPES_H_