language_detection 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. data/.gitignore +19 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +85 -0
  5. data/Rakefile +11 -0
  6. data/ext/cld/Makefile +34 -0
  7. data/ext/cld/base/basictypes.h +348 -0
  8. data/ext/cld/base/build_config.h +124 -0
  9. data/ext/cld/base/casts.h +156 -0
  10. data/ext/cld/base/commandlineflags.h +443 -0
  11. data/ext/cld/base/crash.h +41 -0
  12. data/ext/cld/base/dynamic_annotations.h +358 -0
  13. data/ext/cld/base/global_strip_options.h +59 -0
  14. data/ext/cld/base/log_severity.h +46 -0
  15. data/ext/cld/base/logging.h +1403 -0
  16. data/ext/cld/base/macros.h +243 -0
  17. data/ext/cld/base/port.h +54 -0
  18. data/ext/cld/base/scoped_ptr.h +428 -0
  19. data/ext/cld/base/stl_decl.h +0 -0
  20. data/ext/cld/base/stl_decl_msvc.h +107 -0
  21. data/ext/cld/base/string_util.h +29 -0
  22. data/ext/cld/base/strtoint.h +93 -0
  23. data/ext/cld/base/template_util.h +96 -0
  24. data/ext/cld/base/type_traits.h +198 -0
  25. data/ext/cld/base/vlog_is_on.h +143 -0
  26. data/ext/cld/cld.so +0 -0
  27. data/ext/cld/encodings/compact_lang_det/cldutil.cc +905 -0
  28. data/ext/cld/encodings/compact_lang_det/cldutil.h +1205 -0
  29. data/ext/cld/encodings/compact_lang_det/cldutil_dbg.h +76 -0
  30. data/ext/cld/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
  31. data/ext/cld/encodings/compact_lang_det/compact_lang_det.cc +62 -0
  32. data/ext/cld/encodings/compact_lang_det/compact_lang_det.h +145 -0
  33. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
  34. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
  35. data/ext/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
  36. data/ext/cld/encodings/compact_lang_det/compile.cmd +1 -0
  37. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
  38. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.h +119 -0
  39. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
  40. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
  41. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
  42. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
  43. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
  44. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
  45. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
  46. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
  47. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
  48. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
  49. data/ext/cld/encodings/compact_lang_det/getonescriptspan.cc +570 -0
  50. data/ext/cld/encodings/compact_lang_det/getonescriptspan.h +131 -0
  51. data/ext/cld/encodings/compact_lang_det/letterscript_enum.cc +117 -0
  52. data/ext/cld/encodings/compact_lang_det/letterscript_enum.h +99 -0
  53. data/ext/cld/encodings/compact_lang_det/subsetsequence.cc +259 -0
  54. data/ext/cld/encodings/compact_lang_det/subsetsequence.h +44 -0
  55. data/ext/cld/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
  56. data/ext/cld/encodings/compact_lang_det/tote.cc +299 -0
  57. data/ext/cld/encodings/compact_lang_det/tote.h +89 -0
  58. data/ext/cld/encodings/compact_lang_det/unittest_data.h +193 -0
  59. data/ext/cld/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
  60. data/ext/cld/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
  61. data/ext/cld/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
  62. data/ext/cld/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
  63. data/ext/cld/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
  64. data/ext/cld/encodings/compact_lang_det/win/cld_google.h +18 -0
  65. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
  66. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
  67. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
  68. data/ext/cld/encodings/compact_lang_det/win/cld_logging.h +21 -0
  69. data/ext/cld/encodings/compact_lang_det/win/cld_macros.h +19 -0
  70. data/ext/cld/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
  71. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
  72. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
  73. data/ext/cld/encodings/compact_lang_det/win/cld_unilib.h +15 -0
  74. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
  75. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
  76. data/ext/cld/encodings/compact_lang_det/win/cld_utf.h +24 -0
  77. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
  78. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
  79. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
  80. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
  81. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
  82. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
  83. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
  84. data/ext/cld/encodings/internal/encodings.cc +12 -0
  85. data/ext/cld/encodings/lang_enc.h +254 -0
  86. data/ext/cld/encodings/proto/encodings.pb.h +169 -0
  87. data/ext/cld/encodings/public/encodings.h +301 -0
  88. data/ext/cld/extconf.rb +1 -0
  89. data/ext/cld/language_detection.cc +88 -0
  90. data/ext/cld/languages/internal/languages.cc +337 -0
  91. data/ext/cld/languages/proto/languages.pb.h +179 -0
  92. data/ext/cld/languages/public/languages.h +379 -0
  93. data/language_detection.gemspec +28 -0
  94. data/lib/language_detection/string.rb +1 -0
  95. data/lib/language_detection/version.rb +3 -0
  96. data/lib/language_detection.rb +54 -0
  97. data/test/_helper.rb +15 -0
  98. data/test/fixtures/languages.csv +80 -0
  99. data/test/language_detection_test.rb +88 -0
  100. metadata +250 -0
@@ -0,0 +1,156 @@
1
+ // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ #ifndef BASE_CASTS_H_
6
+ #define BASE_CASTS_H_
7
+
8
+ #include <assert.h> // for use with down_cast<>
9
+ #include <string.h> // for memcpy
10
+
11
+ #include "base/macros.h"
12
+
13
+
14
+ // Use implicit_cast as a safe version of static_cast or const_cast
15
+ // for upcasting in the type hierarchy (i.e. casting a pointer to Foo
16
+ // to a pointer to SuperclassOfFoo or casting a pointer to Foo to
17
+ // a const pointer to Foo).
18
+ // When you use implicit_cast, the compiler checks that the cast is safe.
19
+ // Such explicit implicit_casts are necessary in surprisingly many
20
+ // situations where C++ demands an exact type match instead of an
21
+ // argument type convertable to a target type.
22
+ //
23
+ // The From type can be inferred, so the preferred syntax for using
24
+ // implicit_cast is the same as for static_cast etc.:
25
+ //
26
+ // implicit_cast<ToType>(expr)
27
+ //
28
+ // implicit_cast would have been part of the C++ standard library,
29
+ // but the proposal was submitted too late. It will probably make
30
+ // its way into the language in the future.
31
+ template<typename To, typename From>
32
+ inline To implicit_cast(From const &f) {
33
+ return f;
34
+ }
35
+
36
+
37
+ // When you upcast (that is, cast a pointer from type Foo to type
38
+ // SuperclassOfFoo), it's fine to use implicit_cast<>, since upcasts
39
+ // always succeed. When you downcast (that is, cast a pointer from
40
+ // type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
41
+ // how do you know the pointer is really of type SubclassOfFoo? It
42
+ // could be a bare Foo, or of type DifferentSubclassOfFoo. Thus,
43
+ // when you downcast, you should use this macro. In debug mode, we
44
+ // use dynamic_cast<> to double-check the downcast is legal (we die
45
+ // if it's not). In normal mode, we do the efficient static_cast<>
46
+ // instead. Thus, it's important to test in debug mode to make sure
47
+ // the cast is legal!
48
+ // This is the only place in the code we should use dynamic_cast<>.
49
+ // In particular, you SHOULDN'T be using dynamic_cast<> in order to
50
+ // do RTTI (eg code like this:
51
+ // if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
52
+ // if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
53
+ // You should design the code some other way not to need this.
54
+
55
+ template<typename To, typename From> // use like this: down_cast<T*>(foo);
56
+ inline To down_cast(From* f) { // so we only accept pointers
57
+ // Ensures that To is a sub-type of From *. This test is here only
58
+ // for compile-time type checking, and has no overhead in an
59
+ // optimized build at run-time, as it will be optimized away
60
+ // completely.
61
+ if (false) {
62
+ implicit_cast<From*, To>(0);
63
+ }
64
+
65
+ assert(f == NULL || dynamic_cast<To>(f) != NULL); // RTTI: debug mode only!
66
+ return static_cast<To>(f);
67
+ }
68
+
69
+ // Overload of down_cast for references. Use like this: down_cast<T&>(foo).
70
+ // The code is slightly convoluted because we're still using the pointer
71
+ // form of dynamic cast. (The reference form throws an exception if it
72
+ // fails.)
73
+ //
74
+ // There's no need for a special const overload either for the pointer
75
+ // or the reference form. If you call down_cast with a const T&, the
76
+ // compiler will just bind From to const T.
77
+ template<typename To, typename From>
78
+ inline To down_cast(From& f) {
79
+ COMPILE_ASSERT(base::is_reference<To>::value, target_type_not_a_reference);
80
+ typedef typename base::remove_reference<To>::type* ToAsPointer;
81
+ if (false) {
82
+ // Compile-time check that To inherits from From. See above for details.
83
+ implicit_cast<From*, ToAsPointer>(0);
84
+ }
85
+
86
+ assert(dynamic_cast<ToAsPointer>(&f) != NULL); // RTTI: debug mode only
87
+ return static_cast<To>(f);
88
+ }
89
+
90
+ // bit_cast<Dest,Source> is a template function that implements the
91
+ // equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
92
+ // very low-level functions like the protobuf library and fast math
93
+ // support.
94
+ //
95
+ // float f = 3.14159265358979;
96
+ // int i = bit_cast<int32>(f);
97
+ // // i = 0x40490fdb
98
+ //
99
+ // The classical address-casting method is:
100
+ //
101
+ // // WRONG
102
+ // float f = 3.14159265358979; // WRONG
103
+ // int i = * reinterpret_cast<int*>(&f); // WRONG
104
+ //
105
+ // The address-casting method actually produces undefined behavior
106
+ // according to ISO C++ specification section 3.10 -15 -. Roughly, this
107
+ // section says: if an object in memory has one type, and a program
108
+ // accesses it with a different type, then the result is undefined
109
+ // behavior for most values of "different type".
110
+ //
111
+ // This is true for any cast syntax, either *(int*)&f or
112
+ // *reinterpret_cast<int*>(&f). And it is particularly true for
113
+ // conversions betweeen integral lvalues and floating-point lvalues.
114
+ //
115
+ // The purpose of 3.10 -15- is to allow optimizing compilers to assume
116
+ // that expressions with different types refer to different memory. gcc
117
+ // 4.0.1 has an optimizer that takes advantage of this. So a
118
+ // non-conforming program quietly produces wildly incorrect output.
119
+ //
120
+ // The problem is not the use of reinterpret_cast. The problem is type
121
+ // punning: holding an object in memory of one type and reading its bits
122
+ // back using a different type.
123
+ //
124
+ // The C++ standard is more subtle and complex than this, but that
125
+ // is the basic idea.
126
+ //
127
+ // Anyways ...
128
+ //
129
+ // bit_cast<> calls memcpy() which is blessed by the standard,
130
+ // especially by the example in section 3.9 . Also, of course,
131
+ // bit_cast<> wraps up the nasty logic in one place.
132
+ //
133
+ // Fortunately memcpy() is very fast. In optimized mode, with a
134
+ // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
135
+ // code with the minimal amount of data movement. On a 32-bit system,
136
+ // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
137
+ // compiles to two loads and two stores.
138
+ //
139
+ // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
140
+ //
141
+ // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
142
+ // is likely to surprise you.
143
+ //
144
+
145
+ template <class Dest, class Source>
146
+ inline Dest bit_cast(const Source& source) {
147
+ // Compile time assertion: sizeof(Dest) == sizeof(Source)
148
+ // A compile error here means your Dest and Source have different sizes.
149
+ typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
150
+
151
+ Dest dest;
152
+ memcpy(&dest, &source, sizeof(dest));
153
+ return dest;
154
+ }
155
+
156
+ #endif // BASE_CASTS_H_
@@ -0,0 +1,443 @@
1
+ // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ // This is the file that should be included by any file which declares
6
+ // or defines a command line flag or wants to parse command line flags
7
+ // or print a program usage message (which will include information about
8
+ // flags). Executive summary, in the form of an example foo.cc file:
9
+ //
10
+ // #include "foo.h" // foo.h has a line "DECLARE_int32(start);"
11
+ //
12
+ // DEFINE_int32(end, 1000, "The last record to read");
13
+ // DECLARE_bool(verbose); // some other file has a DEFINE_bool(verbose, ...)
14
+ //
15
+ // void MyFunc() {
16
+ // if (FLAGS_verbose) printf("Records %d-%d\n", FLAGS_start, FLAGS_end);
17
+ // }
18
+ //
19
+ // Then, at the command-line:
20
+ // ./foo --noverbose --start=5 --end=100
21
+
22
+ #ifndef BASE_COMMANDLINEFLAGS_H_
23
+ #define BASE_COMMANDLINEFLAGS_H_
24
+
25
+ #include <assert.h>
26
+ #include <string>
27
+ #include <vector>
28
+ #include "base/basictypes.h"
29
+ #include "base/port.h"
30
+ #include "base/stl_decl_msvc.h"
31
+ #include "base/global_strip_options.h"
32
+
33
+ // --------------------------------------------------------------------
34
+ // To actually define a flag in a file, use DEFINE_bool,
35
+ // DEFINE_string, etc. at the bottom of this file. You may also find
36
+ // it useful to register a validator with the flag. This ensures that
37
+ // when the flag is parsed from the commandline, or is later set via
38
+ // SetCommandLineOption, we call the validation function.
39
+ //
40
+ // The validation function should return true if the flag value is valid, and
41
+ // false otherwise. If the function returns false for the new setting of the
42
+ // flag, the flag will retain its current value. If it returns false for the
43
+ // default value, InitGoogle will die.
44
+ //
45
+ // This function is safe to call at global construct time (as in the
46
+ // example below).
47
+ //
48
+ // Example use:
49
+ // static bool ValidatePort(const char* flagname, int32 value) {
50
+ // if (value > 0 && value < 32768) // value is ok
51
+ // return true;
52
+ // printf("Invalid value for --%s: %d\n", flagname, (int)value);
53
+ // return false;
54
+ // }
55
+ // DEFINE_int32(port, 0, "What port to listen on");
56
+ // static bool dummy = RegisterFlagValidator(&FLAGS_port, &ValidatePort);
57
+
58
+ // Returns true if successfully registered, false if not (because the
59
+ // first argument doesn't point to a command-line flag, or because a
60
+ // validator is already registered for this flag).
61
+ bool RegisterFlagValidator(const bool* flag,
62
+ bool (*validate_fn)(const char*, bool));
63
+ bool RegisterFlagValidator(const int32* flag,
64
+ bool (*validate_fn)(const char*, int32));
65
+ bool RegisterFlagValidator(const int64* flag,
66
+ bool (*validate_fn)(const char*, int64));
67
+ bool RegisterFlagValidator(const uint64* flag,
68
+ bool (*validate_fn)(const char*, uint64));
69
+ bool RegisterFlagValidator(const double* flag,
70
+ bool (*validate_fn)(const char*, double));
71
+ bool RegisterFlagValidator(const string* flag,
72
+ bool (*validate_fn)(const char*, const string&));
73
+
74
+
75
+ // --------------------------------------------------------------------
76
+ // These methods are the best way to get access to info about the
77
+ // list of commandline flags. Note that these routines are pretty slow.
78
+ // GetAllFlags: mostly-complete info about the list, sorted by file.
79
+ // ShowUsageWithFlags: pretty-prints the list to stdout (what --help does)
80
+ // ShowUsageWithFlagsRestrict: limit to filenames with restrict as a substr
81
+ //
82
+ // In addition to accessing flags, you can also access argv[0] (the program
83
+ // name) and argv (the entire commandline), which we sock away a copy of.
84
+ // These variables are static, so you should only set them once.
85
+
86
+ struct CommandLineFlagInfo {
87
+ string name; // the name of the flag
88
+ string type; // the type of the flag: int32, etc
89
+ string description; // the "help text" associated with the flag
90
+ string current_value; // the current value, as a string
91
+ string default_value; // the default value, as a string
92
+ string filename; // 'cleaned' version of filename holding the flag
93
+ bool is_default; // true if the flag has default value
94
+ bool has_validator_fn; // true if RegisterFlagValidator called on this flag
95
+ };
96
+
97
+ extern void GetAllFlags(vector<CommandLineFlagInfo>* OUTPUT);
98
+ // These two are actually defined in commandlineflags_reporting.cc.
99
+ extern void ShowUsageWithFlags(const char *argv0); // what --help does
100
+ extern void ShowUsageWithFlagsRestrict(const char *argv0, const char *restrict);
101
+
102
+ // Create a descriptive string for a flag.
103
+ // Goes to some trouble to make pretty line breaks.
104
+ extern string DescribeOneFlag(const CommandLineFlagInfo& flag);
105
+
106
+ // Thread-hostile; meant to be called before any threads are spawned.
107
+ extern void SetArgv(int argc, const char** argv);
108
+ // The following functions are thread-safe as long as SetArgv() is
109
+ // only called before any threads start.
110
+ extern const vector<string>& GetArgvs(); // all of argv = vector of strings
111
+ extern const char* GetArgv(); // all of argv as a string
112
+ extern const char* GetArgv0(); // only argv0
113
+ extern uint32 GetArgvSum(); // simple checksum of argv
114
+ extern const char* ProgramInvocationName(); // argv0, or "UNKNOWN" if not set
115
+ extern const char* ProgramInvocationShortName(); // basename(argv0)
116
+ // ProgramUsage() is thread-safe as long as SetUsageMessage() is only
117
+ // called before any threads start.
118
+ extern const char* ProgramUsage(); // string set by SetUsageMessage()
119
+
120
+
121
+ // --------------------------------------------------------------------
122
+ // Normally you access commandline flags by just saying "if (FLAGS_foo)"
123
+ // or whatever, and set them by calling "FLAGS_foo = bar" (or, more
124
+ // commonly, via the DEFINE_foo macro). But if you need a bit more
125
+ // control, we have programmatic ways to get/set the flags as well.
126
+ // These programmatic ways to access flags are thread-safe, but direct
127
+ // access is only thread-compatible.
128
+
129
+ // Return true iff the flagname was found.
130
+ // OUTPUT is set to the flag's value, or unchanged if we return false.
131
+ extern bool GetCommandLineOption(const char* name, string* OUTPUT);
132
+
133
+ // Return true iff the flagname was found. OUTPUT is set to the flag's
134
+ // CommandLineFlagInfo or unchanged if we return false.
135
+ extern bool GetCommandLineFlagInfo(const char* name,
136
+ CommandLineFlagInfo* OUTPUT);
137
+
138
+ // Return the CommandLineFlagInfo of the flagname. exit() if name not found.
139
+ // Example usage, to check if a flag's value is currently the default value:
140
+ // if (GetCommandLineFlagInfoOrDie("foo").is_default) ...
141
+ extern CommandLineFlagInfo GetCommandLineFlagInfoOrDie(const char* name);
142
+
143
+ enum FlagSettingMode {
144
+ // update the flag's value (can call this multiple times).
145
+ SET_FLAGS_VALUE,
146
+ // update the flag's value, but *only if* it has not yet been updated
147
+ // with SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef".
148
+ SET_FLAG_IF_DEFAULT,
149
+ // set the flag's default value to this. If the flag has not yet updated
150
+ // yet (via SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef")
151
+ // change the flag's current value to the new default value as well.
152
+ SET_FLAGS_DEFAULT
153
+ };
154
+
155
+ // Set a particular flag ("command line option"). Returns a string
156
+ // describing the new value that the option has been set to. The
157
+ // return value API is not well-specified, so basically just depend on
158
+ // it to be empty if the setting failed for some reason -- the name is
159
+ // not a valid flag name, or the value is not a valid value -- and
160
+ // non-empty else.
161
+
162
+ // SetCommandLineOption uses set_mode == SET_FLAGS_VALUE (the common case)
163
+ extern string SetCommandLineOption(const char* name, const char* value);
164
+ extern string SetCommandLineOptionWithMode(const char* name, const char* value,
165
+ FlagSettingMode set_mode);
166
+
167
+
168
+ // --------------------------------------------------------------------
169
+ // Saves the states (value, default value, whether the user has set
170
+ // the flag, registered validators, etc) of all flags, and restores
171
+ // them when the FlagSaver is destroyed. This is very useful in
172
+ // tests, say, when you want to let your tests change the flags, but
173
+ // make sure that they get reverted to the original states when your
174
+ // test is complete.
175
+ //
176
+ // Example usage:
177
+ // void TestFoo() {
178
+ // FlagSaver s1;
179
+ // FLAG_foo = false;
180
+ // FLAG_bar = "some value";
181
+ //
182
+ // // test happens here. You can return at any time
183
+ // // without worrying about restoring the FLAG values.
184
+ // }
185
+ //
186
+ // Note: This class is marked with ATTRIBUTE_UNUSED because all the
187
+ // work is done in the constructor and destructor, so in the standard
188
+ // usage example above, the compiler would complain that it's an
189
+ // unused variable.
190
+ //
191
+ // This class is thread-safe.
192
+ /*
193
+ class FlagSaver {
194
+ public:
195
+ FlagSaver();
196
+ ~FlagSaver();
197
+
198
+ private:
199
+ class FlagSaverImpl* impl_; // we use pimpl here to keep API steady
200
+
201
+ FlagSaver(const FlagSaver&); // no copying!
202
+ void operator=(const FlagSaver&);
203
+ }
204
+ #ifndef SWIG // swig seems to have trouble with this for some reason
205
+ ATTRIBUTE_UNUSED
206
+ #endif
207
+ ;
208
+ */
209
+ // --------------------------------------------------------------------
210
+ // Some deprecated or hopefully-soon-to-be-deprecated functions.
211
+
212
+ // This is often used for logging. TODO(csilvers): figure out a better way
213
+ extern string CommandlineFlagsIntoString();
214
+ // Usually where this is used, a FlagSaver should be used instead.
215
+ extern bool ReadFlagsFromString(const string& flagfilecontents,
216
+ const char* prog_name,
217
+ bool errors_are_fatal); // uses SET_FLAGS_VALUE
218
+
219
+ // These let you manually implement --flagfile functionality.
220
+ // DEPRECATED.
221
+ extern bool AppendFlagsIntoFile(const string& filename, const char* prog_name);
222
+ extern bool SaveCommandFlags(); // actually defined in google.cc !
223
+ extern bool ReadFromFlagsFile(const string& filename, const char* prog_name,
224
+ bool errors_are_fatal); // uses SET_FLAGS_VALUE
225
+
226
+
227
+ // --------------------------------------------------------------------
228
+ // Useful routines for initializing flags from the environment.
229
+ // In each case, if 'varname' does not exist in the environment
230
+ // return defval. If 'varname' does exist but is not valid
231
+ // (e.g., not a number for an int32 flag), abort with an error.
232
+ // Otherwise, return the value. NOTE: for booleans, for true use
233
+ // 't' or 'T' or 'true' or '1', for false 'f' or 'F' or 'false' or '0'.
234
+
235
+ extern bool BoolFromEnv(const char *varname, bool defval);
236
+ extern int32 Int32FromEnv(const char *varname, int32 defval);
237
+ extern int64 Int64FromEnv(const char *varname, int64 defval);
238
+ extern uint64 Uint64FromEnv(const char *varname, uint64 defval);
239
+ extern double DoubleFromEnv(const char *varname, double defval);
240
+ extern const char *StringFromEnv(const char *varname, const char *defval);
241
+
242
+
243
+ // --------------------------------------------------------------------
244
+ // The next two functions parse commandlineflags from main():
245
+
246
+ // Set the "usage" message for this program. For example:
247
+ // string usage("This program does nothing. Sample usage:\n");
248
+ // usage += argv[0] + " <uselessarg1> <uselessarg2>";
249
+ // SetUsageMessage(usage);
250
+ // Do not include commandline flags in the usage: we do that for you!
251
+ // Thread-hostile; meant to be called before any threads are spawned.
252
+ extern void SetUsageMessage(const string& usage);
253
+
254
+ // Looks for flags in argv and parses them. Rearranges argv to put
255
+ // flags first, or removes them entirely if remove_flags is true.
256
+ // If a flag is defined more than once in the command line or flag
257
+ // file, the last definition is used.
258
+ // See top-of-file for more details on this function.
259
+ #ifndef SWIG // In swig, use ParseCommandLineFlagsScript() instead.
260
+ extern uint32 ParseCommandLineFlags(int *argc, char*** argv,
261
+ bool remove_flags);
262
+ #endif
263
+
264
+
265
+ // Calls to ParseCommandLineNonHelpFlags and then to
266
+ // HandleCommandLineHelpFlags can be used instead of a call to
267
+ // ParseCommandLineFlags during initialization, in order to allow for
268
+ // changing default values for some FLAGS (via
269
+ // e.g. SetCommandLineOptionWithMode calls) between the time of
270
+ // command line parsing and the time of dumping help information for
271
+ // the flags as a result of command line parsing.
272
+ // If a flag is defined more than once in the command line or flag
273
+ // file, the last definition is used.
274
+ extern uint32 ParseCommandLineNonHelpFlags(int *argc, char*** argv,
275
+ bool remove_flags);
276
+ // This is actually defined in commandlineflags_reporting.cc.
277
+ // This function is misnamed (it also handles --version, etc.), but
278
+ // it's too late to change that now. :-(
279
+ extern void HandleCommandLineHelpFlags(); // in commandlineflags_reporting.cc
280
+
281
+ // Allow command line reparsing. Disables the error normally
282
+ // generated when an unknown flag is found, since it may be found in a
283
+ // later parse. Thread-hostile; meant to be called before any threads
284
+ // are spawned.
285
+ extern void AllowCommandLineReparsing();
286
+
287
+ // Reparse the flags that have not yet been recognized.
288
+ // Only flags registered since the last parse will be recognized.
289
+ // Any flag value must be provided as part of the argument using "=",
290
+ // not as a separate command line argument that follows the flag argument.
291
+ // Intended for handling flags from dynamically loaded libraries,
292
+ // since their flags are not registered until they are loaded.
293
+ extern uint32 ReparseCommandLineNonHelpFlags();
294
+
295
+
296
+ // --------------------------------------------------------------------
297
+ // Now come the command line flag declaration/definition macros that
298
+ // will actually be used. They're kind of hairy. A major reason
299
+ // for this is initialization: we want people to be able to access
300
+ // variables in global constructors and have that not crash, even if
301
+ // their global constructor runs before the global constructor here.
302
+ // (Obviously, we can't guarantee the flags will have the correct
303
+ // default value in that case, but at least accessing them is safe.)
304
+ // The only way to do that is have flags point to a static buffer.
305
+ // So we make one, using a union to ensure proper alignment, and
306
+ // then use placement-new to actually set up the flag with the
307
+ // correct default value. In the same vein, we have to worry about
308
+ // flag access in global destructors, so FlagRegisterer has to be
309
+ // careful never to destroy the flag-values it constructs.
310
+ //
311
+ // Note that when we define a flag variable FLAGS_<name>, we also
312
+ // preemptively define a junk variable, FLAGS_no<name>. This is to
313
+ // cause a link-time error if someone tries to define 2 flags with
314
+ // names like "logging" and "nologging". We do this because a bool
315
+ // flag FLAG can be set from the command line to true with a "-FLAG"
316
+ // argument, and to false with a "-noFLAG" argument, and so this can
317
+ // potentially avert confusion.
318
+ //
319
+ // We also put flags into their own namespace. It is purposefully
320
+ // named in an opaque way that people should have trouble typing
321
+ // directly. The idea is that DEFINE puts the flag in the weird
322
+ // namespace, and DECLARE imports the flag from there into the current
323
+ // namespace. The net result is to force people to use DECLARE to get
324
+ // access to a flag, rather than saying "extern bool FLAGS_whatever;"
325
+ // or some such instead. We want this so we can put extra
326
+ // functionality (like sanity-checking) in DECLARE if we want, and
327
+ // make sure it is picked up everywhere.
328
+ //
329
+ // We also put the type of the variable in the namespace, so that
330
+ // people can't DECLARE_int32 something that they DEFINE_bool'd
331
+ // elsewhere.
332
+
333
+ class FlagRegisterer {
334
+ public:
335
+ FlagRegisterer(const char* name, const char* type,
336
+ const char* help, const char* filename,
337
+ void* current_storage, void* defvalue_storage);
338
+ };
339
+
340
+ #ifndef SWIG // In swig, ignore the main flag declarations
341
+
342
+ // If STRIP_FLAG_HELP is defined and is non-zero, we remove the help
343
+ // message from the binary file. This is useful for security reasons
344
+ // when shipping a binary outside of Google (if the user cannot see
345
+ // the usage message by executing the program, they shouldn't be able
346
+ // to see it by running "strings binary_file").
347
+
348
+ extern const char kStrippedFlagHelp[];
349
+
350
+ #if STRIP_FLAG_HELP > 0
351
+ // Need this construct to avoid the 'defined but not used' warning.
352
+ #define MAYBE_STRIPPED_HELP(txt) (false ? (txt) : kStrippedFlagHelp)
353
+ #else
354
+ #define MAYBE_STRIPPED_HELP(txt) txt
355
+ #endif
356
+
357
+ // Each command-line flag has two variables associated with it: one
358
+ // with the current value, and one with the default value. However,
359
+ // we have a third variable, which is where value is assigned; it's a
360
+ // constant. This guarantees that FLAG_##value is initialized at
361
+ // static initialization time (e.g. before program-start) rather than
362
+ // than global construction time (which is after program-start but
363
+ // before main), at least when 'value' is a compile-time constant. We
364
+ // use a small trick for the "default value" variable, and call it
365
+ // FLAGS_no<name>. This serves the second purpose of assuring a
366
+ // compile error if someone tries to define a flag named no<name>
367
+ // which is illegal (--foo and --nofoo both affect the "foo" flag).
368
+ #define DEFINE_VARIABLE(type, shorttype, name, value, help) \
369
+ namespace fL##shorttype { \
370
+ static const type FLAGS_nono##name = value; \
371
+ type FLAGS_##name = FLAGS_nono##name; \
372
+ type FLAGS_no##name = FLAGS_nono##name; \
373
+ static FlagRegisterer o_##name( \
374
+ #name, #type, MAYBE_STRIPPED_HELP(help), __FILE__, \
375
+ &FLAGS_##name, &FLAGS_no##name); \
376
+ } \
377
+ using fL##shorttype::FLAGS_##name
378
+
379
+ #define DECLARE_VARIABLE(type, shorttype, name) \
380
+ namespace fL##shorttype { \
381
+ extern type FLAGS_##name; \
382
+ } \
383
+ using fL##shorttype::FLAGS_##name
384
+
385
+ // For boolean flags, we want to do the extra check that the passed-in
386
+ // value is actually a bool, and not a string or something that can be
387
+ // coerced to a bool. These declarations (no definition needed!) will
388
+ // help us do that, and never evaluate from, which is important.
389
+ // We'll use 'sizeof(IsBool(val))' to distinguish.
390
+ namespace fLB {
391
+ template<typename From> double IsBoolFlag(const From& from);
392
+ bool IsBoolFlag(bool from);
393
+ }
394
+ extern bool FlagsTypeWarn(const char *name);
395
+
396
+ #define DECLARE_bool(name) DECLARE_VARIABLE(bool,B, name)
397
+ // We have extra code here to make sure 'val' is actually a boolean.
398
+ #define DEFINE_bool(name,val,txt) namespace fLB { \
399
+ const bool FLAGS_nonono##name = \
400
+ (sizeof(::fLB::IsBoolFlag(val)) \
401
+ == sizeof(double)) \
402
+ ? FlagsTypeWarn(#name) : true; \
403
+ } \
404
+ DEFINE_VARIABLE(bool,B, name, val, txt)
405
+ #define DECLARE_int32(name) DECLARE_VARIABLE(int32,I, name)
406
+ #define DEFINE_int32(name,val,txt) DEFINE_VARIABLE(int32,I, name, val, txt)
407
+
408
+ #define DECLARE_int64(name) DECLARE_VARIABLE(int64,I64, name)
409
+ #define DEFINE_int64(name,val,txt) DEFINE_VARIABLE(int64,I64, name, val, txt)
410
+
411
+ #define DECLARE_uint64(name) DECLARE_VARIABLE(uint64,U64, name)
412
+ #define DEFINE_uint64(name,val,txt) DEFINE_VARIABLE(uint64,U64, name, val, txt)
413
+
414
+ #define DECLARE_double(name) DECLARE_VARIABLE(double,D, name)
415
+ #define DEFINE_double(name,val,txt) DEFINE_VARIABLE(double,D, name, val, txt)
416
+
417
+ // Strings are trickier, because they're not a POD, so we can't
418
+ // construct them at static-initialization time (instead they get
419
+ // constructed at global-constructor time, which is much later). To
420
+ // try to avoid crashes in that case, we use a char buffer to store
421
+ // the string, which we can static-initialize, and then placement-new
422
+ // into it later. It's not perfect, but the best we can do.
423
+ #define DECLARE_string(name) namespace fLS { extern string& FLAGS_##name; } \
424
+ using fLS::FLAGS_##name
425
+
426
+ // We need to define a var named FLAGS_no##name so people don't define
427
+ // --string and --nostring. And we need a temporary place to put val
428
+ // so we don't have to evaluate it twice. Two great needs that go
429
+ // great together!
430
+ #define DEFINE_string(name, val, txt) \
431
+ namespace fLS { \
432
+ static union { void* align; char s[sizeof(string)]; } s_##name[2]; \
433
+ const string* const FLAGS_no##name = new (s_##name[0].s) string(val); \
434
+ static FlagRegisterer o_##name( \
435
+ #name, "string", MAYBE_STRIPPED_HELP(txt), __FILE__, \
436
+ s_##name[0].s, new (s_##name[1].s) string(*FLAGS_no##name)); \
437
+ string& FLAGS_##name = *(reinterpret_cast<string*>(s_##name[0].s)); \
438
+ } \
439
+ using fLS::FLAGS_##name
440
+
441
+ #endif // SWIG
442
+
443
+ #endif // BASE_COMMANDLINEFLAGS_H_
@@ -0,0 +1,41 @@
1
+ // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ // Support for collecting useful information when crashing.
6
+
7
+ #ifndef BASE_CRASH_H_
8
+ #define BASE_CRASH_H_
9
+
10
+ namespace base {
11
+
12
+ struct CrashReason {
13
+ CrashReason() : filename(0), line_number(0), message(0), depth(0) {}
14
+
15
+ const char* filename;
16
+ int line_number;
17
+ const char* message;
18
+
19
+ // We'll also store a bit of stack trace context at the time of crash as
20
+ // it may not be available later on.
21
+ void* stack[32];
22
+ int depth;
23
+
24
+ // We'll try to store some trace information if it's available - this should
25
+ // reflect information from TraceContext::Thread()->tracer()->ToString().
26
+ // This field should probably not be set from within a signal handler or
27
+ // low-level code unless absolutely safe to do so.
28
+ char trace_info[512];
29
+ };
30
+
31
+ // Stores "reason" as an explanation for why the process is about to
32
+ // crash. The reason and its contents must remain live for the life
33
+ // of the process. Only the first reason is kept.
34
+ void SetCrashReason(const CrashReason* reason);
35
+
36
+ // Returns first reason passed to SetCrashReason(), or NULL.
37
+ const CrashReason* GetCrashReason();
38
+
39
+ } // namespace base
40
+
41
+ #endif // BASE_CRASH_H_