language_detection 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. data/.gitignore +19 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +85 -0
  5. data/Rakefile +11 -0
  6. data/ext/cld/Makefile +34 -0
  7. data/ext/cld/base/basictypes.h +348 -0
  8. data/ext/cld/base/build_config.h +124 -0
  9. data/ext/cld/base/casts.h +156 -0
  10. data/ext/cld/base/commandlineflags.h +443 -0
  11. data/ext/cld/base/crash.h +41 -0
  12. data/ext/cld/base/dynamic_annotations.h +358 -0
  13. data/ext/cld/base/global_strip_options.h +59 -0
  14. data/ext/cld/base/log_severity.h +46 -0
  15. data/ext/cld/base/logging.h +1403 -0
  16. data/ext/cld/base/macros.h +243 -0
  17. data/ext/cld/base/port.h +54 -0
  18. data/ext/cld/base/scoped_ptr.h +428 -0
  19. data/ext/cld/base/stl_decl.h +0 -0
  20. data/ext/cld/base/stl_decl_msvc.h +107 -0
  21. data/ext/cld/base/string_util.h +29 -0
  22. data/ext/cld/base/strtoint.h +93 -0
  23. data/ext/cld/base/template_util.h +96 -0
  24. data/ext/cld/base/type_traits.h +198 -0
  25. data/ext/cld/base/vlog_is_on.h +143 -0
  26. data/ext/cld/cld.so +0 -0
  27. data/ext/cld/encodings/compact_lang_det/cldutil.cc +905 -0
  28. data/ext/cld/encodings/compact_lang_det/cldutil.h +1205 -0
  29. data/ext/cld/encodings/compact_lang_det/cldutil_dbg.h +76 -0
  30. data/ext/cld/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
  31. data/ext/cld/encodings/compact_lang_det/compact_lang_det.cc +62 -0
  32. data/ext/cld/encodings/compact_lang_det/compact_lang_det.h +145 -0
  33. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
  34. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
  35. data/ext/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
  36. data/ext/cld/encodings/compact_lang_det/compile.cmd +1 -0
  37. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
  38. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.h +119 -0
  39. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
  40. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
  41. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
  42. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
  43. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
  44. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
  45. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
  46. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
  47. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
  48. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
  49. data/ext/cld/encodings/compact_lang_det/getonescriptspan.cc +570 -0
  50. data/ext/cld/encodings/compact_lang_det/getonescriptspan.h +131 -0
  51. data/ext/cld/encodings/compact_lang_det/letterscript_enum.cc +117 -0
  52. data/ext/cld/encodings/compact_lang_det/letterscript_enum.h +99 -0
  53. data/ext/cld/encodings/compact_lang_det/subsetsequence.cc +259 -0
  54. data/ext/cld/encodings/compact_lang_det/subsetsequence.h +44 -0
  55. data/ext/cld/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
  56. data/ext/cld/encodings/compact_lang_det/tote.cc +299 -0
  57. data/ext/cld/encodings/compact_lang_det/tote.h +89 -0
  58. data/ext/cld/encodings/compact_lang_det/unittest_data.h +193 -0
  59. data/ext/cld/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
  60. data/ext/cld/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
  61. data/ext/cld/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
  62. data/ext/cld/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
  63. data/ext/cld/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
  64. data/ext/cld/encodings/compact_lang_det/win/cld_google.h +18 -0
  65. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
  66. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
  67. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
  68. data/ext/cld/encodings/compact_lang_det/win/cld_logging.h +21 -0
  69. data/ext/cld/encodings/compact_lang_det/win/cld_macros.h +19 -0
  70. data/ext/cld/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
  71. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
  72. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
  73. data/ext/cld/encodings/compact_lang_det/win/cld_unilib.h +15 -0
  74. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
  75. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
  76. data/ext/cld/encodings/compact_lang_det/win/cld_utf.h +24 -0
  77. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
  78. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
  79. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
  80. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
  81. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
  82. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
  83. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
  84. data/ext/cld/encodings/internal/encodings.cc +12 -0
  85. data/ext/cld/encodings/lang_enc.h +254 -0
  86. data/ext/cld/encodings/proto/encodings.pb.h +169 -0
  87. data/ext/cld/encodings/public/encodings.h +301 -0
  88. data/ext/cld/extconf.rb +1 -0
  89. data/ext/cld/language_detection.cc +88 -0
  90. data/ext/cld/languages/internal/languages.cc +337 -0
  91. data/ext/cld/languages/proto/languages.pb.h +179 -0
  92. data/ext/cld/languages/public/languages.h +379 -0
  93. data/language_detection.gemspec +28 -0
  94. data/lib/language_detection/string.rb +1 -0
  95. data/lib/language_detection/version.rb +3 -0
  96. data/lib/language_detection.rb +54 -0
  97. data/test/_helper.rb +15 -0
  98. data/test/fixtures/languages.csv +80 -0
  99. data/test/language_detection_test.rb +88 -0
  100. metadata +250 -0
@@ -0,0 +1,156 @@
1
+ // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ #ifndef BASE_CASTS_H_
6
+ #define BASE_CASTS_H_
7
+
8
+ #include <assert.h> // for use with down_cast<>
9
+ #include <string.h> // for memcpy
10
+
11
+ #include "base/macros.h"
12
+
13
+
14
+ // Use implicit_cast as a safe version of static_cast or const_cast
15
+ // for upcasting in the type hierarchy (i.e. casting a pointer to Foo
16
+ // to a pointer to SuperclassOfFoo or casting a pointer to Foo to
17
+ // a const pointer to Foo).
18
+ // When you use implicit_cast, the compiler checks that the cast is safe.
19
+ // Such explicit implicit_casts are necessary in surprisingly many
20
+ // situations where C++ demands an exact type match instead of an
21
+ // argument type convertable to a target type.
22
+ //
23
+ // The From type can be inferred, so the preferred syntax for using
24
+ // implicit_cast is the same as for static_cast etc.:
25
+ //
26
+ // implicit_cast<ToType>(expr)
27
+ //
28
+ // implicit_cast would have been part of the C++ standard library,
29
+ // but the proposal was submitted too late. It will probably make
30
+ // its way into the language in the future.
31
+ template<typename To, typename From>
32
+ inline To implicit_cast(From const &f) {
33
+ return f;
34
+ }
35
+
36
+
37
+ // When you upcast (that is, cast a pointer from type Foo to type
38
+ // SuperclassOfFoo), it's fine to use implicit_cast<>, since upcasts
39
+ // always succeed. When you downcast (that is, cast a pointer from
40
+ // type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
41
+ // how do you know the pointer is really of type SubclassOfFoo? It
42
+ // could be a bare Foo, or of type DifferentSubclassOfFoo. Thus,
43
+ // when you downcast, you should use this macro. In debug mode, we
44
+ // use dynamic_cast<> to double-check the downcast is legal (we die
45
+ // if it's not). In normal mode, we do the efficient static_cast<>
46
+ // instead. Thus, it's important to test in debug mode to make sure
47
+ // the cast is legal!
48
+ // This is the only place in the code we should use dynamic_cast<>.
49
+ // In particular, you SHOULDN'T be using dynamic_cast<> in order to
50
+ // do RTTI (eg code like this:
51
+ // if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
52
+ // if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
53
+ // You should design the code some other way not to need this.
54
+
55
+ template<typename To, typename From> // use like this: down_cast<T*>(foo);
56
+ inline To down_cast(From* f) { // so we only accept pointers
57
+ // Ensures that To is a sub-type of From *. This test is here only
58
+ // for compile-time type checking, and has no overhead in an
59
+ // optimized build at run-time, as it will be optimized away
60
+ // completely.
61
+ if (false) {
62
+ implicit_cast<From*, To>(0);
63
+ }
64
+
65
+ assert(f == NULL || dynamic_cast<To>(f) != NULL); // RTTI: debug mode only!
66
+ return static_cast<To>(f);
67
+ }
68
+
69
+ // Overload of down_cast for references. Use like this: down_cast<T&>(foo).
70
+ // The code is slightly convoluted because we're still using the pointer
71
+ // form of dynamic cast. (The reference form throws an exception if it
72
+ // fails.)
73
+ //
74
+ // There's no need for a special const overload either for the pointer
75
+ // or the reference form. If you call down_cast with a const T&, the
76
+ // compiler will just bind From to const T.
77
+ template<typename To, typename From>
78
+ inline To down_cast(From& f) {
79
+ COMPILE_ASSERT(base::is_reference<To>::value, target_type_not_a_reference);
80
+ typedef typename base::remove_reference<To>::type* ToAsPointer;
81
+ if (false) {
82
+ // Compile-time check that To inherits from From. See above for details.
83
+ implicit_cast<From*, ToAsPointer>(0);
84
+ }
85
+
86
+ assert(dynamic_cast<ToAsPointer>(&f) != NULL); // RTTI: debug mode only
87
+ return static_cast<To>(f);
88
+ }
89
+
90
+ // bit_cast<Dest,Source> is a template function that implements the
91
+ // equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
92
+ // very low-level functions like the protobuf library and fast math
93
+ // support.
94
+ //
95
+ // float f = 3.14159265358979;
96
+ // int i = bit_cast<int32>(f);
97
+ // // i = 0x40490fdb
98
+ //
99
+ // The classical address-casting method is:
100
+ //
101
+ // // WRONG
102
+ // float f = 3.14159265358979; // WRONG
103
+ // int i = * reinterpret_cast<int*>(&f); // WRONG
104
+ //
105
+ // The address-casting method actually produces undefined behavior
106
+ // according to ISO C++ specification section 3.10 -15 -. Roughly, this
107
+ // section says: if an object in memory has one type, and a program
108
+ // accesses it with a different type, then the result is undefined
109
+ // behavior for most values of "different type".
110
+ //
111
+ // This is true for any cast syntax, either *(int*)&f or
112
+ // *reinterpret_cast<int*>(&f). And it is particularly true for
113
+ // conversions betweeen integral lvalues and floating-point lvalues.
114
+ //
115
+ // The purpose of 3.10 -15- is to allow optimizing compilers to assume
116
+ // that expressions with different types refer to different memory. gcc
117
+ // 4.0.1 has an optimizer that takes advantage of this. So a
118
+ // non-conforming program quietly produces wildly incorrect output.
119
+ //
120
+ // The problem is not the use of reinterpret_cast. The problem is type
121
+ // punning: holding an object in memory of one type and reading its bits
122
+ // back using a different type.
123
+ //
124
+ // The C++ standard is more subtle and complex than this, but that
125
+ // is the basic idea.
126
+ //
127
+ // Anyways ...
128
+ //
129
+ // bit_cast<> calls memcpy() which is blessed by the standard,
130
+ // especially by the example in section 3.9 . Also, of course,
131
+ // bit_cast<> wraps up the nasty logic in one place.
132
+ //
133
+ // Fortunately memcpy() is very fast. In optimized mode, with a
134
+ // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
135
+ // code with the minimal amount of data movement. On a 32-bit system,
136
+ // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
137
+ // compiles to two loads and two stores.
138
+ //
139
+ // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
140
+ //
141
+ // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
142
+ // is likely to surprise you.
143
+ //
144
+
145
+ template <class Dest, class Source>
146
+ inline Dest bit_cast(const Source& source) {
147
+ // Compile time assertion: sizeof(Dest) == sizeof(Source)
148
+ // A compile error here means your Dest and Source have different sizes.
149
+ typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
150
+
151
+ Dest dest;
152
+ memcpy(&dest, &source, sizeof(dest));
153
+ return dest;
154
+ }
155
+
156
+ #endif // BASE_CASTS_H_
@@ -0,0 +1,443 @@
1
+ // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ // This is the file that should be included by any file which declares
6
+ // or defines a command line flag or wants to parse command line flags
7
+ // or print a program usage message (which will include information about
8
+ // flags). Executive summary, in the form of an example foo.cc file:
9
+ //
10
+ // #include "foo.h" // foo.h has a line "DECLARE_int32(start);"
11
+ //
12
+ // DEFINE_int32(end, 1000, "The last record to read");
13
+ // DECLARE_bool(verbose); // some other file has a DEFINE_bool(verbose, ...)
14
+ //
15
+ // void MyFunc() {
16
+ // if (FLAGS_verbose) printf("Records %d-%d\n", FLAGS_start, FLAGS_end);
17
+ // }
18
+ //
19
+ // Then, at the command-line:
20
+ // ./foo --noverbose --start=5 --end=100
21
+
22
+ #ifndef BASE_COMMANDLINEFLAGS_H_
23
+ #define BASE_COMMANDLINEFLAGS_H_
24
+
25
+ #include <assert.h>
26
+ #include <string>
27
+ #include <vector>
28
+ #include "base/basictypes.h"
29
+ #include "base/port.h"
30
+ #include "base/stl_decl_msvc.h"
31
+ #include "base/global_strip_options.h"
32
+
33
+ // --------------------------------------------------------------------
34
+ // To actually define a flag in a file, use DEFINE_bool,
35
+ // DEFINE_string, etc. at the bottom of this file. You may also find
36
+ // it useful to register a validator with the flag. This ensures that
37
+ // when the flag is parsed from the commandline, or is later set via
38
+ // SetCommandLineOption, we call the validation function.
39
+ //
40
+ // The validation function should return true if the flag value is valid, and
41
+ // false otherwise. If the function returns false for the new setting of the
42
+ // flag, the flag will retain its current value. If it returns false for the
43
+ // default value, InitGoogle will die.
44
+ //
45
+ // This function is safe to call at global construct time (as in the
46
+ // example below).
47
+ //
48
+ // Example use:
49
+ // static bool ValidatePort(const char* flagname, int32 value) {
50
+ // if (value > 0 && value < 32768) // value is ok
51
+ // return true;
52
+ // printf("Invalid value for --%s: %d\n", flagname, (int)value);
53
+ // return false;
54
+ // }
55
+ // DEFINE_int32(port, 0, "What port to listen on");
56
+ // static bool dummy = RegisterFlagValidator(&FLAGS_port, &ValidatePort);
57
+
58
+ // Returns true if successfully registered, false if not (because the
59
+ // first argument doesn't point to a command-line flag, or because a
60
+ // validator is already registered for this flag).
61
+ bool RegisterFlagValidator(const bool* flag,
62
+ bool (*validate_fn)(const char*, bool));
63
+ bool RegisterFlagValidator(const int32* flag,
64
+ bool (*validate_fn)(const char*, int32));
65
+ bool RegisterFlagValidator(const int64* flag,
66
+ bool (*validate_fn)(const char*, int64));
67
+ bool RegisterFlagValidator(const uint64* flag,
68
+ bool (*validate_fn)(const char*, uint64));
69
+ bool RegisterFlagValidator(const double* flag,
70
+ bool (*validate_fn)(const char*, double));
71
+ bool RegisterFlagValidator(const string* flag,
72
+ bool (*validate_fn)(const char*, const string&));
73
+
74
+
75
+ // --------------------------------------------------------------------
76
+ // These methods are the best way to get access to info about the
77
+ // list of commandline flags. Note that these routines are pretty slow.
78
+ // GetAllFlags: mostly-complete info about the list, sorted by file.
79
+ // ShowUsageWithFlags: pretty-prints the list to stdout (what --help does)
80
+ // ShowUsageWithFlagsRestrict: limit to filenames with restrict as a substr
81
+ //
82
+ // In addition to accessing flags, you can also access argv[0] (the program
83
+ // name) and argv (the entire commandline), which we sock away a copy of.
84
+ // These variables are static, so you should only set them once.
85
+
86
+ struct CommandLineFlagInfo {
87
+ string name; // the name of the flag
88
+ string type; // the type of the flag: int32, etc
89
+ string description; // the "help text" associated with the flag
90
+ string current_value; // the current value, as a string
91
+ string default_value; // the default value, as a string
92
+ string filename; // 'cleaned' version of filename holding the flag
93
+ bool is_default; // true if the flag has default value
94
+ bool has_validator_fn; // true if RegisterFlagValidator called on this flag
95
+ };
96
+
97
+ extern void GetAllFlags(vector<CommandLineFlagInfo>* OUTPUT);
98
+ // These two are actually defined in commandlineflags_reporting.cc.
99
+ extern void ShowUsageWithFlags(const char *argv0); // what --help does
100
+ extern void ShowUsageWithFlagsRestrict(const char *argv0, const char *restrict);
101
+
102
+ // Create a descriptive string for a flag.
103
+ // Goes to some trouble to make pretty line breaks.
104
+ extern string DescribeOneFlag(const CommandLineFlagInfo& flag);
105
+
106
+ // Thread-hostile; meant to be called before any threads are spawned.
107
+ extern void SetArgv(int argc, const char** argv);
108
+ // The following functions are thread-safe as long as SetArgv() is
109
+ // only called before any threads start.
110
+ extern const vector<string>& GetArgvs(); // all of argv = vector of strings
111
+ extern const char* GetArgv(); // all of argv as a string
112
+ extern const char* GetArgv0(); // only argv0
113
+ extern uint32 GetArgvSum(); // simple checksum of argv
114
+ extern const char* ProgramInvocationName(); // argv0, or "UNKNOWN" if not set
115
+ extern const char* ProgramInvocationShortName(); // basename(argv0)
116
+ // ProgramUsage() is thread-safe as long as SetUsageMessage() is only
117
+ // called before any threads start.
118
+ extern const char* ProgramUsage(); // string set by SetUsageMessage()
119
+
120
+
121
+ // --------------------------------------------------------------------
122
+ // Normally you access commandline flags by just saying "if (FLAGS_foo)"
123
+ // or whatever, and set them by calling "FLAGS_foo = bar" (or, more
124
+ // commonly, via the DEFINE_foo macro). But if you need a bit more
125
+ // control, we have programmatic ways to get/set the flags as well.
126
+ // These programmatic ways to access flags are thread-safe, but direct
127
+ // access is only thread-compatible.
128
+
129
+ // Return true iff the flagname was found.
130
+ // OUTPUT is set to the flag's value, or unchanged if we return false.
131
+ extern bool GetCommandLineOption(const char* name, string* OUTPUT);
132
+
133
+ // Return true iff the flagname was found. OUTPUT is set to the flag's
134
+ // CommandLineFlagInfo or unchanged if we return false.
135
+ extern bool GetCommandLineFlagInfo(const char* name,
136
+ CommandLineFlagInfo* OUTPUT);
137
+
138
+ // Return the CommandLineFlagInfo of the flagname. exit() if name not found.
139
+ // Example usage, to check if a flag's value is currently the default value:
140
+ // if (GetCommandLineFlagInfoOrDie("foo").is_default) ...
141
+ extern CommandLineFlagInfo GetCommandLineFlagInfoOrDie(const char* name);
142
+
143
+ enum FlagSettingMode {
144
+ // update the flag's value (can call this multiple times).
145
+ SET_FLAGS_VALUE,
146
+ // update the flag's value, but *only if* it has not yet been updated
147
+ // with SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef".
148
+ SET_FLAG_IF_DEFAULT,
149
+ // set the flag's default value to this. If the flag has not yet updated
150
+ // yet (via SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef")
151
+ // change the flag's current value to the new default value as well.
152
+ SET_FLAGS_DEFAULT
153
+ };
154
+
155
+ // Set a particular flag ("command line option"). Returns a string
156
+ // describing the new value that the option has been set to. The
157
+ // return value API is not well-specified, so basically just depend on
158
+ // it to be empty if the setting failed for some reason -- the name is
159
+ // not a valid flag name, or the value is not a valid value -- and
160
+ // non-empty else.
161
+
162
+ // SetCommandLineOption uses set_mode == SET_FLAGS_VALUE (the common case)
163
+ extern string SetCommandLineOption(const char* name, const char* value);
164
+ extern string SetCommandLineOptionWithMode(const char* name, const char* value,
165
+ FlagSettingMode set_mode);
166
+
167
+
168
+ // --------------------------------------------------------------------
169
+ // Saves the states (value, default value, whether the user has set
170
+ // the flag, registered validators, etc) of all flags, and restores
171
+ // them when the FlagSaver is destroyed. This is very useful in
172
+ // tests, say, when you want to let your tests change the flags, but
173
+ // make sure that they get reverted to the original states when your
174
+ // test is complete.
175
+ //
176
+ // Example usage:
177
+ // void TestFoo() {
178
+ // FlagSaver s1;
179
+ // FLAG_foo = false;
180
+ // FLAG_bar = "some value";
181
+ //
182
+ // // test happens here. You can return at any time
183
+ // // without worrying about restoring the FLAG values.
184
+ // }
185
+ //
186
+ // Note: This class is marked with ATTRIBUTE_UNUSED because all the
187
+ // work is done in the constructor and destructor, so in the standard
188
+ // usage example above, the compiler would complain that it's an
189
+ // unused variable.
190
+ //
191
+ // This class is thread-safe.
192
+ /*
193
+ class FlagSaver {
194
+ public:
195
+ FlagSaver();
196
+ ~FlagSaver();
197
+
198
+ private:
199
+ class FlagSaverImpl* impl_; // we use pimpl here to keep API steady
200
+
201
+ FlagSaver(const FlagSaver&); // no copying!
202
+ void operator=(const FlagSaver&);
203
+ }
204
+ #ifndef SWIG // swig seems to have trouble with this for some reason
205
+ ATTRIBUTE_UNUSED
206
+ #endif
207
+ ;
208
+ */
209
+ // --------------------------------------------------------------------
210
+ // Some deprecated or hopefully-soon-to-be-deprecated functions.
211
+
212
+ // This is often used for logging. TODO(csilvers): figure out a better way
213
+ extern string CommandlineFlagsIntoString();
214
+ // Usually where this is used, a FlagSaver should be used instead.
215
+ extern bool ReadFlagsFromString(const string& flagfilecontents,
216
+ const char* prog_name,
217
+ bool errors_are_fatal); // uses SET_FLAGS_VALUE
218
+
219
+ // These let you manually implement --flagfile functionality.
220
+ // DEPRECATED.
221
+ extern bool AppendFlagsIntoFile(const string& filename, const char* prog_name);
222
+ extern bool SaveCommandFlags(); // actually defined in google.cc !
223
+ extern bool ReadFromFlagsFile(const string& filename, const char* prog_name,
224
+ bool errors_are_fatal); // uses SET_FLAGS_VALUE
225
+
226
+
227
+ // --------------------------------------------------------------------
228
+ // Useful routines for initializing flags from the environment.
229
+ // In each case, if 'varname' does not exist in the environment
230
+ // return defval. If 'varname' does exist but is not valid
231
+ // (e.g., not a number for an int32 flag), abort with an error.
232
+ // Otherwise, return the value. NOTE: for booleans, for true use
233
+ // 't' or 'T' or 'true' or '1', for false 'f' or 'F' or 'false' or '0'.
234
+
235
+ extern bool BoolFromEnv(const char *varname, bool defval);
236
+ extern int32 Int32FromEnv(const char *varname, int32 defval);
237
+ extern int64 Int64FromEnv(const char *varname, int64 defval);
238
+ extern uint64 Uint64FromEnv(const char *varname, uint64 defval);
239
+ extern double DoubleFromEnv(const char *varname, double defval);
240
+ extern const char *StringFromEnv(const char *varname, const char *defval);
241
+
242
+
243
+ // --------------------------------------------------------------------
244
+ // The next two functions parse commandlineflags from main():
245
+
246
+ // Set the "usage" message for this program. For example:
247
+ // string usage("This program does nothing. Sample usage:\n");
248
+ // usage += argv[0] + " <uselessarg1> <uselessarg2>";
249
+ // SetUsageMessage(usage);
250
+ // Do not include commandline flags in the usage: we do that for you!
251
+ // Thread-hostile; meant to be called before any threads are spawned.
252
+ extern void SetUsageMessage(const string& usage);
253
+
254
+ // Looks for flags in argv and parses them. Rearranges argv to put
255
+ // flags first, or removes them entirely if remove_flags is true.
256
+ // If a flag is defined more than once in the command line or flag
257
+ // file, the last definition is used.
258
+ // See top-of-file for more details on this function.
259
+ #ifndef SWIG // In swig, use ParseCommandLineFlagsScript() instead.
260
+ extern uint32 ParseCommandLineFlags(int *argc, char*** argv,
261
+ bool remove_flags);
262
+ #endif
263
+
264
+
265
+ // Calls to ParseCommandLineNonHelpFlags and then to
266
+ // HandleCommandLineHelpFlags can be used instead of a call to
267
+ // ParseCommandLineFlags during initialization, in order to allow for
268
+ // changing default values for some FLAGS (via
269
+ // e.g. SetCommandLineOptionWithMode calls) between the time of
270
+ // command line parsing and the time of dumping help information for
271
+ // the flags as a result of command line parsing.
272
+ // If a flag is defined more than once in the command line or flag
273
+ // file, the last definition is used.
274
+ extern uint32 ParseCommandLineNonHelpFlags(int *argc, char*** argv,
275
+ bool remove_flags);
276
+ // This is actually defined in commandlineflags_reporting.cc.
277
+ // This function is misnamed (it also handles --version, etc.), but
278
+ // it's too late to change that now. :-(
279
+ extern void HandleCommandLineHelpFlags(); // in commandlineflags_reporting.cc
280
+
281
+ // Allow command line reparsing. Disables the error normally
282
+ // generated when an unknown flag is found, since it may be found in a
283
+ // later parse. Thread-hostile; meant to be called before any threads
284
+ // are spawned.
285
+ extern void AllowCommandLineReparsing();
286
+
287
+ // Reparse the flags that have not yet been recognized.
288
+ // Only flags registered since the last parse will be recognized.
289
+ // Any flag value must be provided as part of the argument using "=",
290
+ // not as a separate command line argument that follows the flag argument.
291
+ // Intended for handling flags from dynamically loaded libraries,
292
+ // since their flags are not registered until they are loaded.
293
+ extern uint32 ReparseCommandLineNonHelpFlags();
294
+
295
+
296
+ // --------------------------------------------------------------------
297
+ // Now come the command line flag declaration/definition macros that
298
+ // will actually be used. They're kind of hairy. A major reason
299
+ // for this is initialization: we want people to be able to access
300
+ // variables in global constructors and have that not crash, even if
301
+ // their global constructor runs before the global constructor here.
302
+ // (Obviously, we can't guarantee the flags will have the correct
303
+ // default value in that case, but at least accessing them is safe.)
304
+ // The only way to do that is have flags point to a static buffer.
305
+ // So we make one, using a union to ensure proper alignment, and
306
+ // then use placement-new to actually set up the flag with the
307
+ // correct default value. In the same vein, we have to worry about
308
+ // flag access in global destructors, so FlagRegisterer has to be
309
+ // careful never to destroy the flag-values it constructs.
310
+ //
311
+ // Note that when we define a flag variable FLAGS_<name>, we also
312
+ // preemptively define a junk variable, FLAGS_no<name>. This is to
313
+ // cause a link-time error if someone tries to define 2 flags with
314
+ // names like "logging" and "nologging". We do this because a bool
315
+ // flag FLAG can be set from the command line to true with a "-FLAG"
316
+ // argument, and to false with a "-noFLAG" argument, and so this can
317
+ // potentially avert confusion.
318
+ //
319
+ // We also put flags into their own namespace. It is purposefully
320
+ // named in an opaque way that people should have trouble typing
321
+ // directly. The idea is that DEFINE puts the flag in the weird
322
+ // namespace, and DECLARE imports the flag from there into the current
323
+ // namespace. The net result is to force people to use DECLARE to get
324
+ // access to a flag, rather than saying "extern bool FLAGS_whatever;"
325
+ // or some such instead. We want this so we can put extra
326
+ // functionality (like sanity-checking) in DECLARE if we want, and
327
+ // make sure it is picked up everywhere.
328
+ //
329
+ // We also put the type of the variable in the namespace, so that
330
+ // people can't DECLARE_int32 something that they DEFINE_bool'd
331
+ // elsewhere.
332
+
333
+ class FlagRegisterer {
334
+ public:
335
+ FlagRegisterer(const char* name, const char* type,
336
+ const char* help, const char* filename,
337
+ void* current_storage, void* defvalue_storage);
338
+ };
339
+
340
+ #ifndef SWIG // In swig, ignore the main flag declarations
341
+
342
+ // If STRIP_FLAG_HELP is defined and is non-zero, we remove the help
343
+ // message from the binary file. This is useful for security reasons
344
+ // when shipping a binary outside of Google (if the user cannot see
345
+ // the usage message by executing the program, they shouldn't be able
346
+ // to see it by running "strings binary_file").
347
+
348
+ extern const char kStrippedFlagHelp[];
349
+
350
+ #if STRIP_FLAG_HELP > 0
351
+ // Need this construct to avoid the 'defined but not used' warning.
352
+ #define MAYBE_STRIPPED_HELP(txt) (false ? (txt) : kStrippedFlagHelp)
353
+ #else
354
+ #define MAYBE_STRIPPED_HELP(txt) txt
355
+ #endif
356
+
357
+ // Each command-line flag has two variables associated with it: one
358
+ // with the current value, and one with the default value. However,
359
+ // we have a third variable, which is where value is assigned; it's a
360
+ // constant. This guarantees that FLAG_##value is initialized at
361
+ // static initialization time (e.g. before program-start) rather than
362
+ // than global construction time (which is after program-start but
363
+ // before main), at least when 'value' is a compile-time constant. We
364
+ // use a small trick for the "default value" variable, and call it
365
+ // FLAGS_no<name>. This serves the second purpose of assuring a
366
+ // compile error if someone tries to define a flag named no<name>
367
+ // which is illegal (--foo and --nofoo both affect the "foo" flag).
368
+ #define DEFINE_VARIABLE(type, shorttype, name, value, help) \
369
+ namespace fL##shorttype { \
370
+ static const type FLAGS_nono##name = value; \
371
+ type FLAGS_##name = FLAGS_nono##name; \
372
+ type FLAGS_no##name = FLAGS_nono##name; \
373
+ static FlagRegisterer o_##name( \
374
+ #name, #type, MAYBE_STRIPPED_HELP(help), __FILE__, \
375
+ &FLAGS_##name, &FLAGS_no##name); \
376
+ } \
377
+ using fL##shorttype::FLAGS_##name
378
+
379
+ #define DECLARE_VARIABLE(type, shorttype, name) \
380
+ namespace fL##shorttype { \
381
+ extern type FLAGS_##name; \
382
+ } \
383
+ using fL##shorttype::FLAGS_##name
384
+
385
+ // For boolean flags, we want to do the extra check that the passed-in
386
+ // value is actually a bool, and not a string or something that can be
387
+ // coerced to a bool. These declarations (no definition needed!) will
388
+ // help us do that, and never evaluate from, which is important.
389
+ // We'll use 'sizeof(IsBool(val))' to distinguish.
390
+ namespace fLB {
391
+ template<typename From> double IsBoolFlag(const From& from);
392
+ bool IsBoolFlag(bool from);
393
+ }
394
+ extern bool FlagsTypeWarn(const char *name);
395
+
396
+ #define DECLARE_bool(name) DECLARE_VARIABLE(bool,B, name)
397
+ // We have extra code here to make sure 'val' is actually a boolean.
398
+ #define DEFINE_bool(name,val,txt) namespace fLB { \
399
+ const bool FLAGS_nonono##name = \
400
+ (sizeof(::fLB::IsBoolFlag(val)) \
401
+ == sizeof(double)) \
402
+ ? FlagsTypeWarn(#name) : true; \
403
+ } \
404
+ DEFINE_VARIABLE(bool,B, name, val, txt)
405
+ #define DECLARE_int32(name) DECLARE_VARIABLE(int32,I, name)
406
+ #define DEFINE_int32(name,val,txt) DEFINE_VARIABLE(int32,I, name, val, txt)
407
+
408
+ #define DECLARE_int64(name) DECLARE_VARIABLE(int64,I64, name)
409
+ #define DEFINE_int64(name,val,txt) DEFINE_VARIABLE(int64,I64, name, val, txt)
410
+
411
+ #define DECLARE_uint64(name) DECLARE_VARIABLE(uint64,U64, name)
412
+ #define DEFINE_uint64(name,val,txt) DEFINE_VARIABLE(uint64,U64, name, val, txt)
413
+
414
+ #define DECLARE_double(name) DECLARE_VARIABLE(double,D, name)
415
+ #define DEFINE_double(name,val,txt) DEFINE_VARIABLE(double,D, name, val, txt)
416
+
417
+ // Strings are trickier, because they're not a POD, so we can't
418
+ // construct them at static-initialization time (instead they get
419
+ // constructed at global-constructor time, which is much later). To
420
+ // try to avoid crashes in that case, we use a char buffer to store
421
+ // the string, which we can static-initialize, and then placement-new
422
+ // into it later. It's not perfect, but the best we can do.
423
+ #define DECLARE_string(name) namespace fLS { extern string& FLAGS_##name; } \
424
+ using fLS::FLAGS_##name
425
+
426
+ // We need to define a var named FLAGS_no##name so people don't define
427
+ // --string and --nostring. And we need a temporary place to put val
428
+ // so we don't have to evaluate it twice. Two great needs that go
429
+ // great together!
430
+ #define DEFINE_string(name, val, txt) \
431
+ namespace fLS { \
432
+ static union { void* align; char s[sizeof(string)]; } s_##name[2]; \
433
+ const string* const FLAGS_no##name = new (s_##name[0].s) string(val); \
434
+ static FlagRegisterer o_##name( \
435
+ #name, "string", MAYBE_STRIPPED_HELP(txt), __FILE__, \
436
+ s_##name[0].s, new (s_##name[1].s) string(*FLAGS_no##name)); \
437
+ string& FLAGS_##name = *(reinterpret_cast<string*>(s_##name[0].s)); \
438
+ } \
439
+ using fLS::FLAGS_##name
440
+
441
+ #endif // SWIG
442
+
443
+ #endif // BASE_COMMANDLINEFLAGS_H_
@@ -0,0 +1,41 @@
1
+ // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ // Support for collecting useful information when crashing.
6
+
7
+ #ifndef BASE_CRASH_H_
8
+ #define BASE_CRASH_H_
9
+
10
+ namespace base {
11
+
12
+ struct CrashReason {
13
+ CrashReason() : filename(0), line_number(0), message(0), depth(0) {}
14
+
15
+ const char* filename;
16
+ int line_number;
17
+ const char* message;
18
+
19
+ // We'll also store a bit of stack trace context at the time of crash as
20
+ // it may not be available later on.
21
+ void* stack[32];
22
+ int depth;
23
+
24
+ // We'll try to store some trace information if it's available - this should
25
+ // reflect information from TraceContext::Thread()->tracer()->ToString().
26
+ // This field should probably not be set from within a signal handler or
27
+ // low-level code unless absolutely safe to do so.
28
+ char trace_info[512];
29
+ };
30
+
31
+ // Stores "reason" as an explanation for why the process is about to
32
+ // crash. The reason and its contents must remain live for the life
33
+ // of the process. Only the first reason is kept.
34
+ void SetCrashReason(const CrashReason* reason);
35
+
36
+ // Returns first reason passed to SetCrashReason(), or NULL.
37
+ const CrashReason* GetCrashReason();
38
+
39
+ } // namespace base
40
+
41
+ #endif // BASE_CRASH_H_