language_detection 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +85 -0
- data/Rakefile +11 -0
- data/ext/cld/Makefile +34 -0
- data/ext/cld/base/basictypes.h +348 -0
- data/ext/cld/base/build_config.h +124 -0
- data/ext/cld/base/casts.h +156 -0
- data/ext/cld/base/commandlineflags.h +443 -0
- data/ext/cld/base/crash.h +41 -0
- data/ext/cld/base/dynamic_annotations.h +358 -0
- data/ext/cld/base/global_strip_options.h +59 -0
- data/ext/cld/base/log_severity.h +46 -0
- data/ext/cld/base/logging.h +1403 -0
- data/ext/cld/base/macros.h +243 -0
- data/ext/cld/base/port.h +54 -0
- data/ext/cld/base/scoped_ptr.h +428 -0
- data/ext/cld/base/stl_decl.h +0 -0
- data/ext/cld/base/stl_decl_msvc.h +107 -0
- data/ext/cld/base/string_util.h +29 -0
- data/ext/cld/base/strtoint.h +93 -0
- data/ext/cld/base/template_util.h +96 -0
- data/ext/cld/base/type_traits.h +198 -0
- data/ext/cld/base/vlog_is_on.h +143 -0
- data/ext/cld/cld.so +0 -0
- data/ext/cld/encodings/compact_lang_det/cldutil.cc +905 -0
- data/ext/cld/encodings/compact_lang_det/cldutil.h +1205 -0
- data/ext/cld/encodings/compact_lang_det/cldutil_dbg.h +76 -0
- data/ext/cld/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det.cc +62 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det.h +145 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
- data/ext/cld/encodings/compact_lang_det/compile.cmd +1 -0
- data/ext/cld/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
- data/ext/cld/encodings/compact_lang_det/ext_lang_enc.h +119 -0
- data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
- data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
- data/ext/cld/encodings/compact_lang_det/getonescriptspan.cc +570 -0
- data/ext/cld/encodings/compact_lang_det/getonescriptspan.h +131 -0
- data/ext/cld/encodings/compact_lang_det/letterscript_enum.cc +117 -0
- data/ext/cld/encodings/compact_lang_det/letterscript_enum.h +99 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence.cc +259 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence.h +44 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
- data/ext/cld/encodings/compact_lang_det/tote.cc +299 -0
- data/ext/cld/encodings/compact_lang_det/tote.h +89 -0
- data/ext/cld/encodings/compact_lang_det/unittest_data.h +193 -0
- data/ext/cld/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
- data/ext/cld/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
- data/ext/cld/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_google.h +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_logging.h +21 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_macros.h +19 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib.h +15 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf.h +24 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
- data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
- data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
- data/ext/cld/encodings/internal/encodings.cc +12 -0
- data/ext/cld/encodings/lang_enc.h +254 -0
- data/ext/cld/encodings/proto/encodings.pb.h +169 -0
- data/ext/cld/encodings/public/encodings.h +301 -0
- data/ext/cld/extconf.rb +1 -0
- data/ext/cld/language_detection.cc +88 -0
- data/ext/cld/languages/internal/languages.cc +337 -0
- data/ext/cld/languages/proto/languages.pb.h +179 -0
- data/ext/cld/languages/public/languages.h +379 -0
- data/language_detection.gemspec +28 -0
- data/lib/language_detection/string.rb +1 -0
- data/lib/language_detection/version.rb +3 -0
- data/lib/language_detection.rb +54 -0
- data/test/_helper.rb +15 -0
- data/test/fixtures/languages.csv +80 -0
- data/test/language_detection_test.rb +88 -0
- metadata +250 -0
@@ -0,0 +1,156 @@
|
|
1
|
+
// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file.
|
4
|
+
|
5
|
+
#ifndef BASE_CASTS_H_
|
6
|
+
#define BASE_CASTS_H_
|
7
|
+
|
8
|
+
#include <assert.h> // for use with down_cast<>
|
9
|
+
#include <string.h> // for memcpy
|
10
|
+
|
11
|
+
#include "base/macros.h"
|
12
|
+
|
13
|
+
|
14
|
+
// Use implicit_cast as a safe version of static_cast or const_cast
|
15
|
+
// for upcasting in the type hierarchy (i.e. casting a pointer to Foo
|
16
|
+
// to a pointer to SuperclassOfFoo or casting a pointer to Foo to
|
17
|
+
// a const pointer to Foo).
|
18
|
+
// When you use implicit_cast, the compiler checks that the cast is safe.
|
19
|
+
// Such explicit implicit_casts are necessary in surprisingly many
|
20
|
+
// situations where C++ demands an exact type match instead of an
|
21
|
+
// argument type convertable to a target type.
|
22
|
+
//
|
23
|
+
// The From type can be inferred, so the preferred syntax for using
|
24
|
+
// implicit_cast is the same as for static_cast etc.:
|
25
|
+
//
|
26
|
+
// implicit_cast<ToType>(expr)
|
27
|
+
//
|
28
|
+
// implicit_cast would have been part of the C++ standard library,
|
29
|
+
// but the proposal was submitted too late. It will probably make
|
30
|
+
// its way into the language in the future.
|
31
|
+
template<typename To, typename From>
|
32
|
+
inline To implicit_cast(From const &f) {
|
33
|
+
return f;
|
34
|
+
}
|
35
|
+
|
36
|
+
|
37
|
+
// When you upcast (that is, cast a pointer from type Foo to type
|
38
|
+
// SuperclassOfFoo), it's fine to use implicit_cast<>, since upcasts
|
39
|
+
// always succeed. When you downcast (that is, cast a pointer from
|
40
|
+
// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
|
41
|
+
// how do you know the pointer is really of type SubclassOfFoo? It
|
42
|
+
// could be a bare Foo, or of type DifferentSubclassOfFoo. Thus,
|
43
|
+
// when you downcast, you should use this macro. In debug mode, we
|
44
|
+
// use dynamic_cast<> to double-check the downcast is legal (we die
|
45
|
+
// if it's not). In normal mode, we do the efficient static_cast<>
|
46
|
+
// instead. Thus, it's important to test in debug mode to make sure
|
47
|
+
// the cast is legal!
|
48
|
+
// This is the only place in the code we should use dynamic_cast<>.
|
49
|
+
// In particular, you SHOULDN'T be using dynamic_cast<> in order to
|
50
|
+
// do RTTI (eg code like this:
|
51
|
+
// if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
|
52
|
+
// if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
|
53
|
+
// You should design the code some other way not to need this.
|
54
|
+
|
55
|
+
template<typename To, typename From> // use like this: down_cast<T*>(foo);
|
56
|
+
inline To down_cast(From* f) { // so we only accept pointers
|
57
|
+
// Ensures that To is a sub-type of From *. This test is here only
|
58
|
+
// for compile-time type checking, and has no overhead in an
|
59
|
+
// optimized build at run-time, as it will be optimized away
|
60
|
+
// completely.
|
61
|
+
if (false) {
|
62
|
+
implicit_cast<From*, To>(0);
|
63
|
+
}
|
64
|
+
|
65
|
+
assert(f == NULL || dynamic_cast<To>(f) != NULL); // RTTI: debug mode only!
|
66
|
+
return static_cast<To>(f);
|
67
|
+
}
|
68
|
+
|
69
|
+
// Overload of down_cast for references. Use like this: down_cast<T&>(foo).
|
70
|
+
// The code is slightly convoluted because we're still using the pointer
|
71
|
+
// form of dynamic cast. (The reference form throws an exception if it
|
72
|
+
// fails.)
|
73
|
+
//
|
74
|
+
// There's no need for a special const overload either for the pointer
|
75
|
+
// or the reference form. If you call down_cast with a const T&, the
|
76
|
+
// compiler will just bind From to const T.
|
77
|
+
template<typename To, typename From>
|
78
|
+
inline To down_cast(From& f) {
|
79
|
+
COMPILE_ASSERT(base::is_reference<To>::value, target_type_not_a_reference);
|
80
|
+
typedef typename base::remove_reference<To>::type* ToAsPointer;
|
81
|
+
if (false) {
|
82
|
+
// Compile-time check that To inherits from From. See above for details.
|
83
|
+
implicit_cast<From*, ToAsPointer>(0);
|
84
|
+
}
|
85
|
+
|
86
|
+
assert(dynamic_cast<ToAsPointer>(&f) != NULL); // RTTI: debug mode only
|
87
|
+
return static_cast<To>(f);
|
88
|
+
}
|
89
|
+
|
90
|
+
// bit_cast<Dest,Source> is a template function that implements the
|
91
|
+
// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
|
92
|
+
// very low-level functions like the protobuf library and fast math
|
93
|
+
// support.
|
94
|
+
//
|
95
|
+
// float f = 3.14159265358979;
|
96
|
+
// int i = bit_cast<int32>(f);
|
97
|
+
// // i = 0x40490fdb
|
98
|
+
//
|
99
|
+
// The classical address-casting method is:
|
100
|
+
//
|
101
|
+
// // WRONG
|
102
|
+
// float f = 3.14159265358979; // WRONG
|
103
|
+
// int i = * reinterpret_cast<int*>(&f); // WRONG
|
104
|
+
//
|
105
|
+
// The address-casting method actually produces undefined behavior
|
106
|
+
// according to ISO C++ specification section 3.10 -15 -. Roughly, this
|
107
|
+
// section says: if an object in memory has one type, and a program
|
108
|
+
// accesses it with a different type, then the result is undefined
|
109
|
+
// behavior for most values of "different type".
|
110
|
+
//
|
111
|
+
// This is true for any cast syntax, either *(int*)&f or
|
112
|
+
// *reinterpret_cast<int*>(&f). And it is particularly true for
|
113
|
+
// conversions betweeen integral lvalues and floating-point lvalues.
|
114
|
+
//
|
115
|
+
// The purpose of 3.10 -15- is to allow optimizing compilers to assume
|
116
|
+
// that expressions with different types refer to different memory. gcc
|
117
|
+
// 4.0.1 has an optimizer that takes advantage of this. So a
|
118
|
+
// non-conforming program quietly produces wildly incorrect output.
|
119
|
+
//
|
120
|
+
// The problem is not the use of reinterpret_cast. The problem is type
|
121
|
+
// punning: holding an object in memory of one type and reading its bits
|
122
|
+
// back using a different type.
|
123
|
+
//
|
124
|
+
// The C++ standard is more subtle and complex than this, but that
|
125
|
+
// is the basic idea.
|
126
|
+
//
|
127
|
+
// Anyways ...
|
128
|
+
//
|
129
|
+
// bit_cast<> calls memcpy() which is blessed by the standard,
|
130
|
+
// especially by the example in section 3.9 . Also, of course,
|
131
|
+
// bit_cast<> wraps up the nasty logic in one place.
|
132
|
+
//
|
133
|
+
// Fortunately memcpy() is very fast. In optimized mode, with a
|
134
|
+
// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
|
135
|
+
// code with the minimal amount of data movement. On a 32-bit system,
|
136
|
+
// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
|
137
|
+
// compiles to two loads and two stores.
|
138
|
+
//
|
139
|
+
// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
|
140
|
+
//
|
141
|
+
// WARNING: if Dest or Source is a non-POD type, the result of the memcpy
|
142
|
+
// is likely to surprise you.
|
143
|
+
//
|
144
|
+
|
145
|
+
template <class Dest, class Source>
|
146
|
+
inline Dest bit_cast(const Source& source) {
|
147
|
+
// Compile time assertion: sizeof(Dest) == sizeof(Source)
|
148
|
+
// A compile error here means your Dest and Source have different sizes.
|
149
|
+
typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
|
150
|
+
|
151
|
+
Dest dest;
|
152
|
+
memcpy(&dest, &source, sizeof(dest));
|
153
|
+
return dest;
|
154
|
+
}
|
155
|
+
|
156
|
+
#endif // BASE_CASTS_H_
|
@@ -0,0 +1,443 @@
|
|
1
|
+
// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file.
|
4
|
+
|
5
|
+
// This is the file that should be included by any file which declares
|
6
|
+
// or defines a command line flag or wants to parse command line flags
|
7
|
+
// or print a program usage message (which will include information about
|
8
|
+
// flags). Executive summary, in the form of an example foo.cc file:
|
9
|
+
//
|
10
|
+
// #include "foo.h" // foo.h has a line "DECLARE_int32(start);"
|
11
|
+
//
|
12
|
+
// DEFINE_int32(end, 1000, "The last record to read");
|
13
|
+
// DECLARE_bool(verbose); // some other file has a DEFINE_bool(verbose, ...)
|
14
|
+
//
|
15
|
+
// void MyFunc() {
|
16
|
+
// if (FLAGS_verbose) printf("Records %d-%d\n", FLAGS_start, FLAGS_end);
|
17
|
+
// }
|
18
|
+
//
|
19
|
+
// Then, at the command-line:
|
20
|
+
// ./foo --noverbose --start=5 --end=100
|
21
|
+
|
22
|
+
#ifndef BASE_COMMANDLINEFLAGS_H_
|
23
|
+
#define BASE_COMMANDLINEFLAGS_H_
|
24
|
+
|
25
|
+
#include <assert.h>
|
26
|
+
#include <string>
|
27
|
+
#include <vector>
|
28
|
+
#include "base/basictypes.h"
|
29
|
+
#include "base/port.h"
|
30
|
+
#include "base/stl_decl_msvc.h"
|
31
|
+
#include "base/global_strip_options.h"
|
32
|
+
|
33
|
+
// --------------------------------------------------------------------
|
34
|
+
// To actually define a flag in a file, use DEFINE_bool,
|
35
|
+
// DEFINE_string, etc. at the bottom of this file. You may also find
|
36
|
+
// it useful to register a validator with the flag. This ensures that
|
37
|
+
// when the flag is parsed from the commandline, or is later set via
|
38
|
+
// SetCommandLineOption, we call the validation function.
|
39
|
+
//
|
40
|
+
// The validation function should return true if the flag value is valid, and
|
41
|
+
// false otherwise. If the function returns false for the new setting of the
|
42
|
+
// flag, the flag will retain its current value. If it returns false for the
|
43
|
+
// default value, InitGoogle will die.
|
44
|
+
//
|
45
|
+
// This function is safe to call at global construct time (as in the
|
46
|
+
// example below).
|
47
|
+
//
|
48
|
+
// Example use:
|
49
|
+
// static bool ValidatePort(const char* flagname, int32 value) {
|
50
|
+
// if (value > 0 && value < 32768) // value is ok
|
51
|
+
// return true;
|
52
|
+
// printf("Invalid value for --%s: %d\n", flagname, (int)value);
|
53
|
+
// return false;
|
54
|
+
// }
|
55
|
+
// DEFINE_int32(port, 0, "What port to listen on");
|
56
|
+
// static bool dummy = RegisterFlagValidator(&FLAGS_port, &ValidatePort);
|
57
|
+
|
58
|
+
// Returns true if successfully registered, false if not (because the
|
59
|
+
// first argument doesn't point to a command-line flag, or because a
|
60
|
+
// validator is already registered for this flag).
|
61
|
+
bool RegisterFlagValidator(const bool* flag,
|
62
|
+
bool (*validate_fn)(const char*, bool));
|
63
|
+
bool RegisterFlagValidator(const int32* flag,
|
64
|
+
bool (*validate_fn)(const char*, int32));
|
65
|
+
bool RegisterFlagValidator(const int64* flag,
|
66
|
+
bool (*validate_fn)(const char*, int64));
|
67
|
+
bool RegisterFlagValidator(const uint64* flag,
|
68
|
+
bool (*validate_fn)(const char*, uint64));
|
69
|
+
bool RegisterFlagValidator(const double* flag,
|
70
|
+
bool (*validate_fn)(const char*, double));
|
71
|
+
bool RegisterFlagValidator(const string* flag,
|
72
|
+
bool (*validate_fn)(const char*, const string&));
|
73
|
+
|
74
|
+
|
75
|
+
// --------------------------------------------------------------------
|
76
|
+
// These methods are the best way to get access to info about the
|
77
|
+
// list of commandline flags. Note that these routines are pretty slow.
|
78
|
+
// GetAllFlags: mostly-complete info about the list, sorted by file.
|
79
|
+
// ShowUsageWithFlags: pretty-prints the list to stdout (what --help does)
|
80
|
+
// ShowUsageWithFlagsRestrict: limit to filenames with restrict as a substr
|
81
|
+
//
|
82
|
+
// In addition to accessing flags, you can also access argv[0] (the program
|
83
|
+
// name) and argv (the entire commandline), which we sock away a copy of.
|
84
|
+
// These variables are static, so you should only set them once.
|
85
|
+
|
86
|
+
struct CommandLineFlagInfo {
|
87
|
+
string name; // the name of the flag
|
88
|
+
string type; // the type of the flag: int32, etc
|
89
|
+
string description; // the "help text" associated with the flag
|
90
|
+
string current_value; // the current value, as a string
|
91
|
+
string default_value; // the default value, as a string
|
92
|
+
string filename; // 'cleaned' version of filename holding the flag
|
93
|
+
bool is_default; // true if the flag has default value
|
94
|
+
bool has_validator_fn; // true if RegisterFlagValidator called on this flag
|
95
|
+
};
|
96
|
+
|
97
|
+
extern void GetAllFlags(vector<CommandLineFlagInfo>* OUTPUT);
|
98
|
+
// These two are actually defined in commandlineflags_reporting.cc.
|
99
|
+
extern void ShowUsageWithFlags(const char *argv0); // what --help does
|
100
|
+
extern void ShowUsageWithFlagsRestrict(const char *argv0, const char *restrict);
|
101
|
+
|
102
|
+
// Create a descriptive string for a flag.
|
103
|
+
// Goes to some trouble to make pretty line breaks.
|
104
|
+
extern string DescribeOneFlag(const CommandLineFlagInfo& flag);
|
105
|
+
|
106
|
+
// Thread-hostile; meant to be called before any threads are spawned.
|
107
|
+
extern void SetArgv(int argc, const char** argv);
|
108
|
+
// The following functions are thread-safe as long as SetArgv() is
|
109
|
+
// only called before any threads start.
|
110
|
+
extern const vector<string>& GetArgvs(); // all of argv = vector of strings
|
111
|
+
extern const char* GetArgv(); // all of argv as a string
|
112
|
+
extern const char* GetArgv0(); // only argv0
|
113
|
+
extern uint32 GetArgvSum(); // simple checksum of argv
|
114
|
+
extern const char* ProgramInvocationName(); // argv0, or "UNKNOWN" if not set
|
115
|
+
extern const char* ProgramInvocationShortName(); // basename(argv0)
|
116
|
+
// ProgramUsage() is thread-safe as long as SetUsageMessage() is only
|
117
|
+
// called before any threads start.
|
118
|
+
extern const char* ProgramUsage(); // string set by SetUsageMessage()
|
119
|
+
|
120
|
+
|
121
|
+
// --------------------------------------------------------------------
|
122
|
+
// Normally you access commandline flags by just saying "if (FLAGS_foo)"
|
123
|
+
// or whatever, and set them by calling "FLAGS_foo = bar" (or, more
|
124
|
+
// commonly, via the DEFINE_foo macro). But if you need a bit more
|
125
|
+
// control, we have programmatic ways to get/set the flags as well.
|
126
|
+
// These programmatic ways to access flags are thread-safe, but direct
|
127
|
+
// access is only thread-compatible.
|
128
|
+
|
129
|
+
// Return true iff the flagname was found.
|
130
|
+
// OUTPUT is set to the flag's value, or unchanged if we return false.
|
131
|
+
extern bool GetCommandLineOption(const char* name, string* OUTPUT);
|
132
|
+
|
133
|
+
// Return true iff the flagname was found. OUTPUT is set to the flag's
|
134
|
+
// CommandLineFlagInfo or unchanged if we return false.
|
135
|
+
extern bool GetCommandLineFlagInfo(const char* name,
|
136
|
+
CommandLineFlagInfo* OUTPUT);
|
137
|
+
|
138
|
+
// Return the CommandLineFlagInfo of the flagname. exit() if name not found.
|
139
|
+
// Example usage, to check if a flag's value is currently the default value:
|
140
|
+
// if (GetCommandLineFlagInfoOrDie("foo").is_default) ...
|
141
|
+
extern CommandLineFlagInfo GetCommandLineFlagInfoOrDie(const char* name);
|
142
|
+
|
143
|
+
enum FlagSettingMode {
|
144
|
+
// update the flag's value (can call this multiple times).
|
145
|
+
SET_FLAGS_VALUE,
|
146
|
+
// update the flag's value, but *only if* it has not yet been updated
|
147
|
+
// with SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef".
|
148
|
+
SET_FLAG_IF_DEFAULT,
|
149
|
+
// set the flag's default value to this. If the flag has not yet updated
|
150
|
+
// yet (via SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef")
|
151
|
+
// change the flag's current value to the new default value as well.
|
152
|
+
SET_FLAGS_DEFAULT
|
153
|
+
};
|
154
|
+
|
155
|
+
// Set a particular flag ("command line option"). Returns a string
|
156
|
+
// describing the new value that the option has been set to. The
|
157
|
+
// return value API is not well-specified, so basically just depend on
|
158
|
+
// it to be empty if the setting failed for some reason -- the name is
|
159
|
+
// not a valid flag name, or the value is not a valid value -- and
|
160
|
+
// non-empty else.
|
161
|
+
|
162
|
+
// SetCommandLineOption uses set_mode == SET_FLAGS_VALUE (the common case)
|
163
|
+
extern string SetCommandLineOption(const char* name, const char* value);
|
164
|
+
extern string SetCommandLineOptionWithMode(const char* name, const char* value,
|
165
|
+
FlagSettingMode set_mode);
|
166
|
+
|
167
|
+
|
168
|
+
// --------------------------------------------------------------------
|
169
|
+
// Saves the states (value, default value, whether the user has set
|
170
|
+
// the flag, registered validators, etc) of all flags, and restores
|
171
|
+
// them when the FlagSaver is destroyed. This is very useful in
|
172
|
+
// tests, say, when you want to let your tests change the flags, but
|
173
|
+
// make sure that they get reverted to the original states when your
|
174
|
+
// test is complete.
|
175
|
+
//
|
176
|
+
// Example usage:
|
177
|
+
// void TestFoo() {
|
178
|
+
// FlagSaver s1;
|
179
|
+
// FLAG_foo = false;
|
180
|
+
// FLAG_bar = "some value";
|
181
|
+
//
|
182
|
+
// // test happens here. You can return at any time
|
183
|
+
// // without worrying about restoring the FLAG values.
|
184
|
+
// }
|
185
|
+
//
|
186
|
+
// Note: This class is marked with ATTRIBUTE_UNUSED because all the
|
187
|
+
// work is done in the constructor and destructor, so in the standard
|
188
|
+
// usage example above, the compiler would complain that it's an
|
189
|
+
// unused variable.
|
190
|
+
//
|
191
|
+
// This class is thread-safe.
|
192
|
+
/*
|
193
|
+
class FlagSaver {
|
194
|
+
public:
|
195
|
+
FlagSaver();
|
196
|
+
~FlagSaver();
|
197
|
+
|
198
|
+
private:
|
199
|
+
class FlagSaverImpl* impl_; // we use pimpl here to keep API steady
|
200
|
+
|
201
|
+
FlagSaver(const FlagSaver&); // no copying!
|
202
|
+
void operator=(const FlagSaver&);
|
203
|
+
}
|
204
|
+
#ifndef SWIG // swig seems to have trouble with this for some reason
|
205
|
+
ATTRIBUTE_UNUSED
|
206
|
+
#endif
|
207
|
+
;
|
208
|
+
*/
|
209
|
+
// --------------------------------------------------------------------
|
210
|
+
// Some deprecated or hopefully-soon-to-be-deprecated functions.
|
211
|
+
|
212
|
+
// This is often used for logging. TODO(csilvers): figure out a better way
|
213
|
+
extern string CommandlineFlagsIntoString();
|
214
|
+
// Usually where this is used, a FlagSaver should be used instead.
|
215
|
+
extern bool ReadFlagsFromString(const string& flagfilecontents,
|
216
|
+
const char* prog_name,
|
217
|
+
bool errors_are_fatal); // uses SET_FLAGS_VALUE
|
218
|
+
|
219
|
+
// These let you manually implement --flagfile functionality.
|
220
|
+
// DEPRECATED.
|
221
|
+
extern bool AppendFlagsIntoFile(const string& filename, const char* prog_name);
|
222
|
+
extern bool SaveCommandFlags(); // actually defined in google.cc !
|
223
|
+
extern bool ReadFromFlagsFile(const string& filename, const char* prog_name,
|
224
|
+
bool errors_are_fatal); // uses SET_FLAGS_VALUE
|
225
|
+
|
226
|
+
|
227
|
+
// --------------------------------------------------------------------
|
228
|
+
// Useful routines for initializing flags from the environment.
|
229
|
+
// In each case, if 'varname' does not exist in the environment
|
230
|
+
// return defval. If 'varname' does exist but is not valid
|
231
|
+
// (e.g., not a number for an int32 flag), abort with an error.
|
232
|
+
// Otherwise, return the value. NOTE: for booleans, for true use
|
233
|
+
// 't' or 'T' or 'true' or '1', for false 'f' or 'F' or 'false' or '0'.
|
234
|
+
|
235
|
+
extern bool BoolFromEnv(const char *varname, bool defval);
|
236
|
+
extern int32 Int32FromEnv(const char *varname, int32 defval);
|
237
|
+
extern int64 Int64FromEnv(const char *varname, int64 defval);
|
238
|
+
extern uint64 Uint64FromEnv(const char *varname, uint64 defval);
|
239
|
+
extern double DoubleFromEnv(const char *varname, double defval);
|
240
|
+
extern const char *StringFromEnv(const char *varname, const char *defval);
|
241
|
+
|
242
|
+
|
243
|
+
// --------------------------------------------------------------------
|
244
|
+
// The next two functions parse commandlineflags from main():
|
245
|
+
|
246
|
+
// Set the "usage" message for this program. For example:
|
247
|
+
// string usage("This program does nothing. Sample usage:\n");
|
248
|
+
// usage += argv[0] + " <uselessarg1> <uselessarg2>";
|
249
|
+
// SetUsageMessage(usage);
|
250
|
+
// Do not include commandline flags in the usage: we do that for you!
|
251
|
+
// Thread-hostile; meant to be called before any threads are spawned.
|
252
|
+
extern void SetUsageMessage(const string& usage);
|
253
|
+
|
254
|
+
// Looks for flags in argv and parses them. Rearranges argv to put
|
255
|
+
// flags first, or removes them entirely if remove_flags is true.
|
256
|
+
// If a flag is defined more than once in the command line or flag
|
257
|
+
// file, the last definition is used.
|
258
|
+
// See top-of-file for more details on this function.
|
259
|
+
#ifndef SWIG // In swig, use ParseCommandLineFlagsScript() instead.
|
260
|
+
extern uint32 ParseCommandLineFlags(int *argc, char*** argv,
|
261
|
+
bool remove_flags);
|
262
|
+
#endif
|
263
|
+
|
264
|
+
|
265
|
+
// Calls to ParseCommandLineNonHelpFlags and then to
|
266
|
+
// HandleCommandLineHelpFlags can be used instead of a call to
|
267
|
+
// ParseCommandLineFlags during initialization, in order to allow for
|
268
|
+
// changing default values for some FLAGS (via
|
269
|
+
// e.g. SetCommandLineOptionWithMode calls) between the time of
|
270
|
+
// command line parsing and the time of dumping help information for
|
271
|
+
// the flags as a result of command line parsing.
|
272
|
+
// If a flag is defined more than once in the command line or flag
|
273
|
+
// file, the last definition is used.
|
274
|
+
extern uint32 ParseCommandLineNonHelpFlags(int *argc, char*** argv,
|
275
|
+
bool remove_flags);
|
276
|
+
// This is actually defined in commandlineflags_reporting.cc.
|
277
|
+
// This function is misnamed (it also handles --version, etc.), but
|
278
|
+
// it's too late to change that now. :-(
|
279
|
+
extern void HandleCommandLineHelpFlags(); // in commandlineflags_reporting.cc
|
280
|
+
|
281
|
+
// Allow command line reparsing. Disables the error normally
|
282
|
+
// generated when an unknown flag is found, since it may be found in a
|
283
|
+
// later parse. Thread-hostile; meant to be called before any threads
|
284
|
+
// are spawned.
|
285
|
+
extern void AllowCommandLineReparsing();
|
286
|
+
|
287
|
+
// Reparse the flags that have not yet been recognized.
|
288
|
+
// Only flags registered since the last parse will be recognized.
|
289
|
+
// Any flag value must be provided as part of the argument using "=",
|
290
|
+
// not as a separate command line argument that follows the flag argument.
|
291
|
+
// Intended for handling flags from dynamically loaded libraries,
|
292
|
+
// since their flags are not registered until they are loaded.
|
293
|
+
extern uint32 ReparseCommandLineNonHelpFlags();
|
294
|
+
|
295
|
+
|
296
|
+
// --------------------------------------------------------------------
|
297
|
+
// Now come the command line flag declaration/definition macros that
|
298
|
+
// will actually be used. They're kind of hairy. A major reason
|
299
|
+
// for this is initialization: we want people to be able to access
|
300
|
+
// variables in global constructors and have that not crash, even if
|
301
|
+
// their global constructor runs before the global constructor here.
|
302
|
+
// (Obviously, we can't guarantee the flags will have the correct
|
303
|
+
// default value in that case, but at least accessing them is safe.)
|
304
|
+
// The only way to do that is have flags point to a static buffer.
|
305
|
+
// So we make one, using a union to ensure proper alignment, and
|
306
|
+
// then use placement-new to actually set up the flag with the
|
307
|
+
// correct default value. In the same vein, we have to worry about
|
308
|
+
// flag access in global destructors, so FlagRegisterer has to be
|
309
|
+
// careful never to destroy the flag-values it constructs.
|
310
|
+
//
|
311
|
+
// Note that when we define a flag variable FLAGS_<name>, we also
|
312
|
+
// preemptively define a junk variable, FLAGS_no<name>. This is to
|
313
|
+
// cause a link-time error if someone tries to define 2 flags with
|
314
|
+
// names like "logging" and "nologging". We do this because a bool
|
315
|
+
// flag FLAG can be set from the command line to true with a "-FLAG"
|
316
|
+
// argument, and to false with a "-noFLAG" argument, and so this can
|
317
|
+
// potentially avert confusion.
|
318
|
+
//
|
319
|
+
// We also put flags into their own namespace. It is purposefully
|
320
|
+
// named in an opaque way that people should have trouble typing
|
321
|
+
// directly. The idea is that DEFINE puts the flag in the weird
|
322
|
+
// namespace, and DECLARE imports the flag from there into the current
|
323
|
+
// namespace. The net result is to force people to use DECLARE to get
|
324
|
+
// access to a flag, rather than saying "extern bool FLAGS_whatever;"
|
325
|
+
// or some such instead. We want this so we can put extra
|
326
|
+
// functionality (like sanity-checking) in DECLARE if we want, and
|
327
|
+
// make sure it is picked up everywhere.
|
328
|
+
//
|
329
|
+
// We also put the type of the variable in the namespace, so that
|
330
|
+
// people can't DECLARE_int32 something that they DEFINE_bool'd
|
331
|
+
// elsewhere.
|
332
|
+
|
333
|
+
class FlagRegisterer {
|
334
|
+
public:
|
335
|
+
FlagRegisterer(const char* name, const char* type,
|
336
|
+
const char* help, const char* filename,
|
337
|
+
void* current_storage, void* defvalue_storage);
|
338
|
+
};
|
339
|
+
|
340
|
+
#ifndef SWIG // In swig, ignore the main flag declarations
|
341
|
+
|
342
|
+
// If STRIP_FLAG_HELP is defined and is non-zero, we remove the help
|
343
|
+
// message from the binary file. This is useful for security reasons
|
344
|
+
// when shipping a binary outside of Google (if the user cannot see
|
345
|
+
// the usage message by executing the program, they shouldn't be able
|
346
|
+
// to see it by running "strings binary_file").
|
347
|
+
|
348
|
+
extern const char kStrippedFlagHelp[];
|
349
|
+
|
350
|
+
#if STRIP_FLAG_HELP > 0
|
351
|
+
// Need this construct to avoid the 'defined but not used' warning.
|
352
|
+
#define MAYBE_STRIPPED_HELP(txt) (false ? (txt) : kStrippedFlagHelp)
|
353
|
+
#else
|
354
|
+
#define MAYBE_STRIPPED_HELP(txt) txt
|
355
|
+
#endif
|
356
|
+
|
357
|
+
// Each command-line flag has two variables associated with it: one
|
358
|
+
// with the current value, and one with the default value. However,
|
359
|
+
// we have a third variable, which is where value is assigned; it's a
|
360
|
+
// constant. This guarantees that FLAG_##value is initialized at
|
361
|
+
// static initialization time (e.g. before program-start) rather than
|
362
|
+
// than global construction time (which is after program-start but
|
363
|
+
// before main), at least when 'value' is a compile-time constant. We
|
364
|
+
// use a small trick for the "default value" variable, and call it
|
365
|
+
// FLAGS_no<name>. This serves the second purpose of assuring a
|
366
|
+
// compile error if someone tries to define a flag named no<name>
|
367
|
+
// which is illegal (--foo and --nofoo both affect the "foo" flag).
|
368
|
+
#define DEFINE_VARIABLE(type, shorttype, name, value, help) \
|
369
|
+
namespace fL##shorttype { \
|
370
|
+
static const type FLAGS_nono##name = value; \
|
371
|
+
type FLAGS_##name = FLAGS_nono##name; \
|
372
|
+
type FLAGS_no##name = FLAGS_nono##name; \
|
373
|
+
static FlagRegisterer o_##name( \
|
374
|
+
#name, #type, MAYBE_STRIPPED_HELP(help), __FILE__, \
|
375
|
+
&FLAGS_##name, &FLAGS_no##name); \
|
376
|
+
} \
|
377
|
+
using fL##shorttype::FLAGS_##name
|
378
|
+
|
379
|
+
#define DECLARE_VARIABLE(type, shorttype, name) \
|
380
|
+
namespace fL##shorttype { \
|
381
|
+
extern type FLAGS_##name; \
|
382
|
+
} \
|
383
|
+
using fL##shorttype::FLAGS_##name
|
384
|
+
|
385
|
+
// For boolean flags, we want to do the extra check that the passed-in
|
386
|
+
// value is actually a bool, and not a string or something that can be
|
387
|
+
// coerced to a bool. These declarations (no definition needed!) will
|
388
|
+
// help us do that, and never evaluate from, which is important.
|
389
|
+
// We'll use 'sizeof(IsBool(val))' to distinguish.
|
390
|
+
namespace fLB {
|
391
|
+
template<typename From> double IsBoolFlag(const From& from);
|
392
|
+
bool IsBoolFlag(bool from);
|
393
|
+
}
|
394
|
+
extern bool FlagsTypeWarn(const char *name);
|
395
|
+
|
396
|
+
#define DECLARE_bool(name) DECLARE_VARIABLE(bool,B, name)
|
397
|
+
// We have extra code here to make sure 'val' is actually a boolean.
|
398
|
+
#define DEFINE_bool(name,val,txt) namespace fLB { \
|
399
|
+
const bool FLAGS_nonono##name = \
|
400
|
+
(sizeof(::fLB::IsBoolFlag(val)) \
|
401
|
+
== sizeof(double)) \
|
402
|
+
? FlagsTypeWarn(#name) : true; \
|
403
|
+
} \
|
404
|
+
DEFINE_VARIABLE(bool,B, name, val, txt)
|
405
|
+
#define DECLARE_int32(name) DECLARE_VARIABLE(int32,I, name)
|
406
|
+
#define DEFINE_int32(name,val,txt) DEFINE_VARIABLE(int32,I, name, val, txt)
|
407
|
+
|
408
|
+
#define DECLARE_int64(name) DECLARE_VARIABLE(int64,I64, name)
|
409
|
+
#define DEFINE_int64(name,val,txt) DEFINE_VARIABLE(int64,I64, name, val, txt)
|
410
|
+
|
411
|
+
#define DECLARE_uint64(name) DECLARE_VARIABLE(uint64,U64, name)
|
412
|
+
#define DEFINE_uint64(name,val,txt) DEFINE_VARIABLE(uint64,U64, name, val, txt)
|
413
|
+
|
414
|
+
#define DECLARE_double(name) DECLARE_VARIABLE(double,D, name)
|
415
|
+
#define DEFINE_double(name,val,txt) DEFINE_VARIABLE(double,D, name, val, txt)
|
416
|
+
|
417
|
+
// Strings are trickier, because they're not a POD, so we can't
|
418
|
+
// construct them at static-initialization time (instead they get
|
419
|
+
// constructed at global-constructor time, which is much later). To
|
420
|
+
// try to avoid crashes in that case, we use a char buffer to store
|
421
|
+
// the string, which we can static-initialize, and then placement-new
|
422
|
+
// into it later. It's not perfect, but the best we can do.
|
423
|
+
#define DECLARE_string(name) namespace fLS { extern string& FLAGS_##name; } \
|
424
|
+
using fLS::FLAGS_##name
|
425
|
+
|
426
|
+
// We need to define a var named FLAGS_no##name so people don't define
|
427
|
+
// --string and --nostring. And we need a temporary place to put val
|
428
|
+
// so we don't have to evaluate it twice. Two great needs that go
|
429
|
+
// great together!
|
430
|
+
#define DEFINE_string(name, val, txt) \
|
431
|
+
namespace fLS { \
|
432
|
+
static union { void* align; char s[sizeof(string)]; } s_##name[2]; \
|
433
|
+
const string* const FLAGS_no##name = new (s_##name[0].s) string(val); \
|
434
|
+
static FlagRegisterer o_##name( \
|
435
|
+
#name, "string", MAYBE_STRIPPED_HELP(txt), __FILE__, \
|
436
|
+
s_##name[0].s, new (s_##name[1].s) string(*FLAGS_no##name)); \
|
437
|
+
string& FLAGS_##name = *(reinterpret_cast<string*>(s_##name[0].s)); \
|
438
|
+
} \
|
439
|
+
using fLS::FLAGS_##name
|
440
|
+
|
441
|
+
#endif // SWIG
|
442
|
+
|
443
|
+
#endif // BASE_COMMANDLINEFLAGS_H_
|
@@ -0,0 +1,41 @@
|
|
1
|
+
// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file.
|
4
|
+
|
5
|
+
// Support for collecting useful information when crashing.
|
6
|
+
|
7
|
+
#ifndef BASE_CRASH_H_
|
8
|
+
#define BASE_CRASH_H_
|
9
|
+
|
10
|
+
namespace base {
|
11
|
+
|
12
|
+
struct CrashReason {
|
13
|
+
CrashReason() : filename(0), line_number(0), message(0), depth(0) {}
|
14
|
+
|
15
|
+
const char* filename;
|
16
|
+
int line_number;
|
17
|
+
const char* message;
|
18
|
+
|
19
|
+
// We'll also store a bit of stack trace context at the time of crash as
|
20
|
+
// it may not be available later on.
|
21
|
+
void* stack[32];
|
22
|
+
int depth;
|
23
|
+
|
24
|
+
// We'll try to store some trace information if it's available - this should
|
25
|
+
// reflect information from TraceContext::Thread()->tracer()->ToString().
|
26
|
+
// This field should probably not be set from within a signal handler or
|
27
|
+
// low-level code unless absolutely safe to do so.
|
28
|
+
char trace_info[512];
|
29
|
+
};
|
30
|
+
|
31
|
+
// Stores "reason" as an explanation for why the process is about to
|
32
|
+
// crash. The reason and its contents must remain live for the life
|
33
|
+
// of the process. Only the first reason is kept.
|
34
|
+
void SetCrashReason(const CrashReason* reason);
|
35
|
+
|
36
|
+
// Returns first reason passed to SetCrashReason(), or NULL.
|
37
|
+
const CrashReason* GetCrashReason();
|
38
|
+
|
39
|
+
} // namespace base
|
40
|
+
|
41
|
+
#endif // BASE_CRASH_H_
|