language_detection 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +85 -0
- data/Rakefile +11 -0
- data/ext/cld/Makefile +34 -0
- data/ext/cld/base/basictypes.h +348 -0
- data/ext/cld/base/build_config.h +124 -0
- data/ext/cld/base/casts.h +156 -0
- data/ext/cld/base/commandlineflags.h +443 -0
- data/ext/cld/base/crash.h +41 -0
- data/ext/cld/base/dynamic_annotations.h +358 -0
- data/ext/cld/base/global_strip_options.h +59 -0
- data/ext/cld/base/log_severity.h +46 -0
- data/ext/cld/base/logging.h +1403 -0
- data/ext/cld/base/macros.h +243 -0
- data/ext/cld/base/port.h +54 -0
- data/ext/cld/base/scoped_ptr.h +428 -0
- data/ext/cld/base/stl_decl.h +0 -0
- data/ext/cld/base/stl_decl_msvc.h +107 -0
- data/ext/cld/base/string_util.h +29 -0
- data/ext/cld/base/strtoint.h +93 -0
- data/ext/cld/base/template_util.h +96 -0
- data/ext/cld/base/type_traits.h +198 -0
- data/ext/cld/base/vlog_is_on.h +143 -0
- data/ext/cld/cld.so +0 -0
- data/ext/cld/encodings/compact_lang_det/cldutil.cc +905 -0
- data/ext/cld/encodings/compact_lang_det/cldutil.h +1205 -0
- data/ext/cld/encodings/compact_lang_det/cldutil_dbg.h +76 -0
- data/ext/cld/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det.cc +62 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det.h +145 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
- data/ext/cld/encodings/compact_lang_det/compile.cmd +1 -0
- data/ext/cld/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
- data/ext/cld/encodings/compact_lang_det/ext_lang_enc.h +119 -0
- data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
- data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
- data/ext/cld/encodings/compact_lang_det/getonescriptspan.cc +570 -0
- data/ext/cld/encodings/compact_lang_det/getonescriptspan.h +131 -0
- data/ext/cld/encodings/compact_lang_det/letterscript_enum.cc +117 -0
- data/ext/cld/encodings/compact_lang_det/letterscript_enum.h +99 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence.cc +259 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence.h +44 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
- data/ext/cld/encodings/compact_lang_det/tote.cc +299 -0
- data/ext/cld/encodings/compact_lang_det/tote.h +89 -0
- data/ext/cld/encodings/compact_lang_det/unittest_data.h +193 -0
- data/ext/cld/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
- data/ext/cld/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
- data/ext/cld/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_google.h +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_logging.h +21 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_macros.h +19 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib.h +15 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf.h +24 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
- data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
- data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
- data/ext/cld/encodings/internal/encodings.cc +12 -0
- data/ext/cld/encodings/lang_enc.h +254 -0
- data/ext/cld/encodings/proto/encodings.pb.h +169 -0
- data/ext/cld/encodings/public/encodings.h +301 -0
- data/ext/cld/extconf.rb +1 -0
- data/ext/cld/language_detection.cc +88 -0
- data/ext/cld/languages/internal/languages.cc +337 -0
- data/ext/cld/languages/proto/languages.pb.h +179 -0
- data/ext/cld/languages/public/languages.h +379 -0
- data/language_detection.gemspec +28 -0
- data/lib/language_detection/string.rb +1 -0
- data/lib/language_detection/version.rb +3 -0
- data/lib/language_detection.rb +54 -0
- data/test/_helper.rb +15 -0
- data/test/fixtures/languages.csv +80 -0
- data/test/language_detection_test.rb +88 -0
- metadata +250 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
|
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
3
|
+
// found in the LICENSE file.
|
|
4
|
+
|
|
5
|
+
#ifndef BASE_CASTS_H_
|
|
6
|
+
#define BASE_CASTS_H_
|
|
7
|
+
|
|
8
|
+
#include <assert.h> // for use with down_cast<>
|
|
9
|
+
#include <string.h> // for memcpy
|
|
10
|
+
|
|
11
|
+
#include "base/macros.h"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
// Use implicit_cast as a safe version of static_cast or const_cast
|
|
15
|
+
// for upcasting in the type hierarchy (i.e. casting a pointer to Foo
|
|
16
|
+
// to a pointer to SuperclassOfFoo or casting a pointer to Foo to
|
|
17
|
+
// a const pointer to Foo).
|
|
18
|
+
// When you use implicit_cast, the compiler checks that the cast is safe.
|
|
19
|
+
// Such explicit implicit_casts are necessary in surprisingly many
|
|
20
|
+
// situations where C++ demands an exact type match instead of an
|
|
21
|
+
// argument type convertable to a target type.
|
|
22
|
+
//
|
|
23
|
+
// The From type can be inferred, so the preferred syntax for using
|
|
24
|
+
// implicit_cast is the same as for static_cast etc.:
|
|
25
|
+
//
|
|
26
|
+
// implicit_cast<ToType>(expr)
|
|
27
|
+
//
|
|
28
|
+
// implicit_cast would have been part of the C++ standard library,
|
|
29
|
+
// but the proposal was submitted too late. It will probably make
|
|
30
|
+
// its way into the language in the future.
|
|
31
|
+
template<typename To, typename From>
|
|
32
|
+
inline To implicit_cast(From const &f) {
|
|
33
|
+
return f;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
// When you upcast (that is, cast a pointer from type Foo to type
|
|
38
|
+
// SuperclassOfFoo), it's fine to use implicit_cast<>, since upcasts
|
|
39
|
+
// always succeed. When you downcast (that is, cast a pointer from
|
|
40
|
+
// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
|
|
41
|
+
// how do you know the pointer is really of type SubclassOfFoo? It
|
|
42
|
+
// could be a bare Foo, or of type DifferentSubclassOfFoo. Thus,
|
|
43
|
+
// when you downcast, you should use this macro. In debug mode, we
|
|
44
|
+
// use dynamic_cast<> to double-check the downcast is legal (we die
|
|
45
|
+
// if it's not). In normal mode, we do the efficient static_cast<>
|
|
46
|
+
// instead. Thus, it's important to test in debug mode to make sure
|
|
47
|
+
// the cast is legal!
|
|
48
|
+
// This is the only place in the code we should use dynamic_cast<>.
|
|
49
|
+
// In particular, you SHOULDN'T be using dynamic_cast<> in order to
|
|
50
|
+
// do RTTI (eg code like this:
|
|
51
|
+
// if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
|
|
52
|
+
// if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
|
|
53
|
+
// You should design the code some other way not to need this.
|
|
54
|
+
|
|
55
|
+
template<typename To, typename From> // use like this: down_cast<T*>(foo);
|
|
56
|
+
inline To down_cast(From* f) { // so we only accept pointers
|
|
57
|
+
// Ensures that To is a sub-type of From *. This test is here only
|
|
58
|
+
// for compile-time type checking, and has no overhead in an
|
|
59
|
+
// optimized build at run-time, as it will be optimized away
|
|
60
|
+
// completely.
|
|
61
|
+
if (false) {
|
|
62
|
+
implicit_cast<From*, To>(0);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
assert(f == NULL || dynamic_cast<To>(f) != NULL); // RTTI: debug mode only!
|
|
66
|
+
return static_cast<To>(f);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Overload of down_cast for references. Use like this: down_cast<T&>(foo).
|
|
70
|
+
// The code is slightly convoluted because we're still using the pointer
|
|
71
|
+
// form of dynamic cast. (The reference form throws an exception if it
|
|
72
|
+
// fails.)
|
|
73
|
+
//
|
|
74
|
+
// There's no need for a special const overload either for the pointer
|
|
75
|
+
// or the reference form. If you call down_cast with a const T&, the
|
|
76
|
+
// compiler will just bind From to const T.
|
|
77
|
+
template<typename To, typename From>
|
|
78
|
+
inline To down_cast(From& f) {
|
|
79
|
+
COMPILE_ASSERT(base::is_reference<To>::value, target_type_not_a_reference);
|
|
80
|
+
typedef typename base::remove_reference<To>::type* ToAsPointer;
|
|
81
|
+
if (false) {
|
|
82
|
+
// Compile-time check that To inherits from From. See above for details.
|
|
83
|
+
implicit_cast<From*, ToAsPointer>(0);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
assert(dynamic_cast<ToAsPointer>(&f) != NULL); // RTTI: debug mode only
|
|
87
|
+
return static_cast<To>(f);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// bit_cast<Dest,Source> is a template function that implements the
|
|
91
|
+
// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
|
|
92
|
+
// very low-level functions like the protobuf library and fast math
|
|
93
|
+
// support.
|
|
94
|
+
//
|
|
95
|
+
// float f = 3.14159265358979;
|
|
96
|
+
// int i = bit_cast<int32>(f);
|
|
97
|
+
// // i = 0x40490fdb
|
|
98
|
+
//
|
|
99
|
+
// The classical address-casting method is:
|
|
100
|
+
//
|
|
101
|
+
// // WRONG
|
|
102
|
+
// float f = 3.14159265358979; // WRONG
|
|
103
|
+
// int i = * reinterpret_cast<int*>(&f); // WRONG
|
|
104
|
+
//
|
|
105
|
+
// The address-casting method actually produces undefined behavior
|
|
106
|
+
// according to ISO C++ specification section 3.10 -15 -. Roughly, this
|
|
107
|
+
// section says: if an object in memory has one type, and a program
|
|
108
|
+
// accesses it with a different type, then the result is undefined
|
|
109
|
+
// behavior for most values of "different type".
|
|
110
|
+
//
|
|
111
|
+
// This is true for any cast syntax, either *(int*)&f or
|
|
112
|
+
// *reinterpret_cast<int*>(&f). And it is particularly true for
|
|
113
|
+
// conversions betweeen integral lvalues and floating-point lvalues.
|
|
114
|
+
//
|
|
115
|
+
// The purpose of 3.10 -15- is to allow optimizing compilers to assume
|
|
116
|
+
// that expressions with different types refer to different memory. gcc
|
|
117
|
+
// 4.0.1 has an optimizer that takes advantage of this. So a
|
|
118
|
+
// non-conforming program quietly produces wildly incorrect output.
|
|
119
|
+
//
|
|
120
|
+
// The problem is not the use of reinterpret_cast. The problem is type
|
|
121
|
+
// punning: holding an object in memory of one type and reading its bits
|
|
122
|
+
// back using a different type.
|
|
123
|
+
//
|
|
124
|
+
// The C++ standard is more subtle and complex than this, but that
|
|
125
|
+
// is the basic idea.
|
|
126
|
+
//
|
|
127
|
+
// Anyways ...
|
|
128
|
+
//
|
|
129
|
+
// bit_cast<> calls memcpy() which is blessed by the standard,
|
|
130
|
+
// especially by the example in section 3.9 . Also, of course,
|
|
131
|
+
// bit_cast<> wraps up the nasty logic in one place.
|
|
132
|
+
//
|
|
133
|
+
// Fortunately memcpy() is very fast. In optimized mode, with a
|
|
134
|
+
// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
|
|
135
|
+
// code with the minimal amount of data movement. On a 32-bit system,
|
|
136
|
+
// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
|
|
137
|
+
// compiles to two loads and two stores.
|
|
138
|
+
//
|
|
139
|
+
// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
|
|
140
|
+
//
|
|
141
|
+
// WARNING: if Dest or Source is a non-POD type, the result of the memcpy
|
|
142
|
+
// is likely to surprise you.
|
|
143
|
+
//
|
|
144
|
+
|
|
145
|
+
template <class Dest, class Source>
|
|
146
|
+
inline Dest bit_cast(const Source& source) {
|
|
147
|
+
// Compile time assertion: sizeof(Dest) == sizeof(Source)
|
|
148
|
+
// A compile error here means your Dest and Source have different sizes.
|
|
149
|
+
typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
|
|
150
|
+
|
|
151
|
+
Dest dest;
|
|
152
|
+
memcpy(&dest, &source, sizeof(dest));
|
|
153
|
+
return dest;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
#endif // BASE_CASTS_H_
|
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
|
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
3
|
+
// found in the LICENSE file.
|
|
4
|
+
|
|
5
|
+
// This is the file that should be included by any file which declares
|
|
6
|
+
// or defines a command line flag or wants to parse command line flags
|
|
7
|
+
// or print a program usage message (which will include information about
|
|
8
|
+
// flags). Executive summary, in the form of an example foo.cc file:
|
|
9
|
+
//
|
|
10
|
+
// #include "foo.h" // foo.h has a line "DECLARE_int32(start);"
|
|
11
|
+
//
|
|
12
|
+
// DEFINE_int32(end, 1000, "The last record to read");
|
|
13
|
+
// DECLARE_bool(verbose); // some other file has a DEFINE_bool(verbose, ...)
|
|
14
|
+
//
|
|
15
|
+
// void MyFunc() {
|
|
16
|
+
// if (FLAGS_verbose) printf("Records %d-%d\n", FLAGS_start, FLAGS_end);
|
|
17
|
+
// }
|
|
18
|
+
//
|
|
19
|
+
// Then, at the command-line:
|
|
20
|
+
// ./foo --noverbose --start=5 --end=100
|
|
21
|
+
|
|
22
|
+
#ifndef BASE_COMMANDLINEFLAGS_H_
|
|
23
|
+
#define BASE_COMMANDLINEFLAGS_H_
|
|
24
|
+
|
|
25
|
+
#include <assert.h>
|
|
26
|
+
#include <string>
|
|
27
|
+
#include <vector>
|
|
28
|
+
#include "base/basictypes.h"
|
|
29
|
+
#include "base/port.h"
|
|
30
|
+
#include "base/stl_decl_msvc.h"
|
|
31
|
+
#include "base/global_strip_options.h"
|
|
32
|
+
|
|
33
|
+
// --------------------------------------------------------------------
|
|
34
|
+
// To actually define a flag in a file, use DEFINE_bool,
|
|
35
|
+
// DEFINE_string, etc. at the bottom of this file. You may also find
|
|
36
|
+
// it useful to register a validator with the flag. This ensures that
|
|
37
|
+
// when the flag is parsed from the commandline, or is later set via
|
|
38
|
+
// SetCommandLineOption, we call the validation function.
|
|
39
|
+
//
|
|
40
|
+
// The validation function should return true if the flag value is valid, and
|
|
41
|
+
// false otherwise. If the function returns false for the new setting of the
|
|
42
|
+
// flag, the flag will retain its current value. If it returns false for the
|
|
43
|
+
// default value, InitGoogle will die.
|
|
44
|
+
//
|
|
45
|
+
// This function is safe to call at global construct time (as in the
|
|
46
|
+
// example below).
|
|
47
|
+
//
|
|
48
|
+
// Example use:
|
|
49
|
+
// static bool ValidatePort(const char* flagname, int32 value) {
|
|
50
|
+
// if (value > 0 && value < 32768) // value is ok
|
|
51
|
+
// return true;
|
|
52
|
+
// printf("Invalid value for --%s: %d\n", flagname, (int)value);
|
|
53
|
+
// return false;
|
|
54
|
+
// }
|
|
55
|
+
// DEFINE_int32(port, 0, "What port to listen on");
|
|
56
|
+
// static bool dummy = RegisterFlagValidator(&FLAGS_port, &ValidatePort);
|
|
57
|
+
|
|
58
|
+
// Returns true if successfully registered, false if not (because the
|
|
59
|
+
// first argument doesn't point to a command-line flag, or because a
|
|
60
|
+
// validator is already registered for this flag).
|
|
61
|
+
bool RegisterFlagValidator(const bool* flag,
|
|
62
|
+
bool (*validate_fn)(const char*, bool));
|
|
63
|
+
bool RegisterFlagValidator(const int32* flag,
|
|
64
|
+
bool (*validate_fn)(const char*, int32));
|
|
65
|
+
bool RegisterFlagValidator(const int64* flag,
|
|
66
|
+
bool (*validate_fn)(const char*, int64));
|
|
67
|
+
bool RegisterFlagValidator(const uint64* flag,
|
|
68
|
+
bool (*validate_fn)(const char*, uint64));
|
|
69
|
+
bool RegisterFlagValidator(const double* flag,
|
|
70
|
+
bool (*validate_fn)(const char*, double));
|
|
71
|
+
bool RegisterFlagValidator(const string* flag,
|
|
72
|
+
bool (*validate_fn)(const char*, const string&));
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
// --------------------------------------------------------------------
|
|
76
|
+
// These methods are the best way to get access to info about the
|
|
77
|
+
// list of commandline flags. Note that these routines are pretty slow.
|
|
78
|
+
// GetAllFlags: mostly-complete info about the list, sorted by file.
|
|
79
|
+
// ShowUsageWithFlags: pretty-prints the list to stdout (what --help does)
|
|
80
|
+
// ShowUsageWithFlagsRestrict: limit to filenames with restrict as a substr
|
|
81
|
+
//
|
|
82
|
+
// In addition to accessing flags, you can also access argv[0] (the program
|
|
83
|
+
// name) and argv (the entire commandline), which we sock away a copy of.
|
|
84
|
+
// These variables are static, so you should only set them once.
|
|
85
|
+
|
|
86
|
+
struct CommandLineFlagInfo {
|
|
87
|
+
string name; // the name of the flag
|
|
88
|
+
string type; // the type of the flag: int32, etc
|
|
89
|
+
string description; // the "help text" associated with the flag
|
|
90
|
+
string current_value; // the current value, as a string
|
|
91
|
+
string default_value; // the default value, as a string
|
|
92
|
+
string filename; // 'cleaned' version of filename holding the flag
|
|
93
|
+
bool is_default; // true if the flag has default value
|
|
94
|
+
bool has_validator_fn; // true if RegisterFlagValidator called on this flag
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
extern void GetAllFlags(vector<CommandLineFlagInfo>* OUTPUT);
|
|
98
|
+
// These two are actually defined in commandlineflags_reporting.cc.
|
|
99
|
+
extern void ShowUsageWithFlags(const char *argv0); // what --help does
|
|
100
|
+
extern void ShowUsageWithFlagsRestrict(const char *argv0, const char *restrict);
|
|
101
|
+
|
|
102
|
+
// Create a descriptive string for a flag.
|
|
103
|
+
// Goes to some trouble to make pretty line breaks.
|
|
104
|
+
extern string DescribeOneFlag(const CommandLineFlagInfo& flag);
|
|
105
|
+
|
|
106
|
+
// Thread-hostile; meant to be called before any threads are spawned.
|
|
107
|
+
extern void SetArgv(int argc, const char** argv);
|
|
108
|
+
// The following functions are thread-safe as long as SetArgv() is
|
|
109
|
+
// only called before any threads start.
|
|
110
|
+
extern const vector<string>& GetArgvs(); // all of argv = vector of strings
|
|
111
|
+
extern const char* GetArgv(); // all of argv as a string
|
|
112
|
+
extern const char* GetArgv0(); // only argv0
|
|
113
|
+
extern uint32 GetArgvSum(); // simple checksum of argv
|
|
114
|
+
extern const char* ProgramInvocationName(); // argv0, or "UNKNOWN" if not set
|
|
115
|
+
extern const char* ProgramInvocationShortName(); // basename(argv0)
|
|
116
|
+
// ProgramUsage() is thread-safe as long as SetUsageMessage() is only
|
|
117
|
+
// called before any threads start.
|
|
118
|
+
extern const char* ProgramUsage(); // string set by SetUsageMessage()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
// --------------------------------------------------------------------
|
|
122
|
+
// Normally you access commandline flags by just saying "if (FLAGS_foo)"
|
|
123
|
+
// or whatever, and set them by calling "FLAGS_foo = bar" (or, more
|
|
124
|
+
// commonly, via the DEFINE_foo macro). But if you need a bit more
|
|
125
|
+
// control, we have programmatic ways to get/set the flags as well.
|
|
126
|
+
// These programmatic ways to access flags are thread-safe, but direct
|
|
127
|
+
// access is only thread-compatible.
|
|
128
|
+
|
|
129
|
+
// Return true iff the flagname was found.
|
|
130
|
+
// OUTPUT is set to the flag's value, or unchanged if we return false.
|
|
131
|
+
extern bool GetCommandLineOption(const char* name, string* OUTPUT);
|
|
132
|
+
|
|
133
|
+
// Return true iff the flagname was found. OUTPUT is set to the flag's
|
|
134
|
+
// CommandLineFlagInfo or unchanged if we return false.
|
|
135
|
+
extern bool GetCommandLineFlagInfo(const char* name,
|
|
136
|
+
CommandLineFlagInfo* OUTPUT);
|
|
137
|
+
|
|
138
|
+
// Return the CommandLineFlagInfo of the flagname. exit() if name not found.
|
|
139
|
+
// Example usage, to check if a flag's value is currently the default value:
|
|
140
|
+
// if (GetCommandLineFlagInfoOrDie("foo").is_default) ...
|
|
141
|
+
extern CommandLineFlagInfo GetCommandLineFlagInfoOrDie(const char* name);
|
|
142
|
+
|
|
143
|
+
enum FlagSettingMode {
|
|
144
|
+
// update the flag's value (can call this multiple times).
|
|
145
|
+
SET_FLAGS_VALUE,
|
|
146
|
+
// update the flag's value, but *only if* it has not yet been updated
|
|
147
|
+
// with SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef".
|
|
148
|
+
SET_FLAG_IF_DEFAULT,
|
|
149
|
+
// set the flag's default value to this. If the flag has not yet updated
|
|
150
|
+
// yet (via SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef")
|
|
151
|
+
// change the flag's current value to the new default value as well.
|
|
152
|
+
SET_FLAGS_DEFAULT
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
// Set a particular flag ("command line option"). Returns a string
|
|
156
|
+
// describing the new value that the option has been set to. The
|
|
157
|
+
// return value API is not well-specified, so basically just depend on
|
|
158
|
+
// it to be empty if the setting failed for some reason -- the name is
|
|
159
|
+
// not a valid flag name, or the value is not a valid value -- and
|
|
160
|
+
// non-empty else.
|
|
161
|
+
|
|
162
|
+
// SetCommandLineOption uses set_mode == SET_FLAGS_VALUE (the common case)
|
|
163
|
+
extern string SetCommandLineOption(const char* name, const char* value);
|
|
164
|
+
extern string SetCommandLineOptionWithMode(const char* name, const char* value,
|
|
165
|
+
FlagSettingMode set_mode);
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
// --------------------------------------------------------------------
|
|
169
|
+
// Saves the states (value, default value, whether the user has set
|
|
170
|
+
// the flag, registered validators, etc) of all flags, and restores
|
|
171
|
+
// them when the FlagSaver is destroyed. This is very useful in
|
|
172
|
+
// tests, say, when you want to let your tests change the flags, but
|
|
173
|
+
// make sure that they get reverted to the original states when your
|
|
174
|
+
// test is complete.
|
|
175
|
+
//
|
|
176
|
+
// Example usage:
|
|
177
|
+
// void TestFoo() {
|
|
178
|
+
// FlagSaver s1;
|
|
179
|
+
// FLAG_foo = false;
|
|
180
|
+
// FLAG_bar = "some value";
|
|
181
|
+
//
|
|
182
|
+
// // test happens here. You can return at any time
|
|
183
|
+
// // without worrying about restoring the FLAG values.
|
|
184
|
+
// }
|
|
185
|
+
//
|
|
186
|
+
// Note: This class is marked with ATTRIBUTE_UNUSED because all the
|
|
187
|
+
// work is done in the constructor and destructor, so in the standard
|
|
188
|
+
// usage example above, the compiler would complain that it's an
|
|
189
|
+
// unused variable.
|
|
190
|
+
//
|
|
191
|
+
// This class is thread-safe.
|
|
192
|
+
/*
|
|
193
|
+
class FlagSaver {
|
|
194
|
+
public:
|
|
195
|
+
FlagSaver();
|
|
196
|
+
~FlagSaver();
|
|
197
|
+
|
|
198
|
+
private:
|
|
199
|
+
class FlagSaverImpl* impl_; // we use pimpl here to keep API steady
|
|
200
|
+
|
|
201
|
+
FlagSaver(const FlagSaver&); // no copying!
|
|
202
|
+
void operator=(const FlagSaver&);
|
|
203
|
+
}
|
|
204
|
+
#ifndef SWIG // swig seems to have trouble with this for some reason
|
|
205
|
+
ATTRIBUTE_UNUSED
|
|
206
|
+
#endif
|
|
207
|
+
;
|
|
208
|
+
*/
|
|
209
|
+
// --------------------------------------------------------------------
|
|
210
|
+
// Some deprecated or hopefully-soon-to-be-deprecated functions.
|
|
211
|
+
|
|
212
|
+
// This is often used for logging. TODO(csilvers): figure out a better way
|
|
213
|
+
extern string CommandlineFlagsIntoString();
|
|
214
|
+
// Usually where this is used, a FlagSaver should be used instead.
|
|
215
|
+
extern bool ReadFlagsFromString(const string& flagfilecontents,
|
|
216
|
+
const char* prog_name,
|
|
217
|
+
bool errors_are_fatal); // uses SET_FLAGS_VALUE
|
|
218
|
+
|
|
219
|
+
// These let you manually implement --flagfile functionality.
|
|
220
|
+
// DEPRECATED.
|
|
221
|
+
extern bool AppendFlagsIntoFile(const string& filename, const char* prog_name);
|
|
222
|
+
extern bool SaveCommandFlags(); // actually defined in google.cc !
|
|
223
|
+
extern bool ReadFromFlagsFile(const string& filename, const char* prog_name,
|
|
224
|
+
bool errors_are_fatal); // uses SET_FLAGS_VALUE
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
// --------------------------------------------------------------------
|
|
228
|
+
// Useful routines for initializing flags from the environment.
|
|
229
|
+
// In each case, if 'varname' does not exist in the environment
|
|
230
|
+
// return defval. If 'varname' does exist but is not valid
|
|
231
|
+
// (e.g., not a number for an int32 flag), abort with an error.
|
|
232
|
+
// Otherwise, return the value. NOTE: for booleans, for true use
|
|
233
|
+
// 't' or 'T' or 'true' or '1', for false 'f' or 'F' or 'false' or '0'.
|
|
234
|
+
|
|
235
|
+
extern bool BoolFromEnv(const char *varname, bool defval);
|
|
236
|
+
extern int32 Int32FromEnv(const char *varname, int32 defval);
|
|
237
|
+
extern int64 Int64FromEnv(const char *varname, int64 defval);
|
|
238
|
+
extern uint64 Uint64FromEnv(const char *varname, uint64 defval);
|
|
239
|
+
extern double DoubleFromEnv(const char *varname, double defval);
|
|
240
|
+
extern const char *StringFromEnv(const char *varname, const char *defval);
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
// --------------------------------------------------------------------
|
|
244
|
+
// The next two functions parse commandlineflags from main():
|
|
245
|
+
|
|
246
|
+
// Set the "usage" message for this program. For example:
|
|
247
|
+
// string usage("This program does nothing. Sample usage:\n");
|
|
248
|
+
// usage += argv[0] + " <uselessarg1> <uselessarg2>";
|
|
249
|
+
// SetUsageMessage(usage);
|
|
250
|
+
// Do not include commandline flags in the usage: we do that for you!
|
|
251
|
+
// Thread-hostile; meant to be called before any threads are spawned.
|
|
252
|
+
extern void SetUsageMessage(const string& usage);
|
|
253
|
+
|
|
254
|
+
// Looks for flags in argv and parses them. Rearranges argv to put
|
|
255
|
+
// flags first, or removes them entirely if remove_flags is true.
|
|
256
|
+
// If a flag is defined more than once in the command line or flag
|
|
257
|
+
// file, the last definition is used.
|
|
258
|
+
// See top-of-file for more details on this function.
|
|
259
|
+
#ifndef SWIG // In swig, use ParseCommandLineFlagsScript() instead.
|
|
260
|
+
extern uint32 ParseCommandLineFlags(int *argc, char*** argv,
|
|
261
|
+
bool remove_flags);
|
|
262
|
+
#endif
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
// Calls to ParseCommandLineNonHelpFlags and then to
|
|
266
|
+
// HandleCommandLineHelpFlags can be used instead of a call to
|
|
267
|
+
// ParseCommandLineFlags during initialization, in order to allow for
|
|
268
|
+
// changing default values for some FLAGS (via
|
|
269
|
+
// e.g. SetCommandLineOptionWithMode calls) between the time of
|
|
270
|
+
// command line parsing and the time of dumping help information for
|
|
271
|
+
// the flags as a result of command line parsing.
|
|
272
|
+
// If a flag is defined more than once in the command line or flag
|
|
273
|
+
// file, the last definition is used.
|
|
274
|
+
extern uint32 ParseCommandLineNonHelpFlags(int *argc, char*** argv,
|
|
275
|
+
bool remove_flags);
|
|
276
|
+
// This is actually defined in commandlineflags_reporting.cc.
|
|
277
|
+
// This function is misnamed (it also handles --version, etc.), but
|
|
278
|
+
// it's too late to change that now. :-(
|
|
279
|
+
extern void HandleCommandLineHelpFlags(); // in commandlineflags_reporting.cc
|
|
280
|
+
|
|
281
|
+
// Allow command line reparsing. Disables the error normally
|
|
282
|
+
// generated when an unknown flag is found, since it may be found in a
|
|
283
|
+
// later parse. Thread-hostile; meant to be called before any threads
|
|
284
|
+
// are spawned.
|
|
285
|
+
extern void AllowCommandLineReparsing();
|
|
286
|
+
|
|
287
|
+
// Reparse the flags that have not yet been recognized.
|
|
288
|
+
// Only flags registered since the last parse will be recognized.
|
|
289
|
+
// Any flag value must be provided as part of the argument using "=",
|
|
290
|
+
// not as a separate command line argument that follows the flag argument.
|
|
291
|
+
// Intended for handling flags from dynamically loaded libraries,
|
|
292
|
+
// since their flags are not registered until they are loaded.
|
|
293
|
+
extern uint32 ReparseCommandLineNonHelpFlags();
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
// --------------------------------------------------------------------
|
|
297
|
+
// Now come the command line flag declaration/definition macros that
|
|
298
|
+
// will actually be used. They're kind of hairy. A major reason
|
|
299
|
+
// for this is initialization: we want people to be able to access
|
|
300
|
+
// variables in global constructors and have that not crash, even if
|
|
301
|
+
// their global constructor runs before the global constructor here.
|
|
302
|
+
// (Obviously, we can't guarantee the flags will have the correct
|
|
303
|
+
// default value in that case, but at least accessing them is safe.)
|
|
304
|
+
// The only way to do that is have flags point to a static buffer.
|
|
305
|
+
// So we make one, using a union to ensure proper alignment, and
|
|
306
|
+
// then use placement-new to actually set up the flag with the
|
|
307
|
+
// correct default value. In the same vein, we have to worry about
|
|
308
|
+
// flag access in global destructors, so FlagRegisterer has to be
|
|
309
|
+
// careful never to destroy the flag-values it constructs.
|
|
310
|
+
//
|
|
311
|
+
// Note that when we define a flag variable FLAGS_<name>, we also
|
|
312
|
+
// preemptively define a junk variable, FLAGS_no<name>. This is to
|
|
313
|
+
// cause a link-time error if someone tries to define 2 flags with
|
|
314
|
+
// names like "logging" and "nologging". We do this because a bool
|
|
315
|
+
// flag FLAG can be set from the command line to true with a "-FLAG"
|
|
316
|
+
// argument, and to false with a "-noFLAG" argument, and so this can
|
|
317
|
+
// potentially avert confusion.
|
|
318
|
+
//
|
|
319
|
+
// We also put flags into their own namespace. It is purposefully
|
|
320
|
+
// named in an opaque way that people should have trouble typing
|
|
321
|
+
// directly. The idea is that DEFINE puts the flag in the weird
|
|
322
|
+
// namespace, and DECLARE imports the flag from there into the current
|
|
323
|
+
// namespace. The net result is to force people to use DECLARE to get
|
|
324
|
+
// access to a flag, rather than saying "extern bool FLAGS_whatever;"
|
|
325
|
+
// or some such instead. We want this so we can put extra
|
|
326
|
+
// functionality (like sanity-checking) in DECLARE if we want, and
|
|
327
|
+
// make sure it is picked up everywhere.
|
|
328
|
+
//
|
|
329
|
+
// We also put the type of the variable in the namespace, so that
|
|
330
|
+
// people can't DECLARE_int32 something that they DEFINE_bool'd
|
|
331
|
+
// elsewhere.
|
|
332
|
+
|
|
333
|
+
class FlagRegisterer {
|
|
334
|
+
public:
|
|
335
|
+
FlagRegisterer(const char* name, const char* type,
|
|
336
|
+
const char* help, const char* filename,
|
|
337
|
+
void* current_storage, void* defvalue_storage);
|
|
338
|
+
};
|
|
339
|
+
|
|
340
|
+
#ifndef SWIG // In swig, ignore the main flag declarations
|
|
341
|
+
|
|
342
|
+
// If STRIP_FLAG_HELP is defined and is non-zero, we remove the help
|
|
343
|
+
// message from the binary file. This is useful for security reasons
|
|
344
|
+
// when shipping a binary outside of Google (if the user cannot see
|
|
345
|
+
// the usage message by executing the program, they shouldn't be able
|
|
346
|
+
// to see it by running "strings binary_file").
|
|
347
|
+
|
|
348
|
+
extern const char kStrippedFlagHelp[];
|
|
349
|
+
|
|
350
|
+
#if STRIP_FLAG_HELP > 0
|
|
351
|
+
// Need this construct to avoid the 'defined but not used' warning.
|
|
352
|
+
#define MAYBE_STRIPPED_HELP(txt) (false ? (txt) : kStrippedFlagHelp)
|
|
353
|
+
#else
|
|
354
|
+
#define MAYBE_STRIPPED_HELP(txt) txt
|
|
355
|
+
#endif
|
|
356
|
+
|
|
357
|
+
// Each command-line flag has two variables associated with it: one
|
|
358
|
+
// with the current value, and one with the default value. However,
|
|
359
|
+
// we have a third variable, which is where value is assigned; it's a
|
|
360
|
+
// constant. This guarantees that FLAG_##value is initialized at
|
|
361
|
+
// static initialization time (e.g. before program-start) rather than
|
|
362
|
+
// than global construction time (which is after program-start but
|
|
363
|
+
// before main), at least when 'value' is a compile-time constant. We
|
|
364
|
+
// use a small trick for the "default value" variable, and call it
|
|
365
|
+
// FLAGS_no<name>. This serves the second purpose of assuring a
|
|
366
|
+
// compile error if someone tries to define a flag named no<name>
|
|
367
|
+
// which is illegal (--foo and --nofoo both affect the "foo" flag).
|
|
368
|
+
#define DEFINE_VARIABLE(type, shorttype, name, value, help) \
|
|
369
|
+
namespace fL##shorttype { \
|
|
370
|
+
static const type FLAGS_nono##name = value; \
|
|
371
|
+
type FLAGS_##name = FLAGS_nono##name; \
|
|
372
|
+
type FLAGS_no##name = FLAGS_nono##name; \
|
|
373
|
+
static FlagRegisterer o_##name( \
|
|
374
|
+
#name, #type, MAYBE_STRIPPED_HELP(help), __FILE__, \
|
|
375
|
+
&FLAGS_##name, &FLAGS_no##name); \
|
|
376
|
+
} \
|
|
377
|
+
using fL##shorttype::FLAGS_##name
|
|
378
|
+
|
|
379
|
+
#define DECLARE_VARIABLE(type, shorttype, name) \
|
|
380
|
+
namespace fL##shorttype { \
|
|
381
|
+
extern type FLAGS_##name; \
|
|
382
|
+
} \
|
|
383
|
+
using fL##shorttype::FLAGS_##name
|
|
384
|
+
|
|
385
|
+
// For boolean flags, we want to do the extra check that the passed-in
|
|
386
|
+
// value is actually a bool, and not a string or something that can be
|
|
387
|
+
// coerced to a bool. These declarations (no definition needed!) will
|
|
388
|
+
// help us do that, and never evaluate from, which is important.
|
|
389
|
+
// We'll use 'sizeof(IsBool(val))' to distinguish.
|
|
390
|
+
namespace fLB {
|
|
391
|
+
template<typename From> double IsBoolFlag(const From& from);
|
|
392
|
+
bool IsBoolFlag(bool from);
|
|
393
|
+
}
|
|
394
|
+
extern bool FlagsTypeWarn(const char *name);
|
|
395
|
+
|
|
396
|
+
#define DECLARE_bool(name) DECLARE_VARIABLE(bool,B, name)
|
|
397
|
+
// We have extra code here to make sure 'val' is actually a boolean.
|
|
398
|
+
#define DEFINE_bool(name,val,txt) namespace fLB { \
|
|
399
|
+
const bool FLAGS_nonono##name = \
|
|
400
|
+
(sizeof(::fLB::IsBoolFlag(val)) \
|
|
401
|
+
== sizeof(double)) \
|
|
402
|
+
? FlagsTypeWarn(#name) : true; \
|
|
403
|
+
} \
|
|
404
|
+
DEFINE_VARIABLE(bool,B, name, val, txt)
|
|
405
|
+
#define DECLARE_int32(name) DECLARE_VARIABLE(int32,I, name)
|
|
406
|
+
#define DEFINE_int32(name,val,txt) DEFINE_VARIABLE(int32,I, name, val, txt)
|
|
407
|
+
|
|
408
|
+
#define DECLARE_int64(name) DECLARE_VARIABLE(int64,I64, name)
|
|
409
|
+
#define DEFINE_int64(name,val,txt) DEFINE_VARIABLE(int64,I64, name, val, txt)
|
|
410
|
+
|
|
411
|
+
#define DECLARE_uint64(name) DECLARE_VARIABLE(uint64,U64, name)
|
|
412
|
+
#define DEFINE_uint64(name,val,txt) DEFINE_VARIABLE(uint64,U64, name, val, txt)
|
|
413
|
+
|
|
414
|
+
#define DECLARE_double(name) DECLARE_VARIABLE(double,D, name)
|
|
415
|
+
#define DEFINE_double(name,val,txt) DEFINE_VARIABLE(double,D, name, val, txt)
|
|
416
|
+
|
|
417
|
+
// Strings are trickier, because they're not a POD, so we can't
|
|
418
|
+
// construct them at static-initialization time (instead they get
|
|
419
|
+
// constructed at global-constructor time, which is much later). To
|
|
420
|
+
// try to avoid crashes in that case, we use a char buffer to store
|
|
421
|
+
// the string, which we can static-initialize, and then placement-new
|
|
422
|
+
// into it later. It's not perfect, but the best we can do.
|
|
423
|
+
#define DECLARE_string(name) namespace fLS { extern string& FLAGS_##name; } \
|
|
424
|
+
using fLS::FLAGS_##name
|
|
425
|
+
|
|
426
|
+
// We need to define a var named FLAGS_no##name so people don't define
|
|
427
|
+
// --string and --nostring. And we need a temporary place to put val
|
|
428
|
+
// so we don't have to evaluate it twice. Two great needs that go
|
|
429
|
+
// great together!
|
|
430
|
+
#define DEFINE_string(name, val, txt) \
|
|
431
|
+
namespace fLS { \
|
|
432
|
+
static union { void* align; char s[sizeof(string)]; } s_##name[2]; \
|
|
433
|
+
const string* const FLAGS_no##name = new (s_##name[0].s) string(val); \
|
|
434
|
+
static FlagRegisterer o_##name( \
|
|
435
|
+
#name, "string", MAYBE_STRIPPED_HELP(txt), __FILE__, \
|
|
436
|
+
s_##name[0].s, new (s_##name[1].s) string(*FLAGS_no##name)); \
|
|
437
|
+
string& FLAGS_##name = *(reinterpret_cast<string*>(s_##name[0].s)); \
|
|
438
|
+
} \
|
|
439
|
+
using fLS::FLAGS_##name
|
|
440
|
+
|
|
441
|
+
#endif // SWIG
|
|
442
|
+
|
|
443
|
+
#endif // BASE_COMMANDLINEFLAGS_H_
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
|
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
3
|
+
// found in the LICENSE file.
|
|
4
|
+
|
|
5
|
+
// Support for collecting useful information when crashing.
|
|
6
|
+
|
|
7
|
+
#ifndef BASE_CRASH_H_
|
|
8
|
+
#define BASE_CRASH_H_
|
|
9
|
+
|
|
10
|
+
namespace base {
|
|
11
|
+
|
|
12
|
+
struct CrashReason {
|
|
13
|
+
CrashReason() : filename(0), line_number(0), message(0), depth(0) {}
|
|
14
|
+
|
|
15
|
+
const char* filename;
|
|
16
|
+
int line_number;
|
|
17
|
+
const char* message;
|
|
18
|
+
|
|
19
|
+
// We'll also store a bit of stack trace context at the time of crash as
|
|
20
|
+
// it may not be available later on.
|
|
21
|
+
void* stack[32];
|
|
22
|
+
int depth;
|
|
23
|
+
|
|
24
|
+
// We'll try to store some trace information if it's available - this should
|
|
25
|
+
// reflect information from TraceContext::Thread()->tracer()->ToString().
|
|
26
|
+
// This field should probably not be set from within a signal handler or
|
|
27
|
+
// low-level code unless absolutely safe to do so.
|
|
28
|
+
char trace_info[512];
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
// Stores "reason" as an explanation for why the process is about to
|
|
32
|
+
// crash. The reason and its contents must remain live for the life
|
|
33
|
+
// of the process. Only the first reason is kept.
|
|
34
|
+
void SetCrashReason(const CrashReason* reason);
|
|
35
|
+
|
|
36
|
+
// Returns first reason passed to SetCrashReason(), or NULL.
|
|
37
|
+
const CrashReason* GetCrashReason();
|
|
38
|
+
|
|
39
|
+
} // namespace base
|
|
40
|
+
|
|
41
|
+
#endif // BASE_CRASH_H_
|