chipper 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +51 -0
- data/ext/extconf.rb +58 -0
- data/ext/libstemmer_c/Makefile +10 -0
- data/ext/libstemmer_c/examples/stemwords.c +209 -0
- data/ext/libstemmer_c/include/libstemmer.h +79 -0
- data/ext/libstemmer_c/libstemmer/libstemmer.c +95 -0
- data/ext/libstemmer_c/libstemmer/libstemmer_utf8.c +95 -0
- data/ext/libstemmer_c/libstemmer/modules.h +190 -0
- data/ext/libstemmer_c/libstemmer/modules_utf8.h +121 -0
- data/ext/libstemmer_c/mkinc.mak +82 -0
- data/ext/libstemmer_c/mkinc_utf8.mak +52 -0
- data/ext/libstemmer_c/runtime/api.c +66 -0
- data/ext/libstemmer_c/runtime/api.h +26 -0
- data/ext/libstemmer_c/runtime/header.h +58 -0
- data/ext/libstemmer_c/runtime/utilities.c +478 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.c +521 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
- data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
- data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_german.c +527 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
- data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
- data/ext/re2/bitstate.cc +378 -0
- data/ext/re2/compile.cc +1138 -0
- data/ext/re2/dfa.cc +2086 -0
- data/ext/re2/filtered_re2.cc +100 -0
- data/ext/re2/filtered_re2.h +99 -0
- data/ext/re2/hash.cc +231 -0
- data/ext/re2/mimics_pcre.cc +185 -0
- data/ext/re2/nfa.cc +709 -0
- data/ext/re2/onepass.cc +614 -0
- data/ext/re2/parse.cc +2202 -0
- data/ext/re2/perl_groups.cc +119 -0
- data/ext/re2/prefilter.cc +671 -0
- data/ext/re2/prefilter.h +105 -0
- data/ext/re2/prefilter_tree.cc +398 -0
- data/ext/re2/prefilter_tree.h +130 -0
- data/ext/re2/prog.cc +341 -0
- data/ext/re2/prog.h +376 -0
- data/ext/re2/re2.cc +1180 -0
- data/ext/re2/re2.h +837 -0
- data/ext/re2/regexp.cc +920 -0
- data/ext/re2/regexp.h +632 -0
- data/ext/re2/rune.cc +258 -0
- data/ext/re2/set.cc +113 -0
- data/ext/re2/set.h +55 -0
- data/ext/re2/simplify.cc +393 -0
- data/ext/re2/stringpiece.cc +87 -0
- data/ext/re2/stringpiece.h +182 -0
- data/ext/re2/tostring.cc +341 -0
- data/ext/re2/unicode_casefold.cc +469 -0
- data/ext/re2/unicode_casefold.h +75 -0
- data/ext/re2/unicode_groups.cc +4851 -0
- data/ext/re2/unicode_groups.h +64 -0
- data/ext/re2/valgrind.cc +24 -0
- data/ext/re2/variadic_function.h +346 -0
- data/ext/re2/walker-inl.h +244 -0
- data/ext/src/chipper.cc +626 -0
- data/ext/src/version.h +1 -0
- data/ext/stemmer.rb +40 -0
- data/ext/util/arena.h +103 -0
- data/ext/util/atomicops.h +79 -0
- data/ext/util/benchmark.h +41 -0
- data/ext/util/flags.h +27 -0
- data/ext/util/logging.h +78 -0
- data/ext/util/mutex.h +190 -0
- data/ext/util/pcre.h +679 -0
- data/ext/util/random.h +29 -0
- data/ext/util/sparse_array.h +451 -0
- data/ext/util/sparse_set.h +177 -0
- data/ext/util/test.h +57 -0
- data/ext/util/thread.h +26 -0
- data/ext/util/utf.h +43 -0
- data/ext/util/util.h +127 -0
- data/ext/util/valgrind.h +4517 -0
- data/test/helper.rb +5 -0
- data/test/test_entities.rb +57 -0
- data/test/test_tokens.rb +118 -0
- metadata +199 -0
data/ext/util/arena.h
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
// Copyright 2000 The RE2 Authors. All Rights Reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style
|
3
|
+
// license that can be found in the LICENSE file.
|
4
|
+
|
5
|
+
// Sometimes it is necessary to allocate a large number of small
|
6
|
+
// objects. Doing this the usual way (malloc, new) is slow,
|
7
|
+
// especially for multithreaded programs. An UnsafeArena provides a
|
8
|
+
// mark/release method of memory management: it asks for a large chunk
|
9
|
+
// from the operating system and doles it out bit by bit as required.
|
10
|
+
// Then you free all the memory at once by calling UnsafeArena::Reset().
|
11
|
+
// The "Unsafe" refers to the fact that UnsafeArena is not safe to
|
12
|
+
// call from multiple threads.
|
13
|
+
//
|
14
|
+
// The global operator new that can be used as follows:
|
15
|
+
//
|
16
|
+
// #include "lib/arena-inl.h"
|
17
|
+
//
|
18
|
+
// UnsafeArena arena(1000);
|
19
|
+
// Foo* foo = new (AllocateInArena, &arena) Foo;
|
20
|
+
//
|
21
|
+
|
22
|
+
#ifndef RE2_UTIL_ARENA_H_
|
23
|
+
#define RE2_UTIL_ARENA_H_
|
24
|
+
|
25
|
+
namespace re2 {
|
26
|
+
|
27
|
+
// This class is thread-compatible.
|
28
|
+
class UnsafeArena {
|
29
|
+
public:
|
30
|
+
UnsafeArena(const size_t block_size);
|
31
|
+
virtual ~UnsafeArena();
|
32
|
+
|
33
|
+
void Reset();
|
34
|
+
|
35
|
+
// This should be the worst-case alignment for any type. This is
|
36
|
+
// good for IA-32, SPARC version 7 (the last one I know), and
|
37
|
+
// supposedly Alpha. i386 would be more time-efficient with a
|
38
|
+
// default alignment of 8, but ::operator new() uses alignment of 4,
|
39
|
+
// and an assertion will fail below after the call to MakeNewBlock()
|
40
|
+
// if you try to use a larger alignment.
|
41
|
+
#ifdef __i386__
|
42
|
+
static const int kDefaultAlignment = 4;
|
43
|
+
#else
|
44
|
+
static const int kDefaultAlignment = 8;
|
45
|
+
#endif
|
46
|
+
|
47
|
+
private:
|
48
|
+
void* GetMemoryFallback(const size_t size, const int align);
|
49
|
+
|
50
|
+
public:
|
51
|
+
void* GetMemory(const size_t size, const int align) {
|
52
|
+
if ( size > 0 && size < remaining_ && align == 1 ) { // common case
|
53
|
+
last_alloc_ = freestart_;
|
54
|
+
freestart_ += size;
|
55
|
+
remaining_ -= size;
|
56
|
+
return reinterpret_cast<void*>(last_alloc_);
|
57
|
+
}
|
58
|
+
return GetMemoryFallback(size, align);
|
59
|
+
}
|
60
|
+
|
61
|
+
private:
|
62
|
+
struct AllocatedBlock {
|
63
|
+
char *mem;
|
64
|
+
size_t size;
|
65
|
+
};
|
66
|
+
|
67
|
+
// The returned AllocatedBlock* is valid until the next call to AllocNewBlock
|
68
|
+
// or Reset (i.e. anything that might affect overflow_blocks_).
|
69
|
+
AllocatedBlock *AllocNewBlock(const size_t block_size);
|
70
|
+
|
71
|
+
const AllocatedBlock *IndexToBlock(int index) const;
|
72
|
+
|
73
|
+
const size_t block_size_;
|
74
|
+
char* freestart_; // beginning of the free space in most recent block
|
75
|
+
char* freestart_when_empty_; // beginning of the free space when we're empty
|
76
|
+
char* last_alloc_; // used to make sure ReturnBytes() is safe
|
77
|
+
size_t remaining_;
|
78
|
+
// STL vector isn't as efficient as it could be, so we use an array at first
|
79
|
+
int blocks_alloced_; // how many of the first_blocks_ have been alloced
|
80
|
+
AllocatedBlock first_blocks_[16]; // the length of this array is arbitrary
|
81
|
+
// if the first_blocks_ aren't enough, expand into overflow_blocks_.
|
82
|
+
vector<AllocatedBlock>* overflow_blocks_;
|
83
|
+
|
84
|
+
void FreeBlocks(); // Frees all except first block
|
85
|
+
|
86
|
+
DISALLOW_EVIL_CONSTRUCTORS(UnsafeArena);
|
87
|
+
};
|
88
|
+
|
89
|
+
// Operators for allocation on the arena
|
90
|
+
// Syntax: new (AllocateInArena, arena) MyClass;
|
91
|
+
// STL containers, etc.
|
92
|
+
enum AllocateInArenaType { AllocateInArena };
|
93
|
+
|
94
|
+
} // namespace re2
|
95
|
+
|
96
|
+
inline void* operator new(size_t size,
|
97
|
+
re2::AllocateInArenaType /* unused */,
|
98
|
+
re2::UnsafeArena *arena) {
|
99
|
+
return reinterpret_cast<char*>(arena->GetMemory(size, 1));
|
100
|
+
}
|
101
|
+
|
102
|
+
#endif // RE2_UTIL_ARENA_H_
|
103
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style
|
3
|
+
// license that can be found in the LICENSE file.
|
4
|
+
|
5
|
+
#ifndef RE2_UTIL_ATOMICOPS_H__
|
6
|
+
#define RE2_UTIL_ATOMICOPS_H__
|
7
|
+
|
8
|
+
#if defined(__i386__)
|
9
|
+
|
10
|
+
static inline void WriteMemoryBarrier() {
|
11
|
+
int x;
|
12
|
+
__asm__ __volatile__("xchgl (%0),%0" // The lock prefix is implicit for xchg.
|
13
|
+
:: "r" (&x));
|
14
|
+
}
|
15
|
+
|
16
|
+
#elif defined(__x86_64__)
|
17
|
+
|
18
|
+
// 64-bit implementations of memory barrier can be simpler, because
|
19
|
+
// "sfence" is guaranteed to exist.
|
20
|
+
static inline void WriteMemoryBarrier() {
|
21
|
+
__asm__ __volatile__("sfence" : : : "memory");
|
22
|
+
}
|
23
|
+
|
24
|
+
#elif defined(__ppc__)
|
25
|
+
|
26
|
+
static inline void WriteMemoryBarrier() {
|
27
|
+
__asm__ __volatile__("eieio" : : : "memory");
|
28
|
+
}
|
29
|
+
|
30
|
+
#elif defined(__alpha__)
|
31
|
+
|
32
|
+
static inline void WriteMemoryBarrier() {
|
33
|
+
__asm__ __volatile__("wmb" : : : "memory");
|
34
|
+
}
|
35
|
+
|
36
|
+
#else
|
37
|
+
|
38
|
+
#include "util/mutex.h"
|
39
|
+
|
40
|
+
static inline void WriteMemoryBarrier() {
|
41
|
+
// Slight overkill, but good enough:
|
42
|
+
// any mutex implementation must have
|
43
|
+
// a read barrier after the lock operation and
|
44
|
+
// a write barrier before the unlock operation.
|
45
|
+
//
|
46
|
+
// It may be worthwhile to write architecture-specific
|
47
|
+
// barriers for the common platforms, as above, but
|
48
|
+
// this is a correct fallback.
|
49
|
+
re2::Mutex mu;
|
50
|
+
re2::MutexLock l(&mu);
|
51
|
+
}
|
52
|
+
|
53
|
+
/*
|
54
|
+
#error Need WriteMemoryBarrier for architecture.
|
55
|
+
|
56
|
+
// Windows
|
57
|
+
inline void WriteMemoryBarrier() {
|
58
|
+
LONG x;
|
59
|
+
::InterlockedExchange(&x, 0);
|
60
|
+
}
|
61
|
+
*/
|
62
|
+
|
63
|
+
#endif
|
64
|
+
|
65
|
+
// Alpha has very weak memory ordering. If relying on WriteBarriers, must one
|
66
|
+
// use read barriers for the readers too.
|
67
|
+
#if defined(__alpha__)
|
68
|
+
|
69
|
+
static inline void MaybeReadMemoryBarrier() {
|
70
|
+
__asm__ __volatile__("rmb" : : : "memory");
|
71
|
+
}
|
72
|
+
|
73
|
+
#else
|
74
|
+
|
75
|
+
static inline void MaybeReadMemoryBarrier() {}
|
76
|
+
|
77
|
+
#endif // __alpha__
|
78
|
+
|
79
|
+
#endif // RE2_UTIL_ATOMICOPS_H__
|
@@ -0,0 +1,41 @@
|
|
1
|
+
// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style
|
3
|
+
// license that can be found in the LICENSE file.
|
4
|
+
|
5
|
+
#ifndef RE2_UTIL_BENCHMARK_H__
|
6
|
+
#define RE2_UTIL_BENCHMARK_H__
|
7
|
+
|
8
|
+
namespace testing {
|
9
|
+
struct Benchmark {
|
10
|
+
const char* name;
|
11
|
+
void (*fn)(int);
|
12
|
+
void (*fnr)(int, int);
|
13
|
+
int lo;
|
14
|
+
int hi;
|
15
|
+
int threadlo;
|
16
|
+
int threadhi;
|
17
|
+
|
18
|
+
void Register();
|
19
|
+
Benchmark(const char* name, void (*f)(int)) { Clear(name); fn = f; Register(); }
|
20
|
+
Benchmark(const char* name, void (*f)(int, int), int l, int h) { Clear(name); fnr = f; lo = l; hi = h; Register(); }
|
21
|
+
void Clear(const char* n) { name = n; fn = 0; fnr = 0; lo = 0; hi = 0; threadlo = 0; threadhi = 0; }
|
22
|
+
Benchmark* ThreadRange(int lo, int hi) { threadlo = lo; threadhi = hi; return this; }
|
23
|
+
};
|
24
|
+
} // namespace testing
|
25
|
+
|
26
|
+
void SetBenchmarkBytesProcessed(long long);
|
27
|
+
void StopBenchmarkTiming();
|
28
|
+
void StartBenchmarkTiming();
|
29
|
+
void BenchmarkMemoryUsage();
|
30
|
+
void SetBenchmarkItemsProcessed(int);
|
31
|
+
|
32
|
+
int NumCPUs();
|
33
|
+
|
34
|
+
#define BENCHMARK(f) \
|
35
|
+
::testing::Benchmark* _benchmark_##f = (new ::testing::Benchmark(#f, f))
|
36
|
+
|
37
|
+
#define BENCHMARK_RANGE(f, lo, hi) \
|
38
|
+
::testing::Benchmark* _benchmark_##f = \
|
39
|
+
(new ::testing::Benchmark(#f, f, lo, hi))
|
40
|
+
|
41
|
+
#endif // RE2_UTIL_BENCHMARK_H__
|
data/ext/util/flags.h
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style
|
3
|
+
// license that can be found in the LICENSE file.
|
4
|
+
|
5
|
+
// Simplified version of Google's command line flags.
|
6
|
+
// Does not support parsing the command line.
|
7
|
+
// If you want to do that, see
|
8
|
+
// http://code.google.com/p/google-gflags
|
9
|
+
|
10
|
+
#ifndef RE2_UTIL_FLAGS_H__
|
11
|
+
#define RE2_UTIL_FLAGS_H__
|
12
|
+
|
13
|
+
#define DEFINE_flag(type, name, deflt, desc) \
|
14
|
+
namespace re2 { type FLAGS_##name = deflt; }
|
15
|
+
|
16
|
+
#define DECLARE_flag(type, name) \
|
17
|
+
namespace re2 { extern type FLAGS_##name; }
|
18
|
+
|
19
|
+
#define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc)
|
20
|
+
#define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32, name, deflt, desc)
|
21
|
+
#define DEFINE_string(name, deflt, desc) DEFINE_flag(string, name, deflt, desc)
|
22
|
+
|
23
|
+
#define DECLARE_bool(name) DECLARE_flag(bool, name)
|
24
|
+
#define DECLARE_int32(name) DECLARE_flag(int32, name)
|
25
|
+
#define DECLARE_string(name) DECLARE_flag(string, name)
|
26
|
+
|
27
|
+
#endif // RE2_UTIL_FLAGS_H__
|
data/ext/util/logging.h
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style
|
3
|
+
// license that can be found in the LICENSE file.
|
4
|
+
|
5
|
+
// Simplified version of Google's logging.
|
6
|
+
|
7
|
+
#ifndef RE2_UTIL_LOGGING_H__
|
8
|
+
#define RE2_UTIL_LOGGING_H__
|
9
|
+
|
10
|
+
#include <unistd.h> /* for write */
|
11
|
+
#include <sstream>
|
12
|
+
|
13
|
+
// Debug-only checking.
|
14
|
+
#define DCHECK(condition) assert(condition)
|
15
|
+
#define DCHECK_EQ(val1, val2) assert((val1) == (val2))
|
16
|
+
#define DCHECK_NE(val1, val2) assert((val1) != (val2))
|
17
|
+
#define DCHECK_LE(val1, val2) assert((val1) <= (val2))
|
18
|
+
#define DCHECK_LT(val1, val2) assert((val1) < (val2))
|
19
|
+
#define DCHECK_GE(val1, val2) assert((val1) >= (val2))
|
20
|
+
#define DCHECK_GT(val1, val2) assert((val1) > (val2))
|
21
|
+
|
22
|
+
// Always-on checking
|
23
|
+
#define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
|
24
|
+
#define CHECK_LT(x, y) CHECK((x) < (y))
|
25
|
+
#define CHECK_GT(x, y) CHECK((x) > (y))
|
26
|
+
#define CHECK_LE(x, y) CHECK((x) <= (y))
|
27
|
+
#define CHECK_GE(x, y) CHECK((x) >= (y))
|
28
|
+
#define CHECK_EQ(x, y) CHECK((x) == (y))
|
29
|
+
#define CHECK_NE(x, y) CHECK((x) != (y))
|
30
|
+
|
31
|
+
#define LOG_INFO LogMessage(__FILE__, __LINE__)
|
32
|
+
#define LOG_ERROR LOG_INFO
|
33
|
+
#define LOG_WARNING LOG_INFO
|
34
|
+
#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
|
35
|
+
#define LOG_QFATAL LOG_FATAL
|
36
|
+
|
37
|
+
#define VLOG(x) if((x)>0){}else LOG_INFO.stream()
|
38
|
+
|
39
|
+
#ifdef NDEBUG
|
40
|
+
#define DEBUG_MODE 0
|
41
|
+
#define LOG_DFATAL LOG_ERROR
|
42
|
+
#else
|
43
|
+
#define DEBUG_MODE 1
|
44
|
+
#define LOG_DFATAL LOG_FATAL
|
45
|
+
#endif
|
46
|
+
|
47
|
+
#define LOG(severity) LOG_ ## severity.stream()
|
48
|
+
|
49
|
+
class LogMessage {
|
50
|
+
public:
|
51
|
+
LogMessage(const char* file, int line) {
|
52
|
+
stream() << file << ":" << line << ": ";
|
53
|
+
}
|
54
|
+
~LogMessage() {
|
55
|
+
stream() << "\n";
|
56
|
+
string s = str_.str();
|
57
|
+
if(write(2, s.data(), s.size()) < 0) {} // shut up gcc
|
58
|
+
}
|
59
|
+
ostream& stream() { return str_; }
|
60
|
+
|
61
|
+
private:
|
62
|
+
std::ostringstream str_;
|
63
|
+
DISALLOW_EVIL_CONSTRUCTORS(LogMessage);
|
64
|
+
};
|
65
|
+
|
66
|
+
class LogMessageFatal : public LogMessage {
|
67
|
+
public:
|
68
|
+
LogMessageFatal(const char* file, int line)
|
69
|
+
: LogMessage(file, line) { }
|
70
|
+
~LogMessageFatal() {
|
71
|
+
std::cerr << "\n";
|
72
|
+
abort();
|
73
|
+
}
|
74
|
+
private:
|
75
|
+
DISALLOW_EVIL_CONSTRUCTORS(LogMessageFatal);
|
76
|
+
};
|
77
|
+
|
78
|
+
#endif // RE2_UTIL_LOGGING_H__
|
data/ext/util/mutex.h
ADDED
@@ -0,0 +1,190 @@
|
|
1
|
+
// Copyright 2007 The RE2 Authors. All Rights Reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style
|
3
|
+
// license that can be found in the LICENSE file.
|
4
|
+
|
5
|
+
/*
|
6
|
+
* A simple mutex wrapper, supporting locks and read-write locks.
|
7
|
+
* You should assume the locks are *not* re-entrant.
|
8
|
+
*/
|
9
|
+
|
10
|
+
#ifndef RE2_UTIL_MUTEX_H_
|
11
|
+
#define RE2_UTIL_MUTEX_H_
|
12
|
+
|
13
|
+
namespace re2 {
|
14
|
+
|
15
|
+
#define HAVE_PTHREAD 1
|
16
|
+
#define HAVE_RWLOCK 1
|
17
|
+
|
18
|
+
#if defined(NO_THREADS)
|
19
|
+
typedef int MutexType; // to keep a lock-count
|
20
|
+
#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
|
21
|
+
// Needed for pthread_rwlock_*. If it causes problems, you could take it
|
22
|
+
// out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it
|
23
|
+
// *does* cause problems for FreeBSD, or MacOSX, but isn't needed
|
24
|
+
// for locking there.)
|
25
|
+
# ifdef __linux__
|
26
|
+
# undef _XOPEN_SOURCE
|
27
|
+
# define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls
|
28
|
+
# endif
|
29
|
+
# include <pthread.h>
|
30
|
+
typedef pthread_rwlock_t MutexType;
|
31
|
+
#elif defined(HAVE_PTHREAD)
|
32
|
+
# include <pthread.h>
|
33
|
+
typedef pthread_mutex_t MutexType;
|
34
|
+
#elif defined(WIN32)
|
35
|
+
# define WIN32_LEAN_AND_MEAN // We only need minimal includes
|
36
|
+
# ifdef GMUTEX_TRYLOCK
|
37
|
+
// We need Windows NT or later for TryEnterCriticalSection(). If you
|
38
|
+
// don't need that functionality, you can remove these _WIN32_WINNT
|
39
|
+
// lines, and change TryLock() to assert(0) or something.
|
40
|
+
# ifndef _WIN32_WINNT
|
41
|
+
# define _WIN32_WINNT 0x0400
|
42
|
+
# endif
|
43
|
+
# endif
|
44
|
+
# include <windows.h>
|
45
|
+
typedef CRITICAL_SECTION MutexType;
|
46
|
+
#else
|
47
|
+
# error Need to implement mutex.h for your architecture, or #define NO_THREADS
|
48
|
+
#endif
|
49
|
+
|
50
|
+
class Mutex {
|
51
|
+
public:
|
52
|
+
// Create a Mutex that is not held by anybody.
|
53
|
+
inline Mutex();
|
54
|
+
|
55
|
+
// Destructor
|
56
|
+
inline ~Mutex();
|
57
|
+
|
58
|
+
inline void Lock(); // Block if needed until free then acquire exclusively
|
59
|
+
inline void Unlock(); // Release a lock acquired via Lock()
|
60
|
+
inline bool TryLock(); // If free, Lock() and return true, else return false
|
61
|
+
// Note that on systems that don't support read-write locks, these may
|
62
|
+
// be implemented as synonyms to Lock() and Unlock(). So you can use
|
63
|
+
// these for efficiency, but don't use them anyplace where being able
|
64
|
+
// to do shared reads is necessary to avoid deadlock.
|
65
|
+
inline void ReaderLock(); // Block until free or shared then acquire a share
|
66
|
+
inline void ReaderUnlock(); // Release a read share of this Mutex
|
67
|
+
inline void WriterLock() { Lock(); } // Acquire an exclusive lock
|
68
|
+
inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
|
69
|
+
inline void AssertHeld() { }
|
70
|
+
|
71
|
+
private:
|
72
|
+
MutexType mutex_;
|
73
|
+
|
74
|
+
// Catch the error of writing Mutex when intending MutexLock.
|
75
|
+
Mutex(Mutex *ignored) {}
|
76
|
+
// Disallow "evil" constructors
|
77
|
+
Mutex(const Mutex&);
|
78
|
+
void operator=(const Mutex&);
|
79
|
+
};
|
80
|
+
|
81
|
+
// Now the implementation of Mutex for various systems
|
82
|
+
#if defined(NO_THREADS)
|
83
|
+
|
84
|
+
// When we don't have threads, we can be either reading or writing,
|
85
|
+
// but not both. We can have lots of readers at once (in no-threads
|
86
|
+
// mode, that's most likely to happen in recursive function calls),
|
87
|
+
// but only one writer. We represent this by having mutex_ be -1 when
|
88
|
+
// writing and a number > 0 when reading (and 0 when no lock is held).
|
89
|
+
//
|
90
|
+
// In debug mode, we assert these invariants, while in non-debug mode
|
91
|
+
// we do nothing, for efficiency. That's why everything is in an
|
92
|
+
// assert.
|
93
|
+
#include <assert.h>
|
94
|
+
|
95
|
+
Mutex::Mutex() : mutex_(0) { }
|
96
|
+
Mutex::~Mutex() { assert(mutex_ == 0); }
|
97
|
+
void Mutex::Lock() { assert(--mutex_ == -1); }
|
98
|
+
void Mutex::Unlock() { assert(mutex_++ == -1); }
|
99
|
+
bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; }
|
100
|
+
void Mutex::ReaderLock() { assert(++mutex_ > 0); }
|
101
|
+
void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
|
102
|
+
|
103
|
+
#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
|
104
|
+
|
105
|
+
#include <stdlib.h> // for abort()
|
106
|
+
#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0)
|
107
|
+
|
108
|
+
Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
|
109
|
+
Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
|
110
|
+
void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
|
111
|
+
void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
|
112
|
+
bool Mutex::TryLock() { return pthread_rwlock_trywrlock(&mutex_) == 0; }
|
113
|
+
void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
|
114
|
+
void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
|
115
|
+
|
116
|
+
#undef SAFE_PTHREAD
|
117
|
+
|
118
|
+
#elif defined(HAVE_PTHREAD)
|
119
|
+
|
120
|
+
#include <stdlib.h> // for abort()
|
121
|
+
#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0)
|
122
|
+
|
123
|
+
Mutex::Mutex() { SAFE_PTHREAD(pthread_mutex_init(&mutex_, NULL)); }
|
124
|
+
Mutex::~Mutex() { SAFE_PTHREAD(pthread_mutex_destroy(&mutex_)); }
|
125
|
+
void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock(&mutex_)); }
|
126
|
+
void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock(&mutex_)); }
|
127
|
+
bool Mutex::TryLock() { return pthread_mutex_trylock(&mutex_) == 0; }
|
128
|
+
void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks
|
129
|
+
void Mutex::ReaderUnlock() { Unlock(); }
|
130
|
+
#undef SAFE_PTHREAD
|
131
|
+
|
132
|
+
#elif defined(WIN32)
|
133
|
+
|
134
|
+
Mutex::Mutex() { InitializeCriticalSection(&mutex_); }
|
135
|
+
Mutex::~Mutex() { DeleteCriticalSection(&mutex_); }
|
136
|
+
void Mutex::Lock() { EnterCriticalSection(&mutex_); }
|
137
|
+
void Mutex::Unlock() { LeaveCriticalSection(&mutex_); }
|
138
|
+
bool Mutex::TryLock() { return TryEnterCriticalSection(&mutex_) != 0; }
|
139
|
+
void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks
|
140
|
+
void Mutex::ReaderUnlock() { Unlock(); }
|
141
|
+
|
142
|
+
#endif
|
143
|
+
|
144
|
+
|
145
|
+
// --------------------------------------------------------------------------
|
146
|
+
// Some helper classes
|
147
|
+
|
148
|
+
// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
|
149
|
+
class MutexLock {
|
150
|
+
public:
|
151
|
+
explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
|
152
|
+
~MutexLock() { mu_->Unlock(); }
|
153
|
+
private:
|
154
|
+
Mutex * const mu_;
|
155
|
+
// Disallow "evil" constructors
|
156
|
+
MutexLock(const MutexLock&);
|
157
|
+
void operator=(const MutexLock&);
|
158
|
+
};
|
159
|
+
|
160
|
+
// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
|
161
|
+
class ReaderMutexLock {
|
162
|
+
public:
|
163
|
+
explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
|
164
|
+
~ReaderMutexLock() { mu_->ReaderUnlock(); }
|
165
|
+
private:
|
166
|
+
Mutex * const mu_;
|
167
|
+
// Disallow "evil" constructors
|
168
|
+
ReaderMutexLock(const ReaderMutexLock&);
|
169
|
+
void operator=(const ReaderMutexLock&);
|
170
|
+
};
|
171
|
+
|
172
|
+
class WriterMutexLock {
|
173
|
+
public:
|
174
|
+
explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
|
175
|
+
~WriterMutexLock() { mu_->WriterUnlock(); }
|
176
|
+
private:
|
177
|
+
Mutex * const mu_;
|
178
|
+
// Disallow "evil" constructors
|
179
|
+
WriterMutexLock(const WriterMutexLock&);
|
180
|
+
void operator=(const WriterMutexLock&);
|
181
|
+
};
|
182
|
+
|
183
|
+
// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
|
184
|
+
#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name)
|
185
|
+
#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
|
186
|
+
#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
|
187
|
+
|
188
|
+
} // namespace re2
|
189
|
+
|
190
|
+
#endif /* #define RE2_UTIL_MUTEX_H_ */
|