chipper 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. data/README.rdoc +51 -0
  2. data/ext/extconf.rb +58 -0
  3. data/ext/libstemmer_c/Makefile +10 -0
  4. data/ext/libstemmer_c/examples/stemwords.c +209 -0
  5. data/ext/libstemmer_c/include/libstemmer.h +79 -0
  6. data/ext/libstemmer_c/libstemmer/libstemmer.c +95 -0
  7. data/ext/libstemmer_c/libstemmer/libstemmer_utf8.c +95 -0
  8. data/ext/libstemmer_c/libstemmer/modules.h +190 -0
  9. data/ext/libstemmer_c/libstemmer/modules_utf8.h +121 -0
  10. data/ext/libstemmer_c/mkinc.mak +82 -0
  11. data/ext/libstemmer_c/mkinc_utf8.mak +52 -0
  12. data/ext/libstemmer_c/runtime/api.c +66 -0
  13. data/ext/libstemmer_c/runtime/api.h +26 -0
  14. data/ext/libstemmer_c/runtime/header.h +58 -0
  15. data/ext/libstemmer_c/runtime/utilities.c +478 -0
  16. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
  17. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
  18. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
  19. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
  20. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
  21. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
  22. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
  24. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
  25. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
  26. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.c +521 -0
  27. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
  28. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
  29. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
  30. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
  31. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
  32. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
  33. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
  34. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
  35. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
  36. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
  37. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
  38. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
  39. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
  40. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
  41. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
  42. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
  43. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
  44. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
  45. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
  46. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
  47. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
  48. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
  49. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
  50. data/ext/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
  51. data/ext/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
  52. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
  53. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
  54. data/ext/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
  55. data/ext/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
  56. data/ext/libstemmer_c/src_c/stem_UTF_8_german.c +527 -0
  57. data/ext/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
  58. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
  59. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
  60. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
  61. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
  62. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
  63. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
  64. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
  65. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
  66. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
  67. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
  68. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
  69. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
  70. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
  71. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
  72. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
  73. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
  74. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
  75. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
  76. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
  77. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
  78. data/ext/re2/bitstate.cc +378 -0
  79. data/ext/re2/compile.cc +1138 -0
  80. data/ext/re2/dfa.cc +2086 -0
  81. data/ext/re2/filtered_re2.cc +100 -0
  82. data/ext/re2/filtered_re2.h +99 -0
  83. data/ext/re2/hash.cc +231 -0
  84. data/ext/re2/mimics_pcre.cc +185 -0
  85. data/ext/re2/nfa.cc +709 -0
  86. data/ext/re2/onepass.cc +614 -0
  87. data/ext/re2/parse.cc +2202 -0
  88. data/ext/re2/perl_groups.cc +119 -0
  89. data/ext/re2/prefilter.cc +671 -0
  90. data/ext/re2/prefilter.h +105 -0
  91. data/ext/re2/prefilter_tree.cc +398 -0
  92. data/ext/re2/prefilter_tree.h +130 -0
  93. data/ext/re2/prog.cc +341 -0
  94. data/ext/re2/prog.h +376 -0
  95. data/ext/re2/re2.cc +1180 -0
  96. data/ext/re2/re2.h +837 -0
  97. data/ext/re2/regexp.cc +920 -0
  98. data/ext/re2/regexp.h +632 -0
  99. data/ext/re2/rune.cc +258 -0
  100. data/ext/re2/set.cc +113 -0
  101. data/ext/re2/set.h +55 -0
  102. data/ext/re2/simplify.cc +393 -0
  103. data/ext/re2/stringpiece.cc +87 -0
  104. data/ext/re2/stringpiece.h +182 -0
  105. data/ext/re2/tostring.cc +341 -0
  106. data/ext/re2/unicode_casefold.cc +469 -0
  107. data/ext/re2/unicode_casefold.h +75 -0
  108. data/ext/re2/unicode_groups.cc +4851 -0
  109. data/ext/re2/unicode_groups.h +64 -0
  110. data/ext/re2/valgrind.cc +24 -0
  111. data/ext/re2/variadic_function.h +346 -0
  112. data/ext/re2/walker-inl.h +244 -0
  113. data/ext/src/chipper.cc +626 -0
  114. data/ext/src/version.h +1 -0
  115. data/ext/stemmer.rb +40 -0
  116. data/ext/util/arena.h +103 -0
  117. data/ext/util/atomicops.h +79 -0
  118. data/ext/util/benchmark.h +41 -0
  119. data/ext/util/flags.h +27 -0
  120. data/ext/util/logging.h +78 -0
  121. data/ext/util/mutex.h +190 -0
  122. data/ext/util/pcre.h +679 -0
  123. data/ext/util/random.h +29 -0
  124. data/ext/util/sparse_array.h +451 -0
  125. data/ext/util/sparse_set.h +177 -0
  126. data/ext/util/test.h +57 -0
  127. data/ext/util/thread.h +26 -0
  128. data/ext/util/utf.h +43 -0
  129. data/ext/util/util.h +127 -0
  130. data/ext/util/valgrind.h +4517 -0
  131. data/test/helper.rb +5 -0
  132. data/test/test_entities.rb +57 -0
  133. data/test/test_tokens.rb +118 -0
  134. metadata +199 -0
data/ext/util/arena.h ADDED
@@ -0,0 +1,103 @@
1
+ // Copyright 2000 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Sometimes it is necessary to allocate a large number of small
6
+ // objects. Doing this the usual way (malloc, new) is slow,
7
+ // especially for multithreaded programs. An UnsafeArena provides a
8
+ // mark/release method of memory management: it asks for a large chunk
9
+ // from the operating system and doles it out bit by bit as required.
10
+ // Then you free all the memory at once by calling UnsafeArena::Reset().
11
+ // The "Unsafe" refers to the fact that UnsafeArena is not safe to
12
+ // call from multiple threads.
13
+ //
14
+ // The global operator new that can be used as follows:
15
+ //
16
+ // #include "lib/arena-inl.h"
17
+ //
18
+ // UnsafeArena arena(1000);
19
+ // Foo* foo = new (AllocateInArena, &arena) Foo;
20
+ //
21
+
22
+ #ifndef RE2_UTIL_ARENA_H_
23
+ #define RE2_UTIL_ARENA_H_
24
+
25
+ namespace re2 {
26
+
27
+ // This class is thread-compatible.
28
+ class UnsafeArena {
29
+ public:
30
+ UnsafeArena(const size_t block_size);
31
+ virtual ~UnsafeArena();
32
+
33
+ void Reset();
34
+
35
+ // This should be the worst-case alignment for any type. This is
36
+ // good for IA-32, SPARC version 7 (the last one I know), and
37
+ // supposedly Alpha. i386 would be more time-efficient with a
38
+ // default alignment of 8, but ::operator new() uses alignment of 4,
39
+ // and an assertion will fail below after the call to MakeNewBlock()
40
+ // if you try to use a larger alignment.
41
+ #ifdef __i386__
42
+ static const int kDefaultAlignment = 4;
43
+ #else
44
+ static const int kDefaultAlignment = 8;
45
+ #endif
46
+
47
+ private:
48
+ void* GetMemoryFallback(const size_t size, const int align);
49
+
50
+ public:
51
+ void* GetMemory(const size_t size, const int align) {
52
+ if ( size > 0 && size < remaining_ && align == 1 ) { // common case
53
+ last_alloc_ = freestart_;
54
+ freestart_ += size;
55
+ remaining_ -= size;
56
+ return reinterpret_cast<void*>(last_alloc_);
57
+ }
58
+ return GetMemoryFallback(size, align);
59
+ }
60
+
61
+ private:
62
+ struct AllocatedBlock {
63
+ char *mem;
64
+ size_t size;
65
+ };
66
+
67
+ // The returned AllocatedBlock* is valid until the next call to AllocNewBlock
68
+ // or Reset (i.e. anything that might affect overflow_blocks_).
69
+ AllocatedBlock *AllocNewBlock(const size_t block_size);
70
+
71
+ const AllocatedBlock *IndexToBlock(int index) const;
72
+
73
+ const size_t block_size_;
74
+ char* freestart_; // beginning of the free space in most recent block
75
+ char* freestart_when_empty_; // beginning of the free space when we're empty
76
+ char* last_alloc_; // used to make sure ReturnBytes() is safe
77
+ size_t remaining_;
78
+ // STL vector isn't as efficient as it could be, so we use an array at first
79
+ int blocks_alloced_; // how many of the first_blocks_ have been alloced
80
+ AllocatedBlock first_blocks_[16]; // the length of this array is arbitrary
81
+ // if the first_blocks_ aren't enough, expand into overflow_blocks_.
82
+ vector<AllocatedBlock>* overflow_blocks_;
83
+
84
+ void FreeBlocks(); // Frees all except first block
85
+
86
+ DISALLOW_EVIL_CONSTRUCTORS(UnsafeArena);
87
+ };
88
+
89
+ // Operators for allocation on the arena
90
+ // Syntax: new (AllocateInArena, arena) MyClass;
91
+ // STL containers, etc.
92
+ enum AllocateInArenaType { AllocateInArena };
93
+
94
+ } // namespace re2
95
+
96
+ inline void* operator new(size_t size,
97
+ re2::AllocateInArenaType /* unused */,
98
+ re2::UnsafeArena *arena) {
99
+ return reinterpret_cast<char*>(arena->GetMemory(size, 1));
100
+ }
101
+
102
+ #endif // RE2_UTIL_ARENA_H_
103
+
@@ -0,0 +1,79 @@
1
+ // Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef RE2_UTIL_ATOMICOPS_H__
6
+ #define RE2_UTIL_ATOMICOPS_H__
7
+
8
+ #if defined(__i386__)
9
+
10
+ static inline void WriteMemoryBarrier() {
11
+ int x;
12
+ __asm__ __volatile__("xchgl (%0),%0" // The lock prefix is implicit for xchg.
13
+ :: "r" (&x));
14
+ }
15
+
16
+ #elif defined(__x86_64__)
17
+
18
+ // 64-bit implementations of memory barrier can be simpler, because
19
+ // "sfence" is guaranteed to exist.
20
+ static inline void WriteMemoryBarrier() {
21
+ __asm__ __volatile__("sfence" : : : "memory");
22
+ }
23
+
24
+ #elif defined(__ppc__)
25
+
26
+ static inline void WriteMemoryBarrier() {
27
+ __asm__ __volatile__("eieio" : : : "memory");
28
+ }
29
+
30
+ #elif defined(__alpha__)
31
+
32
+ static inline void WriteMemoryBarrier() {
33
+ __asm__ __volatile__("wmb" : : : "memory");
34
+ }
35
+
36
+ #else
37
+
38
+ #include "util/mutex.h"
39
+
40
+ static inline void WriteMemoryBarrier() {
41
+ // Slight overkill, but good enough:
42
+ // any mutex implementation must have
43
+ // a read barrier after the lock operation and
44
+ // a write barrier before the unlock operation.
45
+ //
46
+ // It may be worthwhile to write architecture-specific
47
+ // barriers for the common platforms, as above, but
48
+ // this is a correct fallback.
49
+ re2::Mutex mu;
50
+ re2::MutexLock l(&mu);
51
+ }
52
+
53
+ /*
54
+ #error Need WriteMemoryBarrier for architecture.
55
+
56
+ // Windows
57
+ inline void WriteMemoryBarrier() {
58
+ LONG x;
59
+ ::InterlockedExchange(&x, 0);
60
+ }
61
+ */
62
+
63
+ #endif
64
+
65
+ // Alpha has very weak memory ordering. If relying on WriteBarriers, must one
66
+ // use read barriers for the readers too.
67
+ #if defined(__alpha__)
68
+
69
+ static inline void MaybeReadMemoryBarrier() {
70
+ __asm__ __volatile__("rmb" : : : "memory");
71
+ }
72
+
73
+ #else
74
+
75
+ static inline void MaybeReadMemoryBarrier() {}
76
+
77
+ #endif // __alpha__
78
+
79
+ #endif // RE2_UTIL_ATOMICOPS_H__
@@ -0,0 +1,41 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef RE2_UTIL_BENCHMARK_H__
6
+ #define RE2_UTIL_BENCHMARK_H__
7
+
8
+ namespace testing {
9
+ struct Benchmark {
10
+ const char* name;
11
+ void (*fn)(int);
12
+ void (*fnr)(int, int);
13
+ int lo;
14
+ int hi;
15
+ int threadlo;
16
+ int threadhi;
17
+
18
+ void Register();
19
+ Benchmark(const char* name, void (*f)(int)) { Clear(name); fn = f; Register(); }
20
+ Benchmark(const char* name, void (*f)(int, int), int l, int h) { Clear(name); fnr = f; lo = l; hi = h; Register(); }
21
+ void Clear(const char* n) { name = n; fn = 0; fnr = 0; lo = 0; hi = 0; threadlo = 0; threadhi = 0; }
22
+ Benchmark* ThreadRange(int lo, int hi) { threadlo = lo; threadhi = hi; return this; }
23
+ };
24
+ } // namespace testing
25
+
26
+ void SetBenchmarkBytesProcessed(long long);
27
+ void StopBenchmarkTiming();
28
+ void StartBenchmarkTiming();
29
+ void BenchmarkMemoryUsage();
30
+ void SetBenchmarkItemsProcessed(int);
31
+
32
+ int NumCPUs();
33
+
34
+ #define BENCHMARK(f) \
35
+ ::testing::Benchmark* _benchmark_##f = (new ::testing::Benchmark(#f, f))
36
+
37
+ #define BENCHMARK_RANGE(f, lo, hi) \
38
+ ::testing::Benchmark* _benchmark_##f = \
39
+ (new ::testing::Benchmark(#f, f, lo, hi))
40
+
41
+ #endif // RE2_UTIL_BENCHMARK_H__
data/ext/util/flags.h ADDED
@@ -0,0 +1,27 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Simplified version of Google's command line flags.
6
+ // Does not support parsing the command line.
7
+ // If you want to do that, see
8
+ // http://code.google.com/p/google-gflags
9
+
10
+ #ifndef RE2_UTIL_FLAGS_H__
11
+ #define RE2_UTIL_FLAGS_H__
12
+
13
+ #define DEFINE_flag(type, name, deflt, desc) \
14
+ namespace re2 { type FLAGS_##name = deflt; }
15
+
16
+ #define DECLARE_flag(type, name) \
17
+ namespace re2 { extern type FLAGS_##name; }
18
+
19
+ #define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc)
20
+ #define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32, name, deflt, desc)
21
+ #define DEFINE_string(name, deflt, desc) DEFINE_flag(string, name, deflt, desc)
22
+
23
+ #define DECLARE_bool(name) DECLARE_flag(bool, name)
24
+ #define DECLARE_int32(name) DECLARE_flag(int32, name)
25
+ #define DECLARE_string(name) DECLARE_flag(string, name)
26
+
27
+ #endif // RE2_UTIL_FLAGS_H__
@@ -0,0 +1,78 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Simplified version of Google's logging.
6
+
7
+ #ifndef RE2_UTIL_LOGGING_H__
8
+ #define RE2_UTIL_LOGGING_H__
9
+
10
+ #include <unistd.h> /* for write */
11
+ #include <sstream>
12
+
13
+ // Debug-only checking.
14
+ #define DCHECK(condition) assert(condition)
15
+ #define DCHECK_EQ(val1, val2) assert((val1) == (val2))
16
+ #define DCHECK_NE(val1, val2) assert((val1) != (val2))
17
+ #define DCHECK_LE(val1, val2) assert((val1) <= (val2))
18
+ #define DCHECK_LT(val1, val2) assert((val1) < (val2))
19
+ #define DCHECK_GE(val1, val2) assert((val1) >= (val2))
20
+ #define DCHECK_GT(val1, val2) assert((val1) > (val2))
21
+
22
+ // Always-on checking
23
+ #define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
24
+ #define CHECK_LT(x, y) CHECK((x) < (y))
25
+ #define CHECK_GT(x, y) CHECK((x) > (y))
26
+ #define CHECK_LE(x, y) CHECK((x) <= (y))
27
+ #define CHECK_GE(x, y) CHECK((x) >= (y))
28
+ #define CHECK_EQ(x, y) CHECK((x) == (y))
29
+ #define CHECK_NE(x, y) CHECK((x) != (y))
30
+
31
+ #define LOG_INFO LogMessage(__FILE__, __LINE__)
32
+ #define LOG_ERROR LOG_INFO
33
+ #define LOG_WARNING LOG_INFO
34
+ #define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
35
+ #define LOG_QFATAL LOG_FATAL
36
+
37
+ #define VLOG(x) if((x)>0){}else LOG_INFO.stream()
38
+
39
+ #ifdef NDEBUG
40
+ #define DEBUG_MODE 0
41
+ #define LOG_DFATAL LOG_ERROR
42
+ #else
43
+ #define DEBUG_MODE 1
44
+ #define LOG_DFATAL LOG_FATAL
45
+ #endif
46
+
47
+ #define LOG(severity) LOG_ ## severity.stream()
48
+
49
+ class LogMessage {
50
+ public:
51
+ LogMessage(const char* file, int line) {
52
+ stream() << file << ":" << line << ": ";
53
+ }
54
+ ~LogMessage() {
55
+ stream() << "\n";
56
+ string s = str_.str();
57
+ if(write(2, s.data(), s.size()) < 0) {} // shut up gcc
58
+ }
59
+ ostream& stream() { return str_; }
60
+
61
+ private:
62
+ std::ostringstream str_;
63
+ DISALLOW_EVIL_CONSTRUCTORS(LogMessage);
64
+ };
65
+
66
+ class LogMessageFatal : public LogMessage {
67
+ public:
68
+ LogMessageFatal(const char* file, int line)
69
+ : LogMessage(file, line) { }
70
+ ~LogMessageFatal() {
71
+ std::cerr << "\n";
72
+ abort();
73
+ }
74
+ private:
75
+ DISALLOW_EVIL_CONSTRUCTORS(LogMessageFatal);
76
+ };
77
+
78
+ #endif // RE2_UTIL_LOGGING_H__
data/ext/util/mutex.h ADDED
@@ -0,0 +1,190 @@
1
+ // Copyright 2007 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ /*
6
+ * A simple mutex wrapper, supporting locks and read-write locks.
7
+ * You should assume the locks are *not* re-entrant.
8
+ */
9
+
10
+ #ifndef RE2_UTIL_MUTEX_H_
11
+ #define RE2_UTIL_MUTEX_H_
12
+
13
+ namespace re2 {
14
+
15
+ #define HAVE_PTHREAD 1
16
+ #define HAVE_RWLOCK 1
17
+
18
+ #if defined(NO_THREADS)
19
+ typedef int MutexType; // to keep a lock-count
20
+ #elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
21
+ // Needed for pthread_rwlock_*. If it causes problems, you could take it
22
+ // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it
23
+ // *does* cause problems for FreeBSD, or MacOSX, but isn't needed
24
+ // for locking there.)
25
+ # ifdef __linux__
26
+ # undef _XOPEN_SOURCE
27
+ # define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls
28
+ # endif
29
+ # include <pthread.h>
30
+ typedef pthread_rwlock_t MutexType;
31
+ #elif defined(HAVE_PTHREAD)
32
+ # include <pthread.h>
33
+ typedef pthread_mutex_t MutexType;
34
+ #elif defined(WIN32)
35
+ # define WIN32_LEAN_AND_MEAN // We only need minimal includes
36
+ # ifdef GMUTEX_TRYLOCK
37
+ // We need Windows NT or later for TryEnterCriticalSection(). If you
38
+ // don't need that functionality, you can remove these _WIN32_WINNT
39
+ // lines, and change TryLock() to assert(0) or something.
40
+ # ifndef _WIN32_WINNT
41
+ # define _WIN32_WINNT 0x0400
42
+ # endif
43
+ # endif
44
+ # include <windows.h>
45
+ typedef CRITICAL_SECTION MutexType;
46
+ #else
47
+ # error Need to implement mutex.h for your architecture, or #define NO_THREADS
48
+ #endif
49
+
50
+ class Mutex {
51
+ public:
52
+ // Create a Mutex that is not held by anybody.
53
+ inline Mutex();
54
+
55
+ // Destructor
56
+ inline ~Mutex();
57
+
58
+ inline void Lock(); // Block if needed until free then acquire exclusively
59
+ inline void Unlock(); // Release a lock acquired via Lock()
60
+ inline bool TryLock(); // If free, Lock() and return true, else return false
61
+ // Note that on systems that don't support read-write locks, these may
62
+ // be implemented as synonyms to Lock() and Unlock(). So you can use
63
+ // these for efficiency, but don't use them anyplace where being able
64
+ // to do shared reads is necessary to avoid deadlock.
65
+ inline void ReaderLock(); // Block until free or shared then acquire a share
66
+ inline void ReaderUnlock(); // Release a read share of this Mutex
67
+ inline void WriterLock() { Lock(); } // Acquire an exclusive lock
68
+ inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
69
+ inline void AssertHeld() { }
70
+
71
+ private:
72
+ MutexType mutex_;
73
+
74
+ // Catch the error of writing Mutex when intending MutexLock.
75
+ Mutex(Mutex *ignored) {}
76
+ // Disallow "evil" constructors
77
+ Mutex(const Mutex&);
78
+ void operator=(const Mutex&);
79
+ };
80
+
81
+ // Now the implementation of Mutex for various systems
82
+ #if defined(NO_THREADS)
83
+
84
+ // When we don't have threads, we can be either reading or writing,
85
+ // but not both. We can have lots of readers at once (in no-threads
86
+ // mode, that's most likely to happen in recursive function calls),
87
+ // but only one writer. We represent this by having mutex_ be -1 when
88
+ // writing and a number > 0 when reading (and 0 when no lock is held).
89
+ //
90
+ // In debug mode, we assert these invariants, while in non-debug mode
91
+ // we do nothing, for efficiency. That's why everything is in an
92
+ // assert.
93
+ #include <assert.h>
94
+
95
+ Mutex::Mutex() : mutex_(0) { }
96
+ Mutex::~Mutex() { assert(mutex_ == 0); }
97
+ void Mutex::Lock() { assert(--mutex_ == -1); }
98
+ void Mutex::Unlock() { assert(mutex_++ == -1); }
99
+ bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; }
100
+ void Mutex::ReaderLock() { assert(++mutex_ > 0); }
101
+ void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
102
+
103
+ #elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
104
+
105
+ #include <stdlib.h> // for abort()
106
+ #define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0)
107
+
108
+ Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
109
+ Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
110
+ void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
111
+ void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
112
+ bool Mutex::TryLock() { return pthread_rwlock_trywrlock(&mutex_) == 0; }
113
+ void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
114
+ void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
115
+
116
+ #undef SAFE_PTHREAD
117
+
118
+ #elif defined(HAVE_PTHREAD)
119
+
120
+ #include <stdlib.h> // for abort()
121
+ #define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0)
122
+
123
+ Mutex::Mutex() { SAFE_PTHREAD(pthread_mutex_init(&mutex_, NULL)); }
124
+ Mutex::~Mutex() { SAFE_PTHREAD(pthread_mutex_destroy(&mutex_)); }
125
+ void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock(&mutex_)); }
126
+ void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock(&mutex_)); }
127
+ bool Mutex::TryLock() { return pthread_mutex_trylock(&mutex_) == 0; }
128
+ void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks
129
+ void Mutex::ReaderUnlock() { Unlock(); }
130
+ #undef SAFE_PTHREAD
131
+
132
+ #elif defined(WIN32)
133
+
134
+ Mutex::Mutex() { InitializeCriticalSection(&mutex_); }
135
+ Mutex::~Mutex() { DeleteCriticalSection(&mutex_); }
136
+ void Mutex::Lock() { EnterCriticalSection(&mutex_); }
137
+ void Mutex::Unlock() { LeaveCriticalSection(&mutex_); }
138
+ bool Mutex::TryLock() { return TryEnterCriticalSection(&mutex_) != 0; }
139
+ void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks
140
+ void Mutex::ReaderUnlock() { Unlock(); }
141
+
142
+ #endif
143
+
144
+
145
+ // --------------------------------------------------------------------------
146
+ // Some helper classes
147
+
148
+ // MutexLock(mu) acquires mu when constructed and releases it when destroyed.
149
+ class MutexLock {
150
+ public:
151
+ explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
152
+ ~MutexLock() { mu_->Unlock(); }
153
+ private:
154
+ Mutex * const mu_;
155
+ // Disallow "evil" constructors
156
+ MutexLock(const MutexLock&);
157
+ void operator=(const MutexLock&);
158
+ };
159
+
160
+ // ReaderMutexLock and WriterMutexLock do the same, for rwlocks
161
+ class ReaderMutexLock {
162
+ public:
163
+ explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
164
+ ~ReaderMutexLock() { mu_->ReaderUnlock(); }
165
+ private:
166
+ Mutex * const mu_;
167
+ // Disallow "evil" constructors
168
+ ReaderMutexLock(const ReaderMutexLock&);
169
+ void operator=(const ReaderMutexLock&);
170
+ };
171
+
172
+ class WriterMutexLock {
173
+ public:
174
+ explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
175
+ ~WriterMutexLock() { mu_->WriterUnlock(); }
176
+ private:
177
+ Mutex * const mu_;
178
+ // Disallow "evil" constructors
179
+ WriterMutexLock(const WriterMutexLock&);
180
+ void operator=(const WriterMutexLock&);
181
+ };
182
+
183
+ // Catch bug where variable name is omitted, e.g. MutexLock (&mu);
184
+ #define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name)
185
+ #define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
186
+ #define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
187
+
188
+ } // namespace re2
189
+
190
+ #endif /* #define RE2_UTIL_MUTEX_H_ */