chipper 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. data/README.rdoc +51 -0
  2. data/ext/extconf.rb +58 -0
  3. data/ext/libstemmer_c/Makefile +10 -0
  4. data/ext/libstemmer_c/examples/stemwords.c +209 -0
  5. data/ext/libstemmer_c/include/libstemmer.h +79 -0
  6. data/ext/libstemmer_c/libstemmer/libstemmer.c +95 -0
  7. data/ext/libstemmer_c/libstemmer/libstemmer_utf8.c +95 -0
  8. data/ext/libstemmer_c/libstemmer/modules.h +190 -0
  9. data/ext/libstemmer_c/libstemmer/modules_utf8.h +121 -0
  10. data/ext/libstemmer_c/mkinc.mak +82 -0
  11. data/ext/libstemmer_c/mkinc_utf8.mak +52 -0
  12. data/ext/libstemmer_c/runtime/api.c +66 -0
  13. data/ext/libstemmer_c/runtime/api.h +26 -0
  14. data/ext/libstemmer_c/runtime/header.h +58 -0
  15. data/ext/libstemmer_c/runtime/utilities.c +478 -0
  16. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
  17. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
  18. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
  19. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
  20. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
  21. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
  22. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
  24. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
  25. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
  26. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.c +521 -0
  27. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
  28. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
  29. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
  30. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
  31. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
  32. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
  33. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
  34. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
  35. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
  36. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
  37. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
  38. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
  39. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
  40. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
  41. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
  42. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
  43. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
  44. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
  45. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
  46. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
  47. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
  48. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
  49. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
  50. data/ext/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
  51. data/ext/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
  52. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
  53. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
  54. data/ext/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
  55. data/ext/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
  56. data/ext/libstemmer_c/src_c/stem_UTF_8_german.c +527 -0
  57. data/ext/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
  58. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
  59. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
  60. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
  61. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
  62. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
  63. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
  64. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
  65. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
  66. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
  67. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
  68. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
  69. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
  70. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
  71. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
  72. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
  73. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
  74. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
  75. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
  76. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
  77. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
  78. data/ext/re2/bitstate.cc +378 -0
  79. data/ext/re2/compile.cc +1138 -0
  80. data/ext/re2/dfa.cc +2086 -0
  81. data/ext/re2/filtered_re2.cc +100 -0
  82. data/ext/re2/filtered_re2.h +99 -0
  83. data/ext/re2/hash.cc +231 -0
  84. data/ext/re2/mimics_pcre.cc +185 -0
  85. data/ext/re2/nfa.cc +709 -0
  86. data/ext/re2/onepass.cc +614 -0
  87. data/ext/re2/parse.cc +2202 -0
  88. data/ext/re2/perl_groups.cc +119 -0
  89. data/ext/re2/prefilter.cc +671 -0
  90. data/ext/re2/prefilter.h +105 -0
  91. data/ext/re2/prefilter_tree.cc +398 -0
  92. data/ext/re2/prefilter_tree.h +130 -0
  93. data/ext/re2/prog.cc +341 -0
  94. data/ext/re2/prog.h +376 -0
  95. data/ext/re2/re2.cc +1180 -0
  96. data/ext/re2/re2.h +837 -0
  97. data/ext/re2/regexp.cc +920 -0
  98. data/ext/re2/regexp.h +632 -0
  99. data/ext/re2/rune.cc +258 -0
  100. data/ext/re2/set.cc +113 -0
  101. data/ext/re2/set.h +55 -0
  102. data/ext/re2/simplify.cc +393 -0
  103. data/ext/re2/stringpiece.cc +87 -0
  104. data/ext/re2/stringpiece.h +182 -0
  105. data/ext/re2/tostring.cc +341 -0
  106. data/ext/re2/unicode_casefold.cc +469 -0
  107. data/ext/re2/unicode_casefold.h +75 -0
  108. data/ext/re2/unicode_groups.cc +4851 -0
  109. data/ext/re2/unicode_groups.h +64 -0
  110. data/ext/re2/valgrind.cc +24 -0
  111. data/ext/re2/variadic_function.h +346 -0
  112. data/ext/re2/walker-inl.h +244 -0
  113. data/ext/src/chipper.cc +626 -0
  114. data/ext/src/version.h +1 -0
  115. data/ext/stemmer.rb +40 -0
  116. data/ext/util/arena.h +103 -0
  117. data/ext/util/atomicops.h +79 -0
  118. data/ext/util/benchmark.h +41 -0
  119. data/ext/util/flags.h +27 -0
  120. data/ext/util/logging.h +78 -0
  121. data/ext/util/mutex.h +190 -0
  122. data/ext/util/pcre.h +679 -0
  123. data/ext/util/random.h +29 -0
  124. data/ext/util/sparse_array.h +451 -0
  125. data/ext/util/sparse_set.h +177 -0
  126. data/ext/util/test.h +57 -0
  127. data/ext/util/thread.h +26 -0
  128. data/ext/util/utf.h +43 -0
  129. data/ext/util/util.h +127 -0
  130. data/ext/util/valgrind.h +4517 -0
  131. data/test/helper.rb +5 -0
  132. data/test/test_entities.rb +57 -0
  133. data/test/test_tokens.rb +118 -0
  134. metadata +199 -0
data/ext/util/arena.h ADDED
@@ -0,0 +1,103 @@
1
+ // Copyright 2000 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Sometimes it is necessary to allocate a large number of small
6
+ // objects. Doing this the usual way (malloc, new) is slow,
7
+ // especially for multithreaded programs. An UnsafeArena provides a
8
+ // mark/release method of memory management: it asks for a large chunk
9
+ // from the operating system and doles it out bit by bit as required.
10
+ // Then you free all the memory at once by calling UnsafeArena::Reset().
11
+ // The "Unsafe" refers to the fact that UnsafeArena is not safe to
12
+ // call from multiple threads.
13
+ //
14
+ // The global operator new that can be used as follows:
15
+ //
16
+ // #include "lib/arena-inl.h"
17
+ //
18
+ // UnsafeArena arena(1000);
19
+ // Foo* foo = new (AllocateInArena, &arena) Foo;
20
+ //
21
+
22
+ #ifndef RE2_UTIL_ARENA_H_
23
+ #define RE2_UTIL_ARENA_H_
24
+
25
+ namespace re2 {
26
+
27
+ // This class is thread-compatible.
28
+ class UnsafeArena {
29
+ public:
30
+ UnsafeArena(const size_t block_size);
31
+ virtual ~UnsafeArena();
32
+
33
+ void Reset();
34
+
35
+ // This should be the worst-case alignment for any type. This is
36
+ // good for IA-32, SPARC version 7 (the last one I know), and
37
+ // supposedly Alpha. i386 would be more time-efficient with a
38
+ // default alignment of 8, but ::operator new() uses alignment of 4,
39
+ // and an assertion will fail below after the call to MakeNewBlock()
40
+ // if you try to use a larger alignment.
41
+ #ifdef __i386__
42
+ static const int kDefaultAlignment = 4;
43
+ #else
44
+ static const int kDefaultAlignment = 8;
45
+ #endif
46
+
47
+ private:
48
+ void* GetMemoryFallback(const size_t size, const int align);
49
+
50
+ public:
51
+ void* GetMemory(const size_t size, const int align) {
52
+ if ( size > 0 && size < remaining_ && align == 1 ) { // common case
53
+ last_alloc_ = freestart_;
54
+ freestart_ += size;
55
+ remaining_ -= size;
56
+ return reinterpret_cast<void*>(last_alloc_);
57
+ }
58
+ return GetMemoryFallback(size, align);
59
+ }
60
+
61
+ private:
62
+ struct AllocatedBlock {
63
+ char *mem;
64
+ size_t size;
65
+ };
66
+
67
+ // The returned AllocatedBlock* is valid until the next call to AllocNewBlock
68
+ // or Reset (i.e. anything that might affect overflow_blocks_).
69
+ AllocatedBlock *AllocNewBlock(const size_t block_size);
70
+
71
+ const AllocatedBlock *IndexToBlock(int index) const;
72
+
73
+ const size_t block_size_;
74
+ char* freestart_; // beginning of the free space in most recent block
75
+ char* freestart_when_empty_; // beginning of the free space when we're empty
76
+ char* last_alloc_; // used to make sure ReturnBytes() is safe
77
+ size_t remaining_;
78
+ // STL vector isn't as efficient as it could be, so we use an array at first
79
+ int blocks_alloced_; // how many of the first_blocks_ have been alloced
80
+ AllocatedBlock first_blocks_[16]; // the length of this array is arbitrary
81
+ // if the first_blocks_ aren't enough, expand into overflow_blocks_.
82
+ vector<AllocatedBlock>* overflow_blocks_;
83
+
84
+ void FreeBlocks(); // Frees all except first block
85
+
86
+ DISALLOW_EVIL_CONSTRUCTORS(UnsafeArena);
87
+ };
88
+
89
+ // Operators for allocation on the arena
90
+ // Syntax: new (AllocateInArena, arena) MyClass;
91
+ // STL containers, etc.
92
+ enum AllocateInArenaType { AllocateInArena };
93
+
94
+ } // namespace re2
95
+
96
+ inline void* operator new(size_t size,
97
+ re2::AllocateInArenaType /* unused */,
98
+ re2::UnsafeArena *arena) {
99
+ return reinterpret_cast<char*>(arena->GetMemory(size, 1));
100
+ }
101
+
102
+ #endif // RE2_UTIL_ARENA_H_
103
+
@@ -0,0 +1,79 @@
1
+ // Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef RE2_UTIL_ATOMICOPS_H__
6
+ #define RE2_UTIL_ATOMICOPS_H__
7
+
8
+ #if defined(__i386__)
9
+
10
+ static inline void WriteMemoryBarrier() {
11
+ int x;
12
+ __asm__ __volatile__("xchgl (%0),%0" // The lock prefix is implicit for xchg.
13
+ :: "r" (&x));
14
+ }
15
+
16
+ #elif defined(__x86_64__)
17
+
18
+ // 64-bit implementations of memory barrier can be simpler, because
19
+ // "sfence" is guaranteed to exist.
20
+ static inline void WriteMemoryBarrier() {
21
+ __asm__ __volatile__("sfence" : : : "memory");
22
+ }
23
+
24
+ #elif defined(__ppc__)
25
+
26
+ static inline void WriteMemoryBarrier() {
27
+ __asm__ __volatile__("eieio" : : : "memory");
28
+ }
29
+
30
+ #elif defined(__alpha__)
31
+
32
+ static inline void WriteMemoryBarrier() {
33
+ __asm__ __volatile__("wmb" : : : "memory");
34
+ }
35
+
36
+ #else
37
+
38
+ #include "util/mutex.h"
39
+
40
+ static inline void WriteMemoryBarrier() {
41
+ // Slight overkill, but good enough:
42
+ // any mutex implementation must have
43
+ // a read barrier after the lock operation and
44
+ // a write barrier before the unlock operation.
45
+ //
46
+ // It may be worthwhile to write architecture-specific
47
+ // barriers for the common platforms, as above, but
48
+ // this is a correct fallback.
49
+ re2::Mutex mu;
50
+ re2::MutexLock l(&mu);
51
+ }
52
+
53
+ /*
54
+ #error Need WriteMemoryBarrier for architecture.
55
+
56
+ // Windows
57
+ inline void WriteMemoryBarrier() {
58
+ LONG x;
59
+ ::InterlockedExchange(&x, 0);
60
+ }
61
+ */
62
+
63
+ #endif
64
+
65
+ // Alpha has very weak memory ordering. If relying on WriteBarriers, must one
66
+ // use read barriers for the readers too.
67
+ #if defined(__alpha__)
68
+
69
+ static inline void MaybeReadMemoryBarrier() {
70
+ __asm__ __volatile__("rmb" : : : "memory");
71
+ }
72
+
73
+ #else
74
+
75
+ static inline void MaybeReadMemoryBarrier() {}
76
+
77
+ #endif // __alpha__
78
+
79
+ #endif // RE2_UTIL_ATOMICOPS_H__
@@ -0,0 +1,41 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef RE2_UTIL_BENCHMARK_H__
6
+ #define RE2_UTIL_BENCHMARK_H__
7
+
8
+ namespace testing {
9
+ struct Benchmark {
10
+ const char* name;
11
+ void (*fn)(int);
12
+ void (*fnr)(int, int);
13
+ int lo;
14
+ int hi;
15
+ int threadlo;
16
+ int threadhi;
17
+
18
+ void Register();
19
+ Benchmark(const char* name, void (*f)(int)) { Clear(name); fn = f; Register(); }
20
+ Benchmark(const char* name, void (*f)(int, int), int l, int h) { Clear(name); fnr = f; lo = l; hi = h; Register(); }
21
+ void Clear(const char* n) { name = n; fn = 0; fnr = 0; lo = 0; hi = 0; threadlo = 0; threadhi = 0; }
22
+ Benchmark* ThreadRange(int lo, int hi) { threadlo = lo; threadhi = hi; return this; }
23
+ };
24
+ } // namespace testing
25
+
26
+ void SetBenchmarkBytesProcessed(long long);
27
+ void StopBenchmarkTiming();
28
+ void StartBenchmarkTiming();
29
+ void BenchmarkMemoryUsage();
30
+ void SetBenchmarkItemsProcessed(int);
31
+
32
+ int NumCPUs();
33
+
34
+ #define BENCHMARK(f) \
35
+ ::testing::Benchmark* _benchmark_##f = (new ::testing::Benchmark(#f, f))
36
+
37
+ #define BENCHMARK_RANGE(f, lo, hi) \
38
+ ::testing::Benchmark* _benchmark_##f = \
39
+ (new ::testing::Benchmark(#f, f, lo, hi))
40
+
41
+ #endif // RE2_UTIL_BENCHMARK_H__
data/ext/util/flags.h ADDED
@@ -0,0 +1,27 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Simplified version of Google's command line flags.
6
+ // Does not support parsing the command line.
7
+ // If you want to do that, see
8
+ // http://code.google.com/p/google-gflags
9
+
10
+ #ifndef RE2_UTIL_FLAGS_H__
11
+ #define RE2_UTIL_FLAGS_H__
12
+
13
+ #define DEFINE_flag(type, name, deflt, desc) \
14
+ namespace re2 { type FLAGS_##name = deflt; }
15
+
16
+ #define DECLARE_flag(type, name) \
17
+ namespace re2 { extern type FLAGS_##name; }
18
+
19
+ #define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc)
20
+ #define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32, name, deflt, desc)
21
+ #define DEFINE_string(name, deflt, desc) DEFINE_flag(string, name, deflt, desc)
22
+
23
+ #define DECLARE_bool(name) DECLARE_flag(bool, name)
24
+ #define DECLARE_int32(name) DECLARE_flag(int32, name)
25
+ #define DECLARE_string(name) DECLARE_flag(string, name)
26
+
27
+ #endif // RE2_UTIL_FLAGS_H__
@@ -0,0 +1,78 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Simplified version of Google's logging.
6
+
7
+ #ifndef RE2_UTIL_LOGGING_H__
8
+ #define RE2_UTIL_LOGGING_H__
9
+
10
+ #include <unistd.h> /* for write */
11
+ #include <sstream>
12
+
13
+ // Debug-only checking.
14
+ #define DCHECK(condition) assert(condition)
15
+ #define DCHECK_EQ(val1, val2) assert((val1) == (val2))
16
+ #define DCHECK_NE(val1, val2) assert((val1) != (val2))
17
+ #define DCHECK_LE(val1, val2) assert((val1) <= (val2))
18
+ #define DCHECK_LT(val1, val2) assert((val1) < (val2))
19
+ #define DCHECK_GE(val1, val2) assert((val1) >= (val2))
20
+ #define DCHECK_GT(val1, val2) assert((val1) > (val2))
21
+
22
+ // Always-on checking
23
+ #define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
24
+ #define CHECK_LT(x, y) CHECK((x) < (y))
25
+ #define CHECK_GT(x, y) CHECK((x) > (y))
26
+ #define CHECK_LE(x, y) CHECK((x) <= (y))
27
+ #define CHECK_GE(x, y) CHECK((x) >= (y))
28
+ #define CHECK_EQ(x, y) CHECK((x) == (y))
29
+ #define CHECK_NE(x, y) CHECK((x) != (y))
30
+
31
+ #define LOG_INFO LogMessage(__FILE__, __LINE__)
32
+ #define LOG_ERROR LOG_INFO
33
+ #define LOG_WARNING LOG_INFO
34
+ #define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
35
+ #define LOG_QFATAL LOG_FATAL
36
+
37
+ #define VLOG(x) if((x)>0){}else LOG_INFO.stream()
38
+
39
+ #ifdef NDEBUG
40
+ #define DEBUG_MODE 0
41
+ #define LOG_DFATAL LOG_ERROR
42
+ #else
43
+ #define DEBUG_MODE 1
44
+ #define LOG_DFATAL LOG_FATAL
45
+ #endif
46
+
47
+ #define LOG(severity) LOG_ ## severity.stream()
48
+
49
+ class LogMessage {
50
+ public:
51
+ LogMessage(const char* file, int line) {
52
+ stream() << file << ":" << line << ": ";
53
+ }
54
+ ~LogMessage() {
55
+ stream() << "\n";
56
+ string s = str_.str();
57
+ if(write(2, s.data(), s.size()) < 0) {} // shut up gcc
58
+ }
59
+ ostream& stream() { return str_; }
60
+
61
+ private:
62
+ std::ostringstream str_;
63
+ DISALLOW_EVIL_CONSTRUCTORS(LogMessage);
64
+ };
65
+
66
+ class LogMessageFatal : public LogMessage {
67
+ public:
68
+ LogMessageFatal(const char* file, int line)
69
+ : LogMessage(file, line) { }
70
+ ~LogMessageFatal() {
71
+ std::cerr << "\n";
72
+ abort();
73
+ }
74
+ private:
75
+ DISALLOW_EVIL_CONSTRUCTORS(LogMessageFatal);
76
+ };
77
+
78
+ #endif // RE2_UTIL_LOGGING_H__
data/ext/util/mutex.h ADDED
@@ -0,0 +1,190 @@
1
+ // Copyright 2007 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ /*
6
+ * A simple mutex wrapper, supporting locks and read-write locks.
7
+ * You should assume the locks are *not* re-entrant.
8
+ */
9
+
10
+ #ifndef RE2_UTIL_MUTEX_H_
11
+ #define RE2_UTIL_MUTEX_H_
12
+
13
+ namespace re2 {
14
+
15
+ #define HAVE_PTHREAD 1
16
+ #define HAVE_RWLOCK 1
17
+
18
+ #if defined(NO_THREADS)
19
+ typedef int MutexType; // to keep a lock-count
20
+ #elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
21
+ // Needed for pthread_rwlock_*. If it causes problems, you could take it
22
+ // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it
23
+ // *does* cause problems for FreeBSD, or MacOSX, but isn't needed
24
+ // for locking there.)
25
+ # ifdef __linux__
26
+ # undef _XOPEN_SOURCE
27
+ # define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls
28
+ # endif
29
+ # include <pthread.h>
30
+ typedef pthread_rwlock_t MutexType;
31
+ #elif defined(HAVE_PTHREAD)
32
+ # include <pthread.h>
33
+ typedef pthread_mutex_t MutexType;
34
+ #elif defined(WIN32)
35
+ # define WIN32_LEAN_AND_MEAN // We only need minimal includes
36
+ # ifdef GMUTEX_TRYLOCK
37
+ // We need Windows NT or later for TryEnterCriticalSection(). If you
38
+ // don't need that functionality, you can remove these _WIN32_WINNT
39
+ // lines, and change TryLock() to assert(0) or something.
40
+ # ifndef _WIN32_WINNT
41
+ # define _WIN32_WINNT 0x0400
42
+ # endif
43
+ # endif
44
+ # include <windows.h>
45
+ typedef CRITICAL_SECTION MutexType;
46
+ #else
47
+ # error Need to implement mutex.h for your architecture, or #define NO_THREADS
48
+ #endif
49
+
50
+ class Mutex {
51
+ public:
52
+ // Create a Mutex that is not held by anybody.
53
+ inline Mutex();
54
+
55
+ // Destructor
56
+ inline ~Mutex();
57
+
58
+ inline void Lock(); // Block if needed until free then acquire exclusively
59
+ inline void Unlock(); // Release a lock acquired via Lock()
60
+ inline bool TryLock(); // If free, Lock() and return true, else return false
61
+ // Note that on systems that don't support read-write locks, these may
62
+ // be implemented as synonyms to Lock() and Unlock(). So you can use
63
+ // these for efficiency, but don't use them anyplace where being able
64
+ // to do shared reads is necessary to avoid deadlock.
65
+ inline void ReaderLock(); // Block until free or shared then acquire a share
66
+ inline void ReaderUnlock(); // Release a read share of this Mutex
67
+ inline void WriterLock() { Lock(); } // Acquire an exclusive lock
68
+ inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
69
+ inline void AssertHeld() { }
70
+
71
+ private:
72
+ MutexType mutex_;
73
+
74
+ // Catch the error of writing Mutex when intending MutexLock.
75
+ Mutex(Mutex *ignored) {}
76
+ // Disallow "evil" constructors
77
+ Mutex(const Mutex&);
78
+ void operator=(const Mutex&);
79
+ };
80
+
81
+ // Now the implementation of Mutex for various systems
82
+ #if defined(NO_THREADS)
83
+
84
+ // When we don't have threads, we can be either reading or writing,
85
+ // but not both. We can have lots of readers at once (in no-threads
86
+ // mode, that's most likely to happen in recursive function calls),
87
+ // but only one writer. We represent this by having mutex_ be -1 when
88
+ // writing and a number > 0 when reading (and 0 when no lock is held).
89
+ //
90
+ // In debug mode, we assert these invariants, while in non-debug mode
91
+ // we do nothing, for efficiency. That's why everything is in an
92
+ // assert.
93
+ #include <assert.h>
94
+
95
+ Mutex::Mutex() : mutex_(0) { }
96
+ Mutex::~Mutex() { assert(mutex_ == 0); }
97
+ void Mutex::Lock() { assert(--mutex_ == -1); }
98
+ void Mutex::Unlock() { assert(mutex_++ == -1); }
99
+ bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; }
100
+ void Mutex::ReaderLock() { assert(++mutex_ > 0); }
101
+ void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
102
+
103
+ #elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
104
+
105
+ #include <stdlib.h> // for abort()
106
+ #define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0)
107
+
108
+ Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
109
+ Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
110
+ void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
111
+ void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
112
+ bool Mutex::TryLock() { return pthread_rwlock_trywrlock(&mutex_) == 0; }
113
+ void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
114
+ void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
115
+
116
+ #undef SAFE_PTHREAD
117
+
118
+ #elif defined(HAVE_PTHREAD)
119
+
120
+ #include <stdlib.h> // for abort()
121
+ #define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0)
122
+
123
+ Mutex::Mutex() { SAFE_PTHREAD(pthread_mutex_init(&mutex_, NULL)); }
124
+ Mutex::~Mutex() { SAFE_PTHREAD(pthread_mutex_destroy(&mutex_)); }
125
+ void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock(&mutex_)); }
126
+ void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock(&mutex_)); }
127
+ bool Mutex::TryLock() { return pthread_mutex_trylock(&mutex_) == 0; }
128
+ void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks
129
+ void Mutex::ReaderUnlock() { Unlock(); }
130
+ #undef SAFE_PTHREAD
131
+
132
+ #elif defined(WIN32)
133
+
134
+ Mutex::Mutex() { InitializeCriticalSection(&mutex_); }
135
+ Mutex::~Mutex() { DeleteCriticalSection(&mutex_); }
136
+ void Mutex::Lock() { EnterCriticalSection(&mutex_); }
137
+ void Mutex::Unlock() { LeaveCriticalSection(&mutex_); }
138
+ bool Mutex::TryLock() { return TryEnterCriticalSection(&mutex_) != 0; }
139
+ void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks
140
+ void Mutex::ReaderUnlock() { Unlock(); }
141
+
142
+ #endif
143
+
144
+
145
+ // --------------------------------------------------------------------------
146
+ // Some helper classes
147
+
148
+ // MutexLock(mu) acquires mu when constructed and releases it when destroyed.
149
+ class MutexLock {
150
+ public:
151
+ explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
152
+ ~MutexLock() { mu_->Unlock(); }
153
+ private:
154
+ Mutex * const mu_;
155
+ // Disallow "evil" constructors
156
+ MutexLock(const MutexLock&);
157
+ void operator=(const MutexLock&);
158
+ };
159
+
160
+ // ReaderMutexLock and WriterMutexLock do the same, for rwlocks
161
+ class ReaderMutexLock {
162
+ public:
163
+ explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
164
+ ~ReaderMutexLock() { mu_->ReaderUnlock(); }
165
+ private:
166
+ Mutex * const mu_;
167
+ // Disallow "evil" constructors
168
+ ReaderMutexLock(const ReaderMutexLock&);
169
+ void operator=(const ReaderMutexLock&);
170
+ };
171
+
172
+ class WriterMutexLock {
173
+ public:
174
+ explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
175
+ ~WriterMutexLock() { mu_->WriterUnlock(); }
176
+ private:
177
+ Mutex * const mu_;
178
+ // Disallow "evil" constructors
179
+ WriterMutexLock(const WriterMutexLock&);
180
+ void operator=(const WriterMutexLock&);
181
+ };
182
+
183
+ // Catch bug where variable name is omitted, e.g. MutexLock (&mu);
184
+ #define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name)
185
+ #define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
186
+ #define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
187
+
188
+ } // namespace re2
189
+
190
+ #endif /* #define RE2_UTIL_MUTEX_H_ */