fairy 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. data/LICENSE +674 -0
  2. data/Makefile +116 -0
  3. data/README +15 -0
  4. data/bin/fairy +582 -0
  5. data/bin/fairy-cat +74 -0
  6. data/bin/fairy-cp +128 -0
  7. data/bin/fairy-rm +122 -0
  8. data/bin/subcmd/controller +41 -0
  9. data/bin/subcmd/inspector +81 -0
  10. data/bin/subcmd/master +43 -0
  11. data/bin/subcmd/node +47 -0
  12. data/bin/subcmd/processor +54 -0
  13. data/doc/programming-interface.html +240 -0
  14. data/doc/programming-interface.rd +300 -0
  15. data/etc/fairy.conf.tmpl +118 -0
  16. data/ext/simple_hash/extconf.rb +4 -0
  17. data/ext/simple_hash/simple_hash.c +42 -0
  18. data/fairy.gemspec +60 -0
  19. data/lib/fairy/client/addins.rb +20 -0
  20. data/lib/fairy/client/barrier.rb +29 -0
  21. data/lib/fairy/client/basic-group-by.rb +52 -0
  22. data/lib/fairy/client/cat.rb +41 -0
  23. data/lib/fairy/client/direct-product.rb +51 -0
  24. data/lib/fairy/client/equijoin.rb +79 -0
  25. data/lib/fairy/client/exec.rb +54 -0
  26. data/lib/fairy/client/filter.rb +62 -0
  27. data/lib/fairy/client/find.rb +35 -0
  28. data/lib/fairy/client/group-by.rb +194 -0
  29. data/lib/fairy/client/here.rb +84 -0
  30. data/lib/fairy/client/inject.rb +70 -0
  31. data/lib/fairy/client/input-file.rb +53 -0
  32. data/lib/fairy/client/input-iota.rb +49 -0
  33. data/lib/fairy/client/input-local-file.rb +188 -0
  34. data/lib/fairy/client/input-varray.rb +30 -0
  35. data/lib/fairy/client/input.rb +42 -0
  36. data/lib/fairy/client/io-filter.rb +26 -0
  37. data/lib/fairy/client/junction.rb +31 -0
  38. data/lib/fairy/client/map.rb +34 -0
  39. data/lib/fairy/client/merge-group-by.rb +71 -0
  40. data/lib/fairy/client/output-file.rb +64 -0
  41. data/lib/fairy/client/output-local-file.rb +60 -0
  42. data/lib/fairy/client/output-null.rb +47 -0
  43. data/lib/fairy/client/output-varray.rb +50 -0
  44. data/lib/fairy/client/output.rb +29 -0
  45. data/lib/fairy/client/roma-put.rb +62 -0
  46. data/lib/fairy/client/roma.rb +156 -0
  47. data/lib/fairy/client/seg-join.rb +61 -0
  48. data/lib/fairy/client/seg-map.rb +78 -0
  49. data/lib/fairy/client/seg-shuffle.rb +35 -0
  50. data/lib/fairy/client/seg-split.rb +27 -0
  51. data/lib/fairy/client/seg-zip.rb +60 -0
  52. data/lib/fairy/client/select.rb +38 -0
  53. data/lib/fairy/client/sort.rb +48 -0
  54. data/lib/fairy/client/sort18.rb +56 -0
  55. data/lib/fairy/client/sort19.rb +61 -0
  56. data/lib/fairy/client/there.rb +47 -0
  57. data/lib/fairy/client/top_n_into_roma.rb +34 -0
  58. data/lib/fairy/client/wc.rb +92 -0
  59. data/lib/fairy/controller.rb +1103 -0
  60. data/lib/fairy/logger.rb +107 -0
  61. data/lib/fairy/master/addins.rb +20 -0
  62. data/lib/fairy/master/atom.rb +17 -0
  63. data/lib/fairy/master/c-barrier.rb +283 -0
  64. data/lib/fairy/master/c-basic-group-by.rb +250 -0
  65. data/lib/fairy/master/c-cat.rb +159 -0
  66. data/lib/fairy/master/c-direct-product.rb +203 -0
  67. data/lib/fairy/master/c-exec.rb +68 -0
  68. data/lib/fairy/master/c-filter.rb +422 -0
  69. data/lib/fairy/master/c-find.rb +138 -0
  70. data/lib/fairy/master/c-group-by.rb +64 -0
  71. data/lib/fairy/master/c-here.rb +80 -0
  72. data/lib/fairy/master/c-inject.rb +119 -0
  73. data/lib/fairy/master/c-input-file.rb +46 -0
  74. data/lib/fairy/master/c-input-iota.rb +66 -0
  75. data/lib/fairy/master/c-input-local-file.rb +117 -0
  76. data/lib/fairy/master/c-input-varray.rb +53 -0
  77. data/lib/fairy/master/c-input.rb +24 -0
  78. data/lib/fairy/master/c-inputtable.rb +31 -0
  79. data/lib/fairy/master/c-inputtable18.rb +36 -0
  80. data/lib/fairy/master/c-inputtable19.rb +35 -0
  81. data/lib/fairy/master/c-io-filter.rb +28 -0
  82. data/lib/fairy/master/c-junction.rb +54 -0
  83. data/lib/fairy/master/c-map.rb +27 -0
  84. data/lib/fairy/master/c-merge-group-by.rb +241 -0
  85. data/lib/fairy/master/c-output-file.rb +84 -0
  86. data/lib/fairy/master/c-output-local-file.rb +19 -0
  87. data/lib/fairy/master/c-output-null.rb +45 -0
  88. data/lib/fairy/master/c-output-varray.rb +57 -0
  89. data/lib/fairy/master/c-output.rb +20 -0
  90. data/lib/fairy/master/c-seg-join.rb +141 -0
  91. data/lib/fairy/master/c-seg-map.rb +26 -0
  92. data/lib/fairy/master/c-seg-shuffle.rb +87 -0
  93. data/lib/fairy/master/c-seg-split.rb +110 -0
  94. data/lib/fairy/master/c-seg-zip.rb +132 -0
  95. data/lib/fairy/master/c-select.rb +27 -0
  96. data/lib/fairy/master/c-sort.rb +108 -0
  97. data/lib/fairy/master/c-there.rb +57 -0
  98. data/lib/fairy/master/c-wc.rb +232 -0
  99. data/lib/fairy/master/job-interpriter.rb +19 -0
  100. data/lib/fairy/master/scheduler.rb +24 -0
  101. data/lib/fairy/master.rb +329 -0
  102. data/lib/fairy/node/addins.rb +19 -0
  103. data/lib/fairy/node/p-barrier.rb +95 -0
  104. data/lib/fairy/node/p-basic-group-by.rb +252 -0
  105. data/lib/fairy/node/p-direct-product.rb +153 -0
  106. data/lib/fairy/node/p-exec.rb +30 -0
  107. data/lib/fairy/node/p-filter.rb +363 -0
  108. data/lib/fairy/node/p-find.rb +111 -0
  109. data/lib/fairy/node/p-group-by.rb +1534 -0
  110. data/lib/fairy/node/p-here.rb +21 -0
  111. data/lib/fairy/node/p-identity.rb +24 -0
  112. data/lib/fairy/node/p-inject.rb +127 -0
  113. data/lib/fairy/node/p-input-file.rb +108 -0
  114. data/lib/fairy/node/p-input-iota.rb +39 -0
  115. data/lib/fairy/node/p-input-local-file.rb +61 -0
  116. data/lib/fairy/node/p-input-varray.rb +26 -0
  117. data/lib/fairy/node/p-io-filter.rb +28 -0
  118. data/lib/fairy/node/p-map.rb +40 -0
  119. data/lib/fairy/node/p-merger-group-by.rb +48 -0
  120. data/lib/fairy/node/p-output-file.rb +104 -0
  121. data/lib/fairy/node/p-output-local-file.rb +14 -0
  122. data/lib/fairy/node/p-output-null.rb +32 -0
  123. data/lib/fairy/node/p-output-varray.rb +41 -0
  124. data/lib/fairy/node/p-seg-join.rb +82 -0
  125. data/lib/fairy/node/p-seg-map.rb +34 -0
  126. data/lib/fairy/node/p-seg-split.rb +61 -0
  127. data/lib/fairy/node/p-seg-zip.rb +79 -0
  128. data/lib/fairy/node/p-select.rb +40 -0
  129. data/lib/fairy/node/p-single-exportable.rb +90 -0
  130. data/lib/fairy/node/p-sort.rb +195 -0
  131. data/lib/fairy/node/p-task.rb +113 -0
  132. data/lib/fairy/node/p-there.rb +44 -0
  133. data/lib/fairy/node/p-wc.rb +266 -0
  134. data/lib/fairy/node.rb +187 -0
  135. data/lib/fairy/processor.rb +510 -0
  136. data/lib/fairy/share/base-app.rb +114 -0
  137. data/lib/fairy/share/block-source.rb +234 -0
  138. data/lib/fairy/share/conf.rb +396 -0
  139. data/lib/fairy/share/debug.rb +21 -0
  140. data/lib/fairy/share/encoding.rb +17 -0
  141. data/lib/fairy/share/fast-tempfile.rb +93 -0
  142. data/lib/fairy/share/file-place.rb +176 -0
  143. data/lib/fairy/share/hash-1.rb +20 -0
  144. data/lib/fairy/share/hash-md5.rb +28 -0
  145. data/lib/fairy/share/hash-murmur.rb +69 -0
  146. data/lib/fairy/share/hash-rb18.rb +20 -0
  147. data/lib/fairy/share/hash-simple-hash.rb +28 -0
  148. data/lib/fairy/share/inspector.rb +16 -0
  149. data/lib/fairy/share/lc/exceptions.rb +82 -0
  150. data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
  151. data/lib/fairy/share/locale.rb +17 -0
  152. data/lib/fairy/share/log.rb +215 -0
  153. data/lib/fairy/share/pool-dictionary.rb +53 -0
  154. data/lib/fairy/share/port-marshaled-queue.rb +347 -0
  155. data/lib/fairy/share/port.rb +1697 -0
  156. data/lib/fairy/share/reference.rb +45 -0
  157. data/lib/fairy/share/stdout.rb +56 -0
  158. data/lib/fairy/share/tr.rb +16 -0
  159. data/lib/fairy/share/varray.rb +147 -0
  160. data/lib/fairy/share/vfile.rb +183 -0
  161. data/lib/fairy/version.rb +8 -0
  162. data/lib/fairy.rb +206 -0
  163. data/sample/grep.rb +46 -0
  164. data/sample/ping.rb +19 -0
  165. data/sample/sort.rb +102 -0
  166. data/sample/wordcount.rb +61 -0
  167. data/spec/README +12 -0
  168. data/spec/fairy1_spec.rb +31 -0
  169. data/spec/fairy2_spec.rb +42 -0
  170. data/spec/fairy3_spec.rb +126 -0
  171. data/spec/fairy4_spec.rb +63 -0
  172. data/spec/fairy5_spec.rb +45 -0
  173. data/spec/fairy6_spec.rb +52 -0
  174. data/spec/fairy7_spec.rb +58 -0
  175. data/spec/fairy8_spec.rb +48 -0
  176. data/spec/mkdat.rb +148 -0
  177. data/spec/run_all.sh +65 -0
  178. data/test/testc.rb +7111 -0
  179. data/tools/cap_recipe/Capfile +144 -0
  180. data/tools/cap_recipe/cluster.yml.sample +14 -0
  181. data/tools/fairy_perf_graph.rb +444 -0
  182. data/tools/git-tag +44 -0
  183. data/tools/log-analysis.rb +62 -0
  184. data/tools/svn-ls-diff +38 -0
  185. data/tools/svn-tags +37 -0
  186. metadata +298 -0
@@ -0,0 +1,21 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ module Fairy
7
+ module Debug
8
+ def njob_status_monitor_on(fairy)
9
+ # require "backend/bjob"
10
+
11
+ bjob = fairy.name2backend_class("CFilter")
12
+ bjob.watch_status = true
13
+ end
14
+
15
+ for method in self.instance_methods
16
+ module_function method
17
+ end
18
+
19
+ end
20
+ end
21
+
@@ -0,0 +1,17 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ module Fairy
7
+ begin
8
+ if CONF.DEFAULT_EXTERNAL
9
+ Encoding.default_external = CONF.DEFAULT_EXTERNAL
10
+ end
11
+ if CONF.DEFAULT_INTERNAL
12
+ Encoding.default_internal = CONF.DEFAULT_INTERNAL
13
+ end
14
+ rescue NameError
15
+ ERR.Raise ERR::NoSupportRubyEncoding, RUBY_VERSION
16
+ end
17
+ end
@@ -0,0 +1,93 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "thread"
7
+ require "forwardable"
8
+
9
+ require "fairy/share/conf.rb"
10
+
11
+ module Fairy
12
+ class FastTempfile
13
+ extend Forwardable
14
+
15
+ Entry = Struct.new(:path, :io)
16
+
17
+ def self.reset
18
+ @DEFAULT_TMPDIR = CONF.TMP_DIR
19
+ @PID = $$
20
+ @HEAD = Time.now.strftime("%Y%m%d")+format("-%05d-", @PID)
21
+ @Seq = "00000"
22
+ end
23
+ reset
24
+ @Mutex = Mutex.new
25
+
26
+ def self.open(prefix, tmpdir = @DEFAULT_TMPDIR)
27
+ new(prefix, tmpdir)
28
+ end
29
+
30
+ def self.gen_tmpname(prefix, tmpdir)
31
+ @Mutex.synchronize do
32
+ # forkしたときのため
33
+ reset if @PID != $$
34
+ name = tmpdir+"/"+prefix+@HEAD+@Seq
35
+ @Seq = @Seq.succ
36
+ name
37
+ end
38
+ end
39
+
40
+ def initialize(prefix, tmpdir)
41
+ @entry = Entry.new
42
+ ObjectSpace.define_finalizer(self, FastTempfile.terminate_proc(@entry))
43
+
44
+ @entry.path = FastTempfile.gen_tmpname(prefix, tmpdir)
45
+
46
+ begin
47
+ @entry.io = File.open(path, File::RDWR|File::CREAT|File::EXCL)
48
+ rescue Errno::ENOENT
49
+ unless File.directory?(tmpdir)
50
+ ERR::Fail ERR::NoTmpDir, tmpdir
51
+ end
52
+ raise
53
+ end
54
+ end
55
+
56
+ def_delegator :@entry, :path
57
+ def_delegator :@entry, :io
58
+
59
+ def open
60
+ @entry.io.close if @entry.io
61
+ @entry.io = File.open(path)
62
+ end
63
+
64
+ def close
65
+ @entry.io.close
66
+ @entry.io = nil
67
+ end
68
+
69
+ def close!
70
+ @entry.io.close if @entry.io
71
+ if File.exist?(@entry.path)
72
+ File.unlink @entry.path
73
+ end
74
+ ObjectSpace.undefine_finalizer(self)
75
+ end
76
+
77
+ def self.terminate_proc(entry)
78
+ pid = @PID
79
+ Proc.new {
80
+ if pid == $$
81
+ entry.io.close if entry.io
82
+
83
+ # keep this order for thread safeness
84
+ if entry.path
85
+ File.unlink(entry.path) if File.exist?(entry.path)
86
+ end
87
+ end
88
+ }
89
+ end
90
+ end
91
+ end
92
+
93
+
@@ -0,0 +1,176 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "thread"
7
+ require "uri"
8
+
9
+ module Fairy
10
+ URI_REGEXP = /:\/\//
11
+
12
+ class CFilePlace
13
+ def initialize(vfile)
14
+ @vfile = vfile
15
+
16
+ # for next_filter
17
+ @no = 0
18
+ @nfileplaces = @vfile.real_file_names
19
+ @nfileplaces_mutex = Mutex.new
20
+ end
21
+
22
+ def each_assigned_filter(&block)
23
+ loop do
24
+ fp = nil
25
+ @nfileplaces_mutex.synchronize do
26
+ file = @nfileplaces.shift
27
+ return unless file
28
+ fp = PFilePlace.new(file, @no)
29
+ @no += 1
30
+ end
31
+ block.call fp
32
+ end
33
+ end
34
+
35
+ end
36
+
37
+ class PFilePlace
38
+ def initialize(url, no)
39
+ @url = url
40
+ @no = no
41
+
42
+ @host = "localhost"
43
+ @path = @url
44
+ if URI_REGEXP =~ @url
45
+ uri = URI(@url)
46
+ @host = uri.host
47
+ if /^\[([0-9a-f.:]*)\]$/ =~ @host
48
+ @host = $1
49
+ end
50
+ @path = uri.path
51
+ end
52
+ end
53
+
54
+ attr_reader :url
55
+ attr_reader :no
56
+ attr_reader :host
57
+ attr_reader :path
58
+ end
59
+
60
+ class CLocalIOPlace
61
+ def initialize(job)
62
+ @job = job
63
+ @no = 0
64
+ end
65
+
66
+ def each_assigned_filter(&block)
67
+ @job.each_assigned_filter do |io|
68
+ block.call PLocalIOPlace.new(io, @no)
69
+ @no += 1
70
+ end
71
+ end
72
+ end
73
+
74
+ class PLocalIOPlace
75
+ def initialize(io, no)
76
+ @io = io
77
+ @no = no
78
+ end
79
+
80
+ attr_reader :no
81
+ attr_reader :io
82
+ end
83
+
84
+ class CVarrayPlace
85
+ def initialize(varray)
86
+ @varray = varray
87
+ @no = 0
88
+ end
89
+
90
+ def each_assigned_filter(&block)
91
+ no = 0
92
+ @varray.arrays_each do |ary|
93
+ vp = PVarrayPlace.new(ary, no)
94
+ no += 1
95
+ block.call vp
96
+ end
97
+ end
98
+ end
99
+
100
+ class PVarrayPlace
101
+ def initialize(ary, no)
102
+ @ary = ary
103
+ @no = no
104
+
105
+ @host = "localhost"
106
+ @path = @url
107
+ if URI_REGEXP =~ @url
108
+ uri = URI(@url)
109
+ @host = uri.host
110
+ if /^\[([0-9a-f.:]*)\]$/ =~ @host
111
+ @host = $1
112
+ end
113
+ @path = uri.path
114
+ end
115
+ end
116
+
117
+ attr_reader :ary
118
+ attr_reader :no
119
+ end
120
+
121
+ class CIotaPlace
122
+ def initialize(last, offset, split_no)
123
+ @last = last
124
+ @offset = offset
125
+ @split_no = split_no
126
+ end
127
+
128
+ def each_assigned_filter(&block)
129
+ first = @offset
130
+ no = -1
131
+
132
+ @split_no.times do
133
+ no += 1
134
+ Log::debug self, "NO: #{no}"
135
+ last = [first + @last.div(@split_no), @last].min
136
+ block.call PIotaPlace.new(no, first, last)
137
+ first = last + 1
138
+ break if first > @last
139
+ end
140
+ # sleep 0.1
141
+ end
142
+ end
143
+
144
+ class PIotaPlace
145
+ def initialize(no, first, last)
146
+ @no = no
147
+ @first = first
148
+ @last = last
149
+ end
150
+
151
+ attr_reader :no
152
+ attr_reader :first
153
+ attr_reader :last
154
+ end
155
+
156
+
157
+ class CTherePlace
158
+ def initialize(enum)
159
+ @enumerable = enum
160
+ end
161
+
162
+ def each_assigned_filter(&block)
163
+ block.call PTherePlace.new(0, @enumerable)
164
+ end
165
+ end
166
+
167
+ class PTherePlace
168
+ def initialize(no, enum)
169
+ @no = no
170
+ @enumerable = enum
171
+ end
172
+
173
+ attr_reader :no
174
+ attr_reader :enumerable
175
+ end
176
+ end
@@ -0,0 +1,20 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ module Fairy
7
+ module HValueGenerator
8
+ module Hash1
9
+ def Hash1.value(key)
10
+ key.ord
11
+ end
12
+ end
13
+
14
+ def self.create_seed;end
15
+ def self.new(seed)
16
+ Hash1
17
+ end
18
+ end
19
+ end
20
+
@@ -0,0 +1,28 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "digest/md5"
7
+
8
+ module Fairy
9
+ module HValueGenerator
10
+ module MD5
11
+ def MD5.value(key)
12
+ case key
13
+ when String
14
+ Digest::MD5.digest(key).unpack("@12N").first
15
+ else
16
+ ERR::Raise ERR::NoImpliment, "non-string key(#{key.inspect})"
17
+ end
18
+ end
19
+ # module_function :hash
20
+ end
21
+
22
+ def self.create_seed;end
23
+ def self.new(seed)
24
+ HValueGenerator::MD5
25
+ end
26
+ end
27
+ end
28
+
@@ -0,0 +1,69 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ module Fairy
7
+
8
+ module HValueGenerator
9
+
10
+ # mhash is modiftied from MurmerHash(http://murmurhash.googlepages.com/).
11
+ class MurMur
12
+ FIX_MASK = 0x3fff_ffff
13
+ MASK24 = 0xffff_ffff_ffff
14
+ MAGIC = 0x7fd652ad & FIX_MASK
15
+ R = 18
16
+
17
+ def initialize(h)
18
+ @seed = h + 0xdeadbeef & FIX_MASK
19
+ # @postfix = [@seed].pack("N")[1..3]
20
+ @postfix = "000"
21
+ end
22
+
23
+ def value(data)
24
+ case data
25
+ when String
26
+ str_hash(data)
27
+ else
28
+ ERR::Raise ERR::NoImpliment, "non-string key(#{data.inspect})"
29
+ end
30
+ end
31
+
32
+ def str_hash(data)
33
+ h = @seed
34
+
35
+ for k in (data+@postfix).unpack("N*")
36
+ k *= MAGIC
37
+ k ^= k >> R
38
+ k &= FIX_MASK
39
+ k *= MAGIC
40
+ k &= FIX_MASK
41
+
42
+ h = @seed * MAGIC
43
+ k &= FIX_MASK
44
+ h ^= k
45
+ end
46
+
47
+ h ^= h >> 13
48
+ h *= MAGIC
49
+ h &= FIX_MASK
50
+ h ^= h >> 15
51
+ h
52
+ end
53
+
54
+ def MurMur.create_seed
55
+ rand(FIX_MASK)
56
+ end
57
+ end
58
+
59
+
60
+ def self.create_seed
61
+ MurMur.create_seed
62
+ end
63
+
64
+ def self.new(seed)
65
+ MurMur.new(seed)
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,20 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ module Fairy
7
+ module HValueGenerator
8
+ module RB18
9
+ def RB18.value(key)
10
+ key.hash
11
+ end
12
+ end
13
+
14
+ def self.create_seed;end
15
+ def self.new(seed)
16
+ RB18
17
+ end
18
+ end
19
+ end
20
+