fairy 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. data/LICENSE +674 -0
  2. data/Makefile +116 -0
  3. data/README +15 -0
  4. data/bin/fairy +582 -0
  5. data/bin/fairy-cat +74 -0
  6. data/bin/fairy-cp +128 -0
  7. data/bin/fairy-rm +122 -0
  8. data/bin/subcmd/controller +41 -0
  9. data/bin/subcmd/inspector +81 -0
  10. data/bin/subcmd/master +43 -0
  11. data/bin/subcmd/node +47 -0
  12. data/bin/subcmd/processor +54 -0
  13. data/doc/programming-interface.html +240 -0
  14. data/doc/programming-interface.rd +300 -0
  15. data/etc/fairy.conf.tmpl +118 -0
  16. data/ext/simple_hash/extconf.rb +4 -0
  17. data/ext/simple_hash/simple_hash.c +42 -0
  18. data/fairy.gemspec +60 -0
  19. data/lib/fairy/client/addins.rb +20 -0
  20. data/lib/fairy/client/barrier.rb +29 -0
  21. data/lib/fairy/client/basic-group-by.rb +52 -0
  22. data/lib/fairy/client/cat.rb +41 -0
  23. data/lib/fairy/client/direct-product.rb +51 -0
  24. data/lib/fairy/client/equijoin.rb +79 -0
  25. data/lib/fairy/client/exec.rb +54 -0
  26. data/lib/fairy/client/filter.rb +62 -0
  27. data/lib/fairy/client/find.rb +35 -0
  28. data/lib/fairy/client/group-by.rb +194 -0
  29. data/lib/fairy/client/here.rb +84 -0
  30. data/lib/fairy/client/inject.rb +70 -0
  31. data/lib/fairy/client/input-file.rb +53 -0
  32. data/lib/fairy/client/input-iota.rb +49 -0
  33. data/lib/fairy/client/input-local-file.rb +188 -0
  34. data/lib/fairy/client/input-varray.rb +30 -0
  35. data/lib/fairy/client/input.rb +42 -0
  36. data/lib/fairy/client/io-filter.rb +26 -0
  37. data/lib/fairy/client/junction.rb +31 -0
  38. data/lib/fairy/client/map.rb +34 -0
  39. data/lib/fairy/client/merge-group-by.rb +71 -0
  40. data/lib/fairy/client/output-file.rb +64 -0
  41. data/lib/fairy/client/output-local-file.rb +60 -0
  42. data/lib/fairy/client/output-null.rb +47 -0
  43. data/lib/fairy/client/output-varray.rb +50 -0
  44. data/lib/fairy/client/output.rb +29 -0
  45. data/lib/fairy/client/roma-put.rb +62 -0
  46. data/lib/fairy/client/roma.rb +156 -0
  47. data/lib/fairy/client/seg-join.rb +61 -0
  48. data/lib/fairy/client/seg-map.rb +78 -0
  49. data/lib/fairy/client/seg-shuffle.rb +35 -0
  50. data/lib/fairy/client/seg-split.rb +27 -0
  51. data/lib/fairy/client/seg-zip.rb +60 -0
  52. data/lib/fairy/client/select.rb +38 -0
  53. data/lib/fairy/client/sort.rb +48 -0
  54. data/lib/fairy/client/sort18.rb +56 -0
  55. data/lib/fairy/client/sort19.rb +61 -0
  56. data/lib/fairy/client/there.rb +47 -0
  57. data/lib/fairy/client/top_n_into_roma.rb +34 -0
  58. data/lib/fairy/client/wc.rb +92 -0
  59. data/lib/fairy/controller.rb +1103 -0
  60. data/lib/fairy/logger.rb +107 -0
  61. data/lib/fairy/master/addins.rb +20 -0
  62. data/lib/fairy/master/atom.rb +17 -0
  63. data/lib/fairy/master/c-barrier.rb +283 -0
  64. data/lib/fairy/master/c-basic-group-by.rb +250 -0
  65. data/lib/fairy/master/c-cat.rb +159 -0
  66. data/lib/fairy/master/c-direct-product.rb +203 -0
  67. data/lib/fairy/master/c-exec.rb +68 -0
  68. data/lib/fairy/master/c-filter.rb +422 -0
  69. data/lib/fairy/master/c-find.rb +138 -0
  70. data/lib/fairy/master/c-group-by.rb +64 -0
  71. data/lib/fairy/master/c-here.rb +80 -0
  72. data/lib/fairy/master/c-inject.rb +119 -0
  73. data/lib/fairy/master/c-input-file.rb +46 -0
  74. data/lib/fairy/master/c-input-iota.rb +66 -0
  75. data/lib/fairy/master/c-input-local-file.rb +117 -0
  76. data/lib/fairy/master/c-input-varray.rb +53 -0
  77. data/lib/fairy/master/c-input.rb +24 -0
  78. data/lib/fairy/master/c-inputtable.rb +31 -0
  79. data/lib/fairy/master/c-inputtable18.rb +36 -0
  80. data/lib/fairy/master/c-inputtable19.rb +35 -0
  81. data/lib/fairy/master/c-io-filter.rb +28 -0
  82. data/lib/fairy/master/c-junction.rb +54 -0
  83. data/lib/fairy/master/c-map.rb +27 -0
  84. data/lib/fairy/master/c-merge-group-by.rb +241 -0
  85. data/lib/fairy/master/c-output-file.rb +84 -0
  86. data/lib/fairy/master/c-output-local-file.rb +19 -0
  87. data/lib/fairy/master/c-output-null.rb +45 -0
  88. data/lib/fairy/master/c-output-varray.rb +57 -0
  89. data/lib/fairy/master/c-output.rb +20 -0
  90. data/lib/fairy/master/c-seg-join.rb +141 -0
  91. data/lib/fairy/master/c-seg-map.rb +26 -0
  92. data/lib/fairy/master/c-seg-shuffle.rb +87 -0
  93. data/lib/fairy/master/c-seg-split.rb +110 -0
  94. data/lib/fairy/master/c-seg-zip.rb +132 -0
  95. data/lib/fairy/master/c-select.rb +27 -0
  96. data/lib/fairy/master/c-sort.rb +108 -0
  97. data/lib/fairy/master/c-there.rb +57 -0
  98. data/lib/fairy/master/c-wc.rb +232 -0
  99. data/lib/fairy/master/job-interpriter.rb +19 -0
  100. data/lib/fairy/master/scheduler.rb +24 -0
  101. data/lib/fairy/master.rb +329 -0
  102. data/lib/fairy/node/addins.rb +19 -0
  103. data/lib/fairy/node/p-barrier.rb +95 -0
  104. data/lib/fairy/node/p-basic-group-by.rb +252 -0
  105. data/lib/fairy/node/p-direct-product.rb +153 -0
  106. data/lib/fairy/node/p-exec.rb +30 -0
  107. data/lib/fairy/node/p-filter.rb +363 -0
  108. data/lib/fairy/node/p-find.rb +111 -0
  109. data/lib/fairy/node/p-group-by.rb +1534 -0
  110. data/lib/fairy/node/p-here.rb +21 -0
  111. data/lib/fairy/node/p-identity.rb +24 -0
  112. data/lib/fairy/node/p-inject.rb +127 -0
  113. data/lib/fairy/node/p-input-file.rb +108 -0
  114. data/lib/fairy/node/p-input-iota.rb +39 -0
  115. data/lib/fairy/node/p-input-local-file.rb +61 -0
  116. data/lib/fairy/node/p-input-varray.rb +26 -0
  117. data/lib/fairy/node/p-io-filter.rb +28 -0
  118. data/lib/fairy/node/p-map.rb +40 -0
  119. data/lib/fairy/node/p-merger-group-by.rb +48 -0
  120. data/lib/fairy/node/p-output-file.rb +104 -0
  121. data/lib/fairy/node/p-output-local-file.rb +14 -0
  122. data/lib/fairy/node/p-output-null.rb +32 -0
  123. data/lib/fairy/node/p-output-varray.rb +41 -0
  124. data/lib/fairy/node/p-seg-join.rb +82 -0
  125. data/lib/fairy/node/p-seg-map.rb +34 -0
  126. data/lib/fairy/node/p-seg-split.rb +61 -0
  127. data/lib/fairy/node/p-seg-zip.rb +79 -0
  128. data/lib/fairy/node/p-select.rb +40 -0
  129. data/lib/fairy/node/p-single-exportable.rb +90 -0
  130. data/lib/fairy/node/p-sort.rb +195 -0
  131. data/lib/fairy/node/p-task.rb +113 -0
  132. data/lib/fairy/node/p-there.rb +44 -0
  133. data/lib/fairy/node/p-wc.rb +266 -0
  134. data/lib/fairy/node.rb +187 -0
  135. data/lib/fairy/processor.rb +510 -0
  136. data/lib/fairy/share/base-app.rb +114 -0
  137. data/lib/fairy/share/block-source.rb +234 -0
  138. data/lib/fairy/share/conf.rb +396 -0
  139. data/lib/fairy/share/debug.rb +21 -0
  140. data/lib/fairy/share/encoding.rb +17 -0
  141. data/lib/fairy/share/fast-tempfile.rb +93 -0
  142. data/lib/fairy/share/file-place.rb +176 -0
  143. data/lib/fairy/share/hash-1.rb +20 -0
  144. data/lib/fairy/share/hash-md5.rb +28 -0
  145. data/lib/fairy/share/hash-murmur.rb +69 -0
  146. data/lib/fairy/share/hash-rb18.rb +20 -0
  147. data/lib/fairy/share/hash-simple-hash.rb +28 -0
  148. data/lib/fairy/share/inspector.rb +16 -0
  149. data/lib/fairy/share/lc/exceptions.rb +82 -0
  150. data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
  151. data/lib/fairy/share/locale.rb +17 -0
  152. data/lib/fairy/share/log.rb +215 -0
  153. data/lib/fairy/share/pool-dictionary.rb +53 -0
  154. data/lib/fairy/share/port-marshaled-queue.rb +347 -0
  155. data/lib/fairy/share/port.rb +1697 -0
  156. data/lib/fairy/share/reference.rb +45 -0
  157. data/lib/fairy/share/stdout.rb +56 -0
  158. data/lib/fairy/share/tr.rb +16 -0
  159. data/lib/fairy/share/varray.rb +147 -0
  160. data/lib/fairy/share/vfile.rb +183 -0
  161. data/lib/fairy/version.rb +8 -0
  162. data/lib/fairy.rb +206 -0
  163. data/sample/grep.rb +46 -0
  164. data/sample/ping.rb +19 -0
  165. data/sample/sort.rb +102 -0
  166. data/sample/wordcount.rb +61 -0
  167. data/spec/README +12 -0
  168. data/spec/fairy1_spec.rb +31 -0
  169. data/spec/fairy2_spec.rb +42 -0
  170. data/spec/fairy3_spec.rb +126 -0
  171. data/spec/fairy4_spec.rb +63 -0
  172. data/spec/fairy5_spec.rb +45 -0
  173. data/spec/fairy6_spec.rb +52 -0
  174. data/spec/fairy7_spec.rb +58 -0
  175. data/spec/fairy8_spec.rb +48 -0
  176. data/spec/mkdat.rb +148 -0
  177. data/spec/run_all.sh +65 -0
  178. data/test/testc.rb +7111 -0
  179. data/tools/cap_recipe/Capfile +144 -0
  180. data/tools/cap_recipe/cluster.yml.sample +14 -0
  181. data/tools/fairy_perf_graph.rb +444 -0
  182. data/tools/git-tag +44 -0
  183. data/tools/log-analysis.rb +62 -0
  184. data/tools/svn-ls-diff +38 -0
  185. data/tools/svn-tags +37 -0
  186. metadata +298 -0
data/spec/README ADDED
@@ -0,0 +1,12 @@
1
+ Befor running tests here,
2
+ you need to edit $FARIY_HOME/tools/cap_recipe/cluster.yml
3
+ along with your cluster.
4
+
5
+ You can run all tests with following command:
6
+
7
+ ./run_all.sh
8
+
9
+
10
+ Copyright (C) 2007-2010 Rakuten, Inc.
11
+
12
+
@@ -0,0 +1,31 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require 'rubygems'
7
+ require 'rspec'
8
+
9
+ require 'fairy'
10
+ require 'fairy/controller'
11
+
12
+ describe Fairy do
13
+ before :all do
14
+ @fairy = Fairy::Fairy.new
15
+ end
16
+
17
+ # initialize
18
+ it 'should create new controller' do
19
+ @fairy.controller.kind_of?(Fairy::Controller).should be_true
20
+ @fairy.controller.deep_space.status.should == :SERVICING
21
+ end
22
+
23
+ # abort
24
+ it 'should destroy the controller' do
25
+ @fairy.abort
26
+ sleep(3)
27
+ @fairy.controller.deep_space.status.should == :SERVICE_STOP
28
+ end
29
+ end
30
+
31
+
@@ -0,0 +1,42 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require 'rubygems'
7
+ require 'rspec'
8
+
9
+ require 'fairy'
10
+
11
+ describe Fairy do
12
+ before :all do
13
+ @source_multi = File.readlines("testdata_multi.txt")
14
+ @fairy = Fairy::Fairy.new
15
+ end
16
+
17
+ # input/here
18
+ it 'should open a vfile & print the contents' do
19
+ contents = []
20
+ @fairy.input("testdata_multi.vf").here.each{|ln|
21
+ contents << ln
22
+ }
23
+ contents.should == @source_multi
24
+ end
25
+
26
+ # output (local)
27
+ it 'should output to a local file' do
28
+ @fairy.input("testdata_multi.vf").output("/tmp/fairy_spec_testdata_multi.txt")
29
+ contents = File.readlines("/tmp/fairy_spec_testdata_multi.txt")
30
+ contents.should == @source_multi
31
+ end
32
+
33
+ # output (vfile)
34
+ it 'should output to remote files (a vfile)' do
35
+ @fairy.input("testdata_multi.vf").output("/tmp/fairy_spec_testdata_multi.vf")
36
+ system %{ fairy cp /tmp/fairy_spec_testdata_multi.vf /tmp/fairy_spec_testdata_multi.txt }
37
+ contents = File.readlines("/tmp/fairy_spec_testdata_multi.txt")
38
+ contents.should == @source_multi
39
+ end
40
+ end
41
+
42
+
@@ -0,0 +1,126 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require 'rubygems'
7
+ require 'rspec'
8
+
9
+ require 'fairy'
10
+
11
+ describe Fairy do
12
+ before :all do
13
+ @source = File.readlines("testdata.txt")
14
+ @source_multi = File.readlines("testdata_multi.txt")
15
+ @fairy = Fairy::Fairy.new
16
+ end
17
+
18
+ # map (=collect)
19
+ it 'should strip LFs & convert values to integer & add one' do
20
+ answer = @source.map{|ln|
21
+ n = ln.chomp.to_i + 1
22
+ "#{n}\n"
23
+ }
24
+
25
+ @fairy.input("testdata.vf").map(%q{|ln|
26
+ n = ln.chomp.to_i + 1
27
+ "#{n}"
28
+ }).output("/tmp/fairy_spec_testdata.txt")
29
+
30
+ result = File.readlines("/tmp/fairy_spec_testdata.txt")
31
+
32
+ result.should == answer
33
+
34
+ @fairy.input("testdata.vf").collect(%q{|ln|
35
+ n = ln.chomp.to_i + 1
36
+ "#{n}"
37
+ }).output("/tmp/fairy_spec_testdata.txt")
38
+
39
+ result = File.readlines("/tmp/fairy_spec_testdata.txt")
40
+
41
+ result.should == answer
42
+ end
43
+
44
+ # map + map
45
+ it 'should add one + double values' do
46
+ answer = @source.map{|ln|
47
+ n = ln.chomp.to_i + 1
48
+ n *= 2
49
+ "#{n}\n"
50
+ }
51
+
52
+ @fairy.input("testdata.vf").map(%{|ln|
53
+ n = ln.chomp.to_i + 1
54
+ n
55
+ }).map(%q{|n|
56
+ n *= 2
57
+ "#{n}"
58
+ }).output("/tmp/fairy_spec_testdata.txt")
59
+
60
+ result = File.readlines("/tmp/fairy_spec_testdata.txt")
61
+
62
+ result.should == answer
63
+ end
64
+
65
+ # map + map + map
66
+ it 'should add one + double values + subtract three' do
67
+ answer = @source.map{|ln|
68
+ n = ln.chomp.to_i + 1
69
+ n *= 2
70
+ n -= 3
71
+ "#{n}\n"
72
+ }
73
+
74
+ @fairy.input("testdata.vf").map(%{|ln|
75
+ n = ln.chomp.to_i + 1
76
+ n
77
+ }).map(%{|n|
78
+ n *= 2
79
+ n
80
+ }).map(%q{|n|
81
+ n -= 3
82
+ "#{n}"
83
+ }).output("/tmp/fairy_spec_testdata.txt")
84
+
85
+ result = File.readlines("/tmp/fairy_spec_testdata.txt")
86
+
87
+ result.should == answer
88
+ end
89
+
90
+ # map_flatten (=mapf)
91
+ it 'should split lines and flatten values' do
92
+ answer = @source_multi.map{|ln|
93
+ nums = ln.split.map{|s| s.to_i}
94
+ nums
95
+ }.flatten.map{|n|
96
+ n + 1
97
+ "#{n}\n"
98
+ }
99
+
100
+ @fairy.input("testdata_multi.vf").map_flatten(%{|ln|
101
+ nums = ln.split.map{|s| s.to_i}
102
+ nums
103
+ }).map(%q{|n|
104
+ n + 1
105
+ "#{n}"
106
+ }).output("/tmp/fairy_spec_testdata_multi.txt")
107
+
108
+ result = File.readlines("/tmp/fairy_spec_testdata_multi.txt")
109
+
110
+ result.should == answer
111
+
112
+ @fairy.input("testdata_multi.vf").mapf(%{|ln|
113
+ nums = ln.split.map{|s| s.to_i}
114
+ nums
115
+ }).map(%q{|n|
116
+ n + 1
117
+ "#{n}"
118
+ }).output("/tmp/fairy_spec_testdata_multi.txt")
119
+
120
+ result = File.readlines("/tmp/fairy_spec_testdata_multi.txt")
121
+
122
+ result.should == answer
123
+ end
124
+ end
125
+
126
+
@@ -0,0 +1,63 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require 'rubygems'
7
+ require 'rspec'
8
+
9
+ require 'fairy'
10
+
11
+ describe Fairy do
12
+ before :all do
13
+ @source = File.readlines("testdata.txt")
14
+ @fairy = Fairy::Fairy.new
15
+ end
16
+
17
+ # select
18
+ it 'should select multiples of 10' do
19
+ answer = @source.map{|ln|
20
+ n = ln.chomp.to_i
21
+ n
22
+ }.select{|n|
23
+ (n % 10).zero?
24
+ }.map{|n|
25
+ "#{n}\n"
26
+ }
27
+
28
+ result = []
29
+ @fairy.input("testdata.vf").map(%{|ln|
30
+ n = ln.chomp.to_i
31
+ n
32
+ }).select(%{|n|
33
+ (n % 10).zero?
34
+ }).map(%q{|n|
35
+ "#{n}"
36
+ }).output("/tmp/fairy_spec_testdata.txt")
37
+
38
+ result = File.readlines("/tmp/fairy_spec_testdata.txt")
39
+
40
+ result.should == answer
41
+ end
42
+
43
+ # grep
44
+ it 'should select 555' do
45
+ answer = @source.map{|ln|
46
+ n = ln.chomp
47
+ n
48
+ }.grep("555").map{|n|
49
+ "#{n}\n"
50
+ }
51
+
52
+ @fairy.input("testdata.vf").map(%{|ln|
53
+ n = ln.chomp
54
+ n
55
+ }).grep(/^555$/).output("/tmp/fairy_spec_testdata.txt")
56
+
57
+ result = File.readlines("/tmp/fairy_spec_testdata.txt")
58
+
59
+ result.should == answer
60
+ end
61
+ end
62
+
63
+
@@ -0,0 +1,45 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require 'rubygems'
7
+ require 'rspec'
8
+
9
+ require 'fairy'
10
+
11
+ describe Fairy do
12
+ before :all do
13
+ @source_multi = File.readlines("testdata_multi.txt")
14
+ @fairy = Fairy::Fairy.new
15
+ end
16
+
17
+ # mapf + group_by + map (word count)
18
+ it 'should count unique values' do
19
+ answer = {}
20
+ @source_multi.each{|ln|
21
+ nums = ln.split.map{|s| s.to_i}
22
+ nums.each{|n|
23
+ answer[n] ||= 0
24
+ answer[n] += 1
25
+ }
26
+ }
27
+
28
+ @fairy.input("testdata_multi.vf").mapf(%{|ln|
29
+ nums = ln.split.map{|s| s.to_i}
30
+ nums
31
+ }).group_by(%{|n| n.to_s}).map(%q{|bag|
32
+ "#{bag.key}\t#{bag.size}"
33
+ }).output("/tmp/fairy_spec_testdata_multi.txt")
34
+
35
+ result = File.readlines("/tmp/fairy_spec_testdata_multi.txt").inject({}){|res,ln|
36
+ ent = ln.split.map{|s| s.to_i}
37
+ res[ent[0]] = ent[1]
38
+ res
39
+ }
40
+
41
+ result.should == answer
42
+ end
43
+ end
44
+
45
+
@@ -0,0 +1,52 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require 'rubygems'
7
+ require 'rspec'
8
+
9
+ require 'fairy'
10
+
11
+ describe Fairy do
12
+ before :all do
13
+ @source = File.readlines("testdata.txt")
14
+ @fairy = Fairy::Fairy.new
15
+ end
16
+
17
+ # sort_by
18
+ it 'should sort data' do
19
+ answer = @source.map{|ln|
20
+ n = ln.chomp.to_i
21
+ n
22
+ }.sort{|a,b|
23
+ a <=> b
24
+ }.map{|n|
25
+ "#{n}\n"
26
+ }
27
+
28
+ @fairy.input("testdata.vf").map(%{|ln|
29
+ n = ln.chomp.to_i
30
+ n
31
+ }).sort_by(%{|n| n.to_i}).map(%q{|n|
32
+ "#{n}"
33
+ }).output("/tmp/fairy_spec_testdata.txt")
34
+
35
+ result = File.readlines("/tmp/fairy_spec_testdata.txt")
36
+
37
+ result.should == answer
38
+
39
+ @fairy.input("testdata.vf").map(%{|ln|
40
+ n = ln.chomp.to_i
41
+ n
42
+ }).sort_by(%{|n| -(n.to_i)}).map(%q{|n|
43
+ "#{n}"
44
+ }).output("/tmp/fairy_spec_testdata.txt")
45
+
46
+ result = File.readlines("/tmp/fairy_spec_testdata.txt")
47
+
48
+ result.should == answer.reverse
49
+ end
50
+ end
51
+
52
+
@@ -0,0 +1,58 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require 'rubygems'
7
+ require 'rspec'
8
+
9
+ require 'fairy'
10
+
11
+ describe Fairy do
12
+ before :all do
13
+ @source_join_a = File.readlines("testdata_join_a.txt")
14
+ @source_join_b = File.readlines("testdata_join_b.txt")
15
+ @fairy = Fairy::Fairy.new
16
+ end
17
+
18
+ # equijoin
19
+ it 'should do inner join' do
20
+ a = File.readlines("testdata_join_a.txt").map{|ln| rec = ln.split; rec}
21
+ b = File.readlines("testdata_join_b.txt").map{|ln| rec = ln.split; rec}
22
+
23
+ index = {}
24
+ b.each{|id,val|
25
+ index[id] ||= []
26
+ index[id] << val
27
+ }
28
+
29
+ answer = []
30
+ a.each{|id,val|
31
+ next unless index[id]
32
+ index[id].each{|val_b|
33
+ answer << [id, val, val_b]
34
+ }
35
+ }
36
+
37
+ answer = answer.sort_by{|ent| "%05d-%s-%s" % ent}.map{|ent|
38
+ ent.join("\t") + "\n"
39
+ }
40
+
41
+ a = @fairy.input("testdata_join_a.vf").map(%{|ln| rec = ln.split; rec})
42
+ b = @fairy.input("testdata_join_b.vf").map(%{|ln| rec = ln.split; rec})
43
+
44
+ joined = a.equijoin(b, 0)
45
+
46
+ joined.map(%{|from_a, from_b|
47
+ [from_a[0], from_a[1], from_b[1]]
48
+ }).sort_by(%{|ent| "%05d-%s-%s" % ent}).map(%{|ent|
49
+ ent.join("\t")
50
+ }).output("/tmp/fairy_spec_testdata_join.txt")
51
+
52
+ result = File.readlines("/tmp/fairy_spec_testdata_join.txt")
53
+
54
+ result.should == answer
55
+ end
56
+ end
57
+
58
+
@@ -0,0 +1,48 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require 'rubygems'
7
+ require 'rspec'
8
+ require 'yaml'
9
+
10
+ require 'fairy'
11
+
12
+ describe Fairy do
13
+ before :all do
14
+ yml_path = File.expand_path(File.dirname(__FILE__) + "/../tools/cap_recipe/cluster.yml")
15
+ @cluster = YAML.load_file(yml_path)
16
+ @source = File.readlines("testdata.txt")
17
+ @fairy = Fairy::Fairy.new
18
+ end
19
+
20
+ # input (local) + split
21
+ it 'should split data & distribute them' do
22
+ answer = @source.sort
23
+
24
+ @fairy.input("testdata.txt").seg_split(10).output("/tmp/fairy_spec_testdata.vf")
25
+ system %{ fairy cat /tmp/fairy_spec_testdata.vf > /tmp/fairy_spec_testdata.txt }
26
+ result = File.readlines("/tmp/fairy_spec_testdata.txt").sort
27
+
28
+ result.should == answer
29
+ end
30
+
31
+ # exec
32
+ it 'should print all node-names' do
33
+ answer = @cluster["nodes"].sort
34
+
35
+ result = []
36
+ @fairy.exec(@cluster["nodes"].map{|n| "file://#{n}"}).map(%q{|uri|
37
+ `hostname`.chomp
38
+ }).here.each{|n|
39
+ result << n
40
+ }
41
+
42
+ result.sort!
43
+
44
+ result.should == answer
45
+ end
46
+ end
47
+
48
+