fairy 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. data/LICENSE +674 -0
  2. data/Makefile +116 -0
  3. data/README +15 -0
  4. data/bin/fairy +582 -0
  5. data/bin/fairy-cat +74 -0
  6. data/bin/fairy-cp +128 -0
  7. data/bin/fairy-rm +122 -0
  8. data/bin/subcmd/controller +41 -0
  9. data/bin/subcmd/inspector +81 -0
  10. data/bin/subcmd/master +43 -0
  11. data/bin/subcmd/node +47 -0
  12. data/bin/subcmd/processor +54 -0
  13. data/doc/programming-interface.html +240 -0
  14. data/doc/programming-interface.rd +300 -0
  15. data/etc/fairy.conf.tmpl +118 -0
  16. data/ext/simple_hash/extconf.rb +4 -0
  17. data/ext/simple_hash/simple_hash.c +42 -0
  18. data/fairy.gemspec +60 -0
  19. data/lib/fairy/client/addins.rb +20 -0
  20. data/lib/fairy/client/barrier.rb +29 -0
  21. data/lib/fairy/client/basic-group-by.rb +52 -0
  22. data/lib/fairy/client/cat.rb +41 -0
  23. data/lib/fairy/client/direct-product.rb +51 -0
  24. data/lib/fairy/client/equijoin.rb +79 -0
  25. data/lib/fairy/client/exec.rb +54 -0
  26. data/lib/fairy/client/filter.rb +62 -0
  27. data/lib/fairy/client/find.rb +35 -0
  28. data/lib/fairy/client/group-by.rb +194 -0
  29. data/lib/fairy/client/here.rb +84 -0
  30. data/lib/fairy/client/inject.rb +70 -0
  31. data/lib/fairy/client/input-file.rb +53 -0
  32. data/lib/fairy/client/input-iota.rb +49 -0
  33. data/lib/fairy/client/input-local-file.rb +188 -0
  34. data/lib/fairy/client/input-varray.rb +30 -0
  35. data/lib/fairy/client/input.rb +42 -0
  36. data/lib/fairy/client/io-filter.rb +26 -0
  37. data/lib/fairy/client/junction.rb +31 -0
  38. data/lib/fairy/client/map.rb +34 -0
  39. data/lib/fairy/client/merge-group-by.rb +71 -0
  40. data/lib/fairy/client/output-file.rb +64 -0
  41. data/lib/fairy/client/output-local-file.rb +60 -0
  42. data/lib/fairy/client/output-null.rb +47 -0
  43. data/lib/fairy/client/output-varray.rb +50 -0
  44. data/lib/fairy/client/output.rb +29 -0
  45. data/lib/fairy/client/roma-put.rb +62 -0
  46. data/lib/fairy/client/roma.rb +156 -0
  47. data/lib/fairy/client/seg-join.rb +61 -0
  48. data/lib/fairy/client/seg-map.rb +78 -0
  49. data/lib/fairy/client/seg-shuffle.rb +35 -0
  50. data/lib/fairy/client/seg-split.rb +27 -0
  51. data/lib/fairy/client/seg-zip.rb +60 -0
  52. data/lib/fairy/client/select.rb +38 -0
  53. data/lib/fairy/client/sort.rb +48 -0
  54. data/lib/fairy/client/sort18.rb +56 -0
  55. data/lib/fairy/client/sort19.rb +61 -0
  56. data/lib/fairy/client/there.rb +47 -0
  57. data/lib/fairy/client/top_n_into_roma.rb +34 -0
  58. data/lib/fairy/client/wc.rb +92 -0
  59. data/lib/fairy/controller.rb +1103 -0
  60. data/lib/fairy/logger.rb +107 -0
  61. data/lib/fairy/master/addins.rb +20 -0
  62. data/lib/fairy/master/atom.rb +17 -0
  63. data/lib/fairy/master/c-barrier.rb +283 -0
  64. data/lib/fairy/master/c-basic-group-by.rb +250 -0
  65. data/lib/fairy/master/c-cat.rb +159 -0
  66. data/lib/fairy/master/c-direct-product.rb +203 -0
  67. data/lib/fairy/master/c-exec.rb +68 -0
  68. data/lib/fairy/master/c-filter.rb +422 -0
  69. data/lib/fairy/master/c-find.rb +138 -0
  70. data/lib/fairy/master/c-group-by.rb +64 -0
  71. data/lib/fairy/master/c-here.rb +80 -0
  72. data/lib/fairy/master/c-inject.rb +119 -0
  73. data/lib/fairy/master/c-input-file.rb +46 -0
  74. data/lib/fairy/master/c-input-iota.rb +66 -0
  75. data/lib/fairy/master/c-input-local-file.rb +117 -0
  76. data/lib/fairy/master/c-input-varray.rb +53 -0
  77. data/lib/fairy/master/c-input.rb +24 -0
  78. data/lib/fairy/master/c-inputtable.rb +31 -0
  79. data/lib/fairy/master/c-inputtable18.rb +36 -0
  80. data/lib/fairy/master/c-inputtable19.rb +35 -0
  81. data/lib/fairy/master/c-io-filter.rb +28 -0
  82. data/lib/fairy/master/c-junction.rb +54 -0
  83. data/lib/fairy/master/c-map.rb +27 -0
  84. data/lib/fairy/master/c-merge-group-by.rb +241 -0
  85. data/lib/fairy/master/c-output-file.rb +84 -0
  86. data/lib/fairy/master/c-output-local-file.rb +19 -0
  87. data/lib/fairy/master/c-output-null.rb +45 -0
  88. data/lib/fairy/master/c-output-varray.rb +57 -0
  89. data/lib/fairy/master/c-output.rb +20 -0
  90. data/lib/fairy/master/c-seg-join.rb +141 -0
  91. data/lib/fairy/master/c-seg-map.rb +26 -0
  92. data/lib/fairy/master/c-seg-shuffle.rb +87 -0
  93. data/lib/fairy/master/c-seg-split.rb +110 -0
  94. data/lib/fairy/master/c-seg-zip.rb +132 -0
  95. data/lib/fairy/master/c-select.rb +27 -0
  96. data/lib/fairy/master/c-sort.rb +108 -0
  97. data/lib/fairy/master/c-there.rb +57 -0
  98. data/lib/fairy/master/c-wc.rb +232 -0
  99. data/lib/fairy/master/job-interpriter.rb +19 -0
  100. data/lib/fairy/master/scheduler.rb +24 -0
  101. data/lib/fairy/master.rb +329 -0
  102. data/lib/fairy/node/addins.rb +19 -0
  103. data/lib/fairy/node/p-barrier.rb +95 -0
  104. data/lib/fairy/node/p-basic-group-by.rb +252 -0
  105. data/lib/fairy/node/p-direct-product.rb +153 -0
  106. data/lib/fairy/node/p-exec.rb +30 -0
  107. data/lib/fairy/node/p-filter.rb +363 -0
  108. data/lib/fairy/node/p-find.rb +111 -0
  109. data/lib/fairy/node/p-group-by.rb +1534 -0
  110. data/lib/fairy/node/p-here.rb +21 -0
  111. data/lib/fairy/node/p-identity.rb +24 -0
  112. data/lib/fairy/node/p-inject.rb +127 -0
  113. data/lib/fairy/node/p-input-file.rb +108 -0
  114. data/lib/fairy/node/p-input-iota.rb +39 -0
  115. data/lib/fairy/node/p-input-local-file.rb +61 -0
  116. data/lib/fairy/node/p-input-varray.rb +26 -0
  117. data/lib/fairy/node/p-io-filter.rb +28 -0
  118. data/lib/fairy/node/p-map.rb +40 -0
  119. data/lib/fairy/node/p-merger-group-by.rb +48 -0
  120. data/lib/fairy/node/p-output-file.rb +104 -0
  121. data/lib/fairy/node/p-output-local-file.rb +14 -0
  122. data/lib/fairy/node/p-output-null.rb +32 -0
  123. data/lib/fairy/node/p-output-varray.rb +41 -0
  124. data/lib/fairy/node/p-seg-join.rb +82 -0
  125. data/lib/fairy/node/p-seg-map.rb +34 -0
  126. data/lib/fairy/node/p-seg-split.rb +61 -0
  127. data/lib/fairy/node/p-seg-zip.rb +79 -0
  128. data/lib/fairy/node/p-select.rb +40 -0
  129. data/lib/fairy/node/p-single-exportable.rb +90 -0
  130. data/lib/fairy/node/p-sort.rb +195 -0
  131. data/lib/fairy/node/p-task.rb +113 -0
  132. data/lib/fairy/node/p-there.rb +44 -0
  133. data/lib/fairy/node/p-wc.rb +266 -0
  134. data/lib/fairy/node.rb +187 -0
  135. data/lib/fairy/processor.rb +510 -0
  136. data/lib/fairy/share/base-app.rb +114 -0
  137. data/lib/fairy/share/block-source.rb +234 -0
  138. data/lib/fairy/share/conf.rb +396 -0
  139. data/lib/fairy/share/debug.rb +21 -0
  140. data/lib/fairy/share/encoding.rb +17 -0
  141. data/lib/fairy/share/fast-tempfile.rb +93 -0
  142. data/lib/fairy/share/file-place.rb +176 -0
  143. data/lib/fairy/share/hash-1.rb +20 -0
  144. data/lib/fairy/share/hash-md5.rb +28 -0
  145. data/lib/fairy/share/hash-murmur.rb +69 -0
  146. data/lib/fairy/share/hash-rb18.rb +20 -0
  147. data/lib/fairy/share/hash-simple-hash.rb +28 -0
  148. data/lib/fairy/share/inspector.rb +16 -0
  149. data/lib/fairy/share/lc/exceptions.rb +82 -0
  150. data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
  151. data/lib/fairy/share/locale.rb +17 -0
  152. data/lib/fairy/share/log.rb +215 -0
  153. data/lib/fairy/share/pool-dictionary.rb +53 -0
  154. data/lib/fairy/share/port-marshaled-queue.rb +347 -0
  155. data/lib/fairy/share/port.rb +1697 -0
  156. data/lib/fairy/share/reference.rb +45 -0
  157. data/lib/fairy/share/stdout.rb +56 -0
  158. data/lib/fairy/share/tr.rb +16 -0
  159. data/lib/fairy/share/varray.rb +147 -0
  160. data/lib/fairy/share/vfile.rb +183 -0
  161. data/lib/fairy/version.rb +8 -0
  162. data/lib/fairy.rb +206 -0
  163. data/sample/grep.rb +46 -0
  164. data/sample/ping.rb +19 -0
  165. data/sample/sort.rb +102 -0
  166. data/sample/wordcount.rb +61 -0
  167. data/spec/README +12 -0
  168. data/spec/fairy1_spec.rb +31 -0
  169. data/spec/fairy2_spec.rb +42 -0
  170. data/spec/fairy3_spec.rb +126 -0
  171. data/spec/fairy4_spec.rb +63 -0
  172. data/spec/fairy5_spec.rb +45 -0
  173. data/spec/fairy6_spec.rb +52 -0
  174. data/spec/fairy7_spec.rb +58 -0
  175. data/spec/fairy8_spec.rb +48 -0
  176. data/spec/mkdat.rb +148 -0
  177. data/spec/run_all.sh +65 -0
  178. data/test/testc.rb +7111 -0
  179. data/tools/cap_recipe/Capfile +144 -0
  180. data/tools/cap_recipe/cluster.yml.sample +14 -0
  181. data/tools/fairy_perf_graph.rb +444 -0
  182. data/tools/git-tag +44 -0
  183. data/tools/log-analysis.rb +62 -0
  184. data/tools/svn-ls-diff +38 -0
  185. data/tools/svn-tags +37 -0
  186. metadata +298 -0
@@ -0,0 +1,8 @@
1
+ #
2
+ # Don't modified this file.
3
+ # This file is auto generation.
4
+ #
5
+ module Fairy
6
+ Version = "0.6.0-001"
7
+ end
8
+
data/lib/fairy.rb ADDED
@@ -0,0 +1,206 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "thread"
7
+
8
+ require "deep-connect"
9
+
10
+ require "fairy/version"
11
+ require "fairy/share/conf"
12
+ require "fairy/share/log"
13
+ require "fairy/share/locale"
14
+ require "fairy/share/encoding"
15
+
16
+ module Fairy
17
+ Conf.configure_common_conf
18
+
19
+ @USER_LEVEL_FILTERS = {}
20
+
21
+ def Fairy::def_filter(name, opts={}, &definition)
22
+ name = name.intern if name.kind_of?(String)
23
+ @USER_LEVEL_FILTERS[name] = definition
24
+
25
+ interface_mod = Module.new
26
+
27
+ if !opts[:sub]
28
+ interface_mod.module_eval %{
29
+ def #{name}(*args)
30
+ p = ::Fairy::user_level_filter(:#{name})
31
+ ::Fairy::ERR.Raise ::Fairy::ERR::INTERNAL::NoSuchDefiledUserLevelFilter, name unless p
32
+ p.call(@fairy, self, *args)
33
+ end
34
+ }
35
+ else
36
+ interface_mod.module_eval %{
37
+ def #{name}(*args)
38
+ p = ::Fairy::user_level_filter(:#{name})
39
+ ::Fairy::ERR.Raise ::Fairy::ERR::INTERNAL::NoSuchDefiledUserLevelFilter, name unless p
40
+ sub{|subf, input| p.call(subf, input, *args)}
41
+ end
42
+ }
43
+ end
44
+ Fairy.def_filter_interface interface_mod
45
+ end
46
+
47
+ def Fairy::user_level_filter(name)
48
+ @USER_LEVEL_FILTERS[name]
49
+ end
50
+
51
+ class Fairy
52
+
53
+ def self.create_subfairy(fairy)
54
+ subfairy = Fairy.allocate
55
+ subfairy.initialize_subfairy(fairy)
56
+ subfairy
57
+ end
58
+
59
+ def initialize(master_host = CONF.MASTER_HOST,
60
+ master_port = CONF.MASTER_PORT,
61
+ opts = {})
62
+
63
+ if master_host.kind_of?(Hash)
64
+ opts = master_host
65
+ master_host = CONF.MASTER_HOST
66
+ master_port = CONF.MASTER_PORT
67
+ end
68
+
69
+ ::Fairy::REPLACE_CONF(Conf.new(CONF, opts))
70
+
71
+ Thread.abort_on_exception = CONF.DEBUG_THREAD_ABORT_ON_EXCEPTION
72
+
73
+ @name2backend_class = {}
74
+
75
+ @deep_connect = DeepConnect.start(0)
76
+ @master_deepspace = @deep_connect.open_deepspace(master_host, master_port)
77
+ @master = @master_deepspace.import("Master")
78
+
79
+ @controller = @master.assgin_controller
80
+ @controller.connect(self, CONF)
81
+
82
+ @logger = @master.logger
83
+ Log.type = "[c]"
84
+ Log.pid = @controller.id
85
+ Log.logger = @logger
86
+ Log.set_local_output_dev
87
+ Log::info self, "fairy connected!!"
88
+ Log::info self, "\tfairy version: #{Version}"
89
+ Log::info(self, "\t[Powered By #{RUBY_DESCRIPTION}]")
90
+
91
+ @stdout_mutex = Mutex.new
92
+
93
+ if CONF.DEBUG_MONITOR_ON
94
+ Log::info self, "MONITOR NODE: ON"
95
+ require "fairy/share/debug"
96
+ Debug::njob_status_monitor_on(self)
97
+ end
98
+ end
99
+
100
+ def initialize_subfairy(fairy)
101
+ @name2backend_class = {}
102
+ @deep_connect = fairy.instance_eval{@deep_connect}
103
+ @master_deepspace = fairy.instance_eval{@master_deepspace}
104
+ @master = fairy.instance_eval{@master}
105
+
106
+ @controller = @master.assgin_controller
107
+ @controller.connect(self, CONF)
108
+
109
+ # Logは親と共有される
110
+ # なので, IDは親と同じになる(process idなので当たり前)
111
+
112
+ @stdout_mutex = fairy.instance_eval{@stdout_mutex}
113
+ end
114
+
115
+ attr_reader :controller
116
+
117
+ def abort
118
+ @master.terminate_controller(@controller)
119
+ end
120
+
121
+ def name2backend_class(backend_class_name)
122
+ if klass = @name2backend_class[backend_class_name]
123
+ return klass
124
+ end
125
+
126
+ if klass = @controller.import(backend_class_name)
127
+ @name2backend_class[backend_class_name] = klass
128
+ end
129
+ klass
130
+ end
131
+
132
+ # pool variables
133
+ def def_pool_variable(vname, value = nil)
134
+ @controller.def_pool_variable(vname, value)
135
+ end
136
+
137
+ def pool_variable(vname, *value)
138
+ @controller.pool_variable(vname, *value)
139
+ end
140
+
141
+ # exception handling
142
+ def handle_exception(exp)
143
+ local_exp = nil
144
+ Log::debug(self, "exception raised: #{exp.class}")
145
+ Log::debug_exception(self, exp)
146
+ begin
147
+ local_exp = exp.dc_deep_copy
148
+ rescue Exception
149
+ Thread.main.raise exp
150
+ raise exp
151
+ end
152
+ Thread.main.raise local_exp
153
+ nil
154
+ end
155
+
156
+ # debug print
157
+ def stdout_write(str)
158
+ @stdout_mutex.synchronize do
159
+ $stdout.write(str)
160
+ end
161
+ end
162
+
163
+ # external module loading
164
+ def self.def_fairy_interface(mod)
165
+ include mod
166
+ end
167
+ end
168
+
169
+ class FilterChain
170
+ def initialize(input)
171
+ @filters = [input]
172
+ end
173
+
174
+ def [](idx)
175
+ @filters[idx]
176
+ end
177
+
178
+ def show_chain
179
+ @filters.each_with_index{|f, idx|
180
+ puts "[#{idx}]\t#{f.class}"
181
+ }
182
+ end
183
+
184
+ def method_missing(msg, *args, &block)
185
+ #pp msg
186
+ ret = @filters.last.__send__(msg, *args, &block)
187
+ if ret.kind_of?(Job)
188
+ @filters << ret
189
+ self
190
+ else
191
+ ret
192
+ end
193
+ end
194
+ end
195
+
196
+ def def_fairy_interface(mod)
197
+ ::Fairy::Fairy.instance_eval{include mod}
198
+ end
199
+ module_function :def_fairy_interface
200
+ end
201
+
202
+ require "fairy/client/filter"
203
+ require "fairy/client/input"
204
+
205
+ require "fairy/client/addins"
206
+
data/sample/grep.rb ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ #
4
+ # Copyright (C) 2007-2010 Rakuten, Inc.
5
+ #
6
+
7
+ require 'rubygems'
8
+ require 'fairy'
9
+
10
+
11
+ unless ARGV.size == 3
12
+ $stderr.puts "Usage: #{File.basename($0)} PATTERN INPUT OUTPUT"
13
+ exit(1)
14
+ end
15
+
16
+ pattern = Regexp.new(eval %{ "#{ARGV[0]}" })
17
+
18
+ input_path = ARGV[1];
19
+ output_path = ARGV[2];
20
+
21
+ unless FileTest.exist?(input_path)
22
+ raise "Input doesn't exist."
23
+ end
24
+
25
+ if FileTest.exist?(output_path) && output_path.match(/\.vf\z/)
26
+ raise "Cannot override an existing VFile. Please confirm."
27
+ end
28
+
29
+
30
+ t0 = Time.now
31
+
32
+ puts "[#{$$}] START: #{t0}"
33
+ puts "[#{$$}] pattern: #{pattern}"
34
+ puts "[#{$$}] input: #{input_path}"
35
+ puts "[#{$$}] output: #{output_path}"
36
+
37
+ fairy = Fairy::Fairy.new
38
+
39
+ input = fairy.input input_path
40
+ greped = input.grep(pattern, :ignore_exception => true)
41
+ greped.output output_path
42
+
43
+ t1 = Time.now
44
+ puts "[#{$$}] DONE: #{t1} (#{t1-t0} sec)"
45
+
46
+
data/sample/ping.rb ADDED
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ #
4
+ # Copyright (C) 2007-2010 Rakuten, Inc.
5
+ #
6
+
7
+ require 'rubygems'
8
+ require 'fairy'
9
+ require 'yaml'
10
+
11
+ yml_path = File.expand_path(File.dirname(__FILE__) + "/../tools/cap_recipe/cluster.yml")
12
+ cluster = YAML.load_file(yml_path)
13
+
14
+ fairy = Fairy::Fairy.new
15
+ input = fairy.exec(cluster['nodes'].map{|n| "file://#{n}/"})
16
+ map = input.map(%q{|uri| "#{`hostname -f`.chomp} (#{`hostname -i`.chomp}) is alive."})
17
+ map.here.each{|responce| puts responce}
18
+
19
+
data/sample/sort.rb ADDED
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ #
4
+ # Copyright (C) 2007-2010 Rakuten, Inc.
5
+ #
6
+
7
+ require 'rubygems'
8
+ require 'fairy'
9
+ require 'optparse'
10
+
11
+
12
+ opt = {:k => 0, :t => /\s+/}
13
+
14
+ op = OptionParser.new
15
+ op.on('-k', '--key=POS', Integer){|v| opt[:k] = v}
16
+ op.on('-n', '--numeric-sort'){|v| opt[:n] = v}
17
+ op.on('-r', '--reverse', nil, "This must be used with -n."){|v| opt[:r] = v}
18
+ op.on('-t', '--separator=SEPARATOR'){|v| opt[:t] = v}
19
+ op.parse!(ARGV)
20
+
21
+
22
+ unless ARGV.size == 2
23
+ $stderr.puts op.to_s.sub(/ *\n/, "... INPUT OUTPUT\n")
24
+ exit(1)
25
+ end
26
+
27
+ input_path = ARGV[0];
28
+ output_path = ARGV[1];
29
+
30
+ unless FileTest.exist?(input_path)
31
+ raise "Input doesn't exist."
32
+ end
33
+
34
+ if FileTest.exist?(output_path) && output_path.match(/\.vf\z/)
35
+ raise "Cannot override an existing VFile. Please confirm."
36
+ end
37
+
38
+ if opt[:r] && !opt[:n]
39
+ raise "Cannot specify -r option without -n."
40
+ end
41
+
42
+ if opt[:t].is_a? Regexp
43
+ sep = opt[:t].to_s
44
+ else
45
+ str = eval %{ "#{opt[:t]}" }
46
+ if str.size == 1 && str != "\\"
47
+ sep = (/(?<!\\)#{str}/o).to_s
48
+ else
49
+ sep = str
50
+ end
51
+ end
52
+
53
+ t0 = Time.now
54
+
55
+ puts "[#{$$}] START: #{t0}"
56
+ puts "[#{$$}] input: #{input_path}"
57
+ puts "[#{$$}] output: #{output_path}"
58
+ puts "[#{$$}] key: #{opt[:k]}"
59
+ puts "[#{$$}] separator: #{sep}"
60
+ puts "[#{$$}] num-sort: ON" if opt[:n]
61
+ puts "[#{$$}] reverse: ON" if opt[:r]
62
+
63
+
64
+ fairy = Fairy::Fairy.new
65
+
66
+ fairy.def_pool_variable(:errors, :block => %{Array.new})
67
+
68
+ input = fairy.input input_path
69
+ maped = input.map(%{|ln|
70
+ @sep_re ||= Regexp.new(#{sep.inspect})
71
+ begin
72
+ sort_key = ln.split(@sep_re)[#{opt[:k]}]
73
+ [sort_key, ln]
74
+ rescue => e
75
+ @Pool.errors.push([e.message, ln])
76
+ Import::TOKEN_NULLVALUE
77
+ end
78
+ })
79
+
80
+ if opt[:n] && opt[:r]
81
+ sorted = maped.sort_by(%{|ary| -ary[0].to_i})
82
+ elsif opt[:n]
83
+ sorted = maped.sort_by(%{|ary| ary[0].to_i})
84
+ else
85
+ sorted = maped.sort_by(%{|ary| ary[0]})
86
+ end
87
+
88
+ formatted = sorted.map(%{|ary| ary[1]})
89
+ formatted.output output_path
90
+
91
+ unless fairy.pool_variable(:errors).size.zero?
92
+ err = fairy.pool_variable(:errors)
93
+ puts "[#{$$}] WARN: #{err.size} error(s) occurred."
94
+ err.each_with_index{|e,i|
95
+ p [i+1] + e
96
+ }
97
+ end
98
+
99
+ t1 = Time.now
100
+ puts "[#{$$}] DONE: #{t1} (#{t1-t0} sec)"
101
+
102
+
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ #
4
+ # Copyright (C) 2007-2010 Rakuten, Inc.
5
+ #
6
+
7
+ require 'rubygems'
8
+ require 'fairy'
9
+
10
+
11
+ unless ARGV.size == 2
12
+ $stderr.puts "Usage: #{File.basename($0)} INPUT OUTPUT"
13
+ end
14
+
15
+ input_path = ARGV[0];
16
+ output_path = ARGV[1];
17
+
18
+ unless FileTest.exist?(input_path)
19
+ raise "Input doesn't exist."
20
+ exit(1)
21
+ end
22
+
23
+ if FileTest.exist?(output_path) && output_path.match(/\.vf\z/)
24
+ raise "Cannot override an existing VFile. Please confirm."
25
+ end
26
+
27
+ t0 = Time.now
28
+
29
+ puts "[#{$$}] START: #{t0}"
30
+ puts "[#{$$}] input: #{input_path}"
31
+ puts "[#{$$}] output: #{output_path}"
32
+
33
+ fairy = Fairy::Fairy.new
34
+
35
+ fairy.def_pool_variable(:errors, :block => %{Array.new})
36
+
37
+ input = fairy.input input_path
38
+ maped = input.mapf(%{|ln|
39
+ begin
40
+ ln.split
41
+ rescue => e
42
+ @Pool.errors.push([e.message, ln])
43
+ Import::TOKEN_NULLVALUE
44
+ end
45
+ })
46
+ grouped = maped.group_by(%{|w| w})
47
+ counted = grouped.map(%q{|bag| "#{bag.key}\t#{bag.size}"})
48
+ counted.output output_path
49
+
50
+ unless fairy.pool_variable(:errors).size.zero?
51
+ err = fairy.pool_variable(:errors)
52
+ puts "[#{$$}] WARN: #{err.size} error(s) occurred."
53
+ err.each_with_index{|e,i|
54
+ p [i+1] + e
55
+ }
56
+ end
57
+
58
+ t1 = Time.now
59
+ puts "[#{$$}] DONE: #{t1} (#{t1-t0} sec)"
60
+
61
+