fairy 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. data/LICENSE +674 -0
  2. data/Makefile +116 -0
  3. data/README +15 -0
  4. data/bin/fairy +582 -0
  5. data/bin/fairy-cat +74 -0
  6. data/bin/fairy-cp +128 -0
  7. data/bin/fairy-rm +122 -0
  8. data/bin/subcmd/controller +41 -0
  9. data/bin/subcmd/inspector +81 -0
  10. data/bin/subcmd/master +43 -0
  11. data/bin/subcmd/node +47 -0
  12. data/bin/subcmd/processor +54 -0
  13. data/doc/programming-interface.html +240 -0
  14. data/doc/programming-interface.rd +300 -0
  15. data/etc/fairy.conf.tmpl +118 -0
  16. data/ext/simple_hash/extconf.rb +4 -0
  17. data/ext/simple_hash/simple_hash.c +42 -0
  18. data/fairy.gemspec +60 -0
  19. data/lib/fairy/client/addins.rb +20 -0
  20. data/lib/fairy/client/barrier.rb +29 -0
  21. data/lib/fairy/client/basic-group-by.rb +52 -0
  22. data/lib/fairy/client/cat.rb +41 -0
  23. data/lib/fairy/client/direct-product.rb +51 -0
  24. data/lib/fairy/client/equijoin.rb +79 -0
  25. data/lib/fairy/client/exec.rb +54 -0
  26. data/lib/fairy/client/filter.rb +62 -0
  27. data/lib/fairy/client/find.rb +35 -0
  28. data/lib/fairy/client/group-by.rb +194 -0
  29. data/lib/fairy/client/here.rb +84 -0
  30. data/lib/fairy/client/inject.rb +70 -0
  31. data/lib/fairy/client/input-file.rb +53 -0
  32. data/lib/fairy/client/input-iota.rb +49 -0
  33. data/lib/fairy/client/input-local-file.rb +188 -0
  34. data/lib/fairy/client/input-varray.rb +30 -0
  35. data/lib/fairy/client/input.rb +42 -0
  36. data/lib/fairy/client/io-filter.rb +26 -0
  37. data/lib/fairy/client/junction.rb +31 -0
  38. data/lib/fairy/client/map.rb +34 -0
  39. data/lib/fairy/client/merge-group-by.rb +71 -0
  40. data/lib/fairy/client/output-file.rb +64 -0
  41. data/lib/fairy/client/output-local-file.rb +60 -0
  42. data/lib/fairy/client/output-null.rb +47 -0
  43. data/lib/fairy/client/output-varray.rb +50 -0
  44. data/lib/fairy/client/output.rb +29 -0
  45. data/lib/fairy/client/roma-put.rb +62 -0
  46. data/lib/fairy/client/roma.rb +156 -0
  47. data/lib/fairy/client/seg-join.rb +61 -0
  48. data/lib/fairy/client/seg-map.rb +78 -0
  49. data/lib/fairy/client/seg-shuffle.rb +35 -0
  50. data/lib/fairy/client/seg-split.rb +27 -0
  51. data/lib/fairy/client/seg-zip.rb +60 -0
  52. data/lib/fairy/client/select.rb +38 -0
  53. data/lib/fairy/client/sort.rb +48 -0
  54. data/lib/fairy/client/sort18.rb +56 -0
  55. data/lib/fairy/client/sort19.rb +61 -0
  56. data/lib/fairy/client/there.rb +47 -0
  57. data/lib/fairy/client/top_n_into_roma.rb +34 -0
  58. data/lib/fairy/client/wc.rb +92 -0
  59. data/lib/fairy/controller.rb +1103 -0
  60. data/lib/fairy/logger.rb +107 -0
  61. data/lib/fairy/master/addins.rb +20 -0
  62. data/lib/fairy/master/atom.rb +17 -0
  63. data/lib/fairy/master/c-barrier.rb +283 -0
  64. data/lib/fairy/master/c-basic-group-by.rb +250 -0
  65. data/lib/fairy/master/c-cat.rb +159 -0
  66. data/lib/fairy/master/c-direct-product.rb +203 -0
  67. data/lib/fairy/master/c-exec.rb +68 -0
  68. data/lib/fairy/master/c-filter.rb +422 -0
  69. data/lib/fairy/master/c-find.rb +138 -0
  70. data/lib/fairy/master/c-group-by.rb +64 -0
  71. data/lib/fairy/master/c-here.rb +80 -0
  72. data/lib/fairy/master/c-inject.rb +119 -0
  73. data/lib/fairy/master/c-input-file.rb +46 -0
  74. data/lib/fairy/master/c-input-iota.rb +66 -0
  75. data/lib/fairy/master/c-input-local-file.rb +117 -0
  76. data/lib/fairy/master/c-input-varray.rb +53 -0
  77. data/lib/fairy/master/c-input.rb +24 -0
  78. data/lib/fairy/master/c-inputtable.rb +31 -0
  79. data/lib/fairy/master/c-inputtable18.rb +36 -0
  80. data/lib/fairy/master/c-inputtable19.rb +35 -0
  81. data/lib/fairy/master/c-io-filter.rb +28 -0
  82. data/lib/fairy/master/c-junction.rb +54 -0
  83. data/lib/fairy/master/c-map.rb +27 -0
  84. data/lib/fairy/master/c-merge-group-by.rb +241 -0
  85. data/lib/fairy/master/c-output-file.rb +84 -0
  86. data/lib/fairy/master/c-output-local-file.rb +19 -0
  87. data/lib/fairy/master/c-output-null.rb +45 -0
  88. data/lib/fairy/master/c-output-varray.rb +57 -0
  89. data/lib/fairy/master/c-output.rb +20 -0
  90. data/lib/fairy/master/c-seg-join.rb +141 -0
  91. data/lib/fairy/master/c-seg-map.rb +26 -0
  92. data/lib/fairy/master/c-seg-shuffle.rb +87 -0
  93. data/lib/fairy/master/c-seg-split.rb +110 -0
  94. data/lib/fairy/master/c-seg-zip.rb +132 -0
  95. data/lib/fairy/master/c-select.rb +27 -0
  96. data/lib/fairy/master/c-sort.rb +108 -0
  97. data/lib/fairy/master/c-there.rb +57 -0
  98. data/lib/fairy/master/c-wc.rb +232 -0
  99. data/lib/fairy/master/job-interpriter.rb +19 -0
  100. data/lib/fairy/master/scheduler.rb +24 -0
  101. data/lib/fairy/master.rb +329 -0
  102. data/lib/fairy/node/addins.rb +19 -0
  103. data/lib/fairy/node/p-barrier.rb +95 -0
  104. data/lib/fairy/node/p-basic-group-by.rb +252 -0
  105. data/lib/fairy/node/p-direct-product.rb +153 -0
  106. data/lib/fairy/node/p-exec.rb +30 -0
  107. data/lib/fairy/node/p-filter.rb +363 -0
  108. data/lib/fairy/node/p-find.rb +111 -0
  109. data/lib/fairy/node/p-group-by.rb +1534 -0
  110. data/lib/fairy/node/p-here.rb +21 -0
  111. data/lib/fairy/node/p-identity.rb +24 -0
  112. data/lib/fairy/node/p-inject.rb +127 -0
  113. data/lib/fairy/node/p-input-file.rb +108 -0
  114. data/lib/fairy/node/p-input-iota.rb +39 -0
  115. data/lib/fairy/node/p-input-local-file.rb +61 -0
  116. data/lib/fairy/node/p-input-varray.rb +26 -0
  117. data/lib/fairy/node/p-io-filter.rb +28 -0
  118. data/lib/fairy/node/p-map.rb +40 -0
  119. data/lib/fairy/node/p-merger-group-by.rb +48 -0
  120. data/lib/fairy/node/p-output-file.rb +104 -0
  121. data/lib/fairy/node/p-output-local-file.rb +14 -0
  122. data/lib/fairy/node/p-output-null.rb +32 -0
  123. data/lib/fairy/node/p-output-varray.rb +41 -0
  124. data/lib/fairy/node/p-seg-join.rb +82 -0
  125. data/lib/fairy/node/p-seg-map.rb +34 -0
  126. data/lib/fairy/node/p-seg-split.rb +61 -0
  127. data/lib/fairy/node/p-seg-zip.rb +79 -0
  128. data/lib/fairy/node/p-select.rb +40 -0
  129. data/lib/fairy/node/p-single-exportable.rb +90 -0
  130. data/lib/fairy/node/p-sort.rb +195 -0
  131. data/lib/fairy/node/p-task.rb +113 -0
  132. data/lib/fairy/node/p-there.rb +44 -0
  133. data/lib/fairy/node/p-wc.rb +266 -0
  134. data/lib/fairy/node.rb +187 -0
  135. data/lib/fairy/processor.rb +510 -0
  136. data/lib/fairy/share/base-app.rb +114 -0
  137. data/lib/fairy/share/block-source.rb +234 -0
  138. data/lib/fairy/share/conf.rb +396 -0
  139. data/lib/fairy/share/debug.rb +21 -0
  140. data/lib/fairy/share/encoding.rb +17 -0
  141. data/lib/fairy/share/fast-tempfile.rb +93 -0
  142. data/lib/fairy/share/file-place.rb +176 -0
  143. data/lib/fairy/share/hash-1.rb +20 -0
  144. data/lib/fairy/share/hash-md5.rb +28 -0
  145. data/lib/fairy/share/hash-murmur.rb +69 -0
  146. data/lib/fairy/share/hash-rb18.rb +20 -0
  147. data/lib/fairy/share/hash-simple-hash.rb +28 -0
  148. data/lib/fairy/share/inspector.rb +16 -0
  149. data/lib/fairy/share/lc/exceptions.rb +82 -0
  150. data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
  151. data/lib/fairy/share/locale.rb +17 -0
  152. data/lib/fairy/share/log.rb +215 -0
  153. data/lib/fairy/share/pool-dictionary.rb +53 -0
  154. data/lib/fairy/share/port-marshaled-queue.rb +347 -0
  155. data/lib/fairy/share/port.rb +1697 -0
  156. data/lib/fairy/share/reference.rb +45 -0
  157. data/lib/fairy/share/stdout.rb +56 -0
  158. data/lib/fairy/share/tr.rb +16 -0
  159. data/lib/fairy/share/varray.rb +147 -0
  160. data/lib/fairy/share/vfile.rb +183 -0
  161. data/lib/fairy/version.rb +8 -0
  162. data/lib/fairy.rb +206 -0
  163. data/sample/grep.rb +46 -0
  164. data/sample/ping.rb +19 -0
  165. data/sample/sort.rb +102 -0
  166. data/sample/wordcount.rb +61 -0
  167. data/spec/README +12 -0
  168. data/spec/fairy1_spec.rb +31 -0
  169. data/spec/fairy2_spec.rb +42 -0
  170. data/spec/fairy3_spec.rb +126 -0
  171. data/spec/fairy4_spec.rb +63 -0
  172. data/spec/fairy5_spec.rb +45 -0
  173. data/spec/fairy6_spec.rb +52 -0
  174. data/spec/fairy7_spec.rb +58 -0
  175. data/spec/fairy8_spec.rb +48 -0
  176. data/spec/mkdat.rb +148 -0
  177. data/spec/run_all.sh +65 -0
  178. data/test/testc.rb +7111 -0
  179. data/tools/cap_recipe/Capfile +144 -0
  180. data/tools/cap_recipe/cluster.yml.sample +14 -0
  181. data/tools/fairy_perf_graph.rb +444 -0
  182. data/tools/git-tag +44 -0
  183. data/tools/log-analysis.rb +62 -0
  184. data/tools/svn-ls-diff +38 -0
  185. data/tools/svn-tags +37 -0
  186. metadata +298 -0
@@ -0,0 +1,8 @@
1
+ #
2
+ # Don't modified this file.
3
+ # This file is auto generation.
4
+ #
5
+ module Fairy
6
+ Version = "0.6.0-001"
7
+ end
8
+
data/lib/fairy.rb ADDED
@@ -0,0 +1,206 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright (C) 2007-2010 Rakuten, Inc.
4
+ #
5
+
6
+ require "thread"
7
+
8
+ require "deep-connect"
9
+
10
+ require "fairy/version"
11
+ require "fairy/share/conf"
12
+ require "fairy/share/log"
13
+ require "fairy/share/locale"
14
+ require "fairy/share/encoding"
15
+
16
+ module Fairy
17
+ Conf.configure_common_conf
18
+
19
+ @USER_LEVEL_FILTERS = {}
20
+
21
+ def Fairy::def_filter(name, opts={}, &definition)
22
+ name = name.intern if name.kind_of?(String)
23
+ @USER_LEVEL_FILTERS[name] = definition
24
+
25
+ interface_mod = Module.new
26
+
27
+ if !opts[:sub]
28
+ interface_mod.module_eval %{
29
+ def #{name}(*args)
30
+ p = ::Fairy::user_level_filter(:#{name})
31
+ ::Fairy::ERR.Raise ::Fairy::ERR::INTERNAL::NoSuchDefiledUserLevelFilter, name unless p
32
+ p.call(@fairy, self, *args)
33
+ end
34
+ }
35
+ else
36
+ interface_mod.module_eval %{
37
+ def #{name}(*args)
38
+ p = ::Fairy::user_level_filter(:#{name})
39
+ ::Fairy::ERR.Raise ::Fairy::ERR::INTERNAL::NoSuchDefiledUserLevelFilter, name unless p
40
+ sub{|subf, input| p.call(subf, input, *args)}
41
+ end
42
+ }
43
+ end
44
+ Fairy.def_filter_interface interface_mod
45
+ end
46
+
47
+ def Fairy::user_level_filter(name)
48
+ @USER_LEVEL_FILTERS[name]
49
+ end
50
+
51
+ class Fairy
52
+
53
+ def self.create_subfairy(fairy)
54
+ subfairy = Fairy.allocate
55
+ subfairy.initialize_subfairy(fairy)
56
+ subfairy
57
+ end
58
+
59
+ def initialize(master_host = CONF.MASTER_HOST,
60
+ master_port = CONF.MASTER_PORT,
61
+ opts = {})
62
+
63
+ if master_host.kind_of?(Hash)
64
+ opts = master_host
65
+ master_host = CONF.MASTER_HOST
66
+ master_port = CONF.MASTER_PORT
67
+ end
68
+
69
+ ::Fairy::REPLACE_CONF(Conf.new(CONF, opts))
70
+
71
+ Thread.abort_on_exception = CONF.DEBUG_THREAD_ABORT_ON_EXCEPTION
72
+
73
+ @name2backend_class = {}
74
+
75
+ @deep_connect = DeepConnect.start(0)
76
+ @master_deepspace = @deep_connect.open_deepspace(master_host, master_port)
77
+ @master = @master_deepspace.import("Master")
78
+
79
+ @controller = @master.assgin_controller
80
+ @controller.connect(self, CONF)
81
+
82
+ @logger = @master.logger
83
+ Log.type = "[c]"
84
+ Log.pid = @controller.id
85
+ Log.logger = @logger
86
+ Log.set_local_output_dev
87
+ Log::info self, "fairy connected!!"
88
+ Log::info self, "\tfairy version: #{Version}"
89
+ Log::info(self, "\t[Powered By #{RUBY_DESCRIPTION}]")
90
+
91
+ @stdout_mutex = Mutex.new
92
+
93
+ if CONF.DEBUG_MONITOR_ON
94
+ Log::info self, "MONITOR NODE: ON"
95
+ require "fairy/share/debug"
96
+ Debug::njob_status_monitor_on(self)
97
+ end
98
+ end
99
+
100
+ def initialize_subfairy(fairy)
101
+ @name2backend_class = {}
102
+ @deep_connect = fairy.instance_eval{@deep_connect}
103
+ @master_deepspace = fairy.instance_eval{@master_deepspace}
104
+ @master = fairy.instance_eval{@master}
105
+
106
+ @controller = @master.assgin_controller
107
+ @controller.connect(self, CONF)
108
+
109
+ # Logは親と共有される
110
+ # なので, IDは親と同じになる(process idなので当たり前)
111
+
112
+ @stdout_mutex = fairy.instance_eval{@stdout_mutex}
113
+ end
114
+
115
+ attr_reader :controller
116
+
117
+ def abort
118
+ @master.terminate_controller(@controller)
119
+ end
120
+
121
+ def name2backend_class(backend_class_name)
122
+ if klass = @name2backend_class[backend_class_name]
123
+ return klass
124
+ end
125
+
126
+ if klass = @controller.import(backend_class_name)
127
+ @name2backend_class[backend_class_name] = klass
128
+ end
129
+ klass
130
+ end
131
+
132
+ # pool variables
133
+ def def_pool_variable(vname, value = nil)
134
+ @controller.def_pool_variable(vname, value)
135
+ end
136
+
137
+ def pool_variable(vname, *value)
138
+ @controller.pool_variable(vname, *value)
139
+ end
140
+
141
+ # exception handling
142
+ def handle_exception(exp)
143
+ local_exp = nil
144
+ Log::debug(self, "exception raised: #{exp.class}")
145
+ Log::debug_exception(self, exp)
146
+ begin
147
+ local_exp = exp.dc_deep_copy
148
+ rescue Exception
149
+ Thread.main.raise exp
150
+ raise exp
151
+ end
152
+ Thread.main.raise local_exp
153
+ nil
154
+ end
155
+
156
+ # debug print
157
+ def stdout_write(str)
158
+ @stdout_mutex.synchronize do
159
+ $stdout.write(str)
160
+ end
161
+ end
162
+
163
+ # external module loading
164
+ def self.def_fairy_interface(mod)
165
+ include mod
166
+ end
167
+ end
168
+
169
+ class FilterChain
170
+ def initialize(input)
171
+ @filters = [input]
172
+ end
173
+
174
+ def [](idx)
175
+ @filters[idx]
176
+ end
177
+
178
+ def show_chain
179
+ @filters.each_with_index{|f, idx|
180
+ puts "[#{idx}]\t#{f.class}"
181
+ }
182
+ end
183
+
184
+ def method_missing(msg, *args, &block)
185
+ #pp msg
186
+ ret = @filters.last.__send__(msg, *args, &block)
187
+ if ret.kind_of?(Job)
188
+ @filters << ret
189
+ self
190
+ else
191
+ ret
192
+ end
193
+ end
194
+ end
195
+
196
+ def def_fairy_interface(mod)
197
+ ::Fairy::Fairy.instance_eval{include mod}
198
+ end
199
+ module_function :def_fairy_interface
200
+ end
201
+
202
+ require "fairy/client/filter"
203
+ require "fairy/client/input"
204
+
205
+ require "fairy/client/addins"
206
+
data/sample/grep.rb ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ #
4
+ # Copyright (C) 2007-2010 Rakuten, Inc.
5
+ #
6
+
7
+ require 'rubygems'
8
+ require 'fairy'
9
+
10
+
11
+ unless ARGV.size == 3
12
+ $stderr.puts "Usage: #{File.basename($0)} PATTERN INPUT OUTPUT"
13
+ exit(1)
14
+ end
15
+
16
+ pattern = Regexp.new(eval %{ "#{ARGV[0]}" })
17
+
18
+ input_path = ARGV[1];
19
+ output_path = ARGV[2];
20
+
21
+ unless FileTest.exist?(input_path)
22
+ raise "Input doesn't exist."
23
+ end
24
+
25
+ if FileTest.exist?(output_path) && output_path.match(/\.vf\z/)
26
+ raise "Cannot override an existing VFile. Please confirm."
27
+ end
28
+
29
+
30
+ t0 = Time.now
31
+
32
+ puts "[#{$$}] START: #{t0}"
33
+ puts "[#{$$}] pattern: #{pattern}"
34
+ puts "[#{$$}] input: #{input_path}"
35
+ puts "[#{$$}] output: #{output_path}"
36
+
37
+ fairy = Fairy::Fairy.new
38
+
39
+ input = fairy.input input_path
40
+ greped = input.grep(pattern, :ignore_exception => true)
41
+ greped.output output_path
42
+
43
+ t1 = Time.now
44
+ puts "[#{$$}] DONE: #{t1} (#{t1-t0} sec)"
45
+
46
+
data/sample/ping.rb ADDED
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ #
4
+ # Copyright (C) 2007-2010 Rakuten, Inc.
5
+ #
6
+
7
+ require 'rubygems'
8
+ require 'fairy'
9
+ require 'yaml'
10
+
11
+ yml_path = File.expand_path(File.dirname(__FILE__) + "/../tools/cap_recipe/cluster.yml")
12
+ cluster = YAML.load_file(yml_path)
13
+
14
+ fairy = Fairy::Fairy.new
15
+ input = fairy.exec(cluster['nodes'].map{|n| "file://#{n}/"})
16
+ map = input.map(%q{|uri| "#{`hostname -f`.chomp} (#{`hostname -i`.chomp}) is alive."})
17
+ map.here.each{|responce| puts responce}
18
+
19
+
data/sample/sort.rb ADDED
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ #
4
+ # Copyright (C) 2007-2010 Rakuten, Inc.
5
+ #
6
+
7
+ require 'rubygems'
8
+ require 'fairy'
9
+ require 'optparse'
10
+
11
+
12
+ opt = {:k => 0, :t => /\s+/}
13
+
14
+ op = OptionParser.new
15
+ op.on('-k', '--key=POS', Integer){|v| opt[:k] = v}
16
+ op.on('-n', '--numeric-sort'){|v| opt[:n] = v}
17
+ op.on('-r', '--reverse', nil, "This must be used with -n."){|v| opt[:r] = v}
18
+ op.on('-t', '--separator=SEPARATOR'){|v| opt[:t] = v}
19
+ op.parse!(ARGV)
20
+
21
+
22
+ unless ARGV.size == 2
23
+ $stderr.puts op.to_s.sub(/ *\n/, "... INPUT OUTPUT\n")
24
+ exit(1)
25
+ end
26
+
27
+ input_path = ARGV[0];
28
+ output_path = ARGV[1];
29
+
30
+ unless FileTest.exist?(input_path)
31
+ raise "Input doesn't exist."
32
+ end
33
+
34
+ if FileTest.exist?(output_path) && output_path.match(/\.vf\z/)
35
+ raise "Cannot override an existing VFile. Please confirm."
36
+ end
37
+
38
+ if opt[:r] && !opt[:n]
39
+ raise "Cannot specify -r option without -n."
40
+ end
41
+
42
+ if opt[:t].is_a? Regexp
43
+ sep = opt[:t].to_s
44
+ else
45
+ str = eval %{ "#{opt[:t]}" }
46
+ if str.size == 1 && str != "\\"
47
+ sep = (/(?<!\\)#{str}/o).to_s
48
+ else
49
+ sep = str
50
+ end
51
+ end
52
+
53
+ t0 = Time.now
54
+
55
+ puts "[#{$$}] START: #{t0}"
56
+ puts "[#{$$}] input: #{input_path}"
57
+ puts "[#{$$}] output: #{output_path}"
58
+ puts "[#{$$}] key: #{opt[:k]}"
59
+ puts "[#{$$}] separator: #{sep}"
60
+ puts "[#{$$}] num-sort: ON" if opt[:n]
61
+ puts "[#{$$}] reverse: ON" if opt[:r]
62
+
63
+
64
+ fairy = Fairy::Fairy.new
65
+
66
+ fairy.def_pool_variable(:errors, :block => %{Array.new})
67
+
68
+ input = fairy.input input_path
69
+ maped = input.map(%{|ln|
70
+ @sep_re ||= Regexp.new(#{sep.inspect})
71
+ begin
72
+ sort_key = ln.split(@sep_re)[#{opt[:k]}]
73
+ [sort_key, ln]
74
+ rescue => e
75
+ @Pool.errors.push([e.message, ln])
76
+ Import::TOKEN_NULLVALUE
77
+ end
78
+ })
79
+
80
+ if opt[:n] && opt[:r]
81
+ sorted = maped.sort_by(%{|ary| -ary[0].to_i})
82
+ elsif opt[:n]
83
+ sorted = maped.sort_by(%{|ary| ary[0].to_i})
84
+ else
85
+ sorted = maped.sort_by(%{|ary| ary[0]})
86
+ end
87
+
88
+ formatted = sorted.map(%{|ary| ary[1]})
89
+ formatted.output output_path
90
+
91
+ unless fairy.pool_variable(:errors).size.zero?
92
+ err = fairy.pool_variable(:errors)
93
+ puts "[#{$$}] WARN: #{err.size} error(s) occurred."
94
+ err.each_with_index{|e,i|
95
+ p [i+1] + e
96
+ }
97
+ end
98
+
99
+ t1 = Time.now
100
+ puts "[#{$$}] DONE: #{t1} (#{t1-t0} sec)"
101
+
102
+
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ #
4
+ # Copyright (C) 2007-2010 Rakuten, Inc.
5
+ #
6
+
7
+ require 'rubygems'
8
+ require 'fairy'
9
+
10
+
11
+ unless ARGV.size == 2
12
+ $stderr.puts "Usage: #{File.basename($0)} INPUT OUTPUT"
13
+ end
14
+
15
+ input_path = ARGV[0];
16
+ output_path = ARGV[1];
17
+
18
+ unless FileTest.exist?(input_path)
19
+ raise "Input doesn't exist."
20
+ exit(1)
21
+ end
22
+
23
+ if FileTest.exist?(output_path) && output_path.match(/\.vf\z/)
24
+ raise "Cannot override an existing VFile. Please confirm."
25
+ end
26
+
27
+ t0 = Time.now
28
+
29
+ puts "[#{$$}] START: #{t0}"
30
+ puts "[#{$$}] input: #{input_path}"
31
+ puts "[#{$$}] output: #{output_path}"
32
+
33
+ fairy = Fairy::Fairy.new
34
+
35
+ fairy.def_pool_variable(:errors, :block => %{Array.new})
36
+
37
+ input = fairy.input input_path
38
+ maped = input.mapf(%{|ln|
39
+ begin
40
+ ln.split
41
+ rescue => e
42
+ @Pool.errors.push([e.message, ln])
43
+ Import::TOKEN_NULLVALUE
44
+ end
45
+ })
46
+ grouped = maped.group_by(%{|w| w})
47
+ counted = grouped.map(%q{|bag| "#{bag.key}\t#{bag.size}"})
48
+ counted.output output_path
49
+
50
+ unless fairy.pool_variable(:errors).size.zero?
51
+ err = fairy.pool_variable(:errors)
52
+ puts "[#{$$}] WARN: #{err.size} error(s) occurred."
53
+ err.each_with_index{|e,i|
54
+ p [i+1] + e
55
+ }
56
+ end
57
+
58
+ t1 = Time.now
59
+ puts "[#{$$}] DONE: #{t1} (#{t1-t0} sec)"
60
+
61
+