fairy 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +674 -0
- data/Makefile +116 -0
- data/README +15 -0
- data/bin/fairy +582 -0
- data/bin/fairy-cat +74 -0
- data/bin/fairy-cp +128 -0
- data/bin/fairy-rm +122 -0
- data/bin/subcmd/controller +41 -0
- data/bin/subcmd/inspector +81 -0
- data/bin/subcmd/master +43 -0
- data/bin/subcmd/node +47 -0
- data/bin/subcmd/processor +54 -0
- data/doc/programming-interface.html +240 -0
- data/doc/programming-interface.rd +300 -0
- data/etc/fairy.conf.tmpl +118 -0
- data/ext/simple_hash/extconf.rb +4 -0
- data/ext/simple_hash/simple_hash.c +42 -0
- data/fairy.gemspec +60 -0
- data/lib/fairy/client/addins.rb +20 -0
- data/lib/fairy/client/barrier.rb +29 -0
- data/lib/fairy/client/basic-group-by.rb +52 -0
- data/lib/fairy/client/cat.rb +41 -0
- data/lib/fairy/client/direct-product.rb +51 -0
- data/lib/fairy/client/equijoin.rb +79 -0
- data/lib/fairy/client/exec.rb +54 -0
- data/lib/fairy/client/filter.rb +62 -0
- data/lib/fairy/client/find.rb +35 -0
- data/lib/fairy/client/group-by.rb +194 -0
- data/lib/fairy/client/here.rb +84 -0
- data/lib/fairy/client/inject.rb +70 -0
- data/lib/fairy/client/input-file.rb +53 -0
- data/lib/fairy/client/input-iota.rb +49 -0
- data/lib/fairy/client/input-local-file.rb +188 -0
- data/lib/fairy/client/input-varray.rb +30 -0
- data/lib/fairy/client/input.rb +42 -0
- data/lib/fairy/client/io-filter.rb +26 -0
- data/lib/fairy/client/junction.rb +31 -0
- data/lib/fairy/client/map.rb +34 -0
- data/lib/fairy/client/merge-group-by.rb +71 -0
- data/lib/fairy/client/output-file.rb +64 -0
- data/lib/fairy/client/output-local-file.rb +60 -0
- data/lib/fairy/client/output-null.rb +47 -0
- data/lib/fairy/client/output-varray.rb +50 -0
- data/lib/fairy/client/output.rb +29 -0
- data/lib/fairy/client/roma-put.rb +62 -0
- data/lib/fairy/client/roma.rb +156 -0
- data/lib/fairy/client/seg-join.rb +61 -0
- data/lib/fairy/client/seg-map.rb +78 -0
- data/lib/fairy/client/seg-shuffle.rb +35 -0
- data/lib/fairy/client/seg-split.rb +27 -0
- data/lib/fairy/client/seg-zip.rb +60 -0
- data/lib/fairy/client/select.rb +38 -0
- data/lib/fairy/client/sort.rb +48 -0
- data/lib/fairy/client/sort18.rb +56 -0
- data/lib/fairy/client/sort19.rb +61 -0
- data/lib/fairy/client/there.rb +47 -0
- data/lib/fairy/client/top_n_into_roma.rb +34 -0
- data/lib/fairy/client/wc.rb +92 -0
- data/lib/fairy/controller.rb +1103 -0
- data/lib/fairy/logger.rb +107 -0
- data/lib/fairy/master/addins.rb +20 -0
- data/lib/fairy/master/atom.rb +17 -0
- data/lib/fairy/master/c-barrier.rb +283 -0
- data/lib/fairy/master/c-basic-group-by.rb +250 -0
- data/lib/fairy/master/c-cat.rb +159 -0
- data/lib/fairy/master/c-direct-product.rb +203 -0
- data/lib/fairy/master/c-exec.rb +68 -0
- data/lib/fairy/master/c-filter.rb +422 -0
- data/lib/fairy/master/c-find.rb +138 -0
- data/lib/fairy/master/c-group-by.rb +64 -0
- data/lib/fairy/master/c-here.rb +80 -0
- data/lib/fairy/master/c-inject.rb +119 -0
- data/lib/fairy/master/c-input-file.rb +46 -0
- data/lib/fairy/master/c-input-iota.rb +66 -0
- data/lib/fairy/master/c-input-local-file.rb +117 -0
- data/lib/fairy/master/c-input-varray.rb +53 -0
- data/lib/fairy/master/c-input.rb +24 -0
- data/lib/fairy/master/c-inputtable.rb +31 -0
- data/lib/fairy/master/c-inputtable18.rb +36 -0
- data/lib/fairy/master/c-inputtable19.rb +35 -0
- data/lib/fairy/master/c-io-filter.rb +28 -0
- data/lib/fairy/master/c-junction.rb +54 -0
- data/lib/fairy/master/c-map.rb +27 -0
- data/lib/fairy/master/c-merge-group-by.rb +241 -0
- data/lib/fairy/master/c-output-file.rb +84 -0
- data/lib/fairy/master/c-output-local-file.rb +19 -0
- data/lib/fairy/master/c-output-null.rb +45 -0
- data/lib/fairy/master/c-output-varray.rb +57 -0
- data/lib/fairy/master/c-output.rb +20 -0
- data/lib/fairy/master/c-seg-join.rb +141 -0
- data/lib/fairy/master/c-seg-map.rb +26 -0
- data/lib/fairy/master/c-seg-shuffle.rb +87 -0
- data/lib/fairy/master/c-seg-split.rb +110 -0
- data/lib/fairy/master/c-seg-zip.rb +132 -0
- data/lib/fairy/master/c-select.rb +27 -0
- data/lib/fairy/master/c-sort.rb +108 -0
- data/lib/fairy/master/c-there.rb +57 -0
- data/lib/fairy/master/c-wc.rb +232 -0
- data/lib/fairy/master/job-interpriter.rb +19 -0
- data/lib/fairy/master/scheduler.rb +24 -0
- data/lib/fairy/master.rb +329 -0
- data/lib/fairy/node/addins.rb +19 -0
- data/lib/fairy/node/p-barrier.rb +95 -0
- data/lib/fairy/node/p-basic-group-by.rb +252 -0
- data/lib/fairy/node/p-direct-product.rb +153 -0
- data/lib/fairy/node/p-exec.rb +30 -0
- data/lib/fairy/node/p-filter.rb +363 -0
- data/lib/fairy/node/p-find.rb +111 -0
- data/lib/fairy/node/p-group-by.rb +1534 -0
- data/lib/fairy/node/p-here.rb +21 -0
- data/lib/fairy/node/p-identity.rb +24 -0
- data/lib/fairy/node/p-inject.rb +127 -0
- data/lib/fairy/node/p-input-file.rb +108 -0
- data/lib/fairy/node/p-input-iota.rb +39 -0
- data/lib/fairy/node/p-input-local-file.rb +61 -0
- data/lib/fairy/node/p-input-varray.rb +26 -0
- data/lib/fairy/node/p-io-filter.rb +28 -0
- data/lib/fairy/node/p-map.rb +40 -0
- data/lib/fairy/node/p-merger-group-by.rb +48 -0
- data/lib/fairy/node/p-output-file.rb +104 -0
- data/lib/fairy/node/p-output-local-file.rb +14 -0
- data/lib/fairy/node/p-output-null.rb +32 -0
- data/lib/fairy/node/p-output-varray.rb +41 -0
- data/lib/fairy/node/p-seg-join.rb +82 -0
- data/lib/fairy/node/p-seg-map.rb +34 -0
- data/lib/fairy/node/p-seg-split.rb +61 -0
- data/lib/fairy/node/p-seg-zip.rb +79 -0
- data/lib/fairy/node/p-select.rb +40 -0
- data/lib/fairy/node/p-single-exportable.rb +90 -0
- data/lib/fairy/node/p-sort.rb +195 -0
- data/lib/fairy/node/p-task.rb +113 -0
- data/lib/fairy/node/p-there.rb +44 -0
- data/lib/fairy/node/p-wc.rb +266 -0
- data/lib/fairy/node.rb +187 -0
- data/lib/fairy/processor.rb +510 -0
- data/lib/fairy/share/base-app.rb +114 -0
- data/lib/fairy/share/block-source.rb +234 -0
- data/lib/fairy/share/conf.rb +396 -0
- data/lib/fairy/share/debug.rb +21 -0
- data/lib/fairy/share/encoding.rb +17 -0
- data/lib/fairy/share/fast-tempfile.rb +93 -0
- data/lib/fairy/share/file-place.rb +176 -0
- data/lib/fairy/share/hash-1.rb +20 -0
- data/lib/fairy/share/hash-md5.rb +28 -0
- data/lib/fairy/share/hash-murmur.rb +69 -0
- data/lib/fairy/share/hash-rb18.rb +20 -0
- data/lib/fairy/share/hash-simple-hash.rb +28 -0
- data/lib/fairy/share/inspector.rb +16 -0
- data/lib/fairy/share/lc/exceptions.rb +82 -0
- data/lib/fairy/share/lc/ja/exceptions.rb +81 -0
- data/lib/fairy/share/locale.rb +17 -0
- data/lib/fairy/share/log.rb +215 -0
- data/lib/fairy/share/pool-dictionary.rb +53 -0
- data/lib/fairy/share/port-marshaled-queue.rb +347 -0
- data/lib/fairy/share/port.rb +1697 -0
- data/lib/fairy/share/reference.rb +45 -0
- data/lib/fairy/share/stdout.rb +56 -0
- data/lib/fairy/share/tr.rb +16 -0
- data/lib/fairy/share/varray.rb +147 -0
- data/lib/fairy/share/vfile.rb +183 -0
- data/lib/fairy/version.rb +8 -0
- data/lib/fairy.rb +206 -0
- data/sample/grep.rb +46 -0
- data/sample/ping.rb +19 -0
- data/sample/sort.rb +102 -0
- data/sample/wordcount.rb +61 -0
- data/spec/README +12 -0
- data/spec/fairy1_spec.rb +31 -0
- data/spec/fairy2_spec.rb +42 -0
- data/spec/fairy3_spec.rb +126 -0
- data/spec/fairy4_spec.rb +63 -0
- data/spec/fairy5_spec.rb +45 -0
- data/spec/fairy6_spec.rb +52 -0
- data/spec/fairy7_spec.rb +58 -0
- data/spec/fairy8_spec.rb +48 -0
- data/spec/mkdat.rb +148 -0
- data/spec/run_all.sh +65 -0
- data/test/testc.rb +7111 -0
- data/tools/cap_recipe/Capfile +144 -0
- data/tools/cap_recipe/cluster.yml.sample +14 -0
- data/tools/fairy_perf_graph.rb +444 -0
- data/tools/git-tag +44 -0
- data/tools/log-analysis.rb +62 -0
- data/tools/svn-ls-diff +38 -0
- data/tools/svn-tags +37 -0
- metadata +298 -0
data/lib/fairy.rb
ADDED
@@ -0,0 +1,206 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "thread"
|
7
|
+
|
8
|
+
require "deep-connect"
|
9
|
+
|
10
|
+
require "fairy/version"
|
11
|
+
require "fairy/share/conf"
|
12
|
+
require "fairy/share/log"
|
13
|
+
require "fairy/share/locale"
|
14
|
+
require "fairy/share/encoding"
|
15
|
+
|
16
|
+
module Fairy
|
17
|
+
Conf.configure_common_conf
|
18
|
+
|
19
|
+
@USER_LEVEL_FILTERS = {}
|
20
|
+
|
21
|
+
def Fairy::def_filter(name, opts={}, &definition)
|
22
|
+
name = name.intern if name.kind_of?(String)
|
23
|
+
@USER_LEVEL_FILTERS[name] = definition
|
24
|
+
|
25
|
+
interface_mod = Module.new
|
26
|
+
|
27
|
+
if !opts[:sub]
|
28
|
+
interface_mod.module_eval %{
|
29
|
+
def #{name}(*args)
|
30
|
+
p = ::Fairy::user_level_filter(:#{name})
|
31
|
+
::Fairy::ERR.Raise ::Fairy::ERR::INTERNAL::NoSuchDefiledUserLevelFilter, name unless p
|
32
|
+
p.call(@fairy, self, *args)
|
33
|
+
end
|
34
|
+
}
|
35
|
+
else
|
36
|
+
interface_mod.module_eval %{
|
37
|
+
def #{name}(*args)
|
38
|
+
p = ::Fairy::user_level_filter(:#{name})
|
39
|
+
::Fairy::ERR.Raise ::Fairy::ERR::INTERNAL::NoSuchDefiledUserLevelFilter, name unless p
|
40
|
+
sub{|subf, input| p.call(subf, input, *args)}
|
41
|
+
end
|
42
|
+
}
|
43
|
+
end
|
44
|
+
Fairy.def_filter_interface interface_mod
|
45
|
+
end
|
46
|
+
|
47
|
+
def Fairy::user_level_filter(name)
|
48
|
+
@USER_LEVEL_FILTERS[name]
|
49
|
+
end
|
50
|
+
|
51
|
+
class Fairy
|
52
|
+
|
53
|
+
def self.create_subfairy(fairy)
|
54
|
+
subfairy = Fairy.allocate
|
55
|
+
subfairy.initialize_subfairy(fairy)
|
56
|
+
subfairy
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize(master_host = CONF.MASTER_HOST,
|
60
|
+
master_port = CONF.MASTER_PORT,
|
61
|
+
opts = {})
|
62
|
+
|
63
|
+
if master_host.kind_of?(Hash)
|
64
|
+
opts = master_host
|
65
|
+
master_host = CONF.MASTER_HOST
|
66
|
+
master_port = CONF.MASTER_PORT
|
67
|
+
end
|
68
|
+
|
69
|
+
::Fairy::REPLACE_CONF(Conf.new(CONF, opts))
|
70
|
+
|
71
|
+
Thread.abort_on_exception = CONF.DEBUG_THREAD_ABORT_ON_EXCEPTION
|
72
|
+
|
73
|
+
@name2backend_class = {}
|
74
|
+
|
75
|
+
@deep_connect = DeepConnect.start(0)
|
76
|
+
@master_deepspace = @deep_connect.open_deepspace(master_host, master_port)
|
77
|
+
@master = @master_deepspace.import("Master")
|
78
|
+
|
79
|
+
@controller = @master.assgin_controller
|
80
|
+
@controller.connect(self, CONF)
|
81
|
+
|
82
|
+
@logger = @master.logger
|
83
|
+
Log.type = "[c]"
|
84
|
+
Log.pid = @controller.id
|
85
|
+
Log.logger = @logger
|
86
|
+
Log.set_local_output_dev
|
87
|
+
Log::info self, "fairy connected!!"
|
88
|
+
Log::info self, "\tfairy version: #{Version}"
|
89
|
+
Log::info(self, "\t[Powered By #{RUBY_DESCRIPTION}]")
|
90
|
+
|
91
|
+
@stdout_mutex = Mutex.new
|
92
|
+
|
93
|
+
if CONF.DEBUG_MONITOR_ON
|
94
|
+
Log::info self, "MONITOR NODE: ON"
|
95
|
+
require "fairy/share/debug"
|
96
|
+
Debug::njob_status_monitor_on(self)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def initialize_subfairy(fairy)
|
101
|
+
@name2backend_class = {}
|
102
|
+
@deep_connect = fairy.instance_eval{@deep_connect}
|
103
|
+
@master_deepspace = fairy.instance_eval{@master_deepspace}
|
104
|
+
@master = fairy.instance_eval{@master}
|
105
|
+
|
106
|
+
@controller = @master.assgin_controller
|
107
|
+
@controller.connect(self, CONF)
|
108
|
+
|
109
|
+
# Logは親と共有される
|
110
|
+
# なので, IDは親と同じになる(process idなので当たり前)
|
111
|
+
|
112
|
+
@stdout_mutex = fairy.instance_eval{@stdout_mutex}
|
113
|
+
end
|
114
|
+
|
115
|
+
attr_reader :controller
|
116
|
+
|
117
|
+
def abort
|
118
|
+
@master.terminate_controller(@controller)
|
119
|
+
end
|
120
|
+
|
121
|
+
def name2backend_class(backend_class_name)
|
122
|
+
if klass = @name2backend_class[backend_class_name]
|
123
|
+
return klass
|
124
|
+
end
|
125
|
+
|
126
|
+
if klass = @controller.import(backend_class_name)
|
127
|
+
@name2backend_class[backend_class_name] = klass
|
128
|
+
end
|
129
|
+
klass
|
130
|
+
end
|
131
|
+
|
132
|
+
# pool variables
|
133
|
+
def def_pool_variable(vname, value = nil)
|
134
|
+
@controller.def_pool_variable(vname, value)
|
135
|
+
end
|
136
|
+
|
137
|
+
def pool_variable(vname, *value)
|
138
|
+
@controller.pool_variable(vname, *value)
|
139
|
+
end
|
140
|
+
|
141
|
+
# exception handling
|
142
|
+
def handle_exception(exp)
|
143
|
+
local_exp = nil
|
144
|
+
Log::debug(self, "exception raised: #{exp.class}")
|
145
|
+
Log::debug_exception(self, exp)
|
146
|
+
begin
|
147
|
+
local_exp = exp.dc_deep_copy
|
148
|
+
rescue Exception
|
149
|
+
Thread.main.raise exp
|
150
|
+
raise exp
|
151
|
+
end
|
152
|
+
Thread.main.raise local_exp
|
153
|
+
nil
|
154
|
+
end
|
155
|
+
|
156
|
+
# debug print
|
157
|
+
def stdout_write(str)
|
158
|
+
@stdout_mutex.synchronize do
|
159
|
+
$stdout.write(str)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# external module loading
|
164
|
+
def self.def_fairy_interface(mod)
|
165
|
+
include mod
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class FilterChain
|
170
|
+
def initialize(input)
|
171
|
+
@filters = [input]
|
172
|
+
end
|
173
|
+
|
174
|
+
def [](idx)
|
175
|
+
@filters[idx]
|
176
|
+
end
|
177
|
+
|
178
|
+
def show_chain
|
179
|
+
@filters.each_with_index{|f, idx|
|
180
|
+
puts "[#{idx}]\t#{f.class}"
|
181
|
+
}
|
182
|
+
end
|
183
|
+
|
184
|
+
def method_missing(msg, *args, &block)
|
185
|
+
#pp msg
|
186
|
+
ret = @filters.last.__send__(msg, *args, &block)
|
187
|
+
if ret.kind_of?(Job)
|
188
|
+
@filters << ret
|
189
|
+
self
|
190
|
+
else
|
191
|
+
ret
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def def_fairy_interface(mod)
|
197
|
+
::Fairy::Fairy.instance_eval{include mod}
|
198
|
+
end
|
199
|
+
module_function :def_fairy_interface
|
200
|
+
end
|
201
|
+
|
202
|
+
require "fairy/client/filter"
|
203
|
+
require "fairy/client/input"
|
204
|
+
|
205
|
+
require "fairy/client/addins"
|
206
|
+
|
data/sample/grep.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
#
|
4
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'fairy'
|
9
|
+
|
10
|
+
|
11
|
+
unless ARGV.size == 3
|
12
|
+
$stderr.puts "Usage: #{File.basename($0)} PATTERN INPUT OUTPUT"
|
13
|
+
exit(1)
|
14
|
+
end
|
15
|
+
|
16
|
+
pattern = Regexp.new(eval %{ "#{ARGV[0]}" })
|
17
|
+
|
18
|
+
input_path = ARGV[1];
|
19
|
+
output_path = ARGV[2];
|
20
|
+
|
21
|
+
unless FileTest.exist?(input_path)
|
22
|
+
raise "Input doesn't exist."
|
23
|
+
end
|
24
|
+
|
25
|
+
if FileTest.exist?(output_path) && output_path.match(/\.vf\z/)
|
26
|
+
raise "Cannot override an existing VFile. Please confirm."
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
t0 = Time.now
|
31
|
+
|
32
|
+
puts "[#{$$}] START: #{t0}"
|
33
|
+
puts "[#{$$}] pattern: #{pattern}"
|
34
|
+
puts "[#{$$}] input: #{input_path}"
|
35
|
+
puts "[#{$$}] output: #{output_path}"
|
36
|
+
|
37
|
+
fairy = Fairy::Fairy.new
|
38
|
+
|
39
|
+
input = fairy.input input_path
|
40
|
+
greped = input.grep(pattern, :ignore_exception => true)
|
41
|
+
greped.output output_path
|
42
|
+
|
43
|
+
t1 = Time.now
|
44
|
+
puts "[#{$$}] DONE: #{t1} (#{t1-t0} sec)"
|
45
|
+
|
46
|
+
|
data/sample/ping.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
#
|
4
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'fairy'
|
9
|
+
require 'yaml'
|
10
|
+
|
11
|
+
yml_path = File.expand_path(File.dirname(__FILE__) + "/../tools/cap_recipe/cluster.yml")
|
12
|
+
cluster = YAML.load_file(yml_path)
|
13
|
+
|
14
|
+
fairy = Fairy::Fairy.new
|
15
|
+
input = fairy.exec(cluster['nodes'].map{|n| "file://#{n}/"})
|
16
|
+
map = input.map(%q{|uri| "#{`hostname -f`.chomp} (#{`hostname -i`.chomp}) is alive."})
|
17
|
+
map.here.each{|responce| puts responce}
|
18
|
+
|
19
|
+
|
data/sample/sort.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
#
|
4
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'fairy'
|
9
|
+
require 'optparse'
|
10
|
+
|
11
|
+
|
12
|
+
opt = {:k => 0, :t => /\s+/}
|
13
|
+
|
14
|
+
op = OptionParser.new
|
15
|
+
op.on('-k', '--key=POS', Integer){|v| opt[:k] = v}
|
16
|
+
op.on('-n', '--numeric-sort'){|v| opt[:n] = v}
|
17
|
+
op.on('-r', '--reverse', nil, "This must be used with -n."){|v| opt[:r] = v}
|
18
|
+
op.on('-t', '--separator=SEPARATOR'){|v| opt[:t] = v}
|
19
|
+
op.parse!(ARGV)
|
20
|
+
|
21
|
+
|
22
|
+
unless ARGV.size == 2
|
23
|
+
$stderr.puts op.to_s.sub(/ *\n/, "... INPUT OUTPUT\n")
|
24
|
+
exit(1)
|
25
|
+
end
|
26
|
+
|
27
|
+
input_path = ARGV[0];
|
28
|
+
output_path = ARGV[1];
|
29
|
+
|
30
|
+
unless FileTest.exist?(input_path)
|
31
|
+
raise "Input doesn't exist."
|
32
|
+
end
|
33
|
+
|
34
|
+
if FileTest.exist?(output_path) && output_path.match(/\.vf\z/)
|
35
|
+
raise "Cannot override an existing VFile. Please confirm."
|
36
|
+
end
|
37
|
+
|
38
|
+
if opt[:r] && !opt[:n]
|
39
|
+
raise "Cannot specify -r option without -n."
|
40
|
+
end
|
41
|
+
|
42
|
+
if opt[:t].is_a? Regexp
|
43
|
+
sep = opt[:t].to_s
|
44
|
+
else
|
45
|
+
str = eval %{ "#{opt[:t]}" }
|
46
|
+
if str.size == 1 && str != "\\"
|
47
|
+
sep = (/(?<!\\)#{str}/o).to_s
|
48
|
+
else
|
49
|
+
sep = str
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
t0 = Time.now
|
54
|
+
|
55
|
+
puts "[#{$$}] START: #{t0}"
|
56
|
+
puts "[#{$$}] input: #{input_path}"
|
57
|
+
puts "[#{$$}] output: #{output_path}"
|
58
|
+
puts "[#{$$}] key: #{opt[:k]}"
|
59
|
+
puts "[#{$$}] separator: #{sep}"
|
60
|
+
puts "[#{$$}] num-sort: ON" if opt[:n]
|
61
|
+
puts "[#{$$}] reverse: ON" if opt[:r]
|
62
|
+
|
63
|
+
|
64
|
+
fairy = Fairy::Fairy.new
|
65
|
+
|
66
|
+
fairy.def_pool_variable(:errors, :block => %{Array.new})
|
67
|
+
|
68
|
+
input = fairy.input input_path
|
69
|
+
maped = input.map(%{|ln|
|
70
|
+
@sep_re ||= Regexp.new(#{sep.inspect})
|
71
|
+
begin
|
72
|
+
sort_key = ln.split(@sep_re)[#{opt[:k]}]
|
73
|
+
[sort_key, ln]
|
74
|
+
rescue => e
|
75
|
+
@Pool.errors.push([e.message, ln])
|
76
|
+
Import::TOKEN_NULLVALUE
|
77
|
+
end
|
78
|
+
})
|
79
|
+
|
80
|
+
if opt[:n] && opt[:r]
|
81
|
+
sorted = maped.sort_by(%{|ary| -ary[0].to_i})
|
82
|
+
elsif opt[:n]
|
83
|
+
sorted = maped.sort_by(%{|ary| ary[0].to_i})
|
84
|
+
else
|
85
|
+
sorted = maped.sort_by(%{|ary| ary[0]})
|
86
|
+
end
|
87
|
+
|
88
|
+
formatted = sorted.map(%{|ary| ary[1]})
|
89
|
+
formatted.output output_path
|
90
|
+
|
91
|
+
unless fairy.pool_variable(:errors).size.zero?
|
92
|
+
err = fairy.pool_variable(:errors)
|
93
|
+
puts "[#{$$}] WARN: #{err.size} error(s) occurred."
|
94
|
+
err.each_with_index{|e,i|
|
95
|
+
p [i+1] + e
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
t1 = Time.now
|
100
|
+
puts "[#{$$}] DONE: #{t1} (#{t1-t0} sec)"
|
101
|
+
|
102
|
+
|
data/sample/wordcount.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
#
|
4
|
+
# Copyright (C) 2007-2010 Rakuten, Inc.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'fairy'
|
9
|
+
|
10
|
+
|
11
|
+
unless ARGV.size == 2
|
12
|
+
$stderr.puts "Usage: #{File.basename($0)} INPUT OUTPUT"
|
13
|
+
end
|
14
|
+
|
15
|
+
input_path = ARGV[0];
|
16
|
+
output_path = ARGV[1];
|
17
|
+
|
18
|
+
unless FileTest.exist?(input_path)
|
19
|
+
raise "Input doesn't exist."
|
20
|
+
exit(1)
|
21
|
+
end
|
22
|
+
|
23
|
+
if FileTest.exist?(output_path) && output_path.match(/\.vf\z/)
|
24
|
+
raise "Cannot override an existing VFile. Please confirm."
|
25
|
+
end
|
26
|
+
|
27
|
+
t0 = Time.now
|
28
|
+
|
29
|
+
puts "[#{$$}] START: #{t0}"
|
30
|
+
puts "[#{$$}] input: #{input_path}"
|
31
|
+
puts "[#{$$}] output: #{output_path}"
|
32
|
+
|
33
|
+
fairy = Fairy::Fairy.new
|
34
|
+
|
35
|
+
fairy.def_pool_variable(:errors, :block => %{Array.new})
|
36
|
+
|
37
|
+
input = fairy.input input_path
|
38
|
+
maped = input.mapf(%{|ln|
|
39
|
+
begin
|
40
|
+
ln.split
|
41
|
+
rescue => e
|
42
|
+
@Pool.errors.push([e.message, ln])
|
43
|
+
Import::TOKEN_NULLVALUE
|
44
|
+
end
|
45
|
+
})
|
46
|
+
grouped = maped.group_by(%{|w| w})
|
47
|
+
counted = grouped.map(%q{|bag| "#{bag.key}\t#{bag.size}"})
|
48
|
+
counted.output output_path
|
49
|
+
|
50
|
+
unless fairy.pool_variable(:errors).size.zero?
|
51
|
+
err = fairy.pool_variable(:errors)
|
52
|
+
puts "[#{$$}] WARN: #{err.size} error(s) occurred."
|
53
|
+
err.each_with_index{|e,i|
|
54
|
+
p [i+1] + e
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
t1 = Time.now
|
59
|
+
puts "[#{$$}] DONE: #{t1} (#{t1-t0} sec)"
|
60
|
+
|
61
|
+
|