scout-gear 7.1.0 → 7.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +29 -0
- data/VERSION +1 -1
- data/bin/scout +5 -1
- data/lib/rbbt-scout.rb +5 -0
- data/lib/scout/concurrent_stream.rb +6 -2
- data/lib/scout/config.rb +168 -0
- data/lib/scout/exceptions.rb +4 -3
- data/lib/scout/indiferent_hash/options.rb +1 -0
- data/lib/scout/indiferent_hash.rb +4 -2
- data/lib/scout/log/color.rb +3 -1
- data/lib/scout/log/progress/report.rb +1 -0
- data/lib/scout/log/progress/util.rb +1 -1
- data/lib/scout/log/progress.rb +5 -3
- data/lib/scout/log.rb +3 -2
- data/lib/scout/misc/monitor.rb +3 -0
- data/lib/scout/misc/system.rb +15 -0
- data/lib/scout/misc.rb +1 -0
- data/lib/scout/named_array.rb +68 -0
- data/lib/scout/open/stream.rb +38 -7
- data/lib/scout/path/find.rb +27 -3
- data/lib/scout/path/util.rb +7 -4
- data/lib/scout/persist/serialize.rb +7 -14
- data/lib/scout/persist.rb +21 -1
- data/lib/scout/resource/produce.rb +7 -94
- data/lib/scout/resource/software.rb +176 -0
- data/lib/scout/tsv/dumper.rb +107 -0
- data/lib/scout/tsv/index.rb +49 -0
- data/lib/scout/tsv/parser.rb +203 -30
- data/lib/scout/tsv/path.rb +13 -0
- data/lib/scout/tsv/persist/adapter.rb +348 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +113 -0
- data/lib/scout/tsv/persist.rb +15 -0
- data/lib/scout/tsv/traverse.rb +48 -0
- data/lib/scout/tsv/util.rb +24 -0
- data/lib/scout/tsv.rb +16 -3
- data/lib/scout/work_queue/worker.rb +3 -3
- data/lib/scout/work_queue.rb +22 -7
- data/lib/scout/workflow/definition.rb +93 -4
- data/lib/scout/workflow/step/config.rb +18 -0
- data/lib/scout/workflow/step/dependencies.rb +40 -0
- data/lib/scout/workflow/step/file.rb +15 -0
- data/lib/scout/workflow/step/info.rb +31 -4
- data/lib/scout/workflow/step/provenance.rb +148 -0
- data/lib/scout/workflow/step.rb +68 -19
- data/lib/scout/workflow/task.rb +3 -2
- data/lib/scout/workflow/usage.rb +1 -1
- data/lib/scout/workflow.rb +11 -3
- data/lib/scout-gear.rb +1 -0
- data/lib/scout.rb +1 -0
- data/scout-gear.gemspec +34 -3
- data/scout_commands/find +1 -1
- data/scout_commands/workflow/task +16 -10
- data/share/software/install_helpers +523 -0
- data/test/scout/log/test_progress.rb +0 -2
- data/test/scout/misc/test_system.rb +21 -0
- data/test/scout/open/test_stream.rb +159 -0
- data/test/scout/path/test_find.rb +14 -7
- data/test/scout/resource/test_software.rb +24 -0
- data/test/scout/test_config.rb +66 -0
- data/test/scout/test_meta_extension.rb +10 -0
- data/test/scout/test_named_array.rb +19 -0
- data/test/scout/test_persist.rb +35 -0
- data/test/scout/test_tmpfile.rb +2 -2
- data/test/scout/test_tsv.rb +41 -1
- data/test/scout/test_work_queue.rb +40 -13
- data/test/scout/tsv/persist/test_adapter.rb +34 -0
- data/test/scout/tsv/persist/test_tokyocabinet.rb +92 -0
- data/test/scout/tsv/test_dumper.rb +44 -0
- data/test/scout/tsv/test_index.rb +64 -0
- data/test/scout/tsv/test_parser.rb +86 -0
- data/test/scout/tsv/test_persist.rb +36 -0
- data/test/scout/tsv/test_traverse.rb +9 -0
- data/test/scout/tsv/test_util.rb +0 -0
- data/test/scout/work_queue/test_worker.rb +3 -3
- data/test/scout/workflow/step/test_dependencies.rb +25 -0
- data/test/scout/workflow/step/test_info.rb +15 -17
- data/test/scout/workflow/step/test_load.rb +16 -18
- data/test/scout/workflow/step/test_provenance.rb +25 -0
- data/test/scout/workflow/test_step.rb +206 -10
- data/test/scout/workflow/test_task.rb +0 -3
- data/test/test_helper.rb +6 -0
- metadata +33 -2
data/lib/scout/path/util.rb
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
module Path
|
2
|
+
def no_method_missing
|
3
|
+
class << self
|
4
|
+
undef_method :method_missing
|
5
|
+
end
|
6
|
+
end
|
2
7
|
|
3
8
|
def self.is_filename?(string, need_to_exists = true)
|
4
9
|
return false if string.nil?
|
@@ -56,9 +61,7 @@ module Path
|
|
56
61
|
end.flatten.uniq
|
57
62
|
end
|
58
63
|
|
59
|
-
def
|
60
|
-
|
61
|
-
undef_method :method_missing
|
62
|
-
end
|
64
|
+
def set_extension(extension)
|
65
|
+
self.annotate(self + ".#{extension}")
|
63
66
|
end
|
64
67
|
end
|
@@ -93,22 +93,15 @@ module Persist
|
|
93
93
|
|
94
94
|
Log.debug "Save #{Log.fingerprint type} on #{file}"
|
95
95
|
if save_drivers[type]
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
if IO === content || StringIO === content
|
101
|
-
main, copy = Open.tee_stream_thread content
|
102
|
-
t = Thread.new do
|
103
|
-
Thread.current["name"] = "file saver: " + file
|
104
|
-
Open.sensible_write(file, main)
|
96
|
+
if save_drivers[type].arity == 1
|
97
|
+
return Open.sensible_write(file, save_drivers[type].call(content))
|
98
|
+
else
|
99
|
+
return save_drivers[type].call(file, content)
|
105
100
|
end
|
106
|
-
ConcurrentStream.setup copy, :threads => t, :filename => file, :autojoin => true
|
107
|
-
else
|
108
|
-
serialized = serialize(content, type)
|
109
|
-
Open.sensible_write(file, serialized, :force => true)
|
110
|
-
content
|
111
101
|
end
|
102
|
+
serialized = serialize(content, type)
|
103
|
+
Open.sensible_write(file, serialized, :force => true)
|
104
|
+
return nil
|
112
105
|
end
|
113
106
|
|
114
107
|
def self.load(file, type = :serializer)
|
data/lib/scout/persist.rb
CHANGED
@@ -26,6 +26,7 @@ module Persist
|
|
26
26
|
|
27
27
|
def self.persist(name, type = :serializer, options = {}, &block)
|
28
28
|
persist_options = IndiferentHash.pull_keys options, :persist
|
29
|
+
return yield if FalseClass === persist_options[:persist]
|
29
30
|
file = persist_options[:path] || options[:path] || persistence_path(name, options)
|
30
31
|
|
31
32
|
update = options[:update] || persist_options[:update]
|
@@ -35,10 +36,29 @@ module Persist
|
|
35
36
|
if Open.exist?(file) && ! update
|
36
37
|
Persist.load(file, type)
|
37
38
|
else
|
39
|
+
return yield(file) if block.arity == 1
|
38
40
|
res = yield
|
39
41
|
begin
|
40
42
|
Open.rm(file)
|
41
|
-
|
43
|
+
|
44
|
+
if IO === res || StringIO === res
|
45
|
+
tee_copies = options[:tee_copies] || 1
|
46
|
+
main, *copies = Open.tee_stream_thread_multiple res, tee_copies + 1
|
47
|
+
t = Thread.new do
|
48
|
+
Thread.current.report_on_exception = false
|
49
|
+
Thread.current["name"] = "file saver: " + file
|
50
|
+
Open.sensible_write(file, main)
|
51
|
+
end
|
52
|
+
Thread.pass until t["name"]
|
53
|
+
copies.each_with_index do |copy,i|
|
54
|
+
next_stream = copies[i+1] if copies.length > i
|
55
|
+
ConcurrentStream.setup copy, :threads => t, :filename => file, :autojoin => true, :next => next_stream
|
56
|
+
end
|
57
|
+
res = copies.first
|
58
|
+
else
|
59
|
+
pres = Persist.save(res, file, type)
|
60
|
+
res = pres unless pres.nil?
|
61
|
+
end
|
42
62
|
rescue
|
43
63
|
raise $! unless options[:canfail]
|
44
64
|
Log.debug "Could not persist #{type} on #{file}"
|
@@ -39,7 +39,7 @@ module Resource
|
|
39
39
|
else
|
40
40
|
ScoutRake.run(rakefile, rake_dir, task)
|
41
41
|
end
|
42
|
-
rescue
|
42
|
+
rescue ScoutRake::TaskNotFound
|
43
43
|
if rake_dir.nil? or rake_dir.empty? or rake_dir == "/" or rake_dir == "./"
|
44
44
|
raise $!
|
45
45
|
end
|
@@ -51,9 +51,9 @@ module Resource
|
|
51
51
|
|
52
52
|
def produce(path, force = false)
|
53
53
|
case
|
54
|
-
when @resources.include?(path)
|
54
|
+
when (@resources && @resources.include?(path))
|
55
55
|
type, content = @resources[path]
|
56
|
-
when (Path === path && @resources.include?(path.original))
|
56
|
+
when (Path === path && @resources && @resources.include?(path.original))
|
57
57
|
type, content = @resources[path.original]
|
58
58
|
when has_rake(path)
|
59
59
|
type = :rake
|
@@ -126,97 +126,10 @@ module Resource
|
|
126
126
|
when :rake
|
127
127
|
run_rake(path, content, rake_dir)
|
128
128
|
when :install
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
software_dir = path.resource.root.software.find :user
|
134
|
-
helper_file = File.expand_path(Rbbt.share.install.software.lib.install_helpers.find(:lib, caller_lib_dir(__FILE__)))
|
135
|
-
#helper_file = File.expand_path(Rbbt.share.install.software.lib.install_helpers.find)
|
136
|
-
|
137
|
-
preamble = <<-EOF
|
138
|
-
#!/bin/bash
|
139
|
-
|
140
|
-
RBBT_SOFTWARE_DIR="#{software_dir}"
|
141
|
-
|
142
|
-
INSTALL_HELPER_FILE="#{helper_file}"
|
143
|
-
source "$INSTALL_HELPER_FILE"
|
144
|
-
EOF
|
145
|
-
|
146
|
-
content = content.call if Proc === content
|
147
|
-
|
148
|
-
content = if content =~ /git:|\.git$/
|
149
|
-
{:git => content}
|
150
|
-
else
|
151
|
-
{:src => content}
|
152
|
-
end if String === content and Open.remote?(content)
|
153
|
-
|
154
|
-
script_text = case content
|
155
|
-
when nil
|
156
|
-
raise "No way to install #{path}"
|
157
|
-
when Path
|
158
|
-
Open.read(content)
|
159
|
-
when String
|
160
|
-
if Path.is_filename?(content) and Open.exists?(content)
|
161
|
-
Open.read(content)
|
162
|
-
else
|
163
|
-
content
|
164
|
-
end
|
165
|
-
when Hash
|
166
|
-
name = content[:name] || File.basename(path)
|
167
|
-
git = content[:git]
|
168
|
-
src = content[:src]
|
169
|
-
url = content[:url]
|
170
|
-
jar = content[:jar]
|
171
|
-
extra = content[:extra]
|
172
|
-
commands = content[:commands]
|
173
|
-
if git
|
174
|
-
<<-EOF
|
175
|
-
|
176
|
-
name='#{name}'
|
177
|
-
url='#{git}'
|
178
|
-
|
179
|
-
install_git "$name" "$url" #{extra}
|
180
|
-
|
181
|
-
#{commands}
|
182
|
-
EOF
|
183
|
-
elsif src
|
184
|
-
<<-EOF
|
185
|
-
|
186
|
-
name='#{name}'
|
187
|
-
url='#{src}'
|
188
|
-
|
189
|
-
install_src "$name" "$url" #{extra}
|
190
|
-
|
191
|
-
#{commands}
|
192
|
-
EOF
|
193
|
-
elsif jar
|
194
|
-
<<-EOF
|
195
|
-
|
196
|
-
name='#{name}'
|
197
|
-
url='#{jar}'
|
198
|
-
|
199
|
-
install_jar "$name" "$url" #{extra}
|
200
|
-
|
201
|
-
#{commands}
|
202
|
-
EOF
|
203
|
-
else
|
204
|
-
<<-EOF
|
205
|
-
|
206
|
-
name='#{name}'
|
207
|
-
url='#{url}'
|
208
|
-
|
209
|
-
#{commands}
|
210
|
-
EOF
|
211
|
-
end
|
212
|
-
end
|
213
|
-
|
214
|
-
script = preamble + "\n" + script_text
|
215
|
-
Log.debug "Installing software with script:\n" << script
|
216
|
-
CMD.cmd_log('bash', :in => script)
|
217
|
-
|
218
|
-
set_software_env(software_dir) unless $set_software_env
|
219
|
-
$set_software_env = true
|
129
|
+
software_dir = self.root.software
|
130
|
+
name = File.basename(path)
|
131
|
+
Resource.install(content, name, software_dir)
|
132
|
+
set_software_env(software_dir)
|
220
133
|
else
|
221
134
|
raise "Could not produce #{ resource }. (#{ type }, #{ content })"
|
222
135
|
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
module Resource
|
2
|
+
|
3
|
+
def self.install_helpers
|
4
|
+
File.expand_path(Scout.share.software.install_helpers.find(:lib))
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.install(content, name, software_dir = Path.setup('software'), &block)
|
8
|
+
software_dir ||= Path.setup('software')
|
9
|
+
software_dir = software_dir.find if Path === software_dir
|
10
|
+
|
11
|
+
content = block if block_given?
|
12
|
+
|
13
|
+
preamble = <<-EOF
|
14
|
+
#!/bin/bash
|
15
|
+
|
16
|
+
SOFTWARE_DIR="#{software_dir}"
|
17
|
+
|
18
|
+
INSTALL_HELPER_FILE="#{install_helpers}"
|
19
|
+
source "$INSTALL_HELPER_FILE"
|
20
|
+
EOF
|
21
|
+
|
22
|
+
content = content.call if Proc === content
|
23
|
+
|
24
|
+
name = content[:name] if Hash === content && content.include?(:name)
|
25
|
+
content =
|
26
|
+
if content =~ /git:|\.git$/
|
27
|
+
{:git => content}
|
28
|
+
else
|
29
|
+
{:src => content}
|
30
|
+
end if String === content and Open.remote?(content)
|
31
|
+
|
32
|
+
script_text =
|
33
|
+
case content
|
34
|
+
when nil
|
35
|
+
raise "No way to install #{name}"
|
36
|
+
when Path
|
37
|
+
Open.read(content)
|
38
|
+
when String
|
39
|
+
if Path.is_filename?(content) and Open.exists?(content)
|
40
|
+
Open.read(content)
|
41
|
+
else
|
42
|
+
content
|
43
|
+
end
|
44
|
+
when Hash
|
45
|
+
name = content[:name] || name
|
46
|
+
git = content[:git]
|
47
|
+
src = content[:src]
|
48
|
+
url = content[:url]
|
49
|
+
jar = content[:jar]
|
50
|
+
extra = content[:extra]
|
51
|
+
commands = content[:commands]
|
52
|
+
if git
|
53
|
+
<<-EOF
|
54
|
+
|
55
|
+
name='#{name}'
|
56
|
+
url='#{git}'
|
57
|
+
|
58
|
+
install_git "$name" "$url" #{extra}
|
59
|
+
|
60
|
+
#{commands}
|
61
|
+
EOF
|
62
|
+
elsif src
|
63
|
+
<<-EOF
|
64
|
+
|
65
|
+
name='#{name}'
|
66
|
+
url='#{src}'
|
67
|
+
|
68
|
+
install_src "$name" "$url" #{extra}
|
69
|
+
|
70
|
+
#{commands}
|
71
|
+
EOF
|
72
|
+
elsif jar
|
73
|
+
<<-EOF
|
74
|
+
|
75
|
+
name='#{name}'
|
76
|
+
url='#{jar}'
|
77
|
+
|
78
|
+
install_jar "$name" "$url" #{extra}
|
79
|
+
|
80
|
+
#{commands}
|
81
|
+
EOF
|
82
|
+
else
|
83
|
+
<<-EOF
|
84
|
+
|
85
|
+
name='#{name}'
|
86
|
+
url='#{url}'
|
87
|
+
|
88
|
+
#{commands}
|
89
|
+
EOF
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
script = preamble + "\n" + script_text
|
94
|
+
Log.debug "Installing software #{name} into #{software_dir} with script:\n" << script
|
95
|
+
CMD.cmd_log('bash', :in => script)
|
96
|
+
Resource.set_software_env(software_dir)
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.set_software_env(software_dir = Path.setup('software'))
|
100
|
+
software_dir.opt.find_all.collect{|d| d.annotate(File.dirname(d)) }.reverse.each do |software_dir|
|
101
|
+
next unless software_dir.exists?
|
102
|
+
Log.medium "Preparing software env at #{software_dir}"
|
103
|
+
|
104
|
+
software_dir = File.expand_path(software_dir)
|
105
|
+
opt_dir = File.join(software_dir, 'opt')
|
106
|
+
bin_dir = File.join(opt_dir, 'bin')
|
107
|
+
|
108
|
+
Misc.env_add 'PATH', bin_dir
|
109
|
+
|
110
|
+
FileUtils.mkdir_p opt_dir unless File.exist? opt_dir
|
111
|
+
|
112
|
+
%w(.ld-paths .c-paths .pkgconfig-paths .aclocal-paths .java-classpaths).each do |file|
|
113
|
+
filename = File.join(opt_dir, file)
|
114
|
+
begin
|
115
|
+
FileUtils.touch filename unless File.exist? filename
|
116
|
+
rescue
|
117
|
+
Log.warn("Could not touch #{ filename }")
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
Open.read(File.join opt_dir, '.c-paths').split(/\n/).each do |line|
|
122
|
+
dir = line.chomp
|
123
|
+
dir = File.join(opt_dir, dir) unless dir[0] == "/"
|
124
|
+
Misc.env_add('CPLUS_INCLUDE_PATH',dir)
|
125
|
+
Misc.env_add('C_INCLUDE_PATH',dir)
|
126
|
+
end if File.exist? File.join(opt_dir, '.c-paths')
|
127
|
+
|
128
|
+
Open.read(File.join opt_dir, '.ld-paths').split(/\n/).each do |line|
|
129
|
+
dir = line.chomp
|
130
|
+
dir = File.join(opt_dir, dir) unless dir[0] == "/"
|
131
|
+
Misc.env_add('LIBRARY_PATH',dir)
|
132
|
+
Misc.env_add('LD_LIBRARY_PATH',dir)
|
133
|
+
Misc.env_add('LD_RUN_PATH',dir)
|
134
|
+
end if File.exist? File.join(opt_dir, '.ld-paths')
|
135
|
+
|
136
|
+
Open.read(File.join opt_dir, '.pkgconfig-paths').split(/\n/).each do |line|
|
137
|
+
dir = line.chomp
|
138
|
+
dir = File.join(opt_dir, dir) unless dir[0] == "/"
|
139
|
+
Misc.env_add('PKG_CONFIG_PATH',dir)
|
140
|
+
end if File.exist? File.join(opt_dir, '.pkgconfig-paths')
|
141
|
+
|
142
|
+
Open.read(File.join opt_dir, '.aclocal-paths').split(/\n/).each do |line|
|
143
|
+
dir = line.chomp
|
144
|
+
dir = File.join(opt_dir, dir) unless dir[0] == "/"
|
145
|
+
Misc.env_add('ACLOCAL_FLAGS', "-I #{dir}", ' ')
|
146
|
+
end if File.exist? File.join(opt_dir, '.aclocal-paths')
|
147
|
+
|
148
|
+
Open.read(File.join opt_dir, '.java-classpaths').split(/\n/).each do |line|
|
149
|
+
dir = line.chomp
|
150
|
+
dir = File.join(opt_dir, dir) unless dir[0] == "/"
|
151
|
+
Misc.env_add('CLASSPATH', "#{dir}")
|
152
|
+
end if File.exist? File.join(opt_dir, '.java-classpaths')
|
153
|
+
|
154
|
+
Dir.glob(File.join opt_dir, 'jars', '*.jar').each do |file|
|
155
|
+
Misc.env_add('CLASSPATH', "#{file}")
|
156
|
+
end
|
157
|
+
|
158
|
+
if File.exist?(File.join(opt_dir, '.post_install')) and File.directory?(File.join(opt_dir, '.post_install'))
|
159
|
+
Dir.glob(File.join(opt_dir, '.post_install','*')).each do |file|
|
160
|
+
|
161
|
+
# Load exports
|
162
|
+
Open.read(file).split("\n").each do |line|
|
163
|
+
next unless line =~ /^\s*export\s+([^=]+)=(.*)/
|
164
|
+
var = $1.strip
|
165
|
+
value = $2.strip
|
166
|
+
value.sub!(/^['"]/,'')
|
167
|
+
value.sub!(/['"]$/,'')
|
168
|
+
value.gsub!(/\$[a-z_0-9]+/i){|var| ENV[var[1..-1]] }
|
169
|
+
Log.debug "Set variable export from .post_install: #{Misc.fingerprint [var,value]*"="}"
|
170
|
+
ENV[var] = value
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module TSV
|
2
|
+
class Dumper
|
3
|
+
def self.header_lines(key_field, fields, entry_hash = nil)
|
4
|
+
if Hash === entry_hash
|
5
|
+
sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
6
|
+
preamble = entry_hash[:preamble]
|
7
|
+
header_hash = entry_hash[:header_hash]
|
8
|
+
end
|
9
|
+
|
10
|
+
header_hash = "#" if header_hash.nil?
|
11
|
+
|
12
|
+
preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
13
|
+
|
14
|
+
str = ""
|
15
|
+
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
16
|
+
if fields
|
17
|
+
if fields.empty?
|
18
|
+
str << header_hash << (key_field || "ID").to_s << "\n"
|
19
|
+
else
|
20
|
+
str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
str
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.header(options={})
|
28
|
+
key_field, fields, sep, header_hash, preamble = IndiferentHash.process_options options,
|
29
|
+
:key_field, :fields, :sep, :header_hash, :preamble,
|
30
|
+
:sep => "\t", :header_hash => "#", :preamble => true
|
31
|
+
|
32
|
+
if fields.nil? || key_field.nil?
|
33
|
+
fields_str = nil
|
34
|
+
else
|
35
|
+
fields_str = "#{header_hash}#{key_field}#{sep}#{fields*sep}"
|
36
|
+
end
|
37
|
+
|
38
|
+
if preamble && options.values.compact.any?
|
39
|
+
preamble_str = "#: " << IndiferentHash.hash2string(options)
|
40
|
+
else
|
41
|
+
preamble_str = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
[preamble_str, fields_str].compact * "\n"
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
attr_accessor :options
|
49
|
+
def initialize(options = {})
|
50
|
+
@sep, @type = IndiferentHash.process_options options,
|
51
|
+
:sep, :type,
|
52
|
+
:sep => "\t", :type => :double
|
53
|
+
@options = options
|
54
|
+
@sout, @sin = Open.pipe
|
55
|
+
ConcurrentStream.setup(@sin, :pair => @sout)
|
56
|
+
ConcurrentStream.setup(@sout, :pair => @sin)
|
57
|
+
end
|
58
|
+
|
59
|
+
def init
|
60
|
+
header = Dumper.header(@options.merge(:type => @type, :sep => @sep))
|
61
|
+
@sin.puts header if header and ! header.empty?
|
62
|
+
end
|
63
|
+
|
64
|
+
def add(key, value)
|
65
|
+
|
66
|
+
case @type
|
67
|
+
when :single
|
68
|
+
@sin.puts key + @sep + value
|
69
|
+
when :list, :flat
|
70
|
+
@sin.puts key + @sep + value * @sep
|
71
|
+
when :double
|
72
|
+
@sin.puts key + @sep + value.collect{|v| v * "|" } * @sep
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def close
|
77
|
+
@sin.close
|
78
|
+
@sin.join
|
79
|
+
end
|
80
|
+
|
81
|
+
def stream
|
82
|
+
@sout
|
83
|
+
end
|
84
|
+
|
85
|
+
def abort(exception=nil)
|
86
|
+
@sin.abort(exception)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def stream
|
91
|
+
iii self.extension_attr_hash
|
92
|
+
dumper = TSV::Dumper.new self.extension_attr_hash
|
93
|
+
dumper.init
|
94
|
+
Thread.new do
|
95
|
+
Thread.current["name"] = "Dumper thread"
|
96
|
+
self.each do |k,v|
|
97
|
+
dumper.add k, v
|
98
|
+
end
|
99
|
+
dumper.close
|
100
|
+
end
|
101
|
+
dumper.stream
|
102
|
+
end
|
103
|
+
|
104
|
+
def to_s
|
105
|
+
stream.read
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
module TSV
|
3
|
+
def self.index(tsv_file, target: 0, order: true, **kwargs)
|
4
|
+
persist, type = IndiferentHash.process_options kwargs,
|
5
|
+
:persist, :persist_type,
|
6
|
+
:persist => false, :persist_type => "HDB"
|
7
|
+
kwargs.delete :type
|
8
|
+
|
9
|
+
Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
|
10
|
+
if filename
|
11
|
+
index = ScoutCabinet.open(filename, true, type)
|
12
|
+
TSV.setup(index, :type => :single)
|
13
|
+
index.extend TSVAdapter
|
14
|
+
else
|
15
|
+
index = TSV.setup({}, :type => :single)
|
16
|
+
end
|
17
|
+
|
18
|
+
dummy_data = nil
|
19
|
+
if order
|
20
|
+
tmp_index = {}
|
21
|
+
dummy_data = Open.open(tsv_file) do |file|
|
22
|
+
TSV.parse file, key_field: target, type: :double, **kwargs do |k,values|
|
23
|
+
values.each_with_index do |list,i|
|
24
|
+
list.each do |e|
|
25
|
+
tmp_index[e] ||= []
|
26
|
+
tmp_index[e][i] ||= []
|
27
|
+
tmp_index[e][i] << k
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
tmp_index.each do |e,list|
|
33
|
+
index[e] = list.flatten.compact.uniq.first
|
34
|
+
end
|
35
|
+
else
|
36
|
+
dummy_data = Open.open(tsv_file) do |file|
|
37
|
+
TSV.parse file, key_field: target, type: :flat, **kwargs do |k,values|
|
38
|
+
values.each do |e|
|
39
|
+
index[e] = k unless index.include?(e)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
index.key_field = dummy_data.fields * ", "
|
45
|
+
index.fields = [dummy_data.key_field]
|
46
|
+
index
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|