filbunke 1.13.5 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/.ruby-version +1 -1
- data/Rakefile +5 -4
- data/VERSION +1 -1
- data/filbunke.gemspec +19 -17
- data/lib/filbunke/callbacks.rb +1 -16
- data/lib/filbunke/client.rb +82 -105
- data/lib/filbunke/daemon.rb +2 -18
- data/lib/filbunke/logger.rb +10 -10
- data/lib/filbunke/repository.rb +28 -15
- data/lib/filbunke.rb +1 -1
- metadata +32 -19
- data/lib/filbunke/thread_pool.rb +0 -104
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
NzlmYmMzN2YwYjI1ZTA5MzhkNThkNWQ0ZDk4YTZiMmQxMWZhNGY4OA==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9ebdbdfb5aa1fc9a53ffe60b2b79d8cec6fe920c
|
4
|
+
data.tar.gz: d320cceb5cd74e3c443495fe2305e08a7d385af8
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
YzkzMzAxOTc2ZmUxMGZlYmRhM2VhYTg1MGI2ZDY4YzAyMmFjNTY1OWNmZDMx
|
11
|
-
ODczMzhhZjVhOWRmYWM5YTMxZTVlOTdkZTRhNmIwOWVkNjJhYzc=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
YTg2Y2NmNzdjYjAxZTRlOTg2NDdlMGIxMTUyOTUwZDdlOGY1ZGRlY2U0NDM2
|
14
|
-
NTljMTc4ZGFlYjIzNWFkMDFmNzJiZmFiYTJmNTY5YzFlZWRkYzNkMmZkNjQ2
|
15
|
-
YWMzYTc0MjY3OTk2ZjkyYTQ4YzAzMDhmOGM3ZTU2OTVjNjBkM2E=
|
6
|
+
metadata.gz: dbd84ac518738229f3333557d68fa8c39dbe9153358ba2da521b575b2003bc814d595bc0d8bc256ddc3dee8965b7eefdb8c72117e0fd244d7847d425126e9fcb
|
7
|
+
data.tar.gz: 815b0982c1c9fab0fdb5c08fd34140796bd89e8cf1cf4c9ed00d474a71dec52ca301f88d60105038ceced0a41d31346a589a95c048ec79cc62c7dc8d01c25990
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.2
|
data/Rakefile
CHANGED
@@ -13,10 +13,11 @@ begin
|
|
13
13
|
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
14
|
gem.files.exclude 'pkg'
|
15
15
|
gem.executables = ['filbunked']
|
16
|
-
gem.add_dependency 'json', '= 1.
|
17
|
-
gem.add_dependency 'typhoeus', '=
|
18
|
-
gem.add_dependency 'open4', '= 1.3.
|
19
|
-
gem.add_dependency 'mime-types', '=
|
16
|
+
gem.add_dependency 'json', '= 1.8.3'
|
17
|
+
gem.add_dependency 'typhoeus', '= 0.7.3'
|
18
|
+
gem.add_dependency 'open4', '= 1.3.4'
|
19
|
+
gem.add_dependency 'mime-types', '= 2.6.2'
|
20
|
+
gem.add_dependency 'parallel', '= 1.6.1'
|
20
21
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
21
22
|
end
|
22
23
|
Jeweler::GemcutterTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.0.2
|
data/filbunke.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: filbunke
|
5
|
+
# stub: filbunke 2.0.2 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "filbunke"
|
9
|
-
s.version = "
|
9
|
+
s.version = "2.0.2"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
|
14
|
-
s.date = "
|
14
|
+
s.date = "2015-09-19"
|
15
15
|
s.description = "Filbunke client and library"
|
16
16
|
s.email = "technical@deltaprojects.com"
|
17
17
|
s.executables = ["filbunked"]
|
@@ -38,12 +38,11 @@ Gem::Specification.new do |s|
|
|
38
38
|
"lib/filbunke/file.rb",
|
39
39
|
"lib/filbunke/logger.rb",
|
40
40
|
"lib/filbunke/repository.rb",
|
41
|
-
"lib/filbunke/thread_pool.rb",
|
42
41
|
"test/helper.rb",
|
43
42
|
"test/test_filbunke.rb"
|
44
43
|
]
|
45
44
|
s.homepage = "https://rubygems.org/gems/filbunke"
|
46
|
-
s.rubygems_version = "2.4.
|
45
|
+
s.rubygems_version = "2.4.5.1"
|
47
46
|
s.summary = "Filbunke client"
|
48
47
|
|
49
48
|
if s.respond_to? :specification_version then
|
@@ -51,23 +50,26 @@ Gem::Specification.new do |s|
|
|
51
50
|
|
52
51
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
52
|
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
54
|
-
s.add_runtime_dependency(%q<json>, ["= 1.
|
55
|
-
s.add_runtime_dependency(%q<typhoeus>, ["=
|
56
|
-
s.add_runtime_dependency(%q<open4>, ["= 1.3.
|
57
|
-
s.add_runtime_dependency(%q<mime-types>, ["=
|
53
|
+
s.add_runtime_dependency(%q<json>, ["= 1.8.3"])
|
54
|
+
s.add_runtime_dependency(%q<typhoeus>, ["= 0.7.3"])
|
55
|
+
s.add_runtime_dependency(%q<open4>, ["= 1.3.4"])
|
56
|
+
s.add_runtime_dependency(%q<mime-types>, ["= 2.6.2"])
|
57
|
+
s.add_runtime_dependency(%q<parallel>, ["= 1.6.1"])
|
58
58
|
else
|
59
59
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
60
|
-
s.add_dependency(%q<json>, ["= 1.
|
61
|
-
s.add_dependency(%q<typhoeus>, ["=
|
62
|
-
s.add_dependency(%q<open4>, ["= 1.3.
|
63
|
-
s.add_dependency(%q<mime-types>, ["=
|
60
|
+
s.add_dependency(%q<json>, ["= 1.8.3"])
|
61
|
+
s.add_dependency(%q<typhoeus>, ["= 0.7.3"])
|
62
|
+
s.add_dependency(%q<open4>, ["= 1.3.4"])
|
63
|
+
s.add_dependency(%q<mime-types>, ["= 2.6.2"])
|
64
|
+
s.add_dependency(%q<parallel>, ["= 1.6.1"])
|
64
65
|
end
|
65
66
|
else
|
66
67
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
67
|
-
s.add_dependency(%q<json>, ["= 1.
|
68
|
-
s.add_dependency(%q<typhoeus>, ["=
|
69
|
-
s.add_dependency(%q<open4>, ["= 1.3.
|
70
|
-
s.add_dependency(%q<mime-types>, ["=
|
68
|
+
s.add_dependency(%q<json>, ["= 1.8.3"])
|
69
|
+
s.add_dependency(%q<typhoeus>, ["= 0.7.3"])
|
70
|
+
s.add_dependency(%q<open4>, ["= 1.3.4"])
|
71
|
+
s.add_dependency(%q<mime-types>, ["= 2.6.2"])
|
72
|
+
s.add_dependency(%q<parallel>, ["= 1.6.1"])
|
71
73
|
end
|
72
74
|
end
|
73
75
|
|
data/lib/filbunke/callbacks.rb
CHANGED
@@ -6,27 +6,12 @@ module Filbunke
|
|
6
6
|
@logger = logger
|
7
7
|
end
|
8
8
|
|
9
|
-
def on_update_batch(files)
|
10
|
-
files.each do |item|
|
11
|
-
on_update(item.file, item.local_file_path)
|
12
|
-
end
|
13
|
-
end
|
14
9
|
def on_update(file, local_file_path)
|
15
10
|
end
|
16
11
|
|
17
|
-
def on_no_change_batch(files)
|
18
|
-
files.each do |item|
|
19
|
-
on_no_change(item.file, item.local_file_path)
|
20
|
-
end
|
21
|
-
end
|
22
12
|
def on_no_change(file, local_file_path)
|
23
13
|
end
|
24
|
-
|
25
|
-
def on_delete_batch(files)
|
26
|
-
files.each do |item|
|
27
|
-
on_delete(item.file, item.local_file_path)
|
28
|
-
end
|
29
|
-
end
|
14
|
+
|
30
15
|
def on_delete(file, local_file_path)
|
31
16
|
end
|
32
17
|
|
data/lib/filbunke/client.rb
CHANGED
@@ -16,6 +16,7 @@ module Filbunke
|
|
16
16
|
@repository = repository
|
17
17
|
@logger = logger
|
18
18
|
@callbacks = callbacks
|
19
|
+
@parallel_callback_opts = (@repository.num_callback_threads > 0 ? {:in_threads => @repository.num_callback_threads} : {:in_processes => repository.num_callback_processes} )
|
19
20
|
@failed_request_log_file_name = failed_request_log_file_name
|
20
21
|
@hydra = Typhoeus::Hydra.new(:max_concurrency => @repository.hydra_concurrency)
|
21
22
|
|
@@ -25,26 +26,18 @@ module Filbunke
|
|
25
26
|
def with_updated_files(last_checkpoint)
|
26
27
|
updates = get_updated_file_list(last_checkpoint)
|
27
28
|
updated_files = updates["files"] || []
|
29
|
+
failure = false
|
30
|
+
|
28
31
|
new_checkpoint = updates["checkpoint"]
|
29
|
-
|
30
|
-
|
31
|
-
return begin
|
32
|
-
fetch_remote_last_checkpoint
|
33
|
-
rescue => e
|
34
|
-
@logger.warn "Failed to fetch remote last_checkpoint #{@repository.name} will fall back to local last_checkpoint=#{last_checkpoint}"
|
35
|
-
last_checkpoint
|
36
|
-
end
|
37
|
-
else
|
38
|
-
return last_checkpoint
|
39
|
-
end
|
40
|
-
end
|
32
|
+
|
33
|
+
@logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
|
41
34
|
|
42
|
-
@logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}"
|
43
35
|
@async_requests = []
|
36
|
+
|
44
37
|
callbacks_on_update = []
|
45
38
|
callbacks_on_no_change = []
|
46
39
|
callbacks_on_delete = []
|
47
|
-
|
40
|
+
|
48
41
|
updated_files.each do |raw_file|
|
49
42
|
file = File.new(raw_file)
|
50
43
|
local_file_path = ::File.join(@repository.local_path, file.path)
|
@@ -58,56 +51,41 @@ module Filbunke
|
|
58
51
|
callbacks_on_update << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
|
59
52
|
else
|
60
53
|
@logger.error "Unable to get file #{file.url} ==> #{file.path}!"
|
61
|
-
|
62
|
-
break
|
54
|
+
failure = true
|
63
55
|
end
|
56
|
+
|
64
57
|
else
|
65
58
|
@logger.debug "File exists with correct hash: #{local_file_path}"
|
66
59
|
callbacks_on_no_change << OpenStruct.new({:file => file, :local_file_path => local_file_path})
|
67
60
|
end
|
68
61
|
end
|
69
62
|
end
|
63
|
+
@hydra.run
|
70
64
|
|
71
|
-
|
72
|
-
@logger.
|
73
|
-
|
74
|
-
end
|
75
|
-
@logger.info "Done setting up async requests for #{@repository.name}, starting fetch..."
|
76
|
-
has_fetch_failures = begin
|
77
|
-
@hydra.run
|
78
|
-
# Magnus 20160305 - since we now fail fast by raising a RuntimeError on response.code != 200
|
79
|
-
# I think we can remove the following request validation
|
80
|
-
@async_requests.any? do |request|
|
81
|
-
@logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
|
82
|
-
request.response.nil? || request.response.code != 200
|
83
|
-
end
|
84
|
-
rescue RuntimeError, SystemCallError, StandardError => e
|
85
|
-
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
86
|
-
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
87
|
-
true
|
88
|
-
end
|
89
|
-
|
90
|
-
if has_fetch_failures
|
91
|
-
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
|
92
|
-
return last_checkpoint
|
65
|
+
pfailure = failure || @async_requests.any? do |request|
|
66
|
+
@logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
|
67
|
+
request.response.nil? || request.response.code != 200
|
93
68
|
end
|
94
69
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
70
|
+
if pfailure == false
|
71
|
+
@logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
|
72
|
+
begin
|
73
|
+
run_callbacks_delete(callbacks_on_delete)
|
74
|
+
run_callbacks(callbacks_on_update)
|
75
|
+
run_callbacks_no_change(callbacks_on_no_change)
|
76
|
+
|
77
|
+
new_checkpoint || last_checkpoint
|
78
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
79
|
+
msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
80
|
+
@logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
81
|
+
last_checkpoint
|
82
|
+
end
|
83
|
+
else
|
84
|
+
@logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
|
105
85
|
last_checkpoint
|
106
86
|
end
|
107
|
-
|
108
|
-
new_or_last_checkpoint
|
109
87
|
end
|
110
|
-
|
88
|
+
|
111
89
|
def update_files!(last_checkpoint)
|
112
90
|
with_updated_files(last_checkpoint) {}
|
113
91
|
end
|
@@ -159,7 +137,7 @@ module Filbunke
|
|
159
137
|
end
|
160
138
|
end
|
161
139
|
|
162
|
-
def
|
140
|
+
def last_checkpoint
|
163
141
|
last_checkpoint_http = Net::HTTP.new(@repository.host, @repository.port)
|
164
142
|
last_checkpoint_http.start do |http|
|
165
143
|
last_checkpoint_path = "/#{UPDATES_ACTION}/#{@repository.name}/#{LAST_CHECKPOINT_ACTION}"
|
@@ -171,7 +149,7 @@ module Filbunke
|
|
171
149
|
return response.body.chomp.to_i
|
172
150
|
end
|
173
151
|
end
|
174
|
-
|
152
|
+
|
175
153
|
private
|
176
154
|
|
177
155
|
def log_failed_request(failed_request_command, e)
|
@@ -183,11 +161,11 @@ module Filbunke
|
|
183
161
|
end
|
184
162
|
|
185
163
|
def update_file!(file, local_file_path)
|
186
|
-
|
164
|
+
|
187
165
|
if file.url =~ /^http:\/\//
|
188
166
|
update_http_file!(file, local_file_path)
|
189
167
|
elsif (file.url =~ /^hdfs:\/\//)
|
190
|
-
update_hdfs_file!(file, local_file_path)
|
168
|
+
success = update_hdfs_file!(file, local_file_path)
|
191
169
|
else
|
192
170
|
raise "Unsupported protocol for file: #{file.inspect}"
|
193
171
|
end
|
@@ -195,19 +173,25 @@ module Filbunke
|
|
195
173
|
|
196
174
|
def run_callbacks(files)
|
197
175
|
@callbacks.each do |callback|
|
198
|
-
|
176
|
+
Parallel.map(files, @parallel_callback_opts) do |item|
|
177
|
+
callback.on_update(item.file, item.local_file_path)
|
178
|
+
end
|
199
179
|
end
|
200
180
|
end
|
201
181
|
|
202
182
|
def run_callbacks_no_change(files)
|
203
183
|
@callbacks.each do |callback|
|
204
|
-
|
184
|
+
Parallel.map(files, @parallel_callback_opts) do |item|
|
185
|
+
callback.on_no_change(item.file, item.local_file_path)
|
186
|
+
end
|
205
187
|
end
|
206
188
|
end
|
207
189
|
|
208
190
|
def run_callbacks_delete(files)
|
209
191
|
@callbacks.each do |callback|
|
210
|
-
|
192
|
+
Parallel.map(files, @parallel_callback_opts) do |item|
|
193
|
+
callback.on_delete(item.file, item.local_file_path)
|
194
|
+
end
|
211
195
|
end
|
212
196
|
end
|
213
197
|
|
@@ -226,7 +210,6 @@ module Filbunke
|
|
226
210
|
updates_http.read_timeout = 300 # default is 60 seconds
|
227
211
|
updates_http.start do |http|
|
228
212
|
updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
|
229
|
-
updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size > 0
|
230
213
|
begin
|
231
214
|
@logger.info "Fetching updated file list from #{updates_path}"
|
232
215
|
request = Net::HTTP::Get.new(updates_path)
|
@@ -245,67 +228,44 @@ module Filbunke
|
|
245
228
|
rescue StandardError => e
|
246
229
|
@logger.error "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
|
247
230
|
return {}
|
248
|
-
end
|
231
|
+
end
|
249
232
|
end
|
250
233
|
|
251
234
|
def update_http_file!(file, local_file_path)
|
252
|
-
downloaded_file = nil
|
253
|
-
tmp_filename = "#{local_file_path}.tmp"
|
254
235
|
begin
|
255
236
|
async_request = if @repository.user
|
256
|
-
Typhoeus::Request.new(
|
257
|
-
URI.escape(file.url),
|
258
|
-
:followlocation => true,
|
259
|
-
:username => @repository.user,
|
260
|
-
:password => @repository.pass
|
261
|
-
)
|
237
|
+
Typhoeus::Request.new(URI.encode(file.url), :followlocation => true, :username => @repository.user, :password => @repository.pass)
|
262
238
|
else
|
263
|
-
Typhoeus::Request.new(
|
264
|
-
URI.escape(file.url),
|
265
|
-
:followlocation => true
|
266
|
-
)
|
267
|
-
end
|
268
|
-
|
269
|
-
|
270
|
-
async_request.on_headers do |response|
|
271
|
-
if response.code != 200
|
272
|
-
raise "Failed to fetch response(#{response.code}) for url '#{response.effective_url}' ---\n\t #{response.inspect}"
|
273
|
-
end
|
274
|
-
@logger.debug("Updating: #{local_file_path}")
|
275
|
-
::FileUtils.mkdir_p(::File.dirname(local_file_path))
|
276
|
-
downloaded_file = ::File.new(tmp_filename, "wb")
|
277
|
-
end
|
278
|
-
|
279
|
-
async_request.on_body do |chunk, response|
|
280
|
-
downloaded_file.write(chunk) if response.code == 200
|
239
|
+
Typhoeus::Request.new(URI.encode(file.url), :followlocation => true)
|
281
240
|
end
|
282
|
-
|
283
241
|
async_request.on_complete do |response|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
::FileUtils.mv(tmp_filename, local_file_path)
|
242
|
+
success = false
|
243
|
+
begin
|
244
|
+
success = response.code.to_i == 200
|
245
|
+
if success
|
246
|
+
write_file!(local_file_path, response.body)
|
290
247
|
else
|
291
|
-
|
248
|
+
body_if_error = response.code >= 500 ? ", body = #{response.body}" : ""
|
249
|
+
@logger.warn "Failed to update file #{file.url}, got status code = #{response.code}#{body_if_error}"
|
292
250
|
end
|
251
|
+
rescue SystemCallError, StandardError => e
|
252
|
+
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
253
|
+
@logger.error "Failed to update file #{file.url}: #{msg}"
|
293
254
|
end
|
294
|
-
|
255
|
+
# return the async_request.handled_response value here
|
256
|
+
success
|
295
257
|
end
|
296
258
|
@hydra.queue async_request
|
297
259
|
@async_requests << async_request
|
298
|
-
|
299
|
-
rescue RuntimeError, SystemCallError, StandardError => e
|
260
|
+
rescue StandardError => e
|
300
261
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
301
262
|
@logger.error "Failed to update file #{file.url}: #{msg}"
|
302
|
-
|
303
|
-
::FileUtils.rm(tmp_filename)
|
304
|
-
end
|
305
|
-
false
|
263
|
+
return false
|
306
264
|
end
|
307
|
-
end
|
308
265
|
|
266
|
+
return true
|
267
|
+
end
|
268
|
+
|
309
269
|
def update_hdfs_file!(file, local_file_path)
|
310
270
|
begin
|
311
271
|
::FileUtils.mkdir_p(::File.dirname(local_file_path))
|
@@ -314,10 +274,10 @@ module Filbunke
|
|
314
274
|
url.gsub!(/hdfs:\/\/([^\/]*)(.*)/, "hdfs://\\2")
|
315
275
|
hdfs_cmd = "#{@repository.hadoop_binary} dfs -copyToLocal #{url} #{local_file_path}.tmp"
|
316
276
|
#@logger.debug "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
|
317
|
-
|
277
|
+
|
318
278
|
pid, stdin, stdout, stderr = Open4::popen4 hdfs_cmd
|
319
279
|
ignored, status = Process::waitpid2 pid
|
320
|
-
|
280
|
+
|
321
281
|
if status.exitstatus == 0 then
|
322
282
|
begin
|
323
283
|
::FileUtils.mv "#{local_file_path}.tmp", local_file_path
|
@@ -326,7 +286,7 @@ module Filbunke
|
|
326
286
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
327
287
|
@logger.error "Failed to move hdfs file #{file.url}: #{msg}"
|
328
288
|
return false
|
329
|
-
end
|
289
|
+
end
|
330
290
|
else
|
331
291
|
@logger.error "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
|
332
292
|
return false
|
@@ -338,6 +298,23 @@ module Filbunke
|
|
338
298
|
end
|
339
299
|
end
|
340
300
|
|
301
|
+
def write_file!(file_path, contents)
|
302
|
+
::FileUtils.mkdir_p(::File.dirname(file_path))
|
303
|
+
@logger.debug("Updating: #{file_path}")
|
304
|
+
begin
|
305
|
+
::File.open("#{file_path}.tmp", 'w') do |file|
|
306
|
+
file.write(contents)
|
307
|
+
file.close
|
308
|
+
end
|
309
|
+
::FileUtils.mv "#{file_path}.tmp", file_path
|
310
|
+
return true
|
311
|
+
rescue StandardError => e
|
312
|
+
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
313
|
+
@logger.error "Failed to move file #{file_path}: #{msg}"
|
314
|
+
return false
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
341
318
|
def delete_file!(file_path)
|
342
319
|
if ::File.exists?(file_path) then
|
343
320
|
@logger.debug("Deleting: #{file_path}")
|
data/lib/filbunke/daemon.rb
CHANGED
@@ -15,7 +15,6 @@ module Filbunke
|
|
15
15
|
@logger.log("Initializing repository: #{repository_name}")
|
16
16
|
@clients << begin
|
17
17
|
repository_config["run_every"] = repository_config.fetch("run_every", @config.fetch("run_every", 10))
|
18
|
-
repository_config["batch_size"] = repository_config.fetch("batch_size", @config.fetch("batch_size", 0))
|
19
18
|
repository = Repository.new(repository_config)
|
20
19
|
callbacks = []
|
21
20
|
repository_config["callbacks"].each do |callback_name, callback_config|
|
@@ -26,10 +25,6 @@ module Filbunke
|
|
26
25
|
failed_request_log_file_name = repository_config["failed_request_log_file_name"]||nil
|
27
26
|
|
28
27
|
Client.new(repository, @logger, callbacks, failed_request_log_file_name)
|
29
|
-
rescue => e
|
30
|
-
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
31
|
-
@logger.error("Failed to initialize #{repository_name}; #{msg}")
|
32
|
-
raise e
|
33
28
|
end
|
34
29
|
end
|
35
30
|
end
|
@@ -48,13 +43,14 @@ module Filbunke
|
|
48
43
|
update_checkpoint_for_repository(client.repository, new_checkpoint)
|
49
44
|
sleep client.repository.run_every
|
50
45
|
end
|
51
|
-
rescue => e
|
46
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
52
47
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
53
48
|
@logger.error("#{client.repository.name} Died.. #{msg}")
|
54
49
|
Process.kill("KILL", @parent_pid)
|
55
50
|
exit 1
|
56
51
|
end
|
57
52
|
end
|
53
|
+
|
58
54
|
end
|
59
55
|
client_pids.each { |pid| Process.wait(pid) }
|
60
56
|
end
|
@@ -83,18 +79,6 @@ module Filbunke
|
|
83
79
|
end
|
84
80
|
|
85
81
|
def write_pid!(pid_file_path)
|
86
|
-
|
87
|
-
begin
|
88
|
-
existing_process = ::File.read(pid_file_path).to_i if ::File.readable?(pid_file_path)
|
89
|
-
if existing_process != nil and existing_process > 0
|
90
|
-
@logger.info("killing existing process #{existing_process} from #{pid_file_path}")
|
91
|
-
Process.kill("KILL", existing_process)
|
92
|
-
|
93
|
-
end
|
94
|
-
rescue => e
|
95
|
-
@logger.warn("failed to kill existing pid from #{pid_file_path}: #{e}\n\twill ignore and continue...")
|
96
|
-
end
|
97
|
-
|
98
82
|
::File.open(pid_file_path, 'w') do |f|
|
99
83
|
f.write(Process.pid.to_i)
|
100
84
|
f.close
|
data/lib/filbunke/logger.rb
CHANGED
@@ -4,41 +4,41 @@ module Filbunke
|
|
4
4
|
|
5
5
|
def initialize(log_file_name, local, level)
|
6
6
|
@local = local
|
7
|
-
@
|
7
|
+
@logger = if @local or log_file_name.nil?
|
8
8
|
Logger.new(STDOUT)
|
9
9
|
else
|
10
10
|
Logger.new(log_file_name)
|
11
11
|
end
|
12
12
|
|
13
|
-
@
|
13
|
+
@logger.level = parse_level(level)
|
14
14
|
end
|
15
15
|
|
16
16
|
def puts(msg)
|
17
|
-
info(msg)
|
17
|
+
@logger.info(msg)
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def log(msg)
|
21
|
-
@
|
21
|
+
@logger.info msg
|
22
22
|
end
|
23
23
|
|
24
24
|
def info(msg)
|
25
|
-
@
|
25
|
+
@logger.info msg
|
26
26
|
end
|
27
27
|
|
28
28
|
def error(msg)
|
29
|
-
@
|
29
|
+
@logger.error msg
|
30
30
|
end
|
31
31
|
|
32
32
|
def warn(msg)
|
33
|
-
@
|
33
|
+
@logger.warn msg
|
34
34
|
end
|
35
35
|
|
36
36
|
def debug(msg)
|
37
|
-
@
|
37
|
+
@logger.debug msg
|
38
38
|
end
|
39
39
|
|
40
40
|
def fatal(msg)
|
41
|
-
@
|
41
|
+
@logger.error msg
|
42
42
|
end
|
43
43
|
|
44
44
|
def parse_level(constantOrString)
|
data/lib/filbunke/repository.rb
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
module Filbunke
|
2
2
|
class Repository
|
3
|
-
|
4
|
-
|
5
|
-
:
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
3
|
+
|
4
|
+
attr_accessor :name,
|
5
|
+
:host,
|
6
|
+
:port,
|
7
|
+
:local_path,
|
8
|
+
:file_umask,
|
9
|
+
:directory_umask,
|
10
|
+
:user,
|
11
|
+
:pass,
|
12
|
+
:hadoop_binary,
|
12
13
|
:run_every,
|
13
14
|
:hydra_concurrency,
|
14
|
-
:
|
15
|
+
:num_callback_processes,
|
16
|
+
:num_callback_threads
|
15
17
|
|
16
18
|
def initialize(repository_config)
|
17
19
|
@name = repository_config["filbunke_server_repository"]
|
@@ -25,8 +27,19 @@ module Filbunke
|
|
25
27
|
@hadoop_binary = repository_config["hadoop_binary"]
|
26
28
|
@run_every = repository_config.fetch("run_every", 10).to_i
|
27
29
|
@hydra_concurrency = repository_config.fetch("hydra_concurrency", 100).to_i
|
28
|
-
|
29
|
-
@
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
@num_callback_processes = repository_config["num_callback_processes"].to_i
|
31
|
+
@num_callback_threads = repository_config["num_callback_threads"].to_i
|
32
|
+
|
33
|
+
if @num_callback_threads == 0 and @num_callback_processes == 0
|
34
|
+
@num_callback_threads = Parallel.processor_count
|
35
|
+
end
|
36
|
+
|
37
|
+
raise ConfigurationError.new("callbacks cant use both processes and threads (#{@name})") if @num_callback_threads > 0 && @num_callback_processes > 0
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
class ConfigurationError < StandardError
|
44
|
+
end
|
45
|
+
end
|
data/lib/filbunke.rb
CHANGED
@@ -7,8 +7,8 @@ require 'typhoeus'
|
|
7
7
|
require 'open4'
|
8
8
|
require 'uri'
|
9
9
|
require 'logger'
|
10
|
+
require 'parallel'
|
10
11
|
|
11
|
-
require File.expand_path(File.dirname(__FILE__) + '/filbunke/thread_pool.rb')
|
12
12
|
require File.expand_path(File.dirname(__FILE__) + '/filbunke/client.rb')
|
13
13
|
require File.expand_path(File.dirname(__FILE__) + '/filbunke/file.rb')
|
14
14
|
require File.expand_path(File.dirname(__FILE__) + '/filbunke/repository.rb')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filbunke
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wouter de Bie
|
@@ -11,20 +11,20 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2015-09-19 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: thoughtbot-shoulda
|
18
18
|
requirement: !ruby/object:Gem::Requirement
|
19
19
|
requirements:
|
20
|
-
- -
|
20
|
+
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: '0'
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
@@ -33,56 +33,70 @@ dependencies:
|
|
33
33
|
requirements:
|
34
34
|
- - '='
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version: 1.
|
36
|
+
version: 1.8.3
|
37
37
|
type: :runtime
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
40
40
|
requirements:
|
41
41
|
- - '='
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: 1.
|
43
|
+
version: 1.8.3
|
44
44
|
- !ruby/object:Gem::Dependency
|
45
45
|
name: typhoeus
|
46
46
|
requirement: !ruby/object:Gem::Requirement
|
47
47
|
requirements:
|
48
48
|
- - '='
|
49
49
|
- !ruby/object:Gem::Version
|
50
|
-
version:
|
50
|
+
version: 0.7.3
|
51
51
|
type: :runtime
|
52
52
|
prerelease: false
|
53
53
|
version_requirements: !ruby/object:Gem::Requirement
|
54
54
|
requirements:
|
55
55
|
- - '='
|
56
56
|
- !ruby/object:Gem::Version
|
57
|
-
version:
|
57
|
+
version: 0.7.3
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: open4
|
60
60
|
requirement: !ruby/object:Gem::Requirement
|
61
61
|
requirements:
|
62
62
|
- - '='
|
63
63
|
- !ruby/object:Gem::Version
|
64
|
-
version: 1.3.
|
64
|
+
version: 1.3.4
|
65
65
|
type: :runtime
|
66
66
|
prerelease: false
|
67
67
|
version_requirements: !ruby/object:Gem::Requirement
|
68
68
|
requirements:
|
69
69
|
- - '='
|
70
70
|
- !ruby/object:Gem::Version
|
71
|
-
version: 1.3.
|
71
|
+
version: 1.3.4
|
72
72
|
- !ruby/object:Gem::Dependency
|
73
73
|
name: mime-types
|
74
74
|
requirement: !ruby/object:Gem::Requirement
|
75
75
|
requirements:
|
76
76
|
- - '='
|
77
77
|
- !ruby/object:Gem::Version
|
78
|
-
version:
|
78
|
+
version: 2.6.2
|
79
79
|
type: :runtime
|
80
80
|
prerelease: false
|
81
81
|
version_requirements: !ruby/object:Gem::Requirement
|
82
82
|
requirements:
|
83
83
|
- - '='
|
84
84
|
- !ruby/object:Gem::Version
|
85
|
-
version:
|
85
|
+
version: 2.6.2
|
86
|
+
- !ruby/object:Gem::Dependency
|
87
|
+
name: parallel
|
88
|
+
requirement: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - '='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 1.6.1
|
93
|
+
type: :runtime
|
94
|
+
prerelease: false
|
95
|
+
version_requirements: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - '='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 1.6.1
|
86
100
|
description: Filbunke client and library
|
87
101
|
email: technical@deltaprojects.com
|
88
102
|
executables:
|
@@ -90,9 +104,9 @@ executables:
|
|
90
104
|
extensions: []
|
91
105
|
extra_rdoc_files: []
|
92
106
|
files:
|
93
|
-
- .gitignore
|
94
|
-
- .ruby-gemset
|
95
|
-
- .ruby-version
|
107
|
+
- ".gitignore"
|
108
|
+
- ".ruby-gemset"
|
109
|
+
- ".ruby-version"
|
96
110
|
- Gemfile
|
97
111
|
- LICENSE
|
98
112
|
- README.rdoc
|
@@ -112,7 +126,6 @@ files:
|
|
112
126
|
- lib/filbunke/file.rb
|
113
127
|
- lib/filbunke/logger.rb
|
114
128
|
- lib/filbunke/repository.rb
|
115
|
-
- lib/filbunke/thread_pool.rb
|
116
129
|
- test/helper.rb
|
117
130
|
- test/test_filbunke.rb
|
118
131
|
homepage: https://rubygems.org/gems/filbunke
|
@@ -124,17 +137,17 @@ require_paths:
|
|
124
137
|
- lib
|
125
138
|
required_ruby_version: !ruby/object:Gem::Requirement
|
126
139
|
requirements:
|
127
|
-
- -
|
140
|
+
- - ">="
|
128
141
|
- !ruby/object:Gem::Version
|
129
142
|
version: '0'
|
130
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
144
|
requirements:
|
132
|
-
- -
|
145
|
+
- - ">="
|
133
146
|
- !ruby/object:Gem::Version
|
134
147
|
version: '0'
|
135
148
|
requirements: []
|
136
149
|
rubyforge_project:
|
137
|
-
rubygems_version: 2.4.
|
150
|
+
rubygems_version: 2.4.5.1
|
138
151
|
signing_key:
|
139
152
|
specification_version: 4
|
140
153
|
summary: Filbunke client
|
data/lib/filbunke/thread_pool.rb
DELETED
@@ -1,104 +0,0 @@
|
|
1
|
-
# Inspired by https://github.com/meh/ruby-threadpool
|
2
|
-
require 'thread'
|
3
|
-
|
4
|
-
class ThreadPool
|
5
|
-
|
6
|
-
class Job < Struct.new(:args, :block); end
|
7
|
-
|
8
|
-
def initialize(min, max = nil)
|
9
|
-
|
10
|
-
trap("INT") { shutdown }
|
11
|
-
|
12
|
-
@min = min
|
13
|
-
@max = max || min
|
14
|
-
|
15
|
-
@cv = ConditionVariable.new
|
16
|
-
@mutex = Mutex.new
|
17
|
-
|
18
|
-
@queue = []
|
19
|
-
@workers = []
|
20
|
-
|
21
|
-
@spawned = 0
|
22
|
-
@waiting = 0
|
23
|
-
@shutdown = false
|
24
|
-
@queue_locked = false
|
25
|
-
|
26
|
-
@mutex.synchronize do
|
27
|
-
min.times { spawn_thread }
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def execute(*args, &block)
|
32
|
-
@mutex.synchronize do
|
33
|
-
raise "Thread pool is about to shutdown" if @shutdown || @queue_locked
|
34
|
-
|
35
|
-
@queue << Job.new(args, block)
|
36
|
-
|
37
|
-
spawn_thread if @waiting == 0 && @spawned < @max
|
38
|
-
|
39
|
-
@cv.signal
|
40
|
-
end
|
41
|
-
end
|
42
|
-
alias :<< :execute
|
43
|
-
|
44
|
-
def shutdown
|
45
|
-
@mutex.synchronize do
|
46
|
-
@shutdown = true
|
47
|
-
@cv.broadcast
|
48
|
-
end
|
49
|
-
|
50
|
-
@workers.first.join until @workers.empty?
|
51
|
-
end
|
52
|
-
|
53
|
-
def join
|
54
|
-
@mutex.synchronize do
|
55
|
-
@queue_locked = true
|
56
|
-
@cv.broadcast
|
57
|
-
sleep 0.01 until @queue.empty?
|
58
|
-
end
|
59
|
-
shutdown
|
60
|
-
end
|
61
|
-
|
62
|
-
protected
|
63
|
-
|
64
|
-
def spawn_thread
|
65
|
-
thread = Thread.new do
|
66
|
-
continue = true
|
67
|
-
|
68
|
-
while continue do
|
69
|
-
job = nil
|
70
|
-
|
71
|
-
@mutex.synchronize do
|
72
|
-
while @queue.empty? && continue
|
73
|
-
if @shutdown || @queue_locked
|
74
|
-
continue = false
|
75
|
-
break
|
76
|
-
end
|
77
|
-
|
78
|
-
@waiting += 1
|
79
|
-
@cv.wait @mutex
|
80
|
-
@waiting -= 1
|
81
|
-
|
82
|
-
if @shutdown || @queue_locked
|
83
|
-
continue = false
|
84
|
-
break
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
if continue
|
89
|
-
job = @queue.shift
|
90
|
-
job.block.call(*job.args) if job
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
@mutex.synchronize do
|
96
|
-
@spawned -= 1
|
97
|
-
@workers.delete thread
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
@workers << thread
|
102
|
-
thread
|
103
|
-
end
|
104
|
-
end
|