filbunke 1.13.5 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/.ruby-version +1 -1
- data/Rakefile +5 -4
- data/VERSION +1 -1
- data/filbunke.gemspec +19 -17
- data/lib/filbunke/callbacks.rb +1 -16
- data/lib/filbunke/client.rb +82 -105
- data/lib/filbunke/daemon.rb +2 -18
- data/lib/filbunke/logger.rb +10 -10
- data/lib/filbunke/repository.rb +28 -15
- data/lib/filbunke.rb +1 -1
- metadata +32 -19
- data/lib/filbunke/thread_pool.rb +0 -104
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
NzlmYmMzN2YwYjI1ZTA5MzhkNThkNWQ0ZDk4YTZiMmQxMWZhNGY4OA==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9ebdbdfb5aa1fc9a53ffe60b2b79d8cec6fe920c
|
4
|
+
data.tar.gz: d320cceb5cd74e3c443495fe2305e08a7d385af8
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
YzkzMzAxOTc2ZmUxMGZlYmRhM2VhYTg1MGI2ZDY4YzAyMmFjNTY1OWNmZDMx
|
11
|
-
ODczMzhhZjVhOWRmYWM5YTMxZTVlOTdkZTRhNmIwOWVkNjJhYzc=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
YTg2Y2NmNzdjYjAxZTRlOTg2NDdlMGIxMTUyOTUwZDdlOGY1ZGRlY2U0NDM2
|
14
|
-
NTljMTc4ZGFlYjIzNWFkMDFmNzJiZmFiYTJmNTY5YzFlZWRkYzNkMmZkNjQ2
|
15
|
-
YWMzYTc0MjY3OTk2ZjkyYTQ4YzAzMDhmOGM3ZTU2OTVjNjBkM2E=
|
6
|
+
metadata.gz: dbd84ac518738229f3333557d68fa8c39dbe9153358ba2da521b575b2003bc814d595bc0d8bc256ddc3dee8965b7eefdb8c72117e0fd244d7847d425126e9fcb
|
7
|
+
data.tar.gz: 815b0982c1c9fab0fdb5c08fd34140796bd89e8cf1cf4c9ed00d474a71dec52ca301f88d60105038ceced0a41d31346a589a95c048ec79cc62c7dc8d01c25990
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.2
|
data/Rakefile
CHANGED
@@ -13,10 +13,11 @@ begin
|
|
13
13
|
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
14
|
gem.files.exclude 'pkg'
|
15
15
|
gem.executables = ['filbunked']
|
16
|
-
gem.add_dependency 'json', '= 1.
|
17
|
-
gem.add_dependency 'typhoeus', '=
|
18
|
-
gem.add_dependency 'open4', '= 1.3.
|
19
|
-
gem.add_dependency 'mime-types', '=
|
16
|
+
gem.add_dependency 'json', '= 1.8.3'
|
17
|
+
gem.add_dependency 'typhoeus', '= 0.7.3'
|
18
|
+
gem.add_dependency 'open4', '= 1.3.4'
|
19
|
+
gem.add_dependency 'mime-types', '= 2.6.2'
|
20
|
+
gem.add_dependency 'parallel', '= 1.6.1'
|
20
21
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
21
22
|
end
|
22
23
|
Jeweler::GemcutterTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.0.2
|
data/filbunke.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: filbunke
|
5
|
+
# stub: filbunke 2.0.2 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "filbunke"
|
9
|
-
s.version = "
|
9
|
+
s.version = "2.0.2"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
|
14
|
-
s.date = "
|
14
|
+
s.date = "2015-09-19"
|
15
15
|
s.description = "Filbunke client and library"
|
16
16
|
s.email = "technical@deltaprojects.com"
|
17
17
|
s.executables = ["filbunked"]
|
@@ -38,12 +38,11 @@ Gem::Specification.new do |s|
|
|
38
38
|
"lib/filbunke/file.rb",
|
39
39
|
"lib/filbunke/logger.rb",
|
40
40
|
"lib/filbunke/repository.rb",
|
41
|
-
"lib/filbunke/thread_pool.rb",
|
42
41
|
"test/helper.rb",
|
43
42
|
"test/test_filbunke.rb"
|
44
43
|
]
|
45
44
|
s.homepage = "https://rubygems.org/gems/filbunke"
|
46
|
-
s.rubygems_version = "2.4.
|
45
|
+
s.rubygems_version = "2.4.5.1"
|
47
46
|
s.summary = "Filbunke client"
|
48
47
|
|
49
48
|
if s.respond_to? :specification_version then
|
@@ -51,23 +50,26 @@ Gem::Specification.new do |s|
|
|
51
50
|
|
52
51
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
52
|
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
54
|
-
s.add_runtime_dependency(%q<json>, ["= 1.
|
55
|
-
s.add_runtime_dependency(%q<typhoeus>, ["=
|
56
|
-
s.add_runtime_dependency(%q<open4>, ["= 1.3.
|
57
|
-
s.add_runtime_dependency(%q<mime-types>, ["=
|
53
|
+
s.add_runtime_dependency(%q<json>, ["= 1.8.3"])
|
54
|
+
s.add_runtime_dependency(%q<typhoeus>, ["= 0.7.3"])
|
55
|
+
s.add_runtime_dependency(%q<open4>, ["= 1.3.4"])
|
56
|
+
s.add_runtime_dependency(%q<mime-types>, ["= 2.6.2"])
|
57
|
+
s.add_runtime_dependency(%q<parallel>, ["= 1.6.1"])
|
58
58
|
else
|
59
59
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
60
|
-
s.add_dependency(%q<json>, ["= 1.
|
61
|
-
s.add_dependency(%q<typhoeus>, ["=
|
62
|
-
s.add_dependency(%q<open4>, ["= 1.3.
|
63
|
-
s.add_dependency(%q<mime-types>, ["=
|
60
|
+
s.add_dependency(%q<json>, ["= 1.8.3"])
|
61
|
+
s.add_dependency(%q<typhoeus>, ["= 0.7.3"])
|
62
|
+
s.add_dependency(%q<open4>, ["= 1.3.4"])
|
63
|
+
s.add_dependency(%q<mime-types>, ["= 2.6.2"])
|
64
|
+
s.add_dependency(%q<parallel>, ["= 1.6.1"])
|
64
65
|
end
|
65
66
|
else
|
66
67
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
67
|
-
s.add_dependency(%q<json>, ["= 1.
|
68
|
-
s.add_dependency(%q<typhoeus>, ["=
|
69
|
-
s.add_dependency(%q<open4>, ["= 1.3.
|
70
|
-
s.add_dependency(%q<mime-types>, ["=
|
68
|
+
s.add_dependency(%q<json>, ["= 1.8.3"])
|
69
|
+
s.add_dependency(%q<typhoeus>, ["= 0.7.3"])
|
70
|
+
s.add_dependency(%q<open4>, ["= 1.3.4"])
|
71
|
+
s.add_dependency(%q<mime-types>, ["= 2.6.2"])
|
72
|
+
s.add_dependency(%q<parallel>, ["= 1.6.1"])
|
71
73
|
end
|
72
74
|
end
|
73
75
|
|
data/lib/filbunke/callbacks.rb
CHANGED
@@ -6,27 +6,12 @@ module Filbunke
|
|
6
6
|
@logger = logger
|
7
7
|
end
|
8
8
|
|
9
|
-
def on_update_batch(files)
|
10
|
-
files.each do |item|
|
11
|
-
on_update(item.file, item.local_file_path)
|
12
|
-
end
|
13
|
-
end
|
14
9
|
def on_update(file, local_file_path)
|
15
10
|
end
|
16
11
|
|
17
|
-
def on_no_change_batch(files)
|
18
|
-
files.each do |item|
|
19
|
-
on_no_change(item.file, item.local_file_path)
|
20
|
-
end
|
21
|
-
end
|
22
12
|
def on_no_change(file, local_file_path)
|
23
13
|
end
|
24
|
-
|
25
|
-
def on_delete_batch(files)
|
26
|
-
files.each do |item|
|
27
|
-
on_delete(item.file, item.local_file_path)
|
28
|
-
end
|
29
|
-
end
|
14
|
+
|
30
15
|
def on_delete(file, local_file_path)
|
31
16
|
end
|
32
17
|
|
data/lib/filbunke/client.rb
CHANGED
@@ -16,6 +16,7 @@ module Filbunke
|
|
16
16
|
@repository = repository
|
17
17
|
@logger = logger
|
18
18
|
@callbacks = callbacks
|
19
|
+
@parallel_callback_opts = (@repository.num_callback_threads > 0 ? {:in_threads => @repository.num_callback_threads} : {:in_processes => repository.num_callback_processes} )
|
19
20
|
@failed_request_log_file_name = failed_request_log_file_name
|
20
21
|
@hydra = Typhoeus::Hydra.new(:max_concurrency => @repository.hydra_concurrency)
|
21
22
|
|
@@ -25,26 +26,18 @@ module Filbunke
|
|
25
26
|
def with_updated_files(last_checkpoint)
|
26
27
|
updates = get_updated_file_list(last_checkpoint)
|
27
28
|
updated_files = updates["files"] || []
|
29
|
+
failure = false
|
30
|
+
|
28
31
|
new_checkpoint = updates["checkpoint"]
|
29
|
-
|
30
|
-
|
31
|
-
return begin
|
32
|
-
fetch_remote_last_checkpoint
|
33
|
-
rescue => e
|
34
|
-
@logger.warn "Failed to fetch remote last_checkpoint #{@repository.name} will fall back to local last_checkpoint=#{last_checkpoint}"
|
35
|
-
last_checkpoint
|
36
|
-
end
|
37
|
-
else
|
38
|
-
return last_checkpoint
|
39
|
-
end
|
40
|
-
end
|
32
|
+
|
33
|
+
@logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
|
41
34
|
|
42
|
-
@logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}"
|
43
35
|
@async_requests = []
|
36
|
+
|
44
37
|
callbacks_on_update = []
|
45
38
|
callbacks_on_no_change = []
|
46
39
|
callbacks_on_delete = []
|
47
|
-
|
40
|
+
|
48
41
|
updated_files.each do |raw_file|
|
49
42
|
file = File.new(raw_file)
|
50
43
|
local_file_path = ::File.join(@repository.local_path, file.path)
|
@@ -58,56 +51,41 @@ module Filbunke
|
|
58
51
|
callbacks_on_update << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
|
59
52
|
else
|
60
53
|
@logger.error "Unable to get file #{file.url} ==> #{file.path}!"
|
61
|
-
|
62
|
-
break
|
54
|
+
failure = true
|
63
55
|
end
|
56
|
+
|
64
57
|
else
|
65
58
|
@logger.debug "File exists with correct hash: #{local_file_path}"
|
66
59
|
callbacks_on_no_change << OpenStruct.new({:file => file, :local_file_path => local_file_path})
|
67
60
|
end
|
68
61
|
end
|
69
62
|
end
|
63
|
+
@hydra.run
|
70
64
|
|
71
|
-
|
72
|
-
@logger.
|
73
|
-
|
74
|
-
end
|
75
|
-
@logger.info "Done setting up async requests for #{@repository.name}, starting fetch..."
|
76
|
-
has_fetch_failures = begin
|
77
|
-
@hydra.run
|
78
|
-
# Magnus 20160305 - since we now fail fast by raising a RuntimeError on response.code != 200
|
79
|
-
# I think we can remove the following request validation
|
80
|
-
@async_requests.any? do |request|
|
81
|
-
@logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
|
82
|
-
request.response.nil? || request.response.code != 200
|
83
|
-
end
|
84
|
-
rescue RuntimeError, SystemCallError, StandardError => e
|
85
|
-
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
86
|
-
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
87
|
-
true
|
88
|
-
end
|
89
|
-
|
90
|
-
if has_fetch_failures
|
91
|
-
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
|
92
|
-
return last_checkpoint
|
65
|
+
pfailure = failure || @async_requests.any? do |request|
|
66
|
+
@logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
|
67
|
+
request.response.nil? || request.response.code != 200
|
93
68
|
end
|
94
69
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
70
|
+
if pfailure == false
|
71
|
+
@logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
|
72
|
+
begin
|
73
|
+
run_callbacks_delete(callbacks_on_delete)
|
74
|
+
run_callbacks(callbacks_on_update)
|
75
|
+
run_callbacks_no_change(callbacks_on_no_change)
|
76
|
+
|
77
|
+
new_checkpoint || last_checkpoint
|
78
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
79
|
+
msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
80
|
+
@logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
81
|
+
last_checkpoint
|
82
|
+
end
|
83
|
+
else
|
84
|
+
@logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
|
105
85
|
last_checkpoint
|
106
86
|
end
|
107
|
-
|
108
|
-
new_or_last_checkpoint
|
109
87
|
end
|
110
|
-
|
88
|
+
|
111
89
|
def update_files!(last_checkpoint)
|
112
90
|
with_updated_files(last_checkpoint) {}
|
113
91
|
end
|
@@ -159,7 +137,7 @@ module Filbunke
|
|
159
137
|
end
|
160
138
|
end
|
161
139
|
|
162
|
-
def
|
140
|
+
def last_checkpoint
|
163
141
|
last_checkpoint_http = Net::HTTP.new(@repository.host, @repository.port)
|
164
142
|
last_checkpoint_http.start do |http|
|
165
143
|
last_checkpoint_path = "/#{UPDATES_ACTION}/#{@repository.name}/#{LAST_CHECKPOINT_ACTION}"
|
@@ -171,7 +149,7 @@ module Filbunke
|
|
171
149
|
return response.body.chomp.to_i
|
172
150
|
end
|
173
151
|
end
|
174
|
-
|
152
|
+
|
175
153
|
private
|
176
154
|
|
177
155
|
def log_failed_request(failed_request_command, e)
|
@@ -183,11 +161,11 @@ module Filbunke
|
|
183
161
|
end
|
184
162
|
|
185
163
|
def update_file!(file, local_file_path)
|
186
|
-
|
164
|
+
|
187
165
|
if file.url =~ /^http:\/\//
|
188
166
|
update_http_file!(file, local_file_path)
|
189
167
|
elsif (file.url =~ /^hdfs:\/\//)
|
190
|
-
update_hdfs_file!(file, local_file_path)
|
168
|
+
success = update_hdfs_file!(file, local_file_path)
|
191
169
|
else
|
192
170
|
raise "Unsupported protocol for file: #{file.inspect}"
|
193
171
|
end
|
@@ -195,19 +173,25 @@ module Filbunke
|
|
195
173
|
|
196
174
|
def run_callbacks(files)
|
197
175
|
@callbacks.each do |callback|
|
198
|
-
|
176
|
+
Parallel.map(files, @parallel_callback_opts) do |item|
|
177
|
+
callback.on_update(item.file, item.local_file_path)
|
178
|
+
end
|
199
179
|
end
|
200
180
|
end
|
201
181
|
|
202
182
|
def run_callbacks_no_change(files)
|
203
183
|
@callbacks.each do |callback|
|
204
|
-
|
184
|
+
Parallel.map(files, @parallel_callback_opts) do |item|
|
185
|
+
callback.on_no_change(item.file, item.local_file_path)
|
186
|
+
end
|
205
187
|
end
|
206
188
|
end
|
207
189
|
|
208
190
|
def run_callbacks_delete(files)
|
209
191
|
@callbacks.each do |callback|
|
210
|
-
|
192
|
+
Parallel.map(files, @parallel_callback_opts) do |item|
|
193
|
+
callback.on_delete(item.file, item.local_file_path)
|
194
|
+
end
|
211
195
|
end
|
212
196
|
end
|
213
197
|
|
@@ -226,7 +210,6 @@ module Filbunke
|
|
226
210
|
updates_http.read_timeout = 300 # default is 60 seconds
|
227
211
|
updates_http.start do |http|
|
228
212
|
updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
|
229
|
-
updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size > 0
|
230
213
|
begin
|
231
214
|
@logger.info "Fetching updated file list from #{updates_path}"
|
232
215
|
request = Net::HTTP::Get.new(updates_path)
|
@@ -245,67 +228,44 @@ module Filbunke
|
|
245
228
|
rescue StandardError => e
|
246
229
|
@logger.error "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
|
247
230
|
return {}
|
248
|
-
end
|
231
|
+
end
|
249
232
|
end
|
250
233
|
|
251
234
|
def update_http_file!(file, local_file_path)
|
252
|
-
downloaded_file = nil
|
253
|
-
tmp_filename = "#{local_file_path}.tmp"
|
254
235
|
begin
|
255
236
|
async_request = if @repository.user
|
256
|
-
Typhoeus::Request.new(
|
257
|
-
URI.escape(file.url),
|
258
|
-
:followlocation => true,
|
259
|
-
:username => @repository.user,
|
260
|
-
:password => @repository.pass
|
261
|
-
)
|
237
|
+
Typhoeus::Request.new(URI.encode(file.url), :followlocation => true, :username => @repository.user, :password => @repository.pass)
|
262
238
|
else
|
263
|
-
Typhoeus::Request.new(
|
264
|
-
URI.escape(file.url),
|
265
|
-
:followlocation => true
|
266
|
-
)
|
267
|
-
end
|
268
|
-
|
269
|
-
|
270
|
-
async_request.on_headers do |response|
|
271
|
-
if response.code != 200
|
272
|
-
raise "Failed to fetch response(#{response.code}) for url '#{response.effective_url}' ---\n\t #{response.inspect}"
|
273
|
-
end
|
274
|
-
@logger.debug("Updating: #{local_file_path}")
|
275
|
-
::FileUtils.mkdir_p(::File.dirname(local_file_path))
|
276
|
-
downloaded_file = ::File.new(tmp_filename, "wb")
|
277
|
-
end
|
278
|
-
|
279
|
-
async_request.on_body do |chunk, response|
|
280
|
-
downloaded_file.write(chunk) if response.code == 200
|
239
|
+
Typhoeus::Request.new(URI.encode(file.url), :followlocation => true)
|
281
240
|
end
|
282
|
-
|
283
241
|
async_request.on_complete do |response|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
::FileUtils.mv(tmp_filename, local_file_path)
|
242
|
+
success = false
|
243
|
+
begin
|
244
|
+
success = response.code.to_i == 200
|
245
|
+
if success
|
246
|
+
write_file!(local_file_path, response.body)
|
290
247
|
else
|
291
|
-
|
248
|
+
body_if_error = response.code >= 500 ? ", body = #{response.body}" : ""
|
249
|
+
@logger.warn "Failed to update file #{file.url}, got status code = #{response.code}#{body_if_error}"
|
292
250
|
end
|
251
|
+
rescue SystemCallError, StandardError => e
|
252
|
+
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
253
|
+
@logger.error "Failed to update file #{file.url}: #{msg}"
|
293
254
|
end
|
294
|
-
|
255
|
+
# return the async_request.handled_response value here
|
256
|
+
success
|
295
257
|
end
|
296
258
|
@hydra.queue async_request
|
297
259
|
@async_requests << async_request
|
298
|
-
|
299
|
-
rescue RuntimeError, SystemCallError, StandardError => e
|
260
|
+
rescue StandardError => e
|
300
261
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
301
262
|
@logger.error "Failed to update file #{file.url}: #{msg}"
|
302
|
-
|
303
|
-
::FileUtils.rm(tmp_filename)
|
304
|
-
end
|
305
|
-
false
|
263
|
+
return false
|
306
264
|
end
|
307
|
-
end
|
308
265
|
|
266
|
+
return true
|
267
|
+
end
|
268
|
+
|
309
269
|
def update_hdfs_file!(file, local_file_path)
|
310
270
|
begin
|
311
271
|
::FileUtils.mkdir_p(::File.dirname(local_file_path))
|
@@ -314,10 +274,10 @@ module Filbunke
|
|
314
274
|
url.gsub!(/hdfs:\/\/([^\/]*)(.*)/, "hdfs://\\2")
|
315
275
|
hdfs_cmd = "#{@repository.hadoop_binary} dfs -copyToLocal #{url} #{local_file_path}.tmp"
|
316
276
|
#@logger.debug "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
|
317
|
-
|
277
|
+
|
318
278
|
pid, stdin, stdout, stderr = Open4::popen4 hdfs_cmd
|
319
279
|
ignored, status = Process::waitpid2 pid
|
320
|
-
|
280
|
+
|
321
281
|
if status.exitstatus == 0 then
|
322
282
|
begin
|
323
283
|
::FileUtils.mv "#{local_file_path}.tmp", local_file_path
|
@@ -326,7 +286,7 @@ module Filbunke
|
|
326
286
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
327
287
|
@logger.error "Failed to move hdfs file #{file.url}: #{msg}"
|
328
288
|
return false
|
329
|
-
end
|
289
|
+
end
|
330
290
|
else
|
331
291
|
@logger.error "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
|
332
292
|
return false
|
@@ -338,6 +298,23 @@ module Filbunke
|
|
338
298
|
end
|
339
299
|
end
|
340
300
|
|
301
|
+
def write_file!(file_path, contents)
|
302
|
+
::FileUtils.mkdir_p(::File.dirname(file_path))
|
303
|
+
@logger.debug("Updating: #{file_path}")
|
304
|
+
begin
|
305
|
+
::File.open("#{file_path}.tmp", 'w') do |file|
|
306
|
+
file.write(contents)
|
307
|
+
file.close
|
308
|
+
end
|
309
|
+
::FileUtils.mv "#{file_path}.tmp", file_path
|
310
|
+
return true
|
311
|
+
rescue StandardError => e
|
312
|
+
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
313
|
+
@logger.error "Failed to move file #{file_path}: #{msg}"
|
314
|
+
return false
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
341
318
|
def delete_file!(file_path)
|
342
319
|
if ::File.exists?(file_path) then
|
343
320
|
@logger.debug("Deleting: #{file_path}")
|
data/lib/filbunke/daemon.rb
CHANGED
@@ -15,7 +15,6 @@ module Filbunke
|
|
15
15
|
@logger.log("Initializing repository: #{repository_name}")
|
16
16
|
@clients << begin
|
17
17
|
repository_config["run_every"] = repository_config.fetch("run_every", @config.fetch("run_every", 10))
|
18
|
-
repository_config["batch_size"] = repository_config.fetch("batch_size", @config.fetch("batch_size", 0))
|
19
18
|
repository = Repository.new(repository_config)
|
20
19
|
callbacks = []
|
21
20
|
repository_config["callbacks"].each do |callback_name, callback_config|
|
@@ -26,10 +25,6 @@ module Filbunke
|
|
26
25
|
failed_request_log_file_name = repository_config["failed_request_log_file_name"]||nil
|
27
26
|
|
28
27
|
Client.new(repository, @logger, callbacks, failed_request_log_file_name)
|
29
|
-
rescue => e
|
30
|
-
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
31
|
-
@logger.error("Failed to initialize #{repository_name}; #{msg}")
|
32
|
-
raise e
|
33
28
|
end
|
34
29
|
end
|
35
30
|
end
|
@@ -48,13 +43,14 @@ module Filbunke
|
|
48
43
|
update_checkpoint_for_repository(client.repository, new_checkpoint)
|
49
44
|
sleep client.repository.run_every
|
50
45
|
end
|
51
|
-
rescue => e
|
46
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
52
47
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
53
48
|
@logger.error("#{client.repository.name} Died.. #{msg}")
|
54
49
|
Process.kill("KILL", @parent_pid)
|
55
50
|
exit 1
|
56
51
|
end
|
57
52
|
end
|
53
|
+
|
58
54
|
end
|
59
55
|
client_pids.each { |pid| Process.wait(pid) }
|
60
56
|
end
|
@@ -83,18 +79,6 @@ module Filbunke
|
|
83
79
|
end
|
84
80
|
|
85
81
|
def write_pid!(pid_file_path)
|
86
|
-
|
87
|
-
begin
|
88
|
-
existing_process = ::File.read(pid_file_path).to_i if ::File.readable?(pid_file_path)
|
89
|
-
if existing_process != nil and existing_process > 0
|
90
|
-
@logger.info("killing existing process #{existing_process} from #{pid_file_path}")
|
91
|
-
Process.kill("KILL", existing_process)
|
92
|
-
|
93
|
-
end
|
94
|
-
rescue => e
|
95
|
-
@logger.warn("failed to kill existing pid from #{pid_file_path}: #{e}\n\twill ignore and continue...")
|
96
|
-
end
|
97
|
-
|
98
82
|
::File.open(pid_file_path, 'w') do |f|
|
99
83
|
f.write(Process.pid.to_i)
|
100
84
|
f.close
|
data/lib/filbunke/logger.rb
CHANGED
@@ -4,41 +4,41 @@ module Filbunke
|
|
4
4
|
|
5
5
|
def initialize(log_file_name, local, level)
|
6
6
|
@local = local
|
7
|
-
@
|
7
|
+
@logger = if @local or log_file_name.nil?
|
8
8
|
Logger.new(STDOUT)
|
9
9
|
else
|
10
10
|
Logger.new(log_file_name)
|
11
11
|
end
|
12
12
|
|
13
|
-
@
|
13
|
+
@logger.level = parse_level(level)
|
14
14
|
end
|
15
15
|
|
16
16
|
def puts(msg)
|
17
|
-
info(msg)
|
17
|
+
@logger.info(msg)
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def log(msg)
|
21
|
-
@
|
21
|
+
@logger.info msg
|
22
22
|
end
|
23
23
|
|
24
24
|
def info(msg)
|
25
|
-
@
|
25
|
+
@logger.info msg
|
26
26
|
end
|
27
27
|
|
28
28
|
def error(msg)
|
29
|
-
@
|
29
|
+
@logger.error msg
|
30
30
|
end
|
31
31
|
|
32
32
|
def warn(msg)
|
33
|
-
@
|
33
|
+
@logger.warn msg
|
34
34
|
end
|
35
35
|
|
36
36
|
def debug(msg)
|
37
|
-
@
|
37
|
+
@logger.debug msg
|
38
38
|
end
|
39
39
|
|
40
40
|
def fatal(msg)
|
41
|
-
@
|
41
|
+
@logger.error msg
|
42
42
|
end
|
43
43
|
|
44
44
|
def parse_level(constantOrString)
|
data/lib/filbunke/repository.rb
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
module Filbunke
|
2
2
|
class Repository
|
3
|
-
|
4
|
-
|
5
|
-
:
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
3
|
+
|
4
|
+
attr_accessor :name,
|
5
|
+
:host,
|
6
|
+
:port,
|
7
|
+
:local_path,
|
8
|
+
:file_umask,
|
9
|
+
:directory_umask,
|
10
|
+
:user,
|
11
|
+
:pass,
|
12
|
+
:hadoop_binary,
|
12
13
|
:run_every,
|
13
14
|
:hydra_concurrency,
|
14
|
-
:
|
15
|
+
:num_callback_processes,
|
16
|
+
:num_callback_threads
|
15
17
|
|
16
18
|
def initialize(repository_config)
|
17
19
|
@name = repository_config["filbunke_server_repository"]
|
@@ -25,8 +27,19 @@ module Filbunke
|
|
25
27
|
@hadoop_binary = repository_config["hadoop_binary"]
|
26
28
|
@run_every = repository_config.fetch("run_every", 10).to_i
|
27
29
|
@hydra_concurrency = repository_config.fetch("hydra_concurrency", 100).to_i
|
28
|
-
|
29
|
-
@
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
@num_callback_processes = repository_config["num_callback_processes"].to_i
|
31
|
+
@num_callback_threads = repository_config["num_callback_threads"].to_i
|
32
|
+
|
33
|
+
if @num_callback_threads == 0 and @num_callback_processes == 0
|
34
|
+
@num_callback_threads = Parallel.processor_count
|
35
|
+
end
|
36
|
+
|
37
|
+
raise ConfigurationError.new("callbacks cant use both processes and threads (#{@name})") if @num_callback_threads > 0 && @num_callback_processes > 0
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
class ConfigurationError < StandardError
|
44
|
+
end
|
45
|
+
end
|
data/lib/filbunke.rb
CHANGED
@@ -7,8 +7,8 @@ require 'typhoeus'
|
|
7
7
|
require 'open4'
|
8
8
|
require 'uri'
|
9
9
|
require 'logger'
|
10
|
+
require 'parallel'
|
10
11
|
|
11
|
-
require File.expand_path(File.dirname(__FILE__) + '/filbunke/thread_pool.rb')
|
12
12
|
require File.expand_path(File.dirname(__FILE__) + '/filbunke/client.rb')
|
13
13
|
require File.expand_path(File.dirname(__FILE__) + '/filbunke/file.rb')
|
14
14
|
require File.expand_path(File.dirname(__FILE__) + '/filbunke/repository.rb')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filbunke
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wouter de Bie
|
@@ -11,20 +11,20 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2015-09-19 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: thoughtbot-shoulda
|
18
18
|
requirement: !ruby/object:Gem::Requirement
|
19
19
|
requirements:
|
20
|
-
- -
|
20
|
+
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: '0'
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
@@ -33,56 +33,70 @@ dependencies:
|
|
33
33
|
requirements:
|
34
34
|
- - '='
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version: 1.
|
36
|
+
version: 1.8.3
|
37
37
|
type: :runtime
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
40
40
|
requirements:
|
41
41
|
- - '='
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: 1.
|
43
|
+
version: 1.8.3
|
44
44
|
- !ruby/object:Gem::Dependency
|
45
45
|
name: typhoeus
|
46
46
|
requirement: !ruby/object:Gem::Requirement
|
47
47
|
requirements:
|
48
48
|
- - '='
|
49
49
|
- !ruby/object:Gem::Version
|
50
|
-
version:
|
50
|
+
version: 0.7.3
|
51
51
|
type: :runtime
|
52
52
|
prerelease: false
|
53
53
|
version_requirements: !ruby/object:Gem::Requirement
|
54
54
|
requirements:
|
55
55
|
- - '='
|
56
56
|
- !ruby/object:Gem::Version
|
57
|
-
version:
|
57
|
+
version: 0.7.3
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: open4
|
60
60
|
requirement: !ruby/object:Gem::Requirement
|
61
61
|
requirements:
|
62
62
|
- - '='
|
63
63
|
- !ruby/object:Gem::Version
|
64
|
-
version: 1.3.
|
64
|
+
version: 1.3.4
|
65
65
|
type: :runtime
|
66
66
|
prerelease: false
|
67
67
|
version_requirements: !ruby/object:Gem::Requirement
|
68
68
|
requirements:
|
69
69
|
- - '='
|
70
70
|
- !ruby/object:Gem::Version
|
71
|
-
version: 1.3.
|
71
|
+
version: 1.3.4
|
72
72
|
- !ruby/object:Gem::Dependency
|
73
73
|
name: mime-types
|
74
74
|
requirement: !ruby/object:Gem::Requirement
|
75
75
|
requirements:
|
76
76
|
- - '='
|
77
77
|
- !ruby/object:Gem::Version
|
78
|
-
version:
|
78
|
+
version: 2.6.2
|
79
79
|
type: :runtime
|
80
80
|
prerelease: false
|
81
81
|
version_requirements: !ruby/object:Gem::Requirement
|
82
82
|
requirements:
|
83
83
|
- - '='
|
84
84
|
- !ruby/object:Gem::Version
|
85
|
-
version:
|
85
|
+
version: 2.6.2
|
86
|
+
- !ruby/object:Gem::Dependency
|
87
|
+
name: parallel
|
88
|
+
requirement: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - '='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 1.6.1
|
93
|
+
type: :runtime
|
94
|
+
prerelease: false
|
95
|
+
version_requirements: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - '='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 1.6.1
|
86
100
|
description: Filbunke client and library
|
87
101
|
email: technical@deltaprojects.com
|
88
102
|
executables:
|
@@ -90,9 +104,9 @@ executables:
|
|
90
104
|
extensions: []
|
91
105
|
extra_rdoc_files: []
|
92
106
|
files:
|
93
|
-
- .gitignore
|
94
|
-
- .ruby-gemset
|
95
|
-
- .ruby-version
|
107
|
+
- ".gitignore"
|
108
|
+
- ".ruby-gemset"
|
109
|
+
- ".ruby-version"
|
96
110
|
- Gemfile
|
97
111
|
- LICENSE
|
98
112
|
- README.rdoc
|
@@ -112,7 +126,6 @@ files:
|
|
112
126
|
- lib/filbunke/file.rb
|
113
127
|
- lib/filbunke/logger.rb
|
114
128
|
- lib/filbunke/repository.rb
|
115
|
-
- lib/filbunke/thread_pool.rb
|
116
129
|
- test/helper.rb
|
117
130
|
- test/test_filbunke.rb
|
118
131
|
homepage: https://rubygems.org/gems/filbunke
|
@@ -124,17 +137,17 @@ require_paths:
|
|
124
137
|
- lib
|
125
138
|
required_ruby_version: !ruby/object:Gem::Requirement
|
126
139
|
requirements:
|
127
|
-
- -
|
140
|
+
- - ">="
|
128
141
|
- !ruby/object:Gem::Version
|
129
142
|
version: '0'
|
130
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
144
|
requirements:
|
132
|
-
- -
|
145
|
+
- - ">="
|
133
146
|
- !ruby/object:Gem::Version
|
134
147
|
version: '0'
|
135
148
|
requirements: []
|
136
149
|
rubyforge_project:
|
137
|
-
rubygems_version: 2.4.
|
150
|
+
rubygems_version: 2.4.5.1
|
138
151
|
signing_key:
|
139
152
|
specification_version: 4
|
140
153
|
summary: Filbunke client
|
data/lib/filbunke/thread_pool.rb
DELETED
@@ -1,104 +0,0 @@
|
|
1
|
-
# Inspired by https://github.com/meh/ruby-threadpool
|
2
|
-
require 'thread'
|
3
|
-
|
4
|
-
class ThreadPool
|
5
|
-
|
6
|
-
class Job < Struct.new(:args, :block); end
|
7
|
-
|
8
|
-
def initialize(min, max = nil)
|
9
|
-
|
10
|
-
trap("INT") { shutdown }
|
11
|
-
|
12
|
-
@min = min
|
13
|
-
@max = max || min
|
14
|
-
|
15
|
-
@cv = ConditionVariable.new
|
16
|
-
@mutex = Mutex.new
|
17
|
-
|
18
|
-
@queue = []
|
19
|
-
@workers = []
|
20
|
-
|
21
|
-
@spawned = 0
|
22
|
-
@waiting = 0
|
23
|
-
@shutdown = false
|
24
|
-
@queue_locked = false
|
25
|
-
|
26
|
-
@mutex.synchronize do
|
27
|
-
min.times { spawn_thread }
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def execute(*args, &block)
|
32
|
-
@mutex.synchronize do
|
33
|
-
raise "Thread pool is about to shutdown" if @shutdown || @queue_locked
|
34
|
-
|
35
|
-
@queue << Job.new(args, block)
|
36
|
-
|
37
|
-
spawn_thread if @waiting == 0 && @spawned < @max
|
38
|
-
|
39
|
-
@cv.signal
|
40
|
-
end
|
41
|
-
end
|
42
|
-
alias :<< :execute
|
43
|
-
|
44
|
-
def shutdown
|
45
|
-
@mutex.synchronize do
|
46
|
-
@shutdown = true
|
47
|
-
@cv.broadcast
|
48
|
-
end
|
49
|
-
|
50
|
-
@workers.first.join until @workers.empty?
|
51
|
-
end
|
52
|
-
|
53
|
-
def join
|
54
|
-
@mutex.synchronize do
|
55
|
-
@queue_locked = true
|
56
|
-
@cv.broadcast
|
57
|
-
sleep 0.01 until @queue.empty?
|
58
|
-
end
|
59
|
-
shutdown
|
60
|
-
end
|
61
|
-
|
62
|
-
protected
|
63
|
-
|
64
|
-
def spawn_thread
|
65
|
-
thread = Thread.new do
|
66
|
-
continue = true
|
67
|
-
|
68
|
-
while continue do
|
69
|
-
job = nil
|
70
|
-
|
71
|
-
@mutex.synchronize do
|
72
|
-
while @queue.empty? && continue
|
73
|
-
if @shutdown || @queue_locked
|
74
|
-
continue = false
|
75
|
-
break
|
76
|
-
end
|
77
|
-
|
78
|
-
@waiting += 1
|
79
|
-
@cv.wait @mutex
|
80
|
-
@waiting -= 1
|
81
|
-
|
82
|
-
if @shutdown || @queue_locked
|
83
|
-
continue = false
|
84
|
-
break
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
if continue
|
89
|
-
job = @queue.shift
|
90
|
-
job.block.call(*job.args) if job
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
@mutex.synchronize do
|
96
|
-
@spawned -= 1
|
97
|
-
@workers.delete thread
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
@workers << thread
|
102
|
-
thread
|
103
|
-
end
|
104
|
-
end
|