filbunke 1.12.0 → 1.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/filbunke.gemspec +6 -6
- data/lib/filbunke/client.rb +88 -63
- data/lib/filbunke/repository.rb +12 -12
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MTMwOWRhNTMzYzNjOTM5YTNlNzM4MDg1MmZiNzk1NDY4Yjk2ZmU0ZQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
Mzc2NWQ3YjAxOGJkZWQxYmI1ZDI2MmY2MmMzZjcwZjk5MDhlNWU4OA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Y2Q3MTA0NjJkZGY5Zjg2ZTZmYjY0MjAxMmQ5NTEwNmIxMTUwNmVjNjMwMWNi
|
10
|
+
YTg4MDA0MjgzMjU1MDM5N2U3ZGYyMDg4MDc5NDA2NDlhNWE5OTk3NDlmNzgw
|
11
|
+
MWI2MDMxMjAwYjNhYTVlNzRjOTY4NWYxZjQ0YTI4MzAzZmMwNDc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZDBjNWZkNGVhOGU1ZGI3ZDBjMmEzMzA3OTVkODExZTAyNzMyOWE1NmZlMTFh
|
14
|
+
YjZkZGZmNTlhZWNlYzY1NTg2NjY1MjhkNDlmNzY5ZTNkMDAxZTY3MzE0YmI3
|
15
|
+
YTdjNTE2OWNjMWE2ZjJmNmZjMTY2ZjIwOGVmYTRjNWI3NjgzNTk=
|
data/Rakefile
CHANGED
@@ -14,7 +14,7 @@ begin
|
|
14
14
|
gem.files.exclude 'pkg'
|
15
15
|
gem.executables = ['filbunked']
|
16
16
|
gem.add_dependency 'json', '= 1.7.5'
|
17
|
-
gem.add_dependency 'typhoeus', '= 0.
|
17
|
+
gem.add_dependency 'typhoeus', '= 1.0.1'
|
18
18
|
gem.add_dependency 'open4', '= 1.3.0'
|
19
19
|
gem.add_dependency 'mime-types', '= 1.19'
|
20
20
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.13.0
|
data/filbunke.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: filbunke 1.
|
5
|
+
# stub: filbunke 1.13.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "filbunke"
|
9
|
-
s.version = "1.
|
9
|
+
s.version = "1.13.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
|
14
|
-
s.date = "2016-03-
|
14
|
+
s.date = "2016-03-05"
|
15
15
|
s.description = "Filbunke client and library"
|
16
16
|
s.email = "technical@deltaprojects.com"
|
17
17
|
s.executables = ["filbunked"]
|
@@ -52,20 +52,20 @@ Gem::Specification.new do |s|
|
|
52
52
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
53
|
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
54
54
|
s.add_runtime_dependency(%q<json>, ["= 1.7.5"])
|
55
|
-
s.add_runtime_dependency(%q<typhoeus>, ["= 0.
|
55
|
+
s.add_runtime_dependency(%q<typhoeus>, ["= 1.0.1"])
|
56
56
|
s.add_runtime_dependency(%q<open4>, ["= 1.3.0"])
|
57
57
|
s.add_runtime_dependency(%q<mime-types>, ["= 1.19"])
|
58
58
|
else
|
59
59
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
60
60
|
s.add_dependency(%q<json>, ["= 1.7.5"])
|
61
|
-
s.add_dependency(%q<typhoeus>, ["= 0.
|
61
|
+
s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
|
62
62
|
s.add_dependency(%q<open4>, ["= 1.3.0"])
|
63
63
|
s.add_dependency(%q<mime-types>, ["= 1.19"])
|
64
64
|
end
|
65
65
|
else
|
66
66
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
67
67
|
s.add_dependency(%q<json>, ["= 1.7.5"])
|
68
|
-
s.add_dependency(%q<typhoeus>, ["= 0.
|
68
|
+
s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
|
69
69
|
s.add_dependency(%q<open4>, ["= 1.3.0"])
|
70
70
|
s.add_dependency(%q<mime-types>, ["= 1.19"])
|
71
71
|
end
|
data/lib/filbunke/client.rb
CHANGED
@@ -25,18 +25,17 @@ module Filbunke
|
|
25
25
|
def with_updated_files(last_checkpoint)
|
26
26
|
updates = get_updated_file_list(last_checkpoint)
|
27
27
|
updated_files = updates["files"] || []
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
@logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
|
28
|
+
new_checkpoint = updates["checkpoint"] || 0
|
29
|
+
if updated_files.empty?
|
30
|
+
return new_checkpoint
|
31
|
+
end
|
33
32
|
|
33
|
+
@logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}"
|
34
34
|
@async_requests = []
|
35
|
-
|
36
35
|
callbacks_on_update = []
|
37
36
|
callbacks_on_no_change = []
|
38
37
|
callbacks_on_delete = []
|
39
|
-
|
38
|
+
has_update_file_failure = false
|
40
39
|
updated_files.each do |raw_file|
|
41
40
|
file = File.new(raw_file)
|
42
41
|
local_file_path = ::File.join(@repository.local_path, file.path)
|
@@ -50,7 +49,8 @@ module Filbunke
|
|
50
49
|
callbacks_on_update << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
|
51
50
|
else
|
52
51
|
@logger.error "Unable to get file #{file.url} ==> #{file.path}!"
|
53
|
-
|
52
|
+
has_update_file_failure = true
|
53
|
+
break
|
54
54
|
end
|
55
55
|
else
|
56
56
|
@logger.debug "File exists with correct hash: #{local_file_path}"
|
@@ -58,35 +58,47 @@ module Filbunke
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
end
|
61
|
-
@hydra.run
|
62
61
|
|
63
|
-
|
64
|
-
@logger.
|
65
|
-
|
62
|
+
if has_update_file_failure
|
63
|
+
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
|
64
|
+
return last_checkpoint
|
66
65
|
end
|
67
|
-
|
68
|
-
|
69
|
-
@
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
run_callbacks_no_change(callbacks_on_no_change)
|
77
|
-
|
78
|
-
new_checkpoint || last_checkpoint
|
79
|
-
rescue RuntimeError, SystemCallError, StandardError => e
|
80
|
-
msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
81
|
-
@logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
82
|
-
last_checkpoint
|
66
|
+
@logger.info "Done setting up async requests for #{@repository.name}, starting fetch..."
|
67
|
+
has_fetch_failures = begin
|
68
|
+
@hydra.run
|
69
|
+
# Magnus 20160305 - since we now fail fast by raising a RuntimeError on response.code != 200
|
70
|
+
# I think we can remove the following request validation
|
71
|
+
@async_requests.any? do |request|
|
72
|
+
@logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
|
73
|
+
request.response.nil? || request.response.code != 200
|
83
74
|
end
|
84
|
-
|
85
|
-
|
75
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
76
|
+
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
77
|
+
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
78
|
+
true
|
79
|
+
end
|
80
|
+
|
81
|
+
if has_fetch_failures
|
82
|
+
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
|
83
|
+
return last_checkpoint
|
84
|
+
end
|
85
|
+
|
86
|
+
@logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
|
87
|
+
new_or_last_checkpoint = begin
|
88
|
+
run_callbacks_delete(callbacks_on_delete)
|
89
|
+
run_callbacks(callbacks_on_update)
|
90
|
+
run_callbacks_no_change(callbacks_on_no_change)
|
91
|
+
|
92
|
+
new_checkpoint || last_checkpoint
|
93
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
94
|
+
msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
95
|
+
@logger.error "FAILED to process callbacks for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
86
96
|
last_checkpoint
|
87
97
|
end
|
98
|
+
|
99
|
+
new_or_last_checkpoint
|
88
100
|
end
|
89
|
-
|
101
|
+
|
90
102
|
def update_files!(last_checkpoint)
|
91
103
|
with_updated_files(last_checkpoint) {}
|
92
104
|
end
|
@@ -150,7 +162,7 @@ module Filbunke
|
|
150
162
|
return response.body.chomp.to_i
|
151
163
|
end
|
152
164
|
end
|
153
|
-
|
165
|
+
|
154
166
|
private
|
155
167
|
|
156
168
|
def log_failed_request(failed_request_command, e)
|
@@ -162,11 +174,11 @@ module Filbunke
|
|
162
174
|
end
|
163
175
|
|
164
176
|
def update_file!(file, local_file_path)
|
165
|
-
|
177
|
+
|
166
178
|
if file.url =~ /^http:\/\//
|
167
179
|
update_http_file!(file, local_file_path)
|
168
180
|
elsif (file.url =~ /^hdfs:\/\//)
|
169
|
-
|
181
|
+
update_hdfs_file!(file, local_file_path)
|
170
182
|
else
|
171
183
|
raise "Unsupported protocol for file: #{file.inspect}"
|
172
184
|
end
|
@@ -205,7 +217,7 @@ module Filbunke
|
|
205
217
|
updates_http.read_timeout = 300 # default is 60 seconds
|
206
218
|
updates_http.start do |http|
|
207
219
|
updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
|
208
|
-
updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size
|
220
|
+
updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size > 0
|
209
221
|
begin
|
210
222
|
@logger.info "Fetching updated file list from #{updates_path}"
|
211
223
|
request = Net::HTTP::Get.new(updates_path)
|
@@ -224,44 +236,58 @@ module Filbunke
|
|
224
236
|
rescue StandardError => e
|
225
237
|
@logger.error "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
|
226
238
|
return {}
|
227
|
-
end
|
239
|
+
end
|
228
240
|
end
|
229
241
|
|
230
242
|
def update_http_file!(file, local_file_path)
|
231
243
|
begin
|
232
244
|
async_request = if @repository.user
|
233
|
-
Typhoeus::Request.new(
|
245
|
+
Typhoeus::Request.new(
|
246
|
+
URI.escape(file.url),
|
247
|
+
:followlocation => true,
|
248
|
+
:username => @repository.user,
|
249
|
+
:password => @repository.pass
|
250
|
+
)
|
234
251
|
else
|
235
|
-
Typhoeus::Request.new(
|
252
|
+
Typhoeus::Request.new(
|
253
|
+
URI.escape(file.url),
|
254
|
+
:followlocation => true
|
255
|
+
)
|
256
|
+
end
|
257
|
+
|
258
|
+
downloaded_file = nil
|
259
|
+
async_request.on_headers do |response|
|
260
|
+
if response.code != 200
|
261
|
+
raise "Failed to fetch response(#{response.code}) for url '#{response.effective_url}' ---\n\t #{response.inspect}"
|
262
|
+
end
|
263
|
+
@logger.debug("Updating: #{local_file_path}")
|
264
|
+
::FileUtils.mkdir_p(::File.dirname(local_file_path))
|
265
|
+
downloaded_file = ::File.new("#{local_file_path}.tmp", "wb")
|
236
266
|
end
|
267
|
+
|
268
|
+
async_request.on_body do |chunk, response|
|
269
|
+
downloaded_file.write(chunk) if response.code == 200
|
270
|
+
end
|
271
|
+
|
237
272
|
async_request.on_complete do |response|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
else
|
244
|
-
body_if_error = response.code >= 500 ? ", body = #{response.body}" : ""
|
245
|
-
@logger.warn "Failed to update file #{file.url}, got status code = #{response.code}#{body_if_error}"
|
246
|
-
end
|
247
|
-
rescue SystemCallError, StandardError => e
|
248
|
-
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
249
|
-
@logger.error "Failed to update file #{file.url}: #{msg}"
|
273
|
+
unless downloaded_file.nil?
|
274
|
+
downloaded_file.close
|
275
|
+
::FileUtils.mv("#{local_file_path}.tmp", local_file_path)
|
276
|
+
else
|
277
|
+
::FileUtils.rm("#{local_file_path}.tmp") if ::File.exist?("#{local_file_path}.tmp")
|
250
278
|
end
|
251
|
-
|
252
|
-
success
|
279
|
+
true
|
253
280
|
end
|
254
281
|
@hydra.queue async_request
|
255
282
|
@async_requests << async_request
|
256
|
-
|
283
|
+
true
|
284
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
257
285
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
258
286
|
@logger.error "Failed to update file #{file.url}: #{msg}"
|
259
|
-
|
287
|
+
false
|
260
288
|
end
|
261
|
-
|
262
|
-
return true
|
263
289
|
end
|
264
|
-
|
290
|
+
|
265
291
|
def update_hdfs_file!(file, local_file_path)
|
266
292
|
begin
|
267
293
|
::FileUtils.mkdir_p(::File.dirname(local_file_path))
|
@@ -270,10 +296,10 @@ module Filbunke
|
|
270
296
|
url.gsub!(/hdfs:\/\/([^\/]*)(.*)/, "hdfs://\\2")
|
271
297
|
hdfs_cmd = "#{@repository.hadoop_binary} dfs -copyToLocal #{url} #{local_file_path}.tmp"
|
272
298
|
#@logger.debug "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
|
273
|
-
|
299
|
+
|
274
300
|
pid, stdin, stdout, stderr = Open4::popen4 hdfs_cmd
|
275
301
|
ignored, status = Process::waitpid2 pid
|
276
|
-
|
302
|
+
|
277
303
|
if status.exitstatus == 0 then
|
278
304
|
begin
|
279
305
|
::FileUtils.mv "#{local_file_path}.tmp", local_file_path
|
@@ -282,7 +308,7 @@ module Filbunke
|
|
282
308
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
283
309
|
@logger.error "Failed to move hdfs file #{file.url}: #{msg}"
|
284
310
|
return false
|
285
|
-
end
|
311
|
+
end
|
286
312
|
else
|
287
313
|
@logger.error "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
|
288
314
|
return false
|
@@ -295,8 +321,7 @@ module Filbunke
|
|
295
321
|
end
|
296
322
|
|
297
323
|
def write_file!(file_path, contents)
|
298
|
-
|
299
|
-
@logger.debug("Updating: #{file_path}")
|
324
|
+
|
300
325
|
begin
|
301
326
|
::File.open("#{file_path}.tmp", 'w') do |file|
|
302
327
|
file.write(contents)
|
@@ -308,7 +333,7 @@ module Filbunke
|
|
308
333
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
309
334
|
@logger.error "Failed to move file #{file_path}: #{msg}"
|
310
335
|
return false
|
311
|
-
end
|
336
|
+
end
|
312
337
|
end
|
313
338
|
|
314
339
|
def delete_file!(file_path)
|
data/lib/filbunke/repository.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
module Filbunke
|
2
2
|
class Repository
|
3
|
-
attr_accessor :name,
|
4
|
-
:host,
|
5
|
-
:port,
|
6
|
-
:local_path,
|
7
|
-
:file_umask,
|
8
|
-
:directory_umask,
|
9
|
-
:user,
|
10
|
-
:pass,
|
11
|
-
:hadoop_binary,
|
3
|
+
attr_accessor :name,
|
4
|
+
:host,
|
5
|
+
:port,
|
6
|
+
:local_path,
|
7
|
+
:file_umask,
|
8
|
+
:directory_umask,
|
9
|
+
:user,
|
10
|
+
:pass,
|
11
|
+
:hadoop_binary,
|
12
12
|
:run_every,
|
13
13
|
:hydra_concurrency,
|
14
14
|
:batch_size
|
15
|
-
|
15
|
+
|
16
16
|
def initialize(repository_config)
|
17
17
|
@name = repository_config["filbunke_server_repository"]
|
18
18
|
@host = repository_config["filbunke_server_host"]
|
@@ -28,5 +28,5 @@ module Filbunke
|
|
28
28
|
# batch_size == 0 means use default configured in filbunke-server
|
29
29
|
@batch_size = repository_config.fetch("batch_size", 0).to_i
|
30
30
|
end
|
31
|
-
|
32
|
-
end
|
31
|
+
end
|
32
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filbunke
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wouter de Bie
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2016-03-
|
14
|
+
date: 2016-03-05 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: thoughtbot-shoulda
|
@@ -47,14 +47,14 @@ dependencies:
|
|
47
47
|
requirements:
|
48
48
|
- - '='
|
49
49
|
- !ruby/object:Gem::Version
|
50
|
-
version: 0.
|
50
|
+
version: 1.0.1
|
51
51
|
type: :runtime
|
52
52
|
prerelease: false
|
53
53
|
version_requirements: !ruby/object:Gem::Requirement
|
54
54
|
requirements:
|
55
55
|
- - '='
|
56
56
|
- !ruby/object:Gem::Version
|
57
|
-
version: 0.
|
57
|
+
version: 1.0.1
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: open4
|
60
60
|
requirement: !ruby/object:Gem::Requirement
|