filbunke 1.12.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/filbunke.gemspec +6 -6
- data/lib/filbunke/client.rb +88 -63
- data/lib/filbunke/repository.rb +12 -12
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MTMwOWRhNTMzYzNjOTM5YTNlNzM4MDg1MmZiNzk1NDY4Yjk2ZmU0ZQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
Mzc2NWQ3YjAxOGJkZWQxYmI1ZDI2MmY2MmMzZjcwZjk5MDhlNWU4OA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Y2Q3MTA0NjJkZGY5Zjg2ZTZmYjY0MjAxMmQ5NTEwNmIxMTUwNmVjNjMwMWNi
|
10
|
+
YTg4MDA0MjgzMjU1MDM5N2U3ZGYyMDg4MDc5NDA2NDlhNWE5OTk3NDlmNzgw
|
11
|
+
MWI2MDMxMjAwYjNhYTVlNzRjOTY4NWYxZjQ0YTI4MzAzZmMwNDc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZDBjNWZkNGVhOGU1ZGI3ZDBjMmEzMzA3OTVkODExZTAyNzMyOWE1NmZlMTFh
|
14
|
+
YjZkZGZmNTlhZWNlYzY1NTg2NjY1MjhkNDlmNzY5ZTNkMDAxZTY3MzE0YmI3
|
15
|
+
YTdjNTE2OWNjMWE2ZjJmNmZjMTY2ZjIwOGVmYTRjNWI3NjgzNTk=
|
data/Rakefile
CHANGED
@@ -14,7 +14,7 @@ begin
|
|
14
14
|
gem.files.exclude 'pkg'
|
15
15
|
gem.executables = ['filbunked']
|
16
16
|
gem.add_dependency 'json', '= 1.7.5'
|
17
|
-
gem.add_dependency 'typhoeus', '= 0.
|
17
|
+
gem.add_dependency 'typhoeus', '= 1.0.1'
|
18
18
|
gem.add_dependency 'open4', '= 1.3.0'
|
19
19
|
gem.add_dependency 'mime-types', '= 1.19'
|
20
20
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.13.0
|
data/filbunke.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: filbunke 1.
|
5
|
+
# stub: filbunke 1.13.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "filbunke"
|
9
|
-
s.version = "1.
|
9
|
+
s.version = "1.13.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
|
14
|
-
s.date = "2016-03-
|
14
|
+
s.date = "2016-03-05"
|
15
15
|
s.description = "Filbunke client and library"
|
16
16
|
s.email = "technical@deltaprojects.com"
|
17
17
|
s.executables = ["filbunked"]
|
@@ -52,20 +52,20 @@ Gem::Specification.new do |s|
|
|
52
52
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
53
|
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
54
54
|
s.add_runtime_dependency(%q<json>, ["= 1.7.5"])
|
55
|
-
s.add_runtime_dependency(%q<typhoeus>, ["= 0.
|
55
|
+
s.add_runtime_dependency(%q<typhoeus>, ["= 1.0.1"])
|
56
56
|
s.add_runtime_dependency(%q<open4>, ["= 1.3.0"])
|
57
57
|
s.add_runtime_dependency(%q<mime-types>, ["= 1.19"])
|
58
58
|
else
|
59
59
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
60
60
|
s.add_dependency(%q<json>, ["= 1.7.5"])
|
61
|
-
s.add_dependency(%q<typhoeus>, ["= 0.
|
61
|
+
s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
|
62
62
|
s.add_dependency(%q<open4>, ["= 1.3.0"])
|
63
63
|
s.add_dependency(%q<mime-types>, ["= 1.19"])
|
64
64
|
end
|
65
65
|
else
|
66
66
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
67
67
|
s.add_dependency(%q<json>, ["= 1.7.5"])
|
68
|
-
s.add_dependency(%q<typhoeus>, ["= 0.
|
68
|
+
s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
|
69
69
|
s.add_dependency(%q<open4>, ["= 1.3.0"])
|
70
70
|
s.add_dependency(%q<mime-types>, ["= 1.19"])
|
71
71
|
end
|
data/lib/filbunke/client.rb
CHANGED
@@ -25,18 +25,17 @@ module Filbunke
|
|
25
25
|
def with_updated_files(last_checkpoint)
|
26
26
|
updates = get_updated_file_list(last_checkpoint)
|
27
27
|
updated_files = updates["files"] || []
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
@logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
|
28
|
+
new_checkpoint = updates["checkpoint"] || 0
|
29
|
+
if updated_files.empty?
|
30
|
+
return new_checkpoint
|
31
|
+
end
|
33
32
|
|
33
|
+
@logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}"
|
34
34
|
@async_requests = []
|
35
|
-
|
36
35
|
callbacks_on_update = []
|
37
36
|
callbacks_on_no_change = []
|
38
37
|
callbacks_on_delete = []
|
39
|
-
|
38
|
+
has_update_file_failure = false
|
40
39
|
updated_files.each do |raw_file|
|
41
40
|
file = File.new(raw_file)
|
42
41
|
local_file_path = ::File.join(@repository.local_path, file.path)
|
@@ -50,7 +49,8 @@ module Filbunke
|
|
50
49
|
callbacks_on_update << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
|
51
50
|
else
|
52
51
|
@logger.error "Unable to get file #{file.url} ==> #{file.path}!"
|
53
|
-
|
52
|
+
has_update_file_failure = true
|
53
|
+
break
|
54
54
|
end
|
55
55
|
else
|
56
56
|
@logger.debug "File exists with correct hash: #{local_file_path}"
|
@@ -58,35 +58,47 @@ module Filbunke
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
end
|
61
|
-
@hydra.run
|
62
61
|
|
63
|
-
|
64
|
-
@logger.
|
65
|
-
|
62
|
+
if has_update_file_failure
|
63
|
+
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
|
64
|
+
return last_checkpoint
|
66
65
|
end
|
67
|
-
|
68
|
-
|
69
|
-
@
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
run_callbacks_no_change(callbacks_on_no_change)
|
77
|
-
|
78
|
-
new_checkpoint || last_checkpoint
|
79
|
-
rescue RuntimeError, SystemCallError, StandardError => e
|
80
|
-
msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
81
|
-
@logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
82
|
-
last_checkpoint
|
66
|
+
@logger.info "Done setting up async requests for #{@repository.name}, starting fetch..."
|
67
|
+
has_fetch_failures = begin
|
68
|
+
@hydra.run
|
69
|
+
# Magnus 20160305 - since we now fail fast by raising a RuntimeError on response.code != 200
|
70
|
+
# I think we can remove the following request validation
|
71
|
+
@async_requests.any? do |request|
|
72
|
+
@logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
|
73
|
+
request.response.nil? || request.response.code != 200
|
83
74
|
end
|
84
|
-
|
85
|
-
|
75
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
76
|
+
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
77
|
+
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
78
|
+
true
|
79
|
+
end
|
80
|
+
|
81
|
+
if has_fetch_failures
|
82
|
+
@logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
|
83
|
+
return last_checkpoint
|
84
|
+
end
|
85
|
+
|
86
|
+
@logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
|
87
|
+
new_or_last_checkpoint = begin
|
88
|
+
run_callbacks_delete(callbacks_on_delete)
|
89
|
+
run_callbacks(callbacks_on_update)
|
90
|
+
run_callbacks_no_change(callbacks_on_no_change)
|
91
|
+
|
92
|
+
new_checkpoint || last_checkpoint
|
93
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
94
|
+
msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
95
|
+
@logger.error "FAILED to process callbacks for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
|
86
96
|
last_checkpoint
|
87
97
|
end
|
98
|
+
|
99
|
+
new_or_last_checkpoint
|
88
100
|
end
|
89
|
-
|
101
|
+
|
90
102
|
def update_files!(last_checkpoint)
|
91
103
|
with_updated_files(last_checkpoint) {}
|
92
104
|
end
|
@@ -150,7 +162,7 @@ module Filbunke
|
|
150
162
|
return response.body.chomp.to_i
|
151
163
|
end
|
152
164
|
end
|
153
|
-
|
165
|
+
|
154
166
|
private
|
155
167
|
|
156
168
|
def log_failed_request(failed_request_command, e)
|
@@ -162,11 +174,11 @@ module Filbunke
|
|
162
174
|
end
|
163
175
|
|
164
176
|
def update_file!(file, local_file_path)
|
165
|
-
|
177
|
+
|
166
178
|
if file.url =~ /^http:\/\//
|
167
179
|
update_http_file!(file, local_file_path)
|
168
180
|
elsif (file.url =~ /^hdfs:\/\//)
|
169
|
-
|
181
|
+
update_hdfs_file!(file, local_file_path)
|
170
182
|
else
|
171
183
|
raise "Unsupported protocol for file: #{file.inspect}"
|
172
184
|
end
|
@@ -205,7 +217,7 @@ module Filbunke
|
|
205
217
|
updates_http.read_timeout = 300 # default is 60 seconds
|
206
218
|
updates_http.start do |http|
|
207
219
|
updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
|
208
|
-
updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size
|
220
|
+
updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size > 0
|
209
221
|
begin
|
210
222
|
@logger.info "Fetching updated file list from #{updates_path}"
|
211
223
|
request = Net::HTTP::Get.new(updates_path)
|
@@ -224,44 +236,58 @@ module Filbunke
|
|
224
236
|
rescue StandardError => e
|
225
237
|
@logger.error "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
|
226
238
|
return {}
|
227
|
-
end
|
239
|
+
end
|
228
240
|
end
|
229
241
|
|
230
242
|
def update_http_file!(file, local_file_path)
|
231
243
|
begin
|
232
244
|
async_request = if @repository.user
|
233
|
-
Typhoeus::Request.new(
|
245
|
+
Typhoeus::Request.new(
|
246
|
+
URI.escape(file.url),
|
247
|
+
:followlocation => true,
|
248
|
+
:username => @repository.user,
|
249
|
+
:password => @repository.pass
|
250
|
+
)
|
234
251
|
else
|
235
|
-
Typhoeus::Request.new(
|
252
|
+
Typhoeus::Request.new(
|
253
|
+
URI.escape(file.url),
|
254
|
+
:followlocation => true
|
255
|
+
)
|
256
|
+
end
|
257
|
+
|
258
|
+
downloaded_file = nil
|
259
|
+
async_request.on_headers do |response|
|
260
|
+
if response.code != 200
|
261
|
+
raise "Failed to fetch response(#{response.code}) for url '#{response.effective_url}' ---\n\t #{response.inspect}"
|
262
|
+
end
|
263
|
+
@logger.debug("Updating: #{local_file_path}")
|
264
|
+
::FileUtils.mkdir_p(::File.dirname(local_file_path))
|
265
|
+
downloaded_file = ::File.new("#{local_file_path}.tmp", "wb")
|
236
266
|
end
|
267
|
+
|
268
|
+
async_request.on_body do |chunk, response|
|
269
|
+
downloaded_file.write(chunk) if response.code == 200
|
270
|
+
end
|
271
|
+
|
237
272
|
async_request.on_complete do |response|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
else
|
244
|
-
body_if_error = response.code >= 500 ? ", body = #{response.body}" : ""
|
245
|
-
@logger.warn "Failed to update file #{file.url}, got status code = #{response.code}#{body_if_error}"
|
246
|
-
end
|
247
|
-
rescue SystemCallError, StandardError => e
|
248
|
-
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
249
|
-
@logger.error "Failed to update file #{file.url}: #{msg}"
|
273
|
+
unless downloaded_file.nil?
|
274
|
+
downloaded_file.close
|
275
|
+
::FileUtils.mv("#{local_file_path}.tmp", local_file_path)
|
276
|
+
else
|
277
|
+
::FileUtils.rm("#{local_file_path}.tmp") if ::File.exist?("#{local_file_path}.tmp")
|
250
278
|
end
|
251
|
-
|
252
|
-
success
|
279
|
+
true
|
253
280
|
end
|
254
281
|
@hydra.queue async_request
|
255
282
|
@async_requests << async_request
|
256
|
-
|
283
|
+
true
|
284
|
+
rescue RuntimeError, SystemCallError, StandardError => e
|
257
285
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
258
286
|
@logger.error "Failed to update file #{file.url}: #{msg}"
|
259
|
-
|
287
|
+
false
|
260
288
|
end
|
261
|
-
|
262
|
-
return true
|
263
289
|
end
|
264
|
-
|
290
|
+
|
265
291
|
def update_hdfs_file!(file, local_file_path)
|
266
292
|
begin
|
267
293
|
::FileUtils.mkdir_p(::File.dirname(local_file_path))
|
@@ -270,10 +296,10 @@ module Filbunke
|
|
270
296
|
url.gsub!(/hdfs:\/\/([^\/]*)(.*)/, "hdfs://\\2")
|
271
297
|
hdfs_cmd = "#{@repository.hadoop_binary} dfs -copyToLocal #{url} #{local_file_path}.tmp"
|
272
298
|
#@logger.debug "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
|
273
|
-
|
299
|
+
|
274
300
|
pid, stdin, stdout, stderr = Open4::popen4 hdfs_cmd
|
275
301
|
ignored, status = Process::waitpid2 pid
|
276
|
-
|
302
|
+
|
277
303
|
if status.exitstatus == 0 then
|
278
304
|
begin
|
279
305
|
::FileUtils.mv "#{local_file_path}.tmp", local_file_path
|
@@ -282,7 +308,7 @@ module Filbunke
|
|
282
308
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
283
309
|
@logger.error "Failed to move hdfs file #{file.url}: #{msg}"
|
284
310
|
return false
|
285
|
-
end
|
311
|
+
end
|
286
312
|
else
|
287
313
|
@logger.error "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
|
288
314
|
return false
|
@@ -295,8 +321,7 @@ module Filbunke
|
|
295
321
|
end
|
296
322
|
|
297
323
|
def write_file!(file_path, contents)
|
298
|
-
|
299
|
-
@logger.debug("Updating: #{file_path}")
|
324
|
+
|
300
325
|
begin
|
301
326
|
::File.open("#{file_path}.tmp", 'w') do |file|
|
302
327
|
file.write(contents)
|
@@ -308,7 +333,7 @@ module Filbunke
|
|
308
333
|
msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
|
309
334
|
@logger.error "Failed to move file #{file_path}: #{msg}"
|
310
335
|
return false
|
311
|
-
end
|
336
|
+
end
|
312
337
|
end
|
313
338
|
|
314
339
|
def delete_file!(file_path)
|
data/lib/filbunke/repository.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
module Filbunke
|
2
2
|
class Repository
|
3
|
-
attr_accessor :name,
|
4
|
-
:host,
|
5
|
-
:port,
|
6
|
-
:local_path,
|
7
|
-
:file_umask,
|
8
|
-
:directory_umask,
|
9
|
-
:user,
|
10
|
-
:pass,
|
11
|
-
:hadoop_binary,
|
3
|
+
attr_accessor :name,
|
4
|
+
:host,
|
5
|
+
:port,
|
6
|
+
:local_path,
|
7
|
+
:file_umask,
|
8
|
+
:directory_umask,
|
9
|
+
:user,
|
10
|
+
:pass,
|
11
|
+
:hadoop_binary,
|
12
12
|
:run_every,
|
13
13
|
:hydra_concurrency,
|
14
14
|
:batch_size
|
15
|
-
|
15
|
+
|
16
16
|
def initialize(repository_config)
|
17
17
|
@name = repository_config["filbunke_server_repository"]
|
18
18
|
@host = repository_config["filbunke_server_host"]
|
@@ -28,5 +28,5 @@ module Filbunke
|
|
28
28
|
# batch_size == 0 means use default configured in filbunke-server
|
29
29
|
@batch_size = repository_config.fetch("batch_size", 0).to_i
|
30
30
|
end
|
31
|
-
|
32
|
-
end
|
31
|
+
end
|
32
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filbunke
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wouter de Bie
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2016-03-
|
14
|
+
date: 2016-03-05 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: thoughtbot-shoulda
|
@@ -47,14 +47,14 @@ dependencies:
|
|
47
47
|
requirements:
|
48
48
|
- - '='
|
49
49
|
- !ruby/object:Gem::Version
|
50
|
-
version: 0.
|
50
|
+
version: 1.0.1
|
51
51
|
type: :runtime
|
52
52
|
prerelease: false
|
53
53
|
version_requirements: !ruby/object:Gem::Requirement
|
54
54
|
requirements:
|
55
55
|
- - '='
|
56
56
|
- !ruby/object:Gem::Version
|
57
|
-
version: 0.
|
57
|
+
version: 1.0.1
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: open4
|
60
60
|
requirement: !ruby/object:Gem::Requirement
|