filbunke 1.12.0 → 1.13.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MzE3ODU4ZGQzMmIyYzA1MzI0MmVkZTY4YWNkNTE2NDk3MGQ4Y2RmYw==
4
+ MTMwOWRhNTMzYzNjOTM5YTNlNzM4MDg1MmZiNzk1NDY4Yjk2ZmU0ZQ==
5
5
  data.tar.gz: !binary |-
6
- YTQ1ZGNlYTVmYTNhY2RmY2UyNmFiZWVkYzIxZThmYTQ4NTlhZTVmMw==
6
+ Mzc2NWQ3YjAxOGJkZWQxYmI1ZDI2MmY2MmMzZjcwZjk5MDhlNWU4OA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- Mzc2NmU2YzRhNDJhNzNkNDFhZTI2MDg2NzU3ZWU3MDkyZmM5ODU5ZTFlMDBi
10
- Y2FhM2QxZGQ2ZjkyNmRmZGY0ODNlMzFmMmI1MzZlNGYzNTRjN2E0ODllNDhk
11
- MWEzOTQ2OTFlZGQ1MDk3NTcwNDk5YzUzYzUyODliZjNiMmQxMGQ=
9
+ Y2Q3MTA0NjJkZGY5Zjg2ZTZmYjY0MjAxMmQ5NTEwNmIxMTUwNmVjNjMwMWNi
10
+ YTg4MDA0MjgzMjU1MDM5N2U3ZGYyMDg4MDc5NDA2NDlhNWE5OTk3NDlmNzgw
11
+ MWI2MDMxMjAwYjNhYTVlNzRjOTY4NWYxZjQ0YTI4MzAzZmMwNDc=
12
12
  data.tar.gz: !binary |-
13
- NDRjNzVjN2FmZDgzODYzMjNiYzM4NGQ3YzkzNTJjNGVhMmU4M2NjN2Y3YTky
14
- YzU2MmIzNDE4ZWNmZGJkM2NiZmQwNDk1MDQ5ZTkwNDE3M2JjNWY4ZTFhYmIz
15
- N2IwMTE1MjQwM2M2M2RiOTM2YmJhOTQzNjcwNmUxYTkzNWMwODI=
13
+ ZDBjNWZkNGVhOGU1ZGI3ZDBjMmEzMzA3OTVkODExZTAyNzMyOWE1NmZlMTFh
14
+ YjZkZGZmNTlhZWNlYzY1NTg2NjY1MjhkNDlmNzY5ZTNkMDAxZTY3MzE0YmI3
15
+ YTdjNTE2OWNjMWE2ZjJmNmZjMTY2ZjIwOGVmYTRjNWI3NjgzNTk=
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ begin
14
14
  gem.files.exclude 'pkg'
15
15
  gem.executables = ['filbunked']
16
16
  gem.add_dependency 'json', '= 1.7.5'
17
- gem.add_dependency 'typhoeus', '= 0.3.3'
17
+ gem.add_dependency 'typhoeus', '= 1.0.1'
18
18
  gem.add_dependency 'open4', '= 1.3.0'
19
19
  gem.add_dependency 'mime-types', '= 1.19'
20
20
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.12.0
1
+ 1.13.0
data/filbunke.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: filbunke 1.12.0 ruby lib
5
+ # stub: filbunke 1.13.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "filbunke"
9
- s.version = "1.12.0"
9
+ s.version = "1.13.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
14
- s.date = "2016-03-04"
14
+ s.date = "2016-03-05"
15
15
  s.description = "Filbunke client and library"
16
16
  s.email = "technical@deltaprojects.com"
17
17
  s.executables = ["filbunked"]
@@ -52,20 +52,20 @@ Gem::Specification.new do |s|
52
52
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
53
53
  s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
54
  s.add_runtime_dependency(%q<json>, ["= 1.7.5"])
55
- s.add_runtime_dependency(%q<typhoeus>, ["= 0.3.3"])
55
+ s.add_runtime_dependency(%q<typhoeus>, ["= 1.0.1"])
56
56
  s.add_runtime_dependency(%q<open4>, ["= 1.3.0"])
57
57
  s.add_runtime_dependency(%q<mime-types>, ["= 1.19"])
58
58
  else
59
59
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
60
60
  s.add_dependency(%q<json>, ["= 1.7.5"])
61
- s.add_dependency(%q<typhoeus>, ["= 0.3.3"])
61
+ s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
62
62
  s.add_dependency(%q<open4>, ["= 1.3.0"])
63
63
  s.add_dependency(%q<mime-types>, ["= 1.19"])
64
64
  end
65
65
  else
66
66
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
67
67
  s.add_dependency(%q<json>, ["= 1.7.5"])
68
- s.add_dependency(%q<typhoeus>, ["= 0.3.3"])
68
+ s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
69
69
  s.add_dependency(%q<open4>, ["= 1.3.0"])
70
70
  s.add_dependency(%q<mime-types>, ["= 1.19"])
71
71
  end
@@ -25,18 +25,17 @@ module Filbunke
25
25
  def with_updated_files(last_checkpoint)
26
26
  updates = get_updated_file_list(last_checkpoint)
27
27
  updated_files = updates["files"] || []
28
- failure = false
29
-
30
- new_checkpoint = updates["checkpoint"]
31
-
32
- @logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
28
+ new_checkpoint = updates["checkpoint"] || 0
29
+ if updated_files.empty?
30
+ return new_checkpoint
31
+ end
33
32
 
33
+ @logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}"
34
34
  @async_requests = []
35
-
36
35
  callbacks_on_update = []
37
36
  callbacks_on_no_change = []
38
37
  callbacks_on_delete = []
39
-
38
+ has_update_file_failure = false
40
39
  updated_files.each do |raw_file|
41
40
  file = File.new(raw_file)
42
41
  local_file_path = ::File.join(@repository.local_path, file.path)
@@ -50,7 +49,8 @@ module Filbunke
50
49
  callbacks_on_update << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
51
50
  else
52
51
  @logger.error "Unable to get file #{file.url} ==> #{file.path}!"
53
- failure = true
52
+ has_update_file_failure = true
53
+ break
54
54
  end
55
55
  else
56
56
  @logger.debug "File exists with correct hash: #{local_file_path}"
@@ -58,35 +58,47 @@ module Filbunke
58
58
  end
59
59
  end
60
60
  end
61
- @hydra.run
62
61
 
63
- pfailure = failure || @async_requests.any? do |request|
64
- @logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
65
- request.response.nil? || request.response.code != 200
62
+ if has_update_file_failure
63
+ @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
64
+ return last_checkpoint
66
65
  end
67
-
68
- if pfailure == false
69
- @logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
70
- begin
71
-
72
- run_callbacks_delete(callbacks_on_delete)
73
-
74
- run_callbacks(callbacks_on_update)
75
-
76
- run_callbacks_no_change(callbacks_on_no_change)
77
-
78
- new_checkpoint || last_checkpoint
79
- rescue RuntimeError, SystemCallError, StandardError => e
80
- msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
81
- @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
82
- last_checkpoint
66
+ @logger.info "Done setting up async requests for #{@repository.name}, starting fetch..."
67
+ has_fetch_failures = begin
68
+ @hydra.run
69
+ # Magnus 20160305 - since we now fail fast by raising a RuntimeError on response.code != 200
70
+ # I think we can remove the following request validation
71
+ @async_requests.any? do |request|
72
+ @logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
73
+ request.response.nil? || request.response.code != 200
83
74
  end
84
- else
85
- @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
75
+ rescue RuntimeError, SystemCallError, StandardError => e
76
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
77
+ @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
78
+ true
79
+ end
80
+
81
+ if has_fetch_failures
82
+ @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
83
+ return last_checkpoint
84
+ end
85
+
86
+ @logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
87
+ new_or_last_checkpoint = begin
88
+ run_callbacks_delete(callbacks_on_delete)
89
+ run_callbacks(callbacks_on_update)
90
+ run_callbacks_no_change(callbacks_on_no_change)
91
+
92
+ new_checkpoint || last_checkpoint
93
+ rescue RuntimeError, SystemCallError, StandardError => e
94
+ msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
95
+ @logger.error "FAILED to process callbacks for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
86
96
  last_checkpoint
87
97
  end
98
+
99
+ new_or_last_checkpoint
88
100
  end
89
-
101
+
90
102
  def update_files!(last_checkpoint)
91
103
  with_updated_files(last_checkpoint) {}
92
104
  end
@@ -150,7 +162,7 @@ module Filbunke
150
162
  return response.body.chomp.to_i
151
163
  end
152
164
  end
153
-
165
+
154
166
  private
155
167
 
156
168
  def log_failed_request(failed_request_command, e)
@@ -162,11 +174,11 @@ module Filbunke
162
174
  end
163
175
 
164
176
  def update_file!(file, local_file_path)
165
-
177
+
166
178
  if file.url =~ /^http:\/\//
167
179
  update_http_file!(file, local_file_path)
168
180
  elsif (file.url =~ /^hdfs:\/\//)
169
- success = update_hdfs_file!(file, local_file_path)
181
+ update_hdfs_file!(file, local_file_path)
170
182
  else
171
183
  raise "Unsupported protocol for file: #{file.inspect}"
172
184
  end
@@ -205,7 +217,7 @@ module Filbunke
205
217
  updates_http.read_timeout = 300 # default is 60 seconds
206
218
  updates_http.start do |http|
207
219
  updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
208
- updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size
220
+ updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size > 0
209
221
  begin
210
222
  @logger.info "Fetching updated file list from #{updates_path}"
211
223
  request = Net::HTTP::Get.new(updates_path)
@@ -224,44 +236,58 @@ module Filbunke
224
236
  rescue StandardError => e
225
237
  @logger.error "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
226
238
  return {}
227
- end
239
+ end
228
240
  end
229
241
 
230
242
  def update_http_file!(file, local_file_path)
231
243
  begin
232
244
  async_request = if @repository.user
233
- Typhoeus::Request.new(URI.encode(file.url), :follow_location => true, :username => @repository.user, :password => @repository.pass)
245
+ Typhoeus::Request.new(
246
+ URI.escape(file.url),
247
+ :followlocation => true,
248
+ :username => @repository.user,
249
+ :password => @repository.pass
250
+ )
234
251
  else
235
- Typhoeus::Request.new(URI.encode(file.url), :follow_location => true)
252
+ Typhoeus::Request.new(
253
+ URI.escape(file.url),
254
+ :followlocation => true
255
+ )
256
+ end
257
+
258
+ downloaded_file = nil
259
+ async_request.on_headers do |response|
260
+ if response.code != 200
261
+ raise "Failed to fetch response(#{response.code}) for url '#{response.effective_url}' ---\n\t #{response.inspect}"
262
+ end
263
+ @logger.debug("Updating: #{local_file_path}")
264
+ ::FileUtils.mkdir_p(::File.dirname(local_file_path))
265
+ downloaded_file = ::File.new("#{local_file_path}.tmp", "wb")
236
266
  end
267
+
268
+ async_request.on_body do |chunk, response|
269
+ downloaded_file.write(chunk) if response.code == 200
270
+ end
271
+
237
272
  async_request.on_complete do |response|
238
- success = false
239
- begin
240
- success = response.code.to_i == 200
241
- if success
242
- write_file!(local_file_path, response.body)
243
- else
244
- body_if_error = response.code >= 500 ? ", body = #{response.body}" : ""
245
- @logger.warn "Failed to update file #{file.url}, got status code = #{response.code}#{body_if_error}"
246
- end
247
- rescue SystemCallError, StandardError => e
248
- msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
249
- @logger.error "Failed to update file #{file.url}: #{msg}"
273
+ unless downloaded_file.nil?
274
+ downloaded_file.close
275
+ ::FileUtils.mv("#{local_file_path}.tmp", local_file_path)
276
+ else
277
+ ::FileUtils.rm("#{local_file_path}.tmp") if ::File.exist?("#{local_file_path}.tmp")
250
278
  end
251
- # return the async_request.handled_response value here
252
- success
279
+ true
253
280
  end
254
281
  @hydra.queue async_request
255
282
  @async_requests << async_request
256
- rescue StandardError => e
283
+ true
284
+ rescue RuntimeError, SystemCallError, StandardError => e
257
285
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
258
286
  @logger.error "Failed to update file #{file.url}: #{msg}"
259
- return false
287
+ false
260
288
  end
261
-
262
- return true
263
289
  end
264
-
290
+
265
291
  def update_hdfs_file!(file, local_file_path)
266
292
  begin
267
293
  ::FileUtils.mkdir_p(::File.dirname(local_file_path))
@@ -270,10 +296,10 @@ module Filbunke
270
296
  url.gsub!(/hdfs:\/\/([^\/]*)(.*)/, "hdfs://\\2")
271
297
  hdfs_cmd = "#{@repository.hadoop_binary} dfs -copyToLocal #{url} #{local_file_path}.tmp"
272
298
  #@logger.debug "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
273
-
299
+
274
300
  pid, stdin, stdout, stderr = Open4::popen4 hdfs_cmd
275
301
  ignored, status = Process::waitpid2 pid
276
-
302
+
277
303
  if status.exitstatus == 0 then
278
304
  begin
279
305
  ::FileUtils.mv "#{local_file_path}.tmp", local_file_path
@@ -282,7 +308,7 @@ module Filbunke
282
308
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
283
309
  @logger.error "Failed to move hdfs file #{file.url}: #{msg}"
284
310
  return false
285
- end
311
+ end
286
312
  else
287
313
  @logger.error "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
288
314
  return false
@@ -295,8 +321,7 @@ module Filbunke
295
321
  end
296
322
 
297
323
  def write_file!(file_path, contents)
298
- ::FileUtils.mkdir_p(::File.dirname(file_path))
299
- @logger.debug("Updating: #{file_path}")
324
+
300
325
  begin
301
326
  ::File.open("#{file_path}.tmp", 'w') do |file|
302
327
  file.write(contents)
@@ -308,7 +333,7 @@ module Filbunke
308
333
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
309
334
  @logger.error "Failed to move file #{file_path}: #{msg}"
310
335
  return false
311
- end
336
+ end
312
337
  end
313
338
 
314
339
  def delete_file!(file_path)
@@ -1,18 +1,18 @@
1
1
  module Filbunke
2
2
  class Repository
3
- attr_accessor :name,
4
- :host,
5
- :port,
6
- :local_path,
7
- :file_umask,
8
- :directory_umask,
9
- :user,
10
- :pass,
11
- :hadoop_binary,
3
+ attr_accessor :name,
4
+ :host,
5
+ :port,
6
+ :local_path,
7
+ :file_umask,
8
+ :directory_umask,
9
+ :user,
10
+ :pass,
11
+ :hadoop_binary,
12
12
  :run_every,
13
13
  :hydra_concurrency,
14
14
  :batch_size
15
-
15
+
16
16
  def initialize(repository_config)
17
17
  @name = repository_config["filbunke_server_repository"]
18
18
  @host = repository_config["filbunke_server_host"]
@@ -28,5 +28,5 @@ module Filbunke
28
28
  # batch_size == 0 means use default configured in filbunke-server
29
29
  @batch_size = repository_config.fetch("batch_size", 0).to_i
30
30
  end
31
-
32
- end
31
+ end
32
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filbunke
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.12.0
4
+ version: 1.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wouter de Bie
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2016-03-04 00:00:00.000000000 Z
14
+ date: 2016-03-05 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: thoughtbot-shoulda
@@ -47,14 +47,14 @@ dependencies:
47
47
  requirements:
48
48
  - - '='
49
49
  - !ruby/object:Gem::Version
50
- version: 0.3.3
50
+ version: 1.0.1
51
51
  type: :runtime
52
52
  prerelease: false
53
53
  version_requirements: !ruby/object:Gem::Requirement
54
54
  requirements:
55
55
  - - '='
56
56
  - !ruby/object:Gem::Version
57
- version: 0.3.3
57
+ version: 1.0.1
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: open4
60
60
  requirement: !ruby/object:Gem::Requirement