filbunke 1.12.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MzE3ODU4ZGQzMmIyYzA1MzI0MmVkZTY4YWNkNTE2NDk3MGQ4Y2RmYw==
4
+ MTMwOWRhNTMzYzNjOTM5YTNlNzM4MDg1MmZiNzk1NDY4Yjk2ZmU0ZQ==
5
5
  data.tar.gz: !binary |-
6
- YTQ1ZGNlYTVmYTNhY2RmY2UyNmFiZWVkYzIxZThmYTQ4NTlhZTVmMw==
6
+ Mzc2NWQ3YjAxOGJkZWQxYmI1ZDI2MmY2MmMzZjcwZjk5MDhlNWU4OA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- Mzc2NmU2YzRhNDJhNzNkNDFhZTI2MDg2NzU3ZWU3MDkyZmM5ODU5ZTFlMDBi
10
- Y2FhM2QxZGQ2ZjkyNmRmZGY0ODNlMzFmMmI1MzZlNGYzNTRjN2E0ODllNDhk
11
- MWEzOTQ2OTFlZGQ1MDk3NTcwNDk5YzUzYzUyODliZjNiMmQxMGQ=
9
+ Y2Q3MTA0NjJkZGY5Zjg2ZTZmYjY0MjAxMmQ5NTEwNmIxMTUwNmVjNjMwMWNi
10
+ YTg4MDA0MjgzMjU1MDM5N2U3ZGYyMDg4MDc5NDA2NDlhNWE5OTk3NDlmNzgw
11
+ MWI2MDMxMjAwYjNhYTVlNzRjOTY4NWYxZjQ0YTI4MzAzZmMwNDc=
12
12
  data.tar.gz: !binary |-
13
- NDRjNzVjN2FmZDgzODYzMjNiYzM4NGQ3YzkzNTJjNGVhMmU4M2NjN2Y3YTky
14
- YzU2MmIzNDE4ZWNmZGJkM2NiZmQwNDk1MDQ5ZTkwNDE3M2JjNWY4ZTFhYmIz
15
- N2IwMTE1MjQwM2M2M2RiOTM2YmJhOTQzNjcwNmUxYTkzNWMwODI=
13
+ ZDBjNWZkNGVhOGU1ZGI3ZDBjMmEzMzA3OTVkODExZTAyNzMyOWE1NmZlMTFh
14
+ YjZkZGZmNTlhZWNlYzY1NTg2NjY1MjhkNDlmNzY5ZTNkMDAxZTY3MzE0YmI3
15
+ YTdjNTE2OWNjMWE2ZjJmNmZjMTY2ZjIwOGVmYTRjNWI3NjgzNTk=
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ begin
14
14
  gem.files.exclude 'pkg'
15
15
  gem.executables = ['filbunked']
16
16
  gem.add_dependency 'json', '= 1.7.5'
17
- gem.add_dependency 'typhoeus', '= 0.3.3'
17
+ gem.add_dependency 'typhoeus', '= 1.0.1'
18
18
  gem.add_dependency 'open4', '= 1.3.0'
19
19
  gem.add_dependency 'mime-types', '= 1.19'
20
20
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.12.0
1
+ 1.13.0
data/filbunke.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: filbunke 1.12.0 ruby lib
5
+ # stub: filbunke 1.13.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "filbunke"
9
- s.version = "1.12.0"
9
+ s.version = "1.13.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
14
- s.date = "2016-03-04"
14
+ s.date = "2016-03-05"
15
15
  s.description = "Filbunke client and library"
16
16
  s.email = "technical@deltaprojects.com"
17
17
  s.executables = ["filbunked"]
@@ -52,20 +52,20 @@ Gem::Specification.new do |s|
52
52
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
53
53
  s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
54
  s.add_runtime_dependency(%q<json>, ["= 1.7.5"])
55
- s.add_runtime_dependency(%q<typhoeus>, ["= 0.3.3"])
55
+ s.add_runtime_dependency(%q<typhoeus>, ["= 1.0.1"])
56
56
  s.add_runtime_dependency(%q<open4>, ["= 1.3.0"])
57
57
  s.add_runtime_dependency(%q<mime-types>, ["= 1.19"])
58
58
  else
59
59
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
60
60
  s.add_dependency(%q<json>, ["= 1.7.5"])
61
- s.add_dependency(%q<typhoeus>, ["= 0.3.3"])
61
+ s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
62
62
  s.add_dependency(%q<open4>, ["= 1.3.0"])
63
63
  s.add_dependency(%q<mime-types>, ["= 1.19"])
64
64
  end
65
65
  else
66
66
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
67
67
  s.add_dependency(%q<json>, ["= 1.7.5"])
68
- s.add_dependency(%q<typhoeus>, ["= 0.3.3"])
68
+ s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
69
69
  s.add_dependency(%q<open4>, ["= 1.3.0"])
70
70
  s.add_dependency(%q<mime-types>, ["= 1.19"])
71
71
  end
@@ -25,18 +25,17 @@ module Filbunke
25
25
  def with_updated_files(last_checkpoint)
26
26
  updates = get_updated_file_list(last_checkpoint)
27
27
  updated_files = updates["files"] || []
28
- failure = false
29
-
30
- new_checkpoint = updates["checkpoint"]
31
-
32
- @logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
28
+ new_checkpoint = updates["checkpoint"] || 0
29
+ if updated_files.empty?
30
+ return new_checkpoint
31
+ end
33
32
 
33
+ @logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}"
34
34
  @async_requests = []
35
-
36
35
  callbacks_on_update = []
37
36
  callbacks_on_no_change = []
38
37
  callbacks_on_delete = []
39
-
38
+ has_update_file_failure = false
40
39
  updated_files.each do |raw_file|
41
40
  file = File.new(raw_file)
42
41
  local_file_path = ::File.join(@repository.local_path, file.path)
@@ -50,7 +49,8 @@ module Filbunke
50
49
  callbacks_on_update << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
51
50
  else
52
51
  @logger.error "Unable to get file #{file.url} ==> #{file.path}!"
53
- failure = true
52
+ has_update_file_failure = true
53
+ break
54
54
  end
55
55
  else
56
56
  @logger.debug "File exists with correct hash: #{local_file_path}"
@@ -58,35 +58,47 @@ module Filbunke
58
58
  end
59
59
  end
60
60
  end
61
- @hydra.run
62
61
 
63
- pfailure = failure || @async_requests.any? do |request|
64
- @logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
65
- request.response.nil? || request.response.code != 200
62
+ if has_update_file_failure
63
+ @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
64
+ return last_checkpoint
66
65
  end
67
-
68
- if pfailure == false
69
- @logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
70
- begin
71
-
72
- run_callbacks_delete(callbacks_on_delete)
73
-
74
- run_callbacks(callbacks_on_update)
75
-
76
- run_callbacks_no_change(callbacks_on_no_change)
77
-
78
- new_checkpoint || last_checkpoint
79
- rescue RuntimeError, SystemCallError, StandardError => e
80
- msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
81
- @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
82
- last_checkpoint
66
+ @logger.info "Done setting up async requests for #{@repository.name}, starting fetch..."
67
+ has_fetch_failures = begin
68
+ @hydra.run
69
+ # Magnus 20160305 - since we now fail fast by raising a RuntimeError on response.code != 200
70
+ # I think we can remove the following request validation
71
+ @async_requests.any? do |request|
72
+ @logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
73
+ request.response.nil? || request.response.code != 200
83
74
  end
84
- else
85
- @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
75
+ rescue RuntimeError, SystemCallError, StandardError => e
76
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
77
+ @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
78
+ true
79
+ end
80
+
81
+ if has_fetch_failures
82
+ @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
83
+ return last_checkpoint
84
+ end
85
+
86
+ @logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
87
+ new_or_last_checkpoint = begin
88
+ run_callbacks_delete(callbacks_on_delete)
89
+ run_callbacks(callbacks_on_update)
90
+ run_callbacks_no_change(callbacks_on_no_change)
91
+
92
+ new_checkpoint || last_checkpoint
93
+ rescue RuntimeError, SystemCallError, StandardError => e
94
+ msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
95
+ @logger.error "FAILED to process callbacks for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
86
96
  last_checkpoint
87
97
  end
98
+
99
+ new_or_last_checkpoint
88
100
  end
89
-
101
+
90
102
  def update_files!(last_checkpoint)
91
103
  with_updated_files(last_checkpoint) {}
92
104
  end
@@ -150,7 +162,7 @@ module Filbunke
150
162
  return response.body.chomp.to_i
151
163
  end
152
164
  end
153
-
165
+
154
166
  private
155
167
 
156
168
  def log_failed_request(failed_request_command, e)
@@ -162,11 +174,11 @@ module Filbunke
162
174
  end
163
175
 
164
176
  def update_file!(file, local_file_path)
165
-
177
+
166
178
  if file.url =~ /^http:\/\//
167
179
  update_http_file!(file, local_file_path)
168
180
  elsif (file.url =~ /^hdfs:\/\//)
169
- success = update_hdfs_file!(file, local_file_path)
181
+ update_hdfs_file!(file, local_file_path)
170
182
  else
171
183
  raise "Unsupported protocol for file: #{file.inspect}"
172
184
  end
@@ -205,7 +217,7 @@ module Filbunke
205
217
  updates_http.read_timeout = 300 # default is 60 seconds
206
218
  updates_http.start do |http|
207
219
  updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
208
- updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size
220
+ updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size > 0
209
221
  begin
210
222
  @logger.info "Fetching updated file list from #{updates_path}"
211
223
  request = Net::HTTP::Get.new(updates_path)
@@ -224,44 +236,58 @@ module Filbunke
224
236
  rescue StandardError => e
225
237
  @logger.error "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
226
238
  return {}
227
- end
239
+ end
228
240
  end
229
241
 
230
242
  def update_http_file!(file, local_file_path)
231
243
  begin
232
244
  async_request = if @repository.user
233
- Typhoeus::Request.new(URI.encode(file.url), :follow_location => true, :username => @repository.user, :password => @repository.pass)
245
+ Typhoeus::Request.new(
246
+ URI.escape(file.url),
247
+ :followlocation => true,
248
+ :username => @repository.user,
249
+ :password => @repository.pass
250
+ )
234
251
  else
235
- Typhoeus::Request.new(URI.encode(file.url), :follow_location => true)
252
+ Typhoeus::Request.new(
253
+ URI.escape(file.url),
254
+ :followlocation => true
255
+ )
256
+ end
257
+
258
+ downloaded_file = nil
259
+ async_request.on_headers do |response|
260
+ if response.code != 200
261
+ raise "Failed to fetch response(#{response.code}) for url '#{response.effective_url}' ---\n\t #{response.inspect}"
262
+ end
263
+ @logger.debug("Updating: #{local_file_path}")
264
+ ::FileUtils.mkdir_p(::File.dirname(local_file_path))
265
+ downloaded_file = ::File.new("#{local_file_path}.tmp", "wb")
236
266
  end
267
+
268
+ async_request.on_body do |chunk, response|
269
+ downloaded_file.write(chunk) if response.code == 200
270
+ end
271
+
237
272
  async_request.on_complete do |response|
238
- success = false
239
- begin
240
- success = response.code.to_i == 200
241
- if success
242
- write_file!(local_file_path, response.body)
243
- else
244
- body_if_error = response.code >= 500 ? ", body = #{response.body}" : ""
245
- @logger.warn "Failed to update file #{file.url}, got status code = #{response.code}#{body_if_error}"
246
- end
247
- rescue SystemCallError, StandardError => e
248
- msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
249
- @logger.error "Failed to update file #{file.url}: #{msg}"
273
+ unless downloaded_file.nil?
274
+ downloaded_file.close
275
+ ::FileUtils.mv("#{local_file_path}.tmp", local_file_path)
276
+ else
277
+ ::FileUtils.rm("#{local_file_path}.tmp") if ::File.exist?("#{local_file_path}.tmp")
250
278
  end
251
- # return the async_request.handled_response value here
252
- success
279
+ true
253
280
  end
254
281
  @hydra.queue async_request
255
282
  @async_requests << async_request
256
- rescue StandardError => e
283
+ true
284
+ rescue RuntimeError, SystemCallError, StandardError => e
257
285
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
258
286
  @logger.error "Failed to update file #{file.url}: #{msg}"
259
- return false
287
+ false
260
288
  end
261
-
262
- return true
263
289
  end
264
-
290
+
265
291
  def update_hdfs_file!(file, local_file_path)
266
292
  begin
267
293
  ::FileUtils.mkdir_p(::File.dirname(local_file_path))
@@ -270,10 +296,10 @@ module Filbunke
270
296
  url.gsub!(/hdfs:\/\/([^\/]*)(.*)/, "hdfs://\\2")
271
297
  hdfs_cmd = "#{@repository.hadoop_binary} dfs -copyToLocal #{url} #{local_file_path}.tmp"
272
298
  #@logger.debug "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
273
-
299
+
274
300
  pid, stdin, stdout, stderr = Open4::popen4 hdfs_cmd
275
301
  ignored, status = Process::waitpid2 pid
276
-
302
+
277
303
  if status.exitstatus == 0 then
278
304
  begin
279
305
  ::FileUtils.mv "#{local_file_path}.tmp", local_file_path
@@ -282,7 +308,7 @@ module Filbunke
282
308
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
283
309
  @logger.error "Failed to move hdfs file #{file.url}: #{msg}"
284
310
  return false
285
- end
311
+ end
286
312
  else
287
313
  @logger.error "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
288
314
  return false
@@ -295,8 +321,7 @@ module Filbunke
295
321
  end
296
322
 
297
323
  def write_file!(file_path, contents)
298
- ::FileUtils.mkdir_p(::File.dirname(file_path))
299
- @logger.debug("Updating: #{file_path}")
324
+
300
325
  begin
301
326
  ::File.open("#{file_path}.tmp", 'w') do |file|
302
327
  file.write(contents)
@@ -308,7 +333,7 @@ module Filbunke
308
333
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
309
334
  @logger.error "Failed to move file #{file_path}: #{msg}"
310
335
  return false
311
- end
336
+ end
312
337
  end
313
338
 
314
339
  def delete_file!(file_path)
@@ -1,18 +1,18 @@
1
1
  module Filbunke
2
2
  class Repository
3
- attr_accessor :name,
4
- :host,
5
- :port,
6
- :local_path,
7
- :file_umask,
8
- :directory_umask,
9
- :user,
10
- :pass,
11
- :hadoop_binary,
3
+ attr_accessor :name,
4
+ :host,
5
+ :port,
6
+ :local_path,
7
+ :file_umask,
8
+ :directory_umask,
9
+ :user,
10
+ :pass,
11
+ :hadoop_binary,
12
12
  :run_every,
13
13
  :hydra_concurrency,
14
14
  :batch_size
15
-
15
+
16
16
  def initialize(repository_config)
17
17
  @name = repository_config["filbunke_server_repository"]
18
18
  @host = repository_config["filbunke_server_host"]
@@ -28,5 +28,5 @@ module Filbunke
28
28
  # batch_size == 0 means use default configured in filbunke-server
29
29
  @batch_size = repository_config.fetch("batch_size", 0).to_i
30
30
  end
31
-
32
- end
31
+ end
32
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filbunke
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.12.0
4
+ version: 1.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wouter de Bie
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2016-03-04 00:00:00.000000000 Z
14
+ date: 2016-03-05 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: thoughtbot-shoulda
@@ -47,14 +47,14 @@ dependencies:
47
47
  requirements:
48
48
  - - '='
49
49
  - !ruby/object:Gem::Version
50
- version: 0.3.3
50
+ version: 1.0.1
51
51
  type: :runtime
52
52
  prerelease: false
53
53
  version_requirements: !ruby/object:Gem::Requirement
54
54
  requirements:
55
55
  - - '='
56
56
  - !ruby/object:Gem::Version
57
- version: 0.3.3
57
+ version: 1.0.1
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: open4
60
60
  requirement: !ruby/object:Gem::Requirement