filbunke 1.13.5 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- ZTY5MmMwZWFjM2U3YThjYzA1ZjkzMmRjNjVmNzVkZDI4Yzk0N2EzZA==
5
- data.tar.gz: !binary |-
6
- NzlmYmMzN2YwYjI1ZTA5MzhkNThkNWQ0ZDk4YTZiMmQxMWZhNGY4OA==
2
+ SHA1:
3
+ metadata.gz: 9ebdbdfb5aa1fc9a53ffe60b2b79d8cec6fe920c
4
+ data.tar.gz: d320cceb5cd74e3c443495fe2305e08a7d385af8
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- MGY2Mjg0OWM4NDk1YTRjNDMzNDhhOGJlMTA1ZGI2NjAyM2Q0MzA3MDNlMjY0
10
- YzkzMzAxOTc2ZmUxMGZlYmRhM2VhYTg1MGI2ZDY4YzAyMmFjNTY1OWNmZDMx
11
- ODczMzhhZjVhOWRmYWM5YTMxZTVlOTdkZTRhNmIwOWVkNjJhYzc=
12
- data.tar.gz: !binary |-
13
- YTg2Y2NmNzdjYjAxZTRlOTg2NDdlMGIxMTUyOTUwZDdlOGY1ZGRlY2U0NDM2
14
- NTljMTc4ZGFlYjIzNWFkMDFmNzJiZmFiYTJmNTY5YzFlZWRkYzNkMmZkNjQ2
15
- YWMzYTc0MjY3OTk2ZjkyYTQ4YzAzMDhmOGM3ZTU2OTVjNjBkM2E=
6
+ metadata.gz: dbd84ac518738229f3333557d68fa8c39dbe9153358ba2da521b575b2003bc814d595bc0d8bc256ddc3dee8965b7eefdb8c72117e0fd244d7847d425126e9fcb
7
+ data.tar.gz: 815b0982c1c9fab0fdb5c08fd34140796bd89e8cf1cf4c9ed00d474a71dec52ca301f88d60105038ceced0a41d31346a589a95c048ec79cc62c7dc8d01c25990
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 1.9.3
1
+ 2.2
data/Rakefile CHANGED
@@ -13,10 +13,11 @@ begin
13
13
  gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
14
  gem.files.exclude 'pkg'
15
15
  gem.executables = ['filbunked']
16
- gem.add_dependency 'json', '= 1.7.5'
17
- gem.add_dependency 'typhoeus', '= 1.0.1'
18
- gem.add_dependency 'open4', '= 1.3.0'
19
- gem.add_dependency 'mime-types', '= 1.19'
16
+ gem.add_dependency 'json', '= 1.8.3'
17
+ gem.add_dependency 'typhoeus', '= 0.7.3'
18
+ gem.add_dependency 'open4', '= 1.3.4'
19
+ gem.add_dependency 'mime-types', '= 2.6.2'
20
+ gem.add_dependency 'parallel', '= 1.6.1'
20
21
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
21
22
  end
22
23
  Jeweler::GemcutterTasks.new
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.13.5
1
+ 2.0.2
data/filbunke.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: filbunke 1.13.5 ruby lib
5
+ # stub: filbunke 2.0.2 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "filbunke"
9
- s.version = "1.13.5"
9
+ s.version = "2.0.2"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
14
- s.date = "2016-03-11"
14
+ s.date = "2015-09-19"
15
15
  s.description = "Filbunke client and library"
16
16
  s.email = "technical@deltaprojects.com"
17
17
  s.executables = ["filbunked"]
@@ -38,12 +38,11 @@ Gem::Specification.new do |s|
38
38
  "lib/filbunke/file.rb",
39
39
  "lib/filbunke/logger.rb",
40
40
  "lib/filbunke/repository.rb",
41
- "lib/filbunke/thread_pool.rb",
42
41
  "test/helper.rb",
43
42
  "test/test_filbunke.rb"
44
43
  ]
45
44
  s.homepage = "https://rubygems.org/gems/filbunke"
46
- s.rubygems_version = "2.4.8"
45
+ s.rubygems_version = "2.4.5.1"
47
46
  s.summary = "Filbunke client"
48
47
 
49
48
  if s.respond_to? :specification_version then
@@ -51,23 +50,26 @@ Gem::Specification.new do |s|
51
50
 
52
51
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
53
52
  s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
- s.add_runtime_dependency(%q<json>, ["= 1.7.5"])
55
- s.add_runtime_dependency(%q<typhoeus>, ["= 1.0.1"])
56
- s.add_runtime_dependency(%q<open4>, ["= 1.3.0"])
57
- s.add_runtime_dependency(%q<mime-types>, ["= 1.19"])
53
+ s.add_runtime_dependency(%q<json>, ["= 1.8.3"])
54
+ s.add_runtime_dependency(%q<typhoeus>, ["= 0.7.3"])
55
+ s.add_runtime_dependency(%q<open4>, ["= 1.3.4"])
56
+ s.add_runtime_dependency(%q<mime-types>, ["= 2.6.2"])
57
+ s.add_runtime_dependency(%q<parallel>, ["= 1.6.1"])
58
58
  else
59
59
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
60
- s.add_dependency(%q<json>, ["= 1.7.5"])
61
- s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
62
- s.add_dependency(%q<open4>, ["= 1.3.0"])
63
- s.add_dependency(%q<mime-types>, ["= 1.19"])
60
+ s.add_dependency(%q<json>, ["= 1.8.3"])
61
+ s.add_dependency(%q<typhoeus>, ["= 0.7.3"])
62
+ s.add_dependency(%q<open4>, ["= 1.3.4"])
63
+ s.add_dependency(%q<mime-types>, ["= 2.6.2"])
64
+ s.add_dependency(%q<parallel>, ["= 1.6.1"])
64
65
  end
65
66
  else
66
67
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
67
- s.add_dependency(%q<json>, ["= 1.7.5"])
68
- s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
69
- s.add_dependency(%q<open4>, ["= 1.3.0"])
70
- s.add_dependency(%q<mime-types>, ["= 1.19"])
68
+ s.add_dependency(%q<json>, ["= 1.8.3"])
69
+ s.add_dependency(%q<typhoeus>, ["= 0.7.3"])
70
+ s.add_dependency(%q<open4>, ["= 1.3.4"])
71
+ s.add_dependency(%q<mime-types>, ["= 2.6.2"])
72
+ s.add_dependency(%q<parallel>, ["= 1.6.1"])
71
73
  end
72
74
  end
73
75
 
@@ -6,27 +6,12 @@ module Filbunke
6
6
  @logger = logger
7
7
  end
8
8
 
9
- def on_update_batch(files)
10
- files.each do |item|
11
- on_update(item.file, item.local_file_path)
12
- end
13
- end
14
9
  def on_update(file, local_file_path)
15
10
  end
16
11
 
17
- def on_no_change_batch(files)
18
- files.each do |item|
19
- on_no_change(item.file, item.local_file_path)
20
- end
21
- end
22
12
  def on_no_change(file, local_file_path)
23
13
  end
24
-
25
- def on_delete_batch(files)
26
- files.each do |item|
27
- on_delete(item.file, item.local_file_path)
28
- end
29
- end
14
+
30
15
  def on_delete(file, local_file_path)
31
16
  end
32
17
 
@@ -16,6 +16,7 @@ module Filbunke
16
16
  @repository = repository
17
17
  @logger = logger
18
18
  @callbacks = callbacks
19
+ @parallel_callback_opts = (@repository.num_callback_threads > 0 ? {:in_threads => @repository.num_callback_threads} : {:in_processes => repository.num_callback_processes} )
19
20
  @failed_request_log_file_name = failed_request_log_file_name
20
21
  @hydra = Typhoeus::Hydra.new(:max_concurrency => @repository.hydra_concurrency)
21
22
 
@@ -25,26 +26,18 @@ module Filbunke
25
26
  def with_updated_files(last_checkpoint)
26
27
  updates = get_updated_file_list(last_checkpoint)
27
28
  updated_files = updates["files"] || []
29
+ failure = false
30
+
28
31
  new_checkpoint = updates["checkpoint"]
29
- if updated_files.empty?
30
- if updates.key?("files")
31
- return begin
32
- fetch_remote_last_checkpoint
33
- rescue => e
34
- @logger.warn "Failed to fetch remote last_checkpoint #{@repository.name} will fall back to local last_checkpoint=#{last_checkpoint}"
35
- last_checkpoint
36
- end
37
- else
38
- return last_checkpoint
39
- end
40
- end
32
+
33
+ @logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
41
34
 
42
- @logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}"
43
35
  @async_requests = []
36
+
44
37
  callbacks_on_update = []
45
38
  callbacks_on_no_change = []
46
39
  callbacks_on_delete = []
47
- has_update_file_failure = false
40
+
48
41
  updated_files.each do |raw_file|
49
42
  file = File.new(raw_file)
50
43
  local_file_path = ::File.join(@repository.local_path, file.path)
@@ -58,56 +51,41 @@ module Filbunke
58
51
  callbacks_on_update << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
59
52
  else
60
53
  @logger.error "Unable to get file #{file.url} ==> #{file.path}!"
61
- has_update_file_failure = true
62
- break
54
+ failure = true
63
55
  end
56
+
64
57
  else
65
58
  @logger.debug "File exists with correct hash: #{local_file_path}"
66
59
  callbacks_on_no_change << OpenStruct.new({:file => file, :local_file_path => local_file_path})
67
60
  end
68
61
  end
69
62
  end
63
+ @hydra.run
70
64
 
71
- if has_update_file_failure
72
- @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
73
- return last_checkpoint
74
- end
75
- @logger.info "Done setting up async requests for #{@repository.name}, starting fetch..."
76
- has_fetch_failures = begin
77
- @hydra.run
78
- # Magnus 20160305 - since we now fail fast by raising a RuntimeError on response.code != 200
79
- # I think we can remove the following request validation
80
- @async_requests.any? do |request|
81
- @logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
82
- request.response.nil? || request.response.code != 200
83
- end
84
- rescue RuntimeError, SystemCallError, StandardError => e
85
- msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
86
- @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
87
- true
88
- end
89
-
90
- if has_fetch_failures
91
- @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
92
- return last_checkpoint
65
+ pfailure = failure || @async_requests.any? do |request|
66
+ @logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
67
+ request.response.nil? || request.response.code != 200
93
68
  end
94
69
 
95
- @logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
96
- new_or_last_checkpoint = begin
97
- run_callbacks_delete(callbacks_on_delete)
98
- run_callbacks(callbacks_on_update)
99
- run_callbacks_no_change(callbacks_on_no_change)
100
-
101
- new_checkpoint || last_checkpoint
102
- rescue RuntimeError, SystemCallError, StandardError => e
103
- msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
104
- @logger.error "FAILED to process callbacks for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
70
+ if pfailure == false
71
+ @logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
72
+ begin
73
+ run_callbacks_delete(callbacks_on_delete)
74
+ run_callbacks(callbacks_on_update)
75
+ run_callbacks_no_change(callbacks_on_no_change)
76
+
77
+ new_checkpoint || last_checkpoint
78
+ rescue RuntimeError, SystemCallError, StandardError => e
79
+ msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
80
+ @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
81
+ last_checkpoint
82
+ end
83
+ else
84
+ @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
105
85
  last_checkpoint
106
86
  end
107
-
108
- new_or_last_checkpoint
109
87
  end
110
-
88
+
111
89
  def update_files!(last_checkpoint)
112
90
  with_updated_files(last_checkpoint) {}
113
91
  end
@@ -159,7 +137,7 @@ module Filbunke
159
137
  end
160
138
  end
161
139
 
162
- def fetch_remote_last_checkpoint
140
+ def last_checkpoint
163
141
  last_checkpoint_http = Net::HTTP.new(@repository.host, @repository.port)
164
142
  last_checkpoint_http.start do |http|
165
143
  last_checkpoint_path = "/#{UPDATES_ACTION}/#{@repository.name}/#{LAST_CHECKPOINT_ACTION}"
@@ -171,7 +149,7 @@ module Filbunke
171
149
  return response.body.chomp.to_i
172
150
  end
173
151
  end
174
-
152
+
175
153
  private
176
154
 
177
155
  def log_failed_request(failed_request_command, e)
@@ -183,11 +161,11 @@ module Filbunke
183
161
  end
184
162
 
185
163
  def update_file!(file, local_file_path)
186
-
164
+
187
165
  if file.url =~ /^http:\/\//
188
166
  update_http_file!(file, local_file_path)
189
167
  elsif (file.url =~ /^hdfs:\/\//)
190
- update_hdfs_file!(file, local_file_path)
168
+ success = update_hdfs_file!(file, local_file_path)
191
169
  else
192
170
  raise "Unsupported protocol for file: #{file.inspect}"
193
171
  end
@@ -195,19 +173,25 @@ module Filbunke
195
173
 
196
174
  def run_callbacks(files)
197
175
  @callbacks.each do |callback|
198
- callback.on_update_batch(files)
176
+ Parallel.map(files, @parallel_callback_opts) do |item|
177
+ callback.on_update(item.file, item.local_file_path)
178
+ end
199
179
  end
200
180
  end
201
181
 
202
182
  def run_callbacks_no_change(files)
203
183
  @callbacks.each do |callback|
204
- callback.on_no_change_batch(files)
184
+ Parallel.map(files, @parallel_callback_opts) do |item|
185
+ callback.on_no_change(item.file, item.local_file_path)
186
+ end
205
187
  end
206
188
  end
207
189
 
208
190
  def run_callbacks_delete(files)
209
191
  @callbacks.each do |callback|
210
- callback.on_delete_batch(files)
192
+ Parallel.map(files, @parallel_callback_opts) do |item|
193
+ callback.on_delete(item.file, item.local_file_path)
194
+ end
211
195
  end
212
196
  end
213
197
 
@@ -226,7 +210,6 @@ module Filbunke
226
210
  updates_http.read_timeout = 300 # default is 60 seconds
227
211
  updates_http.start do |http|
228
212
  updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
229
- updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size > 0
230
213
  begin
231
214
  @logger.info "Fetching updated file list from #{updates_path}"
232
215
  request = Net::HTTP::Get.new(updates_path)
@@ -245,67 +228,44 @@ module Filbunke
245
228
  rescue StandardError => e
246
229
  @logger.error "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
247
230
  return {}
248
- end
231
+ end
249
232
  end
250
233
 
251
234
  def update_http_file!(file, local_file_path)
252
- downloaded_file = nil
253
- tmp_filename = "#{local_file_path}.tmp"
254
235
  begin
255
236
  async_request = if @repository.user
256
- Typhoeus::Request.new(
257
- URI.escape(file.url),
258
- :followlocation => true,
259
- :username => @repository.user,
260
- :password => @repository.pass
261
- )
237
+ Typhoeus::Request.new(URI.encode(file.url), :followlocation => true, :username => @repository.user, :password => @repository.pass)
262
238
  else
263
- Typhoeus::Request.new(
264
- URI.escape(file.url),
265
- :followlocation => true
266
- )
267
- end
268
-
269
-
270
- async_request.on_headers do |response|
271
- if response.code != 200
272
- raise "Failed to fetch response(#{response.code}) for url '#{response.effective_url}' ---\n\t #{response.inspect}"
273
- end
274
- @logger.debug("Updating: #{local_file_path}")
275
- ::FileUtils.mkdir_p(::File.dirname(local_file_path))
276
- downloaded_file = ::File.new(tmp_filename, "wb")
277
- end
278
-
279
- async_request.on_body do |chunk, response|
280
- downloaded_file.write(chunk) if response.code == 200
239
+ Typhoeus::Request.new(URI.encode(file.url), :followlocation => true)
281
240
  end
282
-
283
241
  async_request.on_complete do |response|
284
- unless downloaded_file.nil?
285
- downloaded_file.close
286
- end
287
- if ::File.exist?(tmp_filename)
288
- if response.code == 200
289
- ::FileUtils.mv(tmp_filename, local_file_path)
242
+ success = false
243
+ begin
244
+ success = response.code.to_i == 200
245
+ if success
246
+ write_file!(local_file_path, response.body)
290
247
  else
291
- ::FileUtils.rm(tmp_filename)
248
+ body_if_error = response.code >= 500 ? ", body = #{response.body}" : ""
249
+ @logger.warn "Failed to update file #{file.url}, got status code = #{response.code}#{body_if_error}"
292
250
  end
251
+ rescue SystemCallError, StandardError => e
252
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
253
+ @logger.error "Failed to update file #{file.url}: #{msg}"
293
254
  end
294
- true
255
+ # return the async_request.handled_response value here
256
+ success
295
257
  end
296
258
  @hydra.queue async_request
297
259
  @async_requests << async_request
298
- true
299
- rescue RuntimeError, SystemCallError, StandardError => e
260
+ rescue StandardError => e
300
261
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
301
262
  @logger.error "Failed to update file #{file.url}: #{msg}"
302
- if ::File.exist?(tmp_filename)
303
- ::FileUtils.rm(tmp_filename)
304
- end
305
- false
263
+ return false
306
264
  end
307
- end
308
265
 
266
+ return true
267
+ end
268
+
309
269
  def update_hdfs_file!(file, local_file_path)
310
270
  begin
311
271
  ::FileUtils.mkdir_p(::File.dirname(local_file_path))
@@ -314,10 +274,10 @@ module Filbunke
314
274
  url.gsub!(/hdfs:\/\/([^\/]*)(.*)/, "hdfs://\\2")
315
275
  hdfs_cmd = "#{@repository.hadoop_binary} dfs -copyToLocal #{url} #{local_file_path}.tmp"
316
276
  #@logger.debug "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
317
-
277
+
318
278
  pid, stdin, stdout, stderr = Open4::popen4 hdfs_cmd
319
279
  ignored, status = Process::waitpid2 pid
320
-
280
+
321
281
  if status.exitstatus == 0 then
322
282
  begin
323
283
  ::FileUtils.mv "#{local_file_path}.tmp", local_file_path
@@ -326,7 +286,7 @@ module Filbunke
326
286
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
327
287
  @logger.error "Failed to move hdfs file #{file.url}: #{msg}"
328
288
  return false
329
- end
289
+ end
330
290
  else
331
291
  @logger.error "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
332
292
  return false
@@ -338,6 +298,23 @@ module Filbunke
338
298
  end
339
299
  end
340
300
 
301
+ def write_file!(file_path, contents)
302
+ ::FileUtils.mkdir_p(::File.dirname(file_path))
303
+ @logger.debug("Updating: #{file_path}")
304
+ begin
305
+ ::File.open("#{file_path}.tmp", 'w') do |file|
306
+ file.write(contents)
307
+ file.close
308
+ end
309
+ ::FileUtils.mv "#{file_path}.tmp", file_path
310
+ return true
311
+ rescue StandardError => e
312
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
313
+ @logger.error "Failed to move file #{file_path}: #{msg}"
314
+ return false
315
+ end
316
+ end
317
+
341
318
  def delete_file!(file_path)
342
319
  if ::File.exists?(file_path) then
343
320
  @logger.debug("Deleting: #{file_path}")
@@ -15,7 +15,6 @@ module Filbunke
15
15
  @logger.log("Initializing repository: #{repository_name}")
16
16
  @clients << begin
17
17
  repository_config["run_every"] = repository_config.fetch("run_every", @config.fetch("run_every", 10))
18
- repository_config["batch_size"] = repository_config.fetch("batch_size", @config.fetch("batch_size", 0))
19
18
  repository = Repository.new(repository_config)
20
19
  callbacks = []
21
20
  repository_config["callbacks"].each do |callback_name, callback_config|
@@ -26,10 +25,6 @@ module Filbunke
26
25
  failed_request_log_file_name = repository_config["failed_request_log_file_name"]||nil
27
26
 
28
27
  Client.new(repository, @logger, callbacks, failed_request_log_file_name)
29
- rescue => e
30
- msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
31
- @logger.error("Failed to initialize #{repository_name}; #{msg}")
32
- raise e
33
28
  end
34
29
  end
35
30
  end
@@ -48,13 +43,14 @@ module Filbunke
48
43
  update_checkpoint_for_repository(client.repository, new_checkpoint)
49
44
  sleep client.repository.run_every
50
45
  end
51
- rescue => e
46
+ rescue RuntimeError, SystemCallError, StandardError => e
52
47
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
53
48
  @logger.error("#{client.repository.name} Died.. #{msg}")
54
49
  Process.kill("KILL", @parent_pid)
55
50
  exit 1
56
51
  end
57
52
  end
53
+
58
54
  end
59
55
  client_pids.each { |pid| Process.wait(pid) }
60
56
  end
@@ -83,18 +79,6 @@ module Filbunke
83
79
  end
84
80
 
85
81
  def write_pid!(pid_file_path)
86
-
87
- begin
88
- existing_process = ::File.read(pid_file_path).to_i if ::File.readable?(pid_file_path)
89
- if existing_process != nil and existing_process > 0
90
- @logger.info("killing existing process #{existing_process} from #{pid_file_path}")
91
- Process.kill("KILL", existing_process)
92
-
93
- end
94
- rescue => e
95
- @logger.warn("failed to kill existing pid from #{pid_file_path}: #{e}\n\twill ignore and continue...")
96
- end
97
-
98
82
  ::File.open(pid_file_path, 'w') do |f|
99
83
  f.write(Process.pid.to_i)
100
84
  f.close
@@ -4,41 +4,41 @@ module Filbunke
4
4
 
5
5
  def initialize(log_file_name, local, level)
6
6
  @local = local
7
- @log = if @local or log_file_name.nil?
7
+ @logger = if @local or log_file_name.nil?
8
8
  Logger.new(STDOUT)
9
9
  else
10
10
  Logger.new(log_file_name)
11
11
  end
12
12
 
13
- @log.level = parse_level(level)
13
+ @logger.level = parse_level(level)
14
14
  end
15
15
 
16
16
  def puts(msg)
17
- info(msg)
17
+ @logger.info(msg)
18
18
  end
19
-
19
+
20
20
  def log(msg)
21
- @log.info msg
21
+ @logger.info msg
22
22
  end
23
23
 
24
24
  def info(msg)
25
- @log.info msg
25
+ @logger.info msg
26
26
  end
27
27
 
28
28
  def error(msg)
29
- @log.error msg
29
+ @logger.error msg
30
30
  end
31
31
 
32
32
  def warn(msg)
33
- @log.warn msg
33
+ @logger.warn msg
34
34
  end
35
35
 
36
36
  def debug(msg)
37
- @log.debug msg
37
+ @logger.debug msg
38
38
  end
39
39
 
40
40
  def fatal(msg)
41
- @log.error msg
41
+ @logger.error msg
42
42
  end
43
43
 
44
44
  def parse_level(constantOrString)
@@ -1,17 +1,19 @@
1
1
  module Filbunke
2
2
  class Repository
3
- attr_accessor :name,
4
- :host,
5
- :port,
6
- :local_path,
7
- :file_umask,
8
- :directory_umask,
9
- :user,
10
- :pass,
11
- :hadoop_binary,
3
+
4
+ attr_accessor :name,
5
+ :host,
6
+ :port,
7
+ :local_path,
8
+ :file_umask,
9
+ :directory_umask,
10
+ :user,
11
+ :pass,
12
+ :hadoop_binary,
12
13
  :run_every,
13
14
  :hydra_concurrency,
14
- :batch_size
15
+ :num_callback_processes,
16
+ :num_callback_threads
15
17
 
16
18
  def initialize(repository_config)
17
19
  @name = repository_config["filbunke_server_repository"]
@@ -25,8 +27,19 @@ module Filbunke
25
27
  @hadoop_binary = repository_config["hadoop_binary"]
26
28
  @run_every = repository_config.fetch("run_every", 10).to_i
27
29
  @hydra_concurrency = repository_config.fetch("hydra_concurrency", 100).to_i
28
- # batch_size == 0 means use default configured in filbunke-server
29
- @batch_size = repository_config.fetch("batch_size", 0).to_i
30
- end
31
- end
32
- end
30
+ @num_callback_processes = repository_config["num_callback_processes"].to_i
31
+ @num_callback_threads = repository_config["num_callback_threads"].to_i
32
+
33
+ if @num_callback_threads == 0 and @num_callback_processes == 0
34
+ @num_callback_threads = Parallel.processor_count
35
+ end
36
+
37
+ raise ConfigurationError.new("callbacks cant use both processes and threads (#{@name})") if @num_callback_threads > 0 && @num_callback_processes > 0
38
+
39
+ end
40
+
41
+ end
42
+
43
+ class ConfigurationError < StandardError
44
+ end
45
+ end
data/lib/filbunke.rb CHANGED
@@ -7,8 +7,8 @@ require 'typhoeus'
7
7
  require 'open4'
8
8
  require 'uri'
9
9
  require 'logger'
10
+ require 'parallel'
10
11
 
11
- require File.expand_path(File.dirname(__FILE__) + '/filbunke/thread_pool.rb')
12
12
  require File.expand_path(File.dirname(__FILE__) + '/filbunke/client.rb')
13
13
  require File.expand_path(File.dirname(__FILE__) + '/filbunke/file.rb')
14
14
  require File.expand_path(File.dirname(__FILE__) + '/filbunke/repository.rb')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filbunke
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.13.5
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wouter de Bie
@@ -11,20 +11,20 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2016-03-11 00:00:00.000000000 Z
14
+ date: 2015-09-19 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: thoughtbot-shoulda
18
18
  requirement: !ruby/object:Gem::Requirement
19
19
  requirements:
20
- - - ! '>='
20
+ - - ">="
21
21
  - !ruby/object:Gem::Version
22
22
  version: '0'
23
23
  type: :development
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - ! '>='
27
+ - - ">="
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
30
  - !ruby/object:Gem::Dependency
@@ -33,56 +33,70 @@ dependencies:
33
33
  requirements:
34
34
  - - '='
35
35
  - !ruby/object:Gem::Version
36
- version: 1.7.5
36
+ version: 1.8.3
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
40
40
  requirements:
41
41
  - - '='
42
42
  - !ruby/object:Gem::Version
43
- version: 1.7.5
43
+ version: 1.8.3
44
44
  - !ruby/object:Gem::Dependency
45
45
  name: typhoeus
46
46
  requirement: !ruby/object:Gem::Requirement
47
47
  requirements:
48
48
  - - '='
49
49
  - !ruby/object:Gem::Version
50
- version: 1.0.1
50
+ version: 0.7.3
51
51
  type: :runtime
52
52
  prerelease: false
53
53
  version_requirements: !ruby/object:Gem::Requirement
54
54
  requirements:
55
55
  - - '='
56
56
  - !ruby/object:Gem::Version
57
- version: 1.0.1
57
+ version: 0.7.3
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: open4
60
60
  requirement: !ruby/object:Gem::Requirement
61
61
  requirements:
62
62
  - - '='
63
63
  - !ruby/object:Gem::Version
64
- version: 1.3.0
64
+ version: 1.3.4
65
65
  type: :runtime
66
66
  prerelease: false
67
67
  version_requirements: !ruby/object:Gem::Requirement
68
68
  requirements:
69
69
  - - '='
70
70
  - !ruby/object:Gem::Version
71
- version: 1.3.0
71
+ version: 1.3.4
72
72
  - !ruby/object:Gem::Dependency
73
73
  name: mime-types
74
74
  requirement: !ruby/object:Gem::Requirement
75
75
  requirements:
76
76
  - - '='
77
77
  - !ruby/object:Gem::Version
78
- version: '1.19'
78
+ version: 2.6.2
79
79
  type: :runtime
80
80
  prerelease: false
81
81
  version_requirements: !ruby/object:Gem::Requirement
82
82
  requirements:
83
83
  - - '='
84
84
  - !ruby/object:Gem::Version
85
- version: '1.19'
85
+ version: 2.6.2
86
+ - !ruby/object:Gem::Dependency
87
+ name: parallel
88
+ requirement: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - '='
91
+ - !ruby/object:Gem::Version
92
+ version: 1.6.1
93
+ type: :runtime
94
+ prerelease: false
95
+ version_requirements: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - '='
98
+ - !ruby/object:Gem::Version
99
+ version: 1.6.1
86
100
  description: Filbunke client and library
87
101
  email: technical@deltaprojects.com
88
102
  executables:
@@ -90,9 +104,9 @@ executables:
90
104
  extensions: []
91
105
  extra_rdoc_files: []
92
106
  files:
93
- - .gitignore
94
- - .ruby-gemset
95
- - .ruby-version
107
+ - ".gitignore"
108
+ - ".ruby-gemset"
109
+ - ".ruby-version"
96
110
  - Gemfile
97
111
  - LICENSE
98
112
  - README.rdoc
@@ -112,7 +126,6 @@ files:
112
126
  - lib/filbunke/file.rb
113
127
  - lib/filbunke/logger.rb
114
128
  - lib/filbunke/repository.rb
115
- - lib/filbunke/thread_pool.rb
116
129
  - test/helper.rb
117
130
  - test/test_filbunke.rb
118
131
  homepage: https://rubygems.org/gems/filbunke
@@ -124,17 +137,17 @@ require_paths:
124
137
  - lib
125
138
  required_ruby_version: !ruby/object:Gem::Requirement
126
139
  requirements:
127
- - - ! '>='
140
+ - - ">="
128
141
  - !ruby/object:Gem::Version
129
142
  version: '0'
130
143
  required_rubygems_version: !ruby/object:Gem::Requirement
131
144
  requirements:
132
- - - ! '>='
145
+ - - ">="
133
146
  - !ruby/object:Gem::Version
134
147
  version: '0'
135
148
  requirements: []
136
149
  rubyforge_project:
137
- rubygems_version: 2.4.8
150
+ rubygems_version: 2.4.5.1
138
151
  signing_key:
139
152
  specification_version: 4
140
153
  summary: Filbunke client
@@ -1,104 +0,0 @@
1
- # Inspired by https://github.com/meh/ruby-threadpool
2
- require 'thread'
3
-
4
- class ThreadPool
5
-
6
- class Job < Struct.new(:args, :block); end
7
-
8
- def initialize(min, max = nil)
9
-
10
- trap("INT") { shutdown }
11
-
12
- @min = min
13
- @max = max || min
14
-
15
- @cv = ConditionVariable.new
16
- @mutex = Mutex.new
17
-
18
- @queue = []
19
- @workers = []
20
-
21
- @spawned = 0
22
- @waiting = 0
23
- @shutdown = false
24
- @queue_locked = false
25
-
26
- @mutex.synchronize do
27
- min.times { spawn_thread }
28
- end
29
- end
30
-
31
- def execute(*args, &block)
32
- @mutex.synchronize do
33
- raise "Thread pool is about to shutdown" if @shutdown || @queue_locked
34
-
35
- @queue << Job.new(args, block)
36
-
37
- spawn_thread if @waiting == 0 && @spawned < @max
38
-
39
- @cv.signal
40
- end
41
- end
42
- alias :<< :execute
43
-
44
- def shutdown
45
- @mutex.synchronize do
46
- @shutdown = true
47
- @cv.broadcast
48
- end
49
-
50
- @workers.first.join until @workers.empty?
51
- end
52
-
53
- def join
54
- @mutex.synchronize do
55
- @queue_locked = true
56
- @cv.broadcast
57
- sleep 0.01 until @queue.empty?
58
- end
59
- shutdown
60
- end
61
-
62
- protected
63
-
64
- def spawn_thread
65
- thread = Thread.new do
66
- continue = true
67
-
68
- while continue do
69
- job = nil
70
-
71
- @mutex.synchronize do
72
- while @queue.empty? && continue
73
- if @shutdown || @queue_locked
74
- continue = false
75
- break
76
- end
77
-
78
- @waiting += 1
79
- @cv.wait @mutex
80
- @waiting -= 1
81
-
82
- if @shutdown || @queue_locked
83
- continue = false
84
- break
85
- end
86
- end
87
-
88
- if continue
89
- job = @queue.shift
90
- job.block.call(*job.args) if job
91
- end
92
- end
93
- end
94
-
95
- @mutex.synchronize do
96
- @spawned -= 1
97
- @workers.delete thread
98
- end
99
- end
100
-
101
- @workers << thread
102
- thread
103
- end
104
- end