filbunke 1.13.5 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- ZTY5MmMwZWFjM2U3YThjYzA1ZjkzMmRjNjVmNzVkZDI4Yzk0N2EzZA==
5
- data.tar.gz: !binary |-
6
- NzlmYmMzN2YwYjI1ZTA5MzhkNThkNWQ0ZDk4YTZiMmQxMWZhNGY4OA==
2
+ SHA1:
3
+ metadata.gz: 9ebdbdfb5aa1fc9a53ffe60b2b79d8cec6fe920c
4
+ data.tar.gz: d320cceb5cd74e3c443495fe2305e08a7d385af8
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- MGY2Mjg0OWM4NDk1YTRjNDMzNDhhOGJlMTA1ZGI2NjAyM2Q0MzA3MDNlMjY0
10
- YzkzMzAxOTc2ZmUxMGZlYmRhM2VhYTg1MGI2ZDY4YzAyMmFjNTY1OWNmZDMx
11
- ODczMzhhZjVhOWRmYWM5YTMxZTVlOTdkZTRhNmIwOWVkNjJhYzc=
12
- data.tar.gz: !binary |-
13
- YTg2Y2NmNzdjYjAxZTRlOTg2NDdlMGIxMTUyOTUwZDdlOGY1ZGRlY2U0NDM2
14
- NTljMTc4ZGFlYjIzNWFkMDFmNzJiZmFiYTJmNTY5YzFlZWRkYzNkMmZkNjQ2
15
- YWMzYTc0MjY3OTk2ZjkyYTQ4YzAzMDhmOGM3ZTU2OTVjNjBkM2E=
6
+ metadata.gz: dbd84ac518738229f3333557d68fa8c39dbe9153358ba2da521b575b2003bc814d595bc0d8bc256ddc3dee8965b7eefdb8c72117e0fd244d7847d425126e9fcb
7
+ data.tar.gz: 815b0982c1c9fab0fdb5c08fd34140796bd89e8cf1cf4c9ed00d474a71dec52ca301f88d60105038ceced0a41d31346a589a95c048ec79cc62c7dc8d01c25990
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 1.9.3
1
+ 2.2
data/Rakefile CHANGED
@@ -13,10 +13,11 @@ begin
13
13
  gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
14
  gem.files.exclude 'pkg'
15
15
  gem.executables = ['filbunked']
16
- gem.add_dependency 'json', '= 1.7.5'
17
- gem.add_dependency 'typhoeus', '= 1.0.1'
18
- gem.add_dependency 'open4', '= 1.3.0'
19
- gem.add_dependency 'mime-types', '= 1.19'
16
+ gem.add_dependency 'json', '= 1.8.3'
17
+ gem.add_dependency 'typhoeus', '= 0.7.3'
18
+ gem.add_dependency 'open4', '= 1.3.4'
19
+ gem.add_dependency 'mime-types', '= 2.6.2'
20
+ gem.add_dependency 'parallel', '= 1.6.1'
20
21
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
21
22
  end
22
23
  Jeweler::GemcutterTasks.new
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.13.5
1
+ 2.0.2
data/filbunke.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: filbunke 1.13.5 ruby lib
5
+ # stub: filbunke 2.0.2 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "filbunke"
9
- s.version = "1.13.5"
9
+ s.version = "2.0.2"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
14
- s.date = "2016-03-11"
14
+ s.date = "2015-09-19"
15
15
  s.description = "Filbunke client and library"
16
16
  s.email = "technical@deltaprojects.com"
17
17
  s.executables = ["filbunked"]
@@ -38,12 +38,11 @@ Gem::Specification.new do |s|
38
38
  "lib/filbunke/file.rb",
39
39
  "lib/filbunke/logger.rb",
40
40
  "lib/filbunke/repository.rb",
41
- "lib/filbunke/thread_pool.rb",
42
41
  "test/helper.rb",
43
42
  "test/test_filbunke.rb"
44
43
  ]
45
44
  s.homepage = "https://rubygems.org/gems/filbunke"
46
- s.rubygems_version = "2.4.8"
45
+ s.rubygems_version = "2.4.5.1"
47
46
  s.summary = "Filbunke client"
48
47
 
49
48
  if s.respond_to? :specification_version then
@@ -51,23 +50,26 @@ Gem::Specification.new do |s|
51
50
 
52
51
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
53
52
  s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
- s.add_runtime_dependency(%q<json>, ["= 1.7.5"])
55
- s.add_runtime_dependency(%q<typhoeus>, ["= 1.0.1"])
56
- s.add_runtime_dependency(%q<open4>, ["= 1.3.0"])
57
- s.add_runtime_dependency(%q<mime-types>, ["= 1.19"])
53
+ s.add_runtime_dependency(%q<json>, ["= 1.8.3"])
54
+ s.add_runtime_dependency(%q<typhoeus>, ["= 0.7.3"])
55
+ s.add_runtime_dependency(%q<open4>, ["= 1.3.4"])
56
+ s.add_runtime_dependency(%q<mime-types>, ["= 2.6.2"])
57
+ s.add_runtime_dependency(%q<parallel>, ["= 1.6.1"])
58
58
  else
59
59
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
60
- s.add_dependency(%q<json>, ["= 1.7.5"])
61
- s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
62
- s.add_dependency(%q<open4>, ["= 1.3.0"])
63
- s.add_dependency(%q<mime-types>, ["= 1.19"])
60
+ s.add_dependency(%q<json>, ["= 1.8.3"])
61
+ s.add_dependency(%q<typhoeus>, ["= 0.7.3"])
62
+ s.add_dependency(%q<open4>, ["= 1.3.4"])
63
+ s.add_dependency(%q<mime-types>, ["= 2.6.2"])
64
+ s.add_dependency(%q<parallel>, ["= 1.6.1"])
64
65
  end
65
66
  else
66
67
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
67
- s.add_dependency(%q<json>, ["= 1.7.5"])
68
- s.add_dependency(%q<typhoeus>, ["= 1.0.1"])
69
- s.add_dependency(%q<open4>, ["= 1.3.0"])
70
- s.add_dependency(%q<mime-types>, ["= 1.19"])
68
+ s.add_dependency(%q<json>, ["= 1.8.3"])
69
+ s.add_dependency(%q<typhoeus>, ["= 0.7.3"])
70
+ s.add_dependency(%q<open4>, ["= 1.3.4"])
71
+ s.add_dependency(%q<mime-types>, ["= 2.6.2"])
72
+ s.add_dependency(%q<parallel>, ["= 1.6.1"])
71
73
  end
72
74
  end
73
75
 
@@ -6,27 +6,12 @@ module Filbunke
6
6
  @logger = logger
7
7
  end
8
8
 
9
- def on_update_batch(files)
10
- files.each do |item|
11
- on_update(item.file, item.local_file_path)
12
- end
13
- end
14
9
  def on_update(file, local_file_path)
15
10
  end
16
11
 
17
- def on_no_change_batch(files)
18
- files.each do |item|
19
- on_no_change(item.file, item.local_file_path)
20
- end
21
- end
22
12
  def on_no_change(file, local_file_path)
23
13
  end
24
-
25
- def on_delete_batch(files)
26
- files.each do |item|
27
- on_delete(item.file, item.local_file_path)
28
- end
29
- end
14
+
30
15
  def on_delete(file, local_file_path)
31
16
  end
32
17
 
@@ -16,6 +16,7 @@ module Filbunke
16
16
  @repository = repository
17
17
  @logger = logger
18
18
  @callbacks = callbacks
19
+ @parallel_callback_opts = (@repository.num_callback_threads > 0 ? {:in_threads => @repository.num_callback_threads} : {:in_processes => repository.num_callback_processes} )
19
20
  @failed_request_log_file_name = failed_request_log_file_name
20
21
  @hydra = Typhoeus::Hydra.new(:max_concurrency => @repository.hydra_concurrency)
21
22
 
@@ -25,26 +26,18 @@ module Filbunke
25
26
  def with_updated_files(last_checkpoint)
26
27
  updates = get_updated_file_list(last_checkpoint)
27
28
  updated_files = updates["files"] || []
29
+ failure = false
30
+
28
31
  new_checkpoint = updates["checkpoint"]
29
- if updated_files.empty?
30
- if updates.key?("files")
31
- return begin
32
- fetch_remote_last_checkpoint
33
- rescue => e
34
- @logger.warn "Failed to fetch remote last_checkpoint #{@repository.name} will fall back to local last_checkpoint=#{last_checkpoint}"
35
- last_checkpoint
36
- end
37
- else
38
- return last_checkpoint
39
- end
40
- end
32
+
33
+ @logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
41
34
 
42
- @logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}"
43
35
  @async_requests = []
36
+
44
37
  callbacks_on_update = []
45
38
  callbacks_on_no_change = []
46
39
  callbacks_on_delete = []
47
- has_update_file_failure = false
40
+
48
41
  updated_files.each do |raw_file|
49
42
  file = File.new(raw_file)
50
43
  local_file_path = ::File.join(@repository.local_path, file.path)
@@ -58,56 +51,41 @@ module Filbunke
58
51
  callbacks_on_update << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
59
52
  else
60
53
  @logger.error "Unable to get file #{file.url} ==> #{file.path}!"
61
- has_update_file_failure = true
62
- break
54
+ failure = true
63
55
  end
56
+
64
57
  else
65
58
  @logger.debug "File exists with correct hash: #{local_file_path}"
66
59
  callbacks_on_no_change << OpenStruct.new({:file => file, :local_file_path => local_file_path})
67
60
  end
68
61
  end
69
62
  end
63
+ @hydra.run
70
64
 
71
- if has_update_file_failure
72
- @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
73
- return last_checkpoint
74
- end
75
- @logger.info "Done setting up async requests for #{@repository.name}, starting fetch..."
76
- has_fetch_failures = begin
77
- @hydra.run
78
- # Magnus 20160305 - since we now fail fast by raising a RuntimeError on response.code != 200
79
- # I think we can remove the following request validation
80
- @async_requests.any? do |request|
81
- @logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
82
- request.response.nil? || request.response.code != 200
83
- end
84
- rescue RuntimeError, SystemCallError, StandardError => e
85
- msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
86
- @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
87
- true
88
- end
89
-
90
- if has_fetch_failures
91
- @logger.error "FAILED to fetch files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
92
- return last_checkpoint
65
+ pfailure = failure || @async_requests.any? do |request|
66
+ @logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
67
+ request.response.nil? || request.response.code != 200
93
68
  end
94
69
 
95
- @logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
96
- new_or_last_checkpoint = begin
97
- run_callbacks_delete(callbacks_on_delete)
98
- run_callbacks(callbacks_on_update)
99
- run_callbacks_no_change(callbacks_on_no_change)
100
-
101
- new_checkpoint || last_checkpoint
102
- rescue RuntimeError, SystemCallError, StandardError => e
103
- msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
104
- @logger.error "FAILED to process callbacks for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
70
+ if pfailure == false
71
+ @logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
72
+ begin
73
+ run_callbacks_delete(callbacks_on_delete)
74
+ run_callbacks(callbacks_on_update)
75
+ run_callbacks_no_change(callbacks_on_no_change)
76
+
77
+ new_checkpoint || last_checkpoint
78
+ rescue RuntimeError, SystemCallError, StandardError => e
79
+ msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
80
+ @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
81
+ last_checkpoint
82
+ end
83
+ else
84
+ @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
105
85
  last_checkpoint
106
86
  end
107
-
108
- new_or_last_checkpoint
109
87
  end
110
-
88
+
111
89
  def update_files!(last_checkpoint)
112
90
  with_updated_files(last_checkpoint) {}
113
91
  end
@@ -159,7 +137,7 @@ module Filbunke
159
137
  end
160
138
  end
161
139
 
162
- def fetch_remote_last_checkpoint
140
+ def last_checkpoint
163
141
  last_checkpoint_http = Net::HTTP.new(@repository.host, @repository.port)
164
142
  last_checkpoint_http.start do |http|
165
143
  last_checkpoint_path = "/#{UPDATES_ACTION}/#{@repository.name}/#{LAST_CHECKPOINT_ACTION}"
@@ -171,7 +149,7 @@ module Filbunke
171
149
  return response.body.chomp.to_i
172
150
  end
173
151
  end
174
-
152
+
175
153
  private
176
154
 
177
155
  def log_failed_request(failed_request_command, e)
@@ -183,11 +161,11 @@ module Filbunke
183
161
  end
184
162
 
185
163
  def update_file!(file, local_file_path)
186
-
164
+
187
165
  if file.url =~ /^http:\/\//
188
166
  update_http_file!(file, local_file_path)
189
167
  elsif (file.url =~ /^hdfs:\/\//)
190
- update_hdfs_file!(file, local_file_path)
168
+ success = update_hdfs_file!(file, local_file_path)
191
169
  else
192
170
  raise "Unsupported protocol for file: #{file.inspect}"
193
171
  end
@@ -195,19 +173,25 @@ module Filbunke
195
173
 
196
174
  def run_callbacks(files)
197
175
  @callbacks.each do |callback|
198
- callback.on_update_batch(files)
176
+ Parallel.map(files, @parallel_callback_opts) do |item|
177
+ callback.on_update(item.file, item.local_file_path)
178
+ end
199
179
  end
200
180
  end
201
181
 
202
182
  def run_callbacks_no_change(files)
203
183
  @callbacks.each do |callback|
204
- callback.on_no_change_batch(files)
184
+ Parallel.map(files, @parallel_callback_opts) do |item|
185
+ callback.on_no_change(item.file, item.local_file_path)
186
+ end
205
187
  end
206
188
  end
207
189
 
208
190
  def run_callbacks_delete(files)
209
191
  @callbacks.each do |callback|
210
- callback.on_delete_batch(files)
192
+ Parallel.map(files, @parallel_callback_opts) do |item|
193
+ callback.on_delete(item.file, item.local_file_path)
194
+ end
211
195
  end
212
196
  end
213
197
 
@@ -226,7 +210,6 @@ module Filbunke
226
210
  updates_http.read_timeout = 300 # default is 60 seconds
227
211
  updates_http.start do |http|
228
212
  updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
229
- updates_path = "#{updates_path}&batch_size=#{@repository.batch_size}" if @repository.batch_size > 0
230
213
  begin
231
214
  @logger.info "Fetching updated file list from #{updates_path}"
232
215
  request = Net::HTTP::Get.new(updates_path)
@@ -245,67 +228,44 @@ module Filbunke
245
228
  rescue StandardError => e
246
229
  @logger.error "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
247
230
  return {}
248
- end
231
+ end
249
232
  end
250
233
 
251
234
  def update_http_file!(file, local_file_path)
252
- downloaded_file = nil
253
- tmp_filename = "#{local_file_path}.tmp"
254
235
  begin
255
236
  async_request = if @repository.user
256
- Typhoeus::Request.new(
257
- URI.escape(file.url),
258
- :followlocation => true,
259
- :username => @repository.user,
260
- :password => @repository.pass
261
- )
237
+ Typhoeus::Request.new(URI.encode(file.url), :followlocation => true, :username => @repository.user, :password => @repository.pass)
262
238
  else
263
- Typhoeus::Request.new(
264
- URI.escape(file.url),
265
- :followlocation => true
266
- )
267
- end
268
-
269
-
270
- async_request.on_headers do |response|
271
- if response.code != 200
272
- raise "Failed to fetch response(#{response.code}) for url '#{response.effective_url}' ---\n\t #{response.inspect}"
273
- end
274
- @logger.debug("Updating: #{local_file_path}")
275
- ::FileUtils.mkdir_p(::File.dirname(local_file_path))
276
- downloaded_file = ::File.new(tmp_filename, "wb")
277
- end
278
-
279
- async_request.on_body do |chunk, response|
280
- downloaded_file.write(chunk) if response.code == 200
239
+ Typhoeus::Request.new(URI.encode(file.url), :followlocation => true)
281
240
  end
282
-
283
241
  async_request.on_complete do |response|
284
- unless downloaded_file.nil?
285
- downloaded_file.close
286
- end
287
- if ::File.exist?(tmp_filename)
288
- if response.code == 200
289
- ::FileUtils.mv(tmp_filename, local_file_path)
242
+ success = false
243
+ begin
244
+ success = response.code.to_i == 200
245
+ if success
246
+ write_file!(local_file_path, response.body)
290
247
  else
291
- ::FileUtils.rm(tmp_filename)
248
+ body_if_error = response.code >= 500 ? ", body = #{response.body}" : ""
249
+ @logger.warn "Failed to update file #{file.url}, got status code = #{response.code}#{body_if_error}"
292
250
  end
251
+ rescue SystemCallError, StandardError => e
252
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
253
+ @logger.error "Failed to update file #{file.url}: #{msg}"
293
254
  end
294
- true
255
+ # return the async_request.handled_response value here
256
+ success
295
257
  end
296
258
  @hydra.queue async_request
297
259
  @async_requests << async_request
298
- true
299
- rescue RuntimeError, SystemCallError, StandardError => e
260
+ rescue StandardError => e
300
261
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
301
262
  @logger.error "Failed to update file #{file.url}: #{msg}"
302
- if ::File.exist?(tmp_filename)
303
- ::FileUtils.rm(tmp_filename)
304
- end
305
- false
263
+ return false
306
264
  end
307
- end
308
265
 
266
+ return true
267
+ end
268
+
309
269
  def update_hdfs_file!(file, local_file_path)
310
270
  begin
311
271
  ::FileUtils.mkdir_p(::File.dirname(local_file_path))
@@ -314,10 +274,10 @@ module Filbunke
314
274
  url.gsub!(/hdfs:\/\/([^\/]*)(.*)/, "hdfs://\\2")
315
275
  hdfs_cmd = "#{@repository.hadoop_binary} dfs -copyToLocal #{url} #{local_file_path}.tmp"
316
276
  #@logger.debug "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
317
-
277
+
318
278
  pid, stdin, stdout, stderr = Open4::popen4 hdfs_cmd
319
279
  ignored, status = Process::waitpid2 pid
320
-
280
+
321
281
  if status.exitstatus == 0 then
322
282
  begin
323
283
  ::FileUtils.mv "#{local_file_path}.tmp", local_file_path
@@ -326,7 +286,7 @@ module Filbunke
326
286
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
327
287
  @logger.error "Failed to move hdfs file #{file.url}: #{msg}"
328
288
  return false
329
- end
289
+ end
330
290
  else
331
291
  @logger.error "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
332
292
  return false
@@ -338,6 +298,23 @@ module Filbunke
338
298
  end
339
299
  end
340
300
 
301
+ def write_file!(file_path, contents)
302
+ ::FileUtils.mkdir_p(::File.dirname(file_path))
303
+ @logger.debug("Updating: #{file_path}")
304
+ begin
305
+ ::File.open("#{file_path}.tmp", 'w') do |file|
306
+ file.write(contents)
307
+ file.close
308
+ end
309
+ ::FileUtils.mv "#{file_path}.tmp", file_path
310
+ return true
311
+ rescue StandardError => e
312
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
313
+ @logger.error "Failed to move file #{file_path}: #{msg}"
314
+ return false
315
+ end
316
+ end
317
+
341
318
  def delete_file!(file_path)
342
319
  if ::File.exists?(file_path) then
343
320
  @logger.debug("Deleting: #{file_path}")
@@ -15,7 +15,6 @@ module Filbunke
15
15
  @logger.log("Initializing repository: #{repository_name}")
16
16
  @clients << begin
17
17
  repository_config["run_every"] = repository_config.fetch("run_every", @config.fetch("run_every", 10))
18
- repository_config["batch_size"] = repository_config.fetch("batch_size", @config.fetch("batch_size", 0))
19
18
  repository = Repository.new(repository_config)
20
19
  callbacks = []
21
20
  repository_config["callbacks"].each do |callback_name, callback_config|
@@ -26,10 +25,6 @@ module Filbunke
26
25
  failed_request_log_file_name = repository_config["failed_request_log_file_name"]||nil
27
26
 
28
27
  Client.new(repository, @logger, callbacks, failed_request_log_file_name)
29
- rescue => e
30
- msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
31
- @logger.error("Failed to initialize #{repository_name}; #{msg}")
32
- raise e
33
28
  end
34
29
  end
35
30
  end
@@ -48,13 +43,14 @@ module Filbunke
48
43
  update_checkpoint_for_repository(client.repository, new_checkpoint)
49
44
  sleep client.repository.run_every
50
45
  end
51
- rescue => e
46
+ rescue RuntimeError, SystemCallError, StandardError => e
52
47
  msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
53
48
  @logger.error("#{client.repository.name} Died.. #{msg}")
54
49
  Process.kill("KILL", @parent_pid)
55
50
  exit 1
56
51
  end
57
52
  end
53
+
58
54
  end
59
55
  client_pids.each { |pid| Process.wait(pid) }
60
56
  end
@@ -83,18 +79,6 @@ module Filbunke
83
79
  end
84
80
 
85
81
  def write_pid!(pid_file_path)
86
-
87
- begin
88
- existing_process = ::File.read(pid_file_path).to_i if ::File.readable?(pid_file_path)
89
- if existing_process != nil and existing_process > 0
90
- @logger.info("killing existing process #{existing_process} from #{pid_file_path}")
91
- Process.kill("KILL", existing_process)
92
-
93
- end
94
- rescue => e
95
- @logger.warn("failed to kill existing pid from #{pid_file_path}: #{e}\n\twill ignore and continue...")
96
- end
97
-
98
82
  ::File.open(pid_file_path, 'w') do |f|
99
83
  f.write(Process.pid.to_i)
100
84
  f.close
@@ -4,41 +4,41 @@ module Filbunke
4
4
 
5
5
  def initialize(log_file_name, local, level)
6
6
  @local = local
7
- @log = if @local or log_file_name.nil?
7
+ @logger = if @local or log_file_name.nil?
8
8
  Logger.new(STDOUT)
9
9
  else
10
10
  Logger.new(log_file_name)
11
11
  end
12
12
 
13
- @log.level = parse_level(level)
13
+ @logger.level = parse_level(level)
14
14
  end
15
15
 
16
16
  def puts(msg)
17
- info(msg)
17
+ @logger.info(msg)
18
18
  end
19
-
19
+
20
20
  def log(msg)
21
- @log.info msg
21
+ @logger.info msg
22
22
  end
23
23
 
24
24
  def info(msg)
25
- @log.info msg
25
+ @logger.info msg
26
26
  end
27
27
 
28
28
  def error(msg)
29
- @log.error msg
29
+ @logger.error msg
30
30
  end
31
31
 
32
32
  def warn(msg)
33
- @log.warn msg
33
+ @logger.warn msg
34
34
  end
35
35
 
36
36
  def debug(msg)
37
- @log.debug msg
37
+ @logger.debug msg
38
38
  end
39
39
 
40
40
  def fatal(msg)
41
- @log.error msg
41
+ @logger.error msg
42
42
  end
43
43
 
44
44
  def parse_level(constantOrString)
@@ -1,17 +1,19 @@
1
1
  module Filbunke
2
2
  class Repository
3
- attr_accessor :name,
4
- :host,
5
- :port,
6
- :local_path,
7
- :file_umask,
8
- :directory_umask,
9
- :user,
10
- :pass,
11
- :hadoop_binary,
3
+
4
+ attr_accessor :name,
5
+ :host,
6
+ :port,
7
+ :local_path,
8
+ :file_umask,
9
+ :directory_umask,
10
+ :user,
11
+ :pass,
12
+ :hadoop_binary,
12
13
  :run_every,
13
14
  :hydra_concurrency,
14
- :batch_size
15
+ :num_callback_processes,
16
+ :num_callback_threads
15
17
 
16
18
  def initialize(repository_config)
17
19
  @name = repository_config["filbunke_server_repository"]
@@ -25,8 +27,19 @@ module Filbunke
25
27
  @hadoop_binary = repository_config["hadoop_binary"]
26
28
  @run_every = repository_config.fetch("run_every", 10).to_i
27
29
  @hydra_concurrency = repository_config.fetch("hydra_concurrency", 100).to_i
28
- # batch_size == 0 means use default configured in filbunke-server
29
- @batch_size = repository_config.fetch("batch_size", 0).to_i
30
- end
31
- end
32
- end
30
+ @num_callback_processes = repository_config["num_callback_processes"].to_i
31
+ @num_callback_threads = repository_config["num_callback_threads"].to_i
32
+
33
+ if @num_callback_threads == 0 and @num_callback_processes == 0
34
+ @num_callback_threads = Parallel.processor_count
35
+ end
36
+
37
+ raise ConfigurationError.new("callbacks cant use both processes and threads (#{@name})") if @num_callback_threads > 0 && @num_callback_processes > 0
38
+
39
+ end
40
+
41
+ end
42
+
43
+ class ConfigurationError < StandardError
44
+ end
45
+ end
data/lib/filbunke.rb CHANGED
@@ -7,8 +7,8 @@ require 'typhoeus'
7
7
  require 'open4'
8
8
  require 'uri'
9
9
  require 'logger'
10
+ require 'parallel'
10
11
 
11
- require File.expand_path(File.dirname(__FILE__) + '/filbunke/thread_pool.rb')
12
12
  require File.expand_path(File.dirname(__FILE__) + '/filbunke/client.rb')
13
13
  require File.expand_path(File.dirname(__FILE__) + '/filbunke/file.rb')
14
14
  require File.expand_path(File.dirname(__FILE__) + '/filbunke/repository.rb')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filbunke
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.13.5
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wouter de Bie
@@ -11,20 +11,20 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2016-03-11 00:00:00.000000000 Z
14
+ date: 2015-09-19 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: thoughtbot-shoulda
18
18
  requirement: !ruby/object:Gem::Requirement
19
19
  requirements:
20
- - - ! '>='
20
+ - - ">="
21
21
  - !ruby/object:Gem::Version
22
22
  version: '0'
23
23
  type: :development
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - ! '>='
27
+ - - ">="
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
30
  - !ruby/object:Gem::Dependency
@@ -33,56 +33,70 @@ dependencies:
33
33
  requirements:
34
34
  - - '='
35
35
  - !ruby/object:Gem::Version
36
- version: 1.7.5
36
+ version: 1.8.3
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
40
40
  requirements:
41
41
  - - '='
42
42
  - !ruby/object:Gem::Version
43
- version: 1.7.5
43
+ version: 1.8.3
44
44
  - !ruby/object:Gem::Dependency
45
45
  name: typhoeus
46
46
  requirement: !ruby/object:Gem::Requirement
47
47
  requirements:
48
48
  - - '='
49
49
  - !ruby/object:Gem::Version
50
- version: 1.0.1
50
+ version: 0.7.3
51
51
  type: :runtime
52
52
  prerelease: false
53
53
  version_requirements: !ruby/object:Gem::Requirement
54
54
  requirements:
55
55
  - - '='
56
56
  - !ruby/object:Gem::Version
57
- version: 1.0.1
57
+ version: 0.7.3
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: open4
60
60
  requirement: !ruby/object:Gem::Requirement
61
61
  requirements:
62
62
  - - '='
63
63
  - !ruby/object:Gem::Version
64
- version: 1.3.0
64
+ version: 1.3.4
65
65
  type: :runtime
66
66
  prerelease: false
67
67
  version_requirements: !ruby/object:Gem::Requirement
68
68
  requirements:
69
69
  - - '='
70
70
  - !ruby/object:Gem::Version
71
- version: 1.3.0
71
+ version: 1.3.4
72
72
  - !ruby/object:Gem::Dependency
73
73
  name: mime-types
74
74
  requirement: !ruby/object:Gem::Requirement
75
75
  requirements:
76
76
  - - '='
77
77
  - !ruby/object:Gem::Version
78
- version: '1.19'
78
+ version: 2.6.2
79
79
  type: :runtime
80
80
  prerelease: false
81
81
  version_requirements: !ruby/object:Gem::Requirement
82
82
  requirements:
83
83
  - - '='
84
84
  - !ruby/object:Gem::Version
85
- version: '1.19'
85
+ version: 2.6.2
86
+ - !ruby/object:Gem::Dependency
87
+ name: parallel
88
+ requirement: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - '='
91
+ - !ruby/object:Gem::Version
92
+ version: 1.6.1
93
+ type: :runtime
94
+ prerelease: false
95
+ version_requirements: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - '='
98
+ - !ruby/object:Gem::Version
99
+ version: 1.6.1
86
100
  description: Filbunke client and library
87
101
  email: technical@deltaprojects.com
88
102
  executables:
@@ -90,9 +104,9 @@ executables:
90
104
  extensions: []
91
105
  extra_rdoc_files: []
92
106
  files:
93
- - .gitignore
94
- - .ruby-gemset
95
- - .ruby-version
107
+ - ".gitignore"
108
+ - ".ruby-gemset"
109
+ - ".ruby-version"
96
110
  - Gemfile
97
111
  - LICENSE
98
112
  - README.rdoc
@@ -112,7 +126,6 @@ files:
112
126
  - lib/filbunke/file.rb
113
127
  - lib/filbunke/logger.rb
114
128
  - lib/filbunke/repository.rb
115
- - lib/filbunke/thread_pool.rb
116
129
  - test/helper.rb
117
130
  - test/test_filbunke.rb
118
131
  homepage: https://rubygems.org/gems/filbunke
@@ -124,17 +137,17 @@ require_paths:
124
137
  - lib
125
138
  required_ruby_version: !ruby/object:Gem::Requirement
126
139
  requirements:
127
- - - ! '>='
140
+ - - ">="
128
141
  - !ruby/object:Gem::Version
129
142
  version: '0'
130
143
  required_rubygems_version: !ruby/object:Gem::Requirement
131
144
  requirements:
132
- - - ! '>='
145
+ - - ">="
133
146
  - !ruby/object:Gem::Version
134
147
  version: '0'
135
148
  requirements: []
136
149
  rubyforge_project:
137
- rubygems_version: 2.4.8
150
+ rubygems_version: 2.4.5.1
138
151
  signing_key:
139
152
  specification_version: 4
140
153
  summary: Filbunke client
@@ -1,104 +0,0 @@
1
- # Inspired by https://github.com/meh/ruby-threadpool
2
- require 'thread'
3
-
4
- class ThreadPool
5
-
6
- class Job < Struct.new(:args, :block); end
7
-
8
- def initialize(min, max = nil)
9
-
10
- trap("INT") { shutdown }
11
-
12
- @min = min
13
- @max = max || min
14
-
15
- @cv = ConditionVariable.new
16
- @mutex = Mutex.new
17
-
18
- @queue = []
19
- @workers = []
20
-
21
- @spawned = 0
22
- @waiting = 0
23
- @shutdown = false
24
- @queue_locked = false
25
-
26
- @mutex.synchronize do
27
- min.times { spawn_thread }
28
- end
29
- end
30
-
31
- def execute(*args, &block)
32
- @mutex.synchronize do
33
- raise "Thread pool is about to shutdown" if @shutdown || @queue_locked
34
-
35
- @queue << Job.new(args, block)
36
-
37
- spawn_thread if @waiting == 0 && @spawned < @max
38
-
39
- @cv.signal
40
- end
41
- end
42
- alias :<< :execute
43
-
44
- def shutdown
45
- @mutex.synchronize do
46
- @shutdown = true
47
- @cv.broadcast
48
- end
49
-
50
- @workers.first.join until @workers.empty?
51
- end
52
-
53
- def join
54
- @mutex.synchronize do
55
- @queue_locked = true
56
- @cv.broadcast
57
- sleep 0.01 until @queue.empty?
58
- end
59
- shutdown
60
- end
61
-
62
- protected
63
-
64
- def spawn_thread
65
- thread = Thread.new do
66
- continue = true
67
-
68
- while continue do
69
- job = nil
70
-
71
- @mutex.synchronize do
72
- while @queue.empty? && continue
73
- if @shutdown || @queue_locked
74
- continue = false
75
- break
76
- end
77
-
78
- @waiting += 1
79
- @cv.wait @mutex
80
- @waiting -= 1
81
-
82
- if @shutdown || @queue_locked
83
- continue = false
84
- break
85
- end
86
- end
87
-
88
- if continue
89
- job = @queue.shift
90
- job.block.call(*job.args) if job
91
- end
92
- end
93
- end
94
-
95
- @mutex.synchronize do
96
- @spawned -= 1
97
- @workers.delete thread
98
- end
99
- end
100
-
101
- @workers << thread
102
- thread
103
- end
104
- end