filbunke 1.10.3 → 1.11.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c037040d9ad780d44b920b3a6dbe1df82463b250
4
- data.tar.gz: 08d359f508446e24881402b5be25de5ea9897ce3
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ M2NkMTBmMzNhMTkwZjg1OWJjN2NkMzQyZWNjY2U2ZmVhMTU5YTY0Nw==
5
+ data.tar.gz: !binary |-
6
+ NzZkODc3ZTAxYWQxOTY4YjczOTdlZTM4ZTAzOTljY2YwZmQ0NWYzZA==
5
7
  SHA512:
6
- metadata.gz: c1f3a1556b9a09dd8f334794636646abaf0814d3308ce1be1f47fe248ce94f99b3989bd5ab5226ff52f9dc9f35fa68d31250bb11f9b8fc57398f71730341effb
7
- data.tar.gz: e58c070c818db39a1273699f13184ae208f292bde5f4c603e3f715aa1f1bbe19e69833e7e3dd952bf7acd1cac1988274c907b003e818faded4df04dc4e726ed0
8
+ metadata.gz: !binary |-
9
+ YWE5YTVhMzE0MTEzODU3OTZkMjdlZDgyODVjZTJhYmViNTY2OWUyYTJjM2E2
10
+ MTdmM2Y4YjY4OTM4MjNjZDE0YTVkMGVkOWI5ODM2NTcwYzczZDBlMzBmOWY0
11
+ Njg4NzRiODAxYWNmNzM2OTU0MmZiZmYwMDM4MGY0ZDExMzc4OWE=
12
+ data.tar.gz: !binary |-
13
+ YjA4NjZjZTcxMzVjZjkwY2MzNmI4MzQ0MDdlOWVkYmFkMmU3NjdmOGM1NjFm
14
+ NDA2MzlhMThhMDgxZjQwYWEwNDI2ZDM5MjZhMjg3MDFkMjFkODBhMWQ3NjY0
15
+ MWViMTgwN2NkYmQ1NDg1MDNjN2Q0NDdiZmJkOTc4YmEzZmU1ZDk=
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ Gemfile.lock
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ filbunkeclient
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gem 'jeweler'
data/Rakefile CHANGED
@@ -7,9 +7,9 @@ begin
7
7
  gem.name = "filbunke"
8
8
  gem.summary = %Q{Filbunke client}
9
9
  gem.description = %Q{Filbunke client and library}
10
- gem.email = "info@deltaprojects.com"
11
- gem.homepage = "http://github.com/deltaprojects/filbunke"
12
- gem.authors = ["Bjorn Sperber", "Karl Ravn"]
10
+ gem.email = "technical@deltaprojects.com"
11
+ gem.homepage = "https://rubygems.org/gems/filbunke"
12
+ gem.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
13
13
  gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
14
  gem.files.exclude 'pkg'
15
15
  gem.executables = ['filbunked']
@@ -20,8 +20,9 @@ begin
20
20
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
21
21
  end
22
22
  Jeweler::GemcutterTasks.new
23
- rescue LoadError
23
+ rescue LoadError => e
24
24
  puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
25
+ raise e
25
26
  end
26
27
 
27
28
  require 'rake/testtask'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.10.3
1
+ 1.11.6
@@ -2,6 +2,7 @@
2
2
  pid_file: '/var/run/filbunked.pid'
3
3
  user: 'wouter'
4
4
  log_file: '/tmp/filbunke.log'
5
+ log_level: 'debug'
5
6
  checkpoint_path: '/var/tmp/filbunke_checkpoints/'
6
7
  run_every: 5
7
8
  callback_path: '/Users/wouter/prjs/filbunke-cdn/client/doc/examples'
@@ -16,6 +17,7 @@ repositories:
16
17
  file_umask: 0644
17
18
  directory_umask: 0755
18
19
  hadoop_binary: '/usr/bin/hadoop'
20
+ hydra_concurrency: 10
19
21
  callbacks:
20
22
  http_evict_cache:
21
23
  host: 'localhost'
data/filbunke.gemspec CHANGED
@@ -2,18 +2,24 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
+ # stub: filbunke 1.11.6 ruby lib
5
6
 
6
7
  Gem::Specification.new do |s|
7
8
  s.name = "filbunke"
8
- s.version = "1.10.3"
9
+ s.version = "1.11.6"
9
10
 
10
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Bjorn Sperber", "Karl Ravn"]
12
- s.date = "2014-11-12"
12
+ s.require_paths = ["lib"]
13
+ s.authors = ["Wouter de Bie", "Bjorn Sperber", "Karl Ravn", "Magnus Spangdal"]
14
+ s.date = "2015-09-22"
13
15
  s.description = "Filbunke client and library"
14
- s.email = "info@deltaprojects.com"
16
+ s.email = "technical@deltaprojects.com"
15
17
  s.executables = ["filbunked"]
16
18
  s.files = [
19
+ ".gitignore",
20
+ ".ruby-gemset",
21
+ ".ruby-version",
22
+ "Gemfile",
17
23
  "LICENSE",
18
24
  "README.rdoc",
19
25
  "Rakefile",
@@ -32,12 +38,12 @@ Gem::Specification.new do |s|
32
38
  "lib/filbunke/file.rb",
33
39
  "lib/filbunke/logger.rb",
34
40
  "lib/filbunke/repository.rb",
41
+ "lib/filbunke/thread_pool.rb",
35
42
  "test/helper.rb",
36
43
  "test/test_filbunke.rb"
37
44
  ]
38
- s.homepage = "http://github.com/deltaprojects/filbunke"
39
- s.require_paths = ["lib"]
40
- s.rubygems_version = "2.0.14"
45
+ s.homepage = "https://rubygems.org/gems/filbunke"
46
+ s.rubygems_version = "2.4.8"
41
47
  s.summary = "Filbunke client"
42
48
 
43
49
  if s.respond_to? :specification_version then
data/lib/filbunke.rb CHANGED
@@ -6,7 +6,9 @@ require 'digest/md5'
6
6
  require 'typhoeus'
7
7
  require 'open4'
8
8
  require 'uri'
9
+ require 'logger'
9
10
 
11
+ require File.expand_path(File.dirname(__FILE__) + '/filbunke/thread_pool.rb')
10
12
  require File.expand_path(File.dirname(__FILE__) + '/filbunke/client.rb')
11
13
  require File.expand_path(File.dirname(__FILE__) + '/filbunke/file.rb')
12
14
  require File.expand_path(File.dirname(__FILE__) + '/filbunke/repository.rb')
@@ -6,12 +6,27 @@ module Filbunke
6
6
  @logger = logger
7
7
  end
8
8
 
9
+ def on_update_batch(files)
10
+ files.each do |item|
11
+ on_update(item.file, item.local_file_path)
12
+ end
13
+ end
9
14
  def on_update(file, local_file_path)
10
15
  end
11
16
 
17
+ def on_no_change_batch(files)
18
+ files.each do |item|
19
+ on_no_change(item.file, item.local_file_path)
20
+ end
21
+ end
12
22
  def on_no_change(file, local_file_path)
13
23
  end
14
-
24
+
25
+ def on_delete_batch(files)
26
+ files.each do |item|
27
+ on_delete(item.file, item.local_file_path)
28
+ end
29
+ end
15
30
  def on_delete(file, local_file_path)
16
31
  end
17
32
 
@@ -17,6 +17,9 @@ module Filbunke
17
17
  @logger = logger
18
18
  @callbacks = callbacks
19
19
  @failed_request_log_file_name = failed_request_log_file_name
20
+ @hydra = Typhoeus::Hydra.new(:max_concurrency => @repository.hydra_concurrency)
21
+
22
+ @logger.info "initialized client for repository '#{@repository.name}'; #{@repository.inspect}"
20
23
  end
21
24
 
22
25
  def with_updated_files(last_checkpoint)
@@ -26,41 +29,62 @@ module Filbunke
26
29
 
27
30
  new_checkpoint = updates["checkpoint"]
28
31
 
29
- @logger.log "Updating repository: #{repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
32
+ @logger.info "Updating repository: #{@repository.name}: #{updated_files.size} files. Checkpoint: #{last_checkpoint} ==> #{new_checkpoint}" if updated_files.size > 0
30
33
 
31
- @hydra = Typhoeus::Hydra.new(:max_concurrency => 10)
32
34
  @async_requests = []
35
+
36
+ callbacks_on_update = []
37
+ callbacks_on_no_change = []
38
+ callbacks_on_delete = []
33
39
 
34
40
  updated_files.each do |raw_file|
35
41
  file = File.new(raw_file)
36
-
37
- local_file_path = ::File.join(repository.local_path, file.path)
42
+ local_file_path = ::File.join(@repository.local_path, file.path)
38
43
  if file.state == "DELETED" then
39
44
  delete_file!(local_file_path)
40
- run_callbacks_delete(file, local_file_path)
45
+ callbacks_on_delete << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
41
46
  else
42
-
43
47
  if file_needs_update?(file, local_file_path)
44
-
45
48
  if update_file!(file, local_file_path) then
46
-
47
49
  yield file
50
+ callbacks_on_update << OpenStruct.new({ :file => file, :local_file_path => local_file_path })
48
51
  else
49
- @logger.log "Unable to get file #{file.url} ==> #{file.path}!"
52
+ @logger.error "Unable to get file #{file.url} ==> #{file.path}!"
50
53
  failure = true
51
54
  end
52
-
53
55
  else
54
- @logger.log "File exists with correct hash: #{local_file_path}"
55
- run_callbacks_no_change(file, local_file_path)
56
+ @logger.debug "File exists with correct hash: #{local_file_path}"
57
+ callbacks_on_no_change << OpenStruct.new({:file => file, :local_file_path => local_file_path})
56
58
  end
57
59
  end
58
60
  end
59
-
60
61
  @hydra.run
61
- failure = failure || @async_requests.any?{|request| request.handled_response == false }
62
62
 
63
- failure ? last_checkpoint : (new_checkpoint || last_checkpoint)
63
+ pfailure = failure || @async_requests.any? do |request|
64
+ @logger.warn "request did not handle response: #{request.inspect}" if request.response.nil? || request.response.code != 200
65
+ request.response.nil? || request.response.code != 200
66
+ end
67
+
68
+ if pfailure == false
69
+ @logger.info "Done fetching files for #{@repository.name}, processing callbacks..."
70
+ begin
71
+
72
+ run_callbacks_delete(callbacks_on_delete)
73
+
74
+ run_callbacks(callbacks_on_update)
75
+
76
+ run_callbacks_no_change(callbacks_on_no_change)
77
+
78
+ new_checkpoint || last_checkpoint
79
+ rescue RuntimeError, SystemCallError, StandardError => e
80
+ msg = ["Callbacks failed to run; #{e.class} - #{e.message}", *e.backtrace].join("\n\t")
81
+ @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}; #{msg}"
82
+ last_checkpoint
83
+ end
84
+ else
85
+ @logger.error "FAILED to update files for #{@repository.name} last_checkpoint = #{last_checkpoint}"
86
+ last_checkpoint
87
+ end
64
88
  end
65
89
 
66
90
  def update_files!(last_checkpoint)
@@ -143,27 +167,26 @@ module Filbunke
143
167
  update_http_file!(file, local_file_path)
144
168
  elsif (file.url =~ /^hdfs:\/\//)
145
169
  success = update_hdfs_file!(file, local_file_path)
146
- run_callbacks(file, local_file_path) if success
147
170
  else
148
171
  raise "Unsupported protocol for file: #{file.inspect}"
149
172
  end
150
173
  end
151
174
 
152
- def run_callbacks(file, local_file_path)
175
+ def run_callbacks(files)
153
176
  @callbacks.each do |callback|
154
- callback.on_update(file, local_file_path)
177
+ callback.on_update_batch(files)
155
178
  end
156
179
  end
157
180
 
158
- def run_callbacks_no_change(file, local_file_path)
181
+ def run_callbacks_no_change(files)
159
182
  @callbacks.each do |callback|
160
- callback.on_no_change(file, local_file_path)
183
+ callback.on_no_change_batch(files)
161
184
  end
162
185
  end
163
186
 
164
- def run_callbacks_delete(file, local_file_path)
187
+ def run_callbacks_delete(files)
165
188
  @callbacks.each do |callback|
166
- callback.on_delete(file, local_file_path)
189
+ callback.on_delete_batch(files)
167
190
  end
168
191
  end
169
192
 
@@ -177,27 +200,28 @@ module Filbunke
177
200
  end
178
201
 
179
202
  def get_updated_file_list(last_checkpoint)
180
-
181
203
  begin
182
204
  updates_http = Net::HTTP.new(@repository.host, @repository.port)
205
+ updates_http.read_timeout = 300 # default is 60 seconds
183
206
  updates_http.start do |http|
207
+ updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
184
208
  begin
185
- updates_path = "/#{UPDATES_ACTION}/#{@repository.name}?#{FROM_CHECKPOINT_KEY}=#{last_checkpoint}"
209
+ @logger.info "Fetching updated file list from #{updates_path}"
186
210
  request = Net::HTTP::Get.new(updates_path)
187
211
  response = http.request(request)
188
212
  if response.code.to_i == 200
189
213
  JSON.parse(response.body)
190
214
  else
191
- @logger.log "Failed to download updates for #{@repository.name}, error code = #{response.code}"
215
+ @logger.error "Failed to download updates for #{@repository.name}, error code = #{response.code}"
192
216
  {}
193
217
  end
194
218
  rescue StandardError => e
195
- @logger.log "Error getting file list: #{e.message}! Retrying later.."
219
+ @logger.error "Error getting file list from http://#{@repository.host}:#{@repository.port}#{updates_path}: #{e.message}! Retrying later.."
196
220
  {}
197
221
  end
198
222
  end
199
223
  rescue StandardError => e
200
- @logger.log "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
224
+ @logger.error "Unable to create HTTP connection to #{@repository.host}:#{@repository.port} (#{e.message})!"
201
225
  return {}
202
226
  end
203
227
  end
@@ -205,32 +229,35 @@ module Filbunke
205
229
  def update_http_file!(file, local_file_path)
206
230
  begin
207
231
  async_request = if @repository.user
208
- Typhoeus::Request.new(URI.encode(file.url), :username => @repository.user, :password => @repository.pass)
232
+ Typhoeus::Request.new(URI.encode(file.url), :follow_location => true, :username => @repository.user, :password => @repository.pass)
209
233
  else
210
- Typhoeus::Request.new(URI.encode(file.url))
234
+ Typhoeus::Request.new(URI.encode(file.url), :follow_location => true)
211
235
  end
212
236
  async_request.on_complete do |response|
213
- success = true
237
+ success = false
214
238
  begin
215
- if response.code.to_i == 200
239
+ success = response.code.to_i == 200
240
+ if success
216
241
  write_file!(local_file_path, response.body)
217
242
  else
218
- @logger.log "Failed to update file #{file.url}, error code = #{response.code}"
219
- success = false
243
+ body_if_error = response.code >= 500 ? ", body = #{response.body}" : ""
244
+ @logger.warn "Failed to update file #{file.url}, got status code = #{response.code}#{body_if_error}"
220
245
  end
221
- run_callbacks(file, local_file_path) if success
222
- rescue StandardError => e
223
- @logger.log "Failed to update file #{file.url}: #{e.message}"
224
- success = false
246
+ rescue SystemCallError, StandardError => e
247
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
248
+ @logger.error "Failed to update file #{file.url}: #{msg}"
225
249
  end
250
+ # return the async_request.handled_response value here
226
251
  success
227
252
  end
228
253
  @hydra.queue async_request
229
254
  @async_requests << async_request
230
255
  rescue StandardError => e
231
- @logger.log "Failed to update file #{file.url}: #{e.message}"
256
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
257
+ @logger.error "Failed to update file #{file.url}: #{msg}"
232
258
  return false
233
259
  end
260
+
234
261
  return true
235
262
  end
236
263
 
@@ -241,7 +268,7 @@ module Filbunke
241
268
  url = file.url
242
269
  url.gsub!(/hdfs:\/\/([^\/]*)(.*)/, "hdfs://\\2")
243
270
  hdfs_cmd = "#{@repository.hadoop_binary} dfs -copyToLocal #{url} #{local_file_path}.tmp"
244
- @logger.log "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
271
+ #@logger.debug "Trying to update #{local_file_path} with '#{hdfs_cmd}'"
245
272
 
246
273
  pid, stdin, stdout, stderr = Open4::popen4 hdfs_cmd
247
274
  ignored, status = Process::waitpid2 pid
@@ -251,38 +278,41 @@ module Filbunke
251
278
  ::FileUtils.mv "#{local_file_path}.tmp", local_file_path
252
279
  return true
253
280
  rescue StandardError => e
254
- @logger.log "Failed to move hdfs file #{file.url}: #{e.message}"
281
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
282
+ @logger.error "Failed to move hdfs file #{file.url}: #{msg}"
255
283
  return false
256
284
  end
257
285
  else
258
- @logger.log "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
286
+ @logger.error "Failed to update hdfs file #{file.url}! Unable to execute #{hdfs_cmd}"
259
287
  return false
260
288
  end
261
- rescue StandardError => e
262
- @logger.log "Failed to update hdfs file #{file.url}: #{e.message}"
289
+ rescue SystemCallError, StandardError => e
290
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
291
+ @logger.error "Failed to update hdfs file #{file.url}: #{msg}"
263
292
  return false
264
293
  end
265
294
  end
266
295
 
267
296
  def write_file!(file_path, contents)
268
297
  ::FileUtils.mkdir_p(::File.dirname(file_path))
269
- @logger.log("Updating: #{file_path}")
298
+ @logger.debug("Updating: #{file_path}")
270
299
  begin
271
300
  ::File.open("#{file_path}.tmp", 'w') do |file|
272
- file.write(contents);
301
+ file.write(contents)
273
302
  file.close
274
303
  end
275
304
  ::FileUtils.mv "#{file_path}.tmp", file_path
276
305
  return true
277
306
  rescue StandardError => e
278
- @logger.log "Failed to move file #{file.url}: #{e.message}"
307
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
308
+ @logger.error "Failed to move file #{file_path}: #{msg}"
279
309
  return false
280
310
  end
281
311
  end
282
312
 
283
313
  def delete_file!(file_path)
284
314
  if ::File.exists?(file_path) then
285
- @logger.log("Deleting: #{file_path}")
315
+ @logger.debug("Deleting: #{file_path}")
286
316
  ::File.delete(file_path)
287
317
  end
288
318
  end
@@ -9,27 +9,26 @@ module Filbunke
9
9
  end
10
10
 
11
11
  def setup_clients!(local = false)
12
- @logger = Logger.new(@config["log_file"], local)
12
+ @logger = FilbunkeLogger.new(@config["log_file"], local, @config["log_level"])
13
13
  @logger.log("Initializing filbunked")
14
14
  @config["repositories"].each do |repository_name, repository_config|
15
15
  @logger.log("Initializing repository: #{repository_name}")
16
16
  @clients << begin
17
- repository = Repository.new(repository_config["filbunke_server_repository"],
18
- repository_config["filbunke_server_host"],
19
- repository_config["filbunke_server_port"],
20
- repository_config["local_path"],
21
- repository_config["file_umask"].to_i,
22
- repository_config["directory_umask"].to_i,
23
- repository_config["file_url_username"],
24
- repository_config["file_url_password"],
25
- repository_config["hadoop_binary"])
17
+ repository_config["run_every"] = repository_config.fetch("run_every", @config.fetch("run_every", 10))
18
+ repository = Repository.new(repository_config)
26
19
  callbacks = []
27
20
  repository_config["callbacks"].each do |callback_name, callback_config|
28
21
  require ::File.join(@config["callback_path"], callback_name.to_s)
29
22
  callback_class = Module.const_get(callback_name.split("_").map(&:capitalize).join)
30
23
  callbacks << callback_class.new(@logger, callback_config)
31
24
  end
32
- Client.new(repository, @logger, callbacks)
25
+ failed_request_log_file_name = repository_config["failed_request_log_file_name"]||nil
26
+
27
+ Client.new(repository, @logger, callbacks, failed_request_log_file_name)
28
+ rescue Exception => e
29
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
30
+ @logger.error("Failed to initialize #{repository_name}; #{msg}")
31
+ raise e
33
32
  end
34
33
  end
35
34
  end
@@ -37,31 +36,44 @@ module Filbunke
37
36
  def run!
38
37
  version = ::File.read(::File.expand_path(::File.join(::File.dirname(__FILE__), "../../VERSION"))).chomp
39
38
  @logger.log("Starting filbunked version #{version}")
40
- while true
41
- begin
42
- @clients.each do |client|
43
- new_checkpoint = client.update_files!(checkpoint_for_repository(client.repository))
44
- update_checkpoint_for_repository(client.repository, new_checkpoint)
39
+ client_pids = []
40
+ @parent_pid = Process.pid
41
+ @clients.each do |client|
42
+ client_pids << fork do
43
+ begin
44
+ while process_is_alive(@parent_pid)
45
+ new_checkpoint = client.update_files!(checkpoint_for_repository(client.repository))
46
+ @logger.info "Update finished for #{client.repository.name} new checkpoint => #{new_checkpoint}"
47
+ update_checkpoint_for_repository(client.repository, new_checkpoint)
48
+ sleep client.repository.run_every
49
+ end
50
+ rescue RuntimeError, SystemCallError, StandardError => e
51
+ msg = ["#{e.class} - #{e.message}", *e.backtrace].join("\n\t")
52
+ @logger.error("#{client.repository.name} Died.. #{msg}")
53
+ Process.kill("KILL", @parent_pid)
54
+ exit 1
45
55
  end
46
- rescue StandardError => e
47
- @logger.log("Died.. #{e.message}")
48
- @logger.log(e.backtrace.join("\n"))
49
- exit 1
50
56
  end
51
- sleep @config["run_every"]
52
57
  end
58
+ client_pids.each { |pid| Process.wait(pid) }
53
59
  end
54
60
 
61
+ def process_is_alive(pid)
62
+ !!Process.kill(0, pid) rescue false
63
+ end
64
+
55
65
  private
56
-
57
66
  def checkpoint_for_repository(repository)
58
67
  ::File.open(checkpoint_file_name_for_repository(repository), 'r') {|f| f.readline.to_i } rescue 0
59
68
  end
60
69
 
61
70
  def update_checkpoint_for_repository(repository, checkpoint)
62
71
  f = ::File.new(checkpoint_file_name_for_repository(repository), 'w', repository.file_umask)
63
- f.write(checkpoint)
64
- f.close
72
+ begin
73
+ f.write(checkpoint)
74
+ ensure
75
+ f.close
76
+ end
65
77
  end
66
78
 
67
79
  def checkpoint_file_name_for_repository(repository)
@@ -70,6 +82,13 @@ module Filbunke
70
82
  end
71
83
 
72
84
  def write_pid!(pid_file_path)
85
+
86
+ existing_process = ::File.read(pid_file_path)
87
+ unless existing_process.nil?
88
+ @logger.info("killing existing process #{existing_process} from #{pid_file_path}")
89
+ Process.kill("KILL", existing_process.to_i) rescue nil
90
+ end
91
+
73
92
  ::File.open(pid_file_path, 'w') do |f|
74
93
  f.write(Process.pid.to_i)
75
94
  f.close
@@ -1,19 +1,58 @@
1
+ require 'logger'
1
2
  module Filbunke
2
- class Logger
3
+ class FilbunkeLogger
3
4
 
4
- def initialize(log_file_name, local)
5
+ def initialize(log_file_name, local, level)
5
6
  @local = local
6
- @log_file = ::File.open(log_file_name, "a") unless local
7
+ @log = if @local or log_file_name.nil?
8
+ Logger.new(STDOUT)
9
+ else
10
+ Logger.new(log_file_name)
11
+ end
12
+
13
+ @log.level = parse_level(level)
7
14
  end
8
15
 
16
+ def puts(msg)
17
+ info(msg)
18
+ end
19
+
9
20
  def log(msg)
10
- if @local then
11
- puts "#{Time.now}: #{msg}"
12
- else
13
- @log_file.write("#{Time.now}: #{msg}\n")
14
- @log_file.flush
15
- end
21
+ @log.info msg
22
+ end
23
+
24
+ def info(msg)
25
+ @log.info msg
26
+ end
27
+
28
+ def error(msg)
29
+ @log.error msg
16
30
  end
17
31
 
32
+ def warn(msg)
33
+ @log.warn msg
34
+ end
35
+
36
+ def debug(msg)
37
+ @log.debug msg
38
+ end
39
+
40
+ def fatal(msg)
41
+ @log.error msg
42
+ end
43
+
44
+ def parse_level(constantOrString)
45
+ case constantOrString
46
+ when 'debug' then Logger::DEBUG
47
+ when 'info' then Logger::INFO
48
+ when 'warn' then Logger::WARN
49
+ when 'error' then Logger::ERROR
50
+ when 'fatal' then Logger::ERROR
51
+ when 'unknown' then Logger::UNKNOWN
52
+ when nil then Logger::INFO
53
+ else
54
+ constantOrString
55
+ end
56
+ end
18
57
  end
19
58
  end
@@ -1,19 +1,31 @@
1
1
  module Filbunke
2
2
  class Repository
3
+ attr_accessor :name,
4
+ :host,
5
+ :port,
6
+ :local_path,
7
+ :file_umask,
8
+ :directory_umask,
9
+ :user,
10
+ :pass,
11
+ :hadoop_binary,
12
+ :run_every,
13
+ :hydra_concurrency
14
+
15
+ def initialize(repository_config)
16
+ @name = repository_config["filbunke_server_repository"]
17
+ @host = repository_config["filbunke_server_host"]
18
+ @port = repository_config["filbunke_server_port"]
19
+ @local_path = repository_config["local_path"]
20
+ @file_umask = repository_config["file_umask"].to_i
21
+ @directory_umask = repository_config["directory_umask"].to_i
22
+ @user = repository_config["file_url_username"]
23
+ @pass = repository_config["file_url_password"]
24
+ @hadoop_binary = repository_config["hadoop_binary"]
25
+ @run_every = repository_config.fetch("run_every", 10).to_i
26
+ @hydra_concurrency = repository_config.fetch("hydra_concurrency", 100).to_i
27
+ end
28
+
29
+ end
3
30
 
4
- attr_accessor :name, :host, :port, :local_path, :file_umask, :directory_umask, :user, :pass, :hadoop_binary
5
-
6
- def initialize(name, host, port, local_path, file_umask, directory_umask, user = nil, pass = nil, hadoop_binary = nil)
7
- @name = name
8
- @host = host
9
- @port = port
10
- @local_path = local_path
11
- @file_umask = file_umask
12
- @directory_umask = directory_umask
13
- @user = user
14
- @pass = pass
15
- @hadoop_binary = hadoop_binary
16
- end
17
-
18
- end
19
31
  end
@@ -0,0 +1,104 @@
1
+ # Inspired by https://github.com/meh/ruby-threadpool
2
+ require 'thread'
3
+
4
+ class ThreadPool
5
+
6
+ class Job < Struct.new(:args, :block); end
7
+
8
+ def initialize(min, max = nil)
9
+
10
+ trap("INT") { shutdown }
11
+
12
+ @min = min
13
+ @max = max || min
14
+
15
+ @cv = ConditionVariable.new
16
+ @mutex = Mutex.new
17
+
18
+ @queue = []
19
+ @workers = []
20
+
21
+ @spawned = 0
22
+ @waiting = 0
23
+ @shutdown = false
24
+ @queue_locked = false
25
+
26
+ @mutex.synchronize do
27
+ min.times { spawn_thread }
28
+ end
29
+ end
30
+
31
+ def execute(*args, &block)
32
+ @mutex.synchronize do
33
+ raise "Thread pool is about to shutdown" if @shutdown || @queue_locked
34
+
35
+ @queue << Job.new(args, block)
36
+
37
+ spawn_thread if @waiting == 0 && @spawned < @max
38
+
39
+ @cv.signal
40
+ end
41
+ end
42
+ alias :<< :execute
43
+
44
+ def shutdown
45
+ @mutex.synchronize do
46
+ @shutdown = true
47
+ @cv.broadcast
48
+ end
49
+
50
+ @workers.first.join until @workers.empty?
51
+ end
52
+
53
+ def join
54
+ @mutex.synchronize do
55
+ @queue_locked = true
56
+ @cv.broadcast
57
+ sleep 0.01 until @queue.empty?
58
+ end
59
+ shutdown
60
+ end
61
+
62
+ protected
63
+
64
+ def spawn_thread
65
+ thread = Thread.new do
66
+ continue = true
67
+
68
+ while continue do
69
+ job = nil
70
+
71
+ @mutex.synchronize do
72
+ while @queue.empty? && continue
73
+ if @shutdown || @queue_locked
74
+ continue = false
75
+ break
76
+ end
77
+
78
+ @waiting += 1
79
+ @cv.wait @mutex
80
+ @waiting -= 1
81
+
82
+ if @shutdown || @queue_locked
83
+ continue = false
84
+ break
85
+ end
86
+ end
87
+
88
+ if continue
89
+ job = @queue.shift
90
+ job.block.call(*job.args) if job
91
+ end
92
+ end
93
+ end
94
+
95
+ @mutex.synchronize do
96
+ @spawned -= 1
97
+ @workers.delete thread
98
+ end
99
+ end
100
+
101
+ @workers << thread
102
+ thread
103
+ end
104
+ end
metadata CHANGED
@@ -1,28 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filbunke
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.3
4
+ version: 1.11.6
5
5
  platform: ruby
6
6
  authors:
7
+ - Wouter de Bie
7
8
  - Bjorn Sperber
8
9
  - Karl Ravn
10
+ - Magnus Spangdal
9
11
  autorequire:
10
12
  bindir: bin
11
13
  cert_chain: []
12
- date: 2014-11-12 00:00:00.000000000 Z
14
+ date: 2015-09-22 00:00:00.000000000 Z
13
15
  dependencies:
14
16
  - !ruby/object:Gem::Dependency
15
17
  name: thoughtbot-shoulda
16
18
  requirement: !ruby/object:Gem::Requirement
17
19
  requirements:
18
- - - '>='
20
+ - - ! '>='
19
21
  - !ruby/object:Gem::Version
20
22
  version: '0'
21
23
  type: :development
22
24
  prerelease: false
23
25
  version_requirements: !ruby/object:Gem::Requirement
24
26
  requirements:
25
- - - '>='
27
+ - - ! '>='
26
28
  - !ruby/object:Gem::Version
27
29
  version: '0'
28
30
  - !ruby/object:Gem::Dependency
@@ -82,12 +84,16 @@ dependencies:
82
84
  - !ruby/object:Gem::Version
83
85
  version: '1.19'
84
86
  description: Filbunke client and library
85
- email: info@deltaprojects.com
87
+ email: technical@deltaprojects.com
86
88
  executables:
87
89
  - filbunked
88
90
  extensions: []
89
91
  extra_rdoc_files: []
90
92
  files:
93
+ - .gitignore
94
+ - .ruby-gemset
95
+ - .ruby-version
96
+ - Gemfile
91
97
  - LICENSE
92
98
  - README.rdoc
93
99
  - Rakefile
@@ -106,9 +112,10 @@ files:
106
112
  - lib/filbunke/file.rb
107
113
  - lib/filbunke/logger.rb
108
114
  - lib/filbunke/repository.rb
115
+ - lib/filbunke/thread_pool.rb
109
116
  - test/helper.rb
110
117
  - test/test_filbunke.rb
111
- homepage: http://github.com/deltaprojects/filbunke
118
+ homepage: https://rubygems.org/gems/filbunke
112
119
  licenses: []
113
120
  metadata: {}
114
121
  post_install_message:
@@ -117,17 +124,17 @@ require_paths:
117
124
  - lib
118
125
  required_ruby_version: !ruby/object:Gem::Requirement
119
126
  requirements:
120
- - - '>='
127
+ - - ! '>='
121
128
  - !ruby/object:Gem::Version
122
129
  version: '0'
123
130
  required_rubygems_version: !ruby/object:Gem::Requirement
124
131
  requirements:
125
- - - '>='
132
+ - - ! '>='
126
133
  - !ruby/object:Gem::Version
127
134
  version: '0'
128
135
  requirements: []
129
136
  rubyforge_project:
130
- rubygems_version: 2.0.14
137
+ rubygems_version: 2.4.8
131
138
  signing_key:
132
139
  specification_version: 4
133
140
  summary: Filbunke client