britebox 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 933b24652e4d8e01a6cba8e94c78826441a6833b
4
- data.tar.gz: 71d0fe270ab25989b2157fbdb5f40bd78b70bdd3
3
+ metadata.gz: 9e800dec5d7cc7526497042fdd135dfeefb49b08
4
+ data.tar.gz: caef336a484073e5b13fdbe31ab1c72d92155281
5
5
  SHA512:
6
- metadata.gz: 7014db8bbe827d9030e69f9e377d2b04fcb43550a64742af704277d6c221204fbe8ff006cd8342ace8960bcd9f97fa84fd3215a1d1a44a735cd049ae3001673d
7
- data.tar.gz: 664811ca1f66bb90f5dbc3de04317deebedaebf1dad076c7c5821674d622a0a2a17db5aeaeba8ff5016775dd18d4dcfd29a2854dfdffb29b65732976608f3703
6
+ metadata.gz: d8cf57385c286d5a807c0a3e04fc5c4ed0ec73754ebe1c0072d73951461b1d3285a63b93fe0f4dc94b5cd887e5209e5fe8fc2bdc80cd95257998e146005a87b6
7
+ data.tar.gz: a887c75d7e6423e5e17fccb2b226f16a13b97c91839676cab6f11d6856aaa84312a34796247dffffa3f73e1f53c1840d2efba86340320ef457a69fadd5b5c4ac
@@ -1,4 +1,6 @@
1
1
 
2
2
  require 'britebox/version'
3
+ require 'britebox/helpers'
3
4
  require 'britebox/file_job'
4
- require 'britebox/file_job_pool'
5
+ require 'britebox/file_job_pool'
6
+ require 'britebox/export_buffer'
@@ -0,0 +1,87 @@
1
+ module Britebox
2
+ class ExportBuffer
3
+ def initialize(out_file_name, header_line, col_separator)
4
+ @file = File.new out_file_name, 'w+'
5
+ @col_separator = col_separator
6
+ write_header_row(header_line)
7
+ @last_exported_line_idx = 0
8
+
9
+ @buffer = []
10
+ @backlog = []
11
+ @semaphore = Mutex.new
12
+ end
13
+
14
+ def buffer_size
15
+ 100
16
+ end
17
+
18
+ # {n: N, line: [...]}
19
+ def push(item)
20
+ check_backlog
21
+
22
+ @semaphore.synchronize do
23
+ if item[:n] == @last_exported_line_idx
24
+ @buffer << item[:line]
25
+ @last_exported_line_idx += 1
26
+ else
27
+ @backlog << item
28
+ end
29
+ end
30
+
31
+ flush_buffer
32
+ end
33
+
34
+ def flush_backlog
35
+ @semaphore.synchronize do
36
+ @backlog.each do |item|
37
+ @buffer << item[:line]
38
+ @last_exported_line_idx += 1
39
+ end
40
+ end
41
+ flush_buffer(true)
42
+ end
43
+
44
+ def flush_buffer(force = false)
45
+ if force || @buffer.size > buffer_size
46
+ @semaphore.synchronize do
47
+ @file.write @buffer.map{ |line| CSV.generate_line(line, col_sep: @col_separator) }.join
48
+ @buffer = []
49
+ end
50
+ end
51
+ end
52
+
53
+ def close
54
+ @file.close
55
+ end
56
+
57
+ def status
58
+ {
59
+ buffer: @buffer.size,
60
+ backlog: @backlog.size,
61
+ last_exported_line_idx: @last_exported_line_idx,
62
+ }
63
+ end
64
+
65
+ private
66
+
67
+ def check_backlog
68
+ loop do
69
+ if (itm = @backlog.detect { |q| q[:n] == @last_exported_line_idx })
70
+ @semaphore.synchronize do
71
+ @buffer << itm[:line]
72
+ @backlog.delete(itm)
73
+ @last_exported_line_idx += 1
74
+ end
75
+ else
76
+ break
77
+ end
78
+ end
79
+ end
80
+
81
+ def write_header_row(row)
82
+ @file.write(CSV.generate_line(row, col_sep: @col_separator)) if row
83
+ end
84
+
85
+ alias_method :<<, :push
86
+ end
87
+ end
@@ -1,45 +1,59 @@
1
- require 'thread_storm'
2
1
  require 'brite-api'
3
2
  require 'csv'
4
3
 
5
4
  module Britebox
6
5
  class FileJob
7
6
 
8
- attr_reader :file_name, :lines_total, :error, :status, :processed_lines
7
+ attr_reader :file_name, :threads_count, :error, :status, :size_total, :size_processed
9
8
 
10
9
  EMAIL_PATTERN = /(\S+)@(\S+)/
11
10
  COL_SEPARATORS = [";", "|", "\t"]
12
11
 
13
- def initialize(file_name, brite_client, thread_pool, options = {})
12
+ def initialize(file_name, brite_client, options = {})
14
13
  @file_name = file_name
15
14
  @brite_client = brite_client
16
- @thread_pool = thread_pool
17
- @processed_lines = []
18
- @status = 'pending'
19
- end
20
15
 
21
- def lines_processed
22
- @processed_lines.compact.count
23
- end
16
+ @threads_count = options[:threads] || Britebox::FileJobPool::THREAD_NUM_DEFAULT
17
+ @queue = options[:queue]
24
18
 
25
- def verify!
26
- @status = 'verifying'
19
+ @status = 'pending'
20
+
21
+ @size_processed = 0
27
22
 
28
23
  unless File.exist?(file_name)
29
24
  report_error!("File #{file_name} not found") and return
30
25
  end
31
- file_data = File.read file_name
32
- if file_data.size == 0
26
+ if (@size_total = File.size(file_name)) == 0
33
27
  report_error!("File #{file_name} is empty") and return
34
28
  end
35
29
 
30
+ @semaphore = Mutex.new
31
+ end
32
+
33
+ def buffer_size
34
+ @threads_count * 4
35
+ end
36
+
37
+ def percent_complete
38
+ if @size_total.to_i > 0
39
+ (100.0 * @size_processed / @size_total).round(1)
40
+ else
41
+ 0.0
42
+ end
43
+ end
44
+
45
+ def verify!(file_name_to)
46
+ return if @status == 'error'
47
+
36
48
  test_lines = []
37
49
  begin
38
50
  line_n = 0
39
- CSV.parse(file_data) do |line|
51
+ CSV.foreach(file_name) do |line|
40
52
  if line && line.size > 0
41
53
  if line_n < 5
42
54
  test_lines << line
55
+ else
56
+ break
43
57
  end
44
58
 
45
59
  line_n += 1
@@ -51,56 +65,66 @@ module Britebox
51
65
 
52
66
  autoconfigure(test_lines) || return
53
67
 
54
- parsed_lines = CSV.parse(file_data, col_sep: @col_separator)
55
- file_data = nil # Free up resources
56
-
57
- @lines_total = parsed_lines.count
58
- @processed_lines = Array.new(@lines_total)
59
- @lines_total -= 1 if is_header_row?(parsed_lines.first)
68
+ # Block processing if some another FileJob processing lines
69
+ @queue.pop if @queue
60
70
 
71
+ @status = 'verifying'
61
72
 
62
- # spin up multiple processing threads
63
- jobs = []
64
- parsed_lines.each_with_index do |line, idx|
65
- next if idx == 0 && is_header_row?(line)
73
+ @in_buffer = Queue.new
74
+ @out_buffer = ExportBuffer.new(file_name_to, @header_row, @col_separator)
75
+
76
+ @threads = []
77
+ @threads_count.times do
78
+ @threads << Thread.new do
79
+ loop do
80
+ item = Timeout.timeout(1) { @in_buffer.pop } rescue nil
81
+ break if item.nil?
82
+
83
+ email = item[:line][@email_index]
84
+ if email.to_s.match(EMAIL_PATTERN)
85
+ begin
86
+ contact = @brite_client.contacts.create(email: email)
87
+ contact.verify!
88
+ contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
89
+ rescue
90
+ contact_status = ['error', false, false]
91
+ end
92
+ else
93
+ contact_status = ['invalid', false, false]
94
+ end
66
95
 
67
- jobs << @thread_pool.execute do
68
- email = line[@email_index]
69
- begin
70
- contact = @brite_client.contacts.create(email: email)
71
- contact.verify!
72
- # ['email_status', 'disposable', 'role_account']
73
- contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
74
- rescue Exception => ex
75
- contact_status = ['error', false, false]
96
+ @semaphore.synchronize do
97
+ @out_buffer << {n: item[:n], line: item[:line] + contact_status}
98
+ @size_processed += CSV.generate_line(item[:line], col_sep: @col_separator).size
99
+ end
76
100
  end
77
- # Store processed file
78
- @processed_lines[idx] = line + contact_status
79
101
  end
80
102
  end
81
103
 
82
- # Wait for all threads
83
- jobs.each{ |j| j.join }
104
+ idx = 0
105
+ CSV.foreach(file_name, col_sep: @col_separator) do |line|
106
+ if idx == 0 && is_header_row?(line)
107
+ @size_processed += CSV.generate_line(line, col_sep: @col_separator).size
108
+ next
109
+ end
110
+ next if line.nil? || line.size == 0
111
+ # Throttle file reading
112
+ sleep(0.1) while @in_buffer.size > buffer_size
84
113
 
85
- # Free up resources
86
- parsed_lines = nil
114
+ @in_buffer << {n: idx, line: line}
115
+ idx += 1
116
+ end
87
117
 
88
- @status = 'exporting'
118
+ @threads.each{ |m| m.join }
89
119
 
90
- true
91
- end
120
+ @out_buffer.flush_backlog
121
+ @out_buffer.close
92
122
 
93
- def export!(file_name)
94
- file = File.new(file_name, "w+")
123
+ @status = 'complete'
95
124
 
96
- file.write(CSV.generate_line(@header_row, col_sep: @col_separator)) if @header_row
97
- @processed_lines.each do |line|
98
- next if line.nil?
99
- file.write CSV.generate_line(line, col_sep: @col_separator)
100
- end
125
+ # Release the lock
126
+ @queue.push(:flag) if @queue
101
127
 
102
- file.close
103
- @status = 'complete'
104
128
  true
105
129
  end
106
130
 
@@ -128,11 +152,16 @@ module Britebox
128
152
 
129
153
 
130
154
  def autoconfigure(test_lines)
155
+ if test_lines.nil? || test_lines.size == 0
156
+ report_error! 'Column separator could not be determined'
157
+ return
158
+ end
159
+
131
160
  @col_separator = nil
132
161
  @header_row = nil
133
162
  @email_index = nil
134
163
 
135
- if test_lines.first.size > 1
164
+ if test_lines[0].size > 1
136
165
  @col_separator = ','
137
166
  else
138
167
  COL_SEPARATORS.each do |sep|
@@ -1,6 +1,8 @@
1
1
  module Britebox
2
2
  # Keep current status for all processing FileJobs
3
3
  class FileJobPool
4
+ include Britebox::Helpers
5
+
4
6
  attr_reader :file_jobs
5
7
 
6
8
  SPINNERS = ["|", "/", "—", "\\"]
@@ -8,10 +10,14 @@ module Britebox
8
10
 
9
11
  def initialize(num_threads = nil)
10
12
  num_threads ||= THREAD_NUM_DEFAULT
13
+ @num_threads = num_threads
14
+
15
+ # Only single FileJob can own :flag & process own lines
16
+ @queue = Queue.new
17
+ @queue << :flag
18
+
11
19
  @file_jobs = []
12
20
  @refresh_number = 0
13
- @fj_pool = ThreadStorm.new size: num_threads
14
- @lines_pool = ThreadStorm.new size: num_threads
15
21
  end
16
22
 
17
23
  def add(file_job)
@@ -23,16 +29,19 @@ module Britebox
23
29
  @file_jobs.map{ |fj| fj.file_name }.include? file_name
24
30
  end
25
31
 
26
- def process_file!(file, dir, out_dir, brite_client, fj_options)
27
- file_name = File.expand_path(file, dir)
32
+ def process_file!(file, in_dir, out_dir, brite_client, fj_options = {})
33
+ file_name = File.expand_path(file, in_dir)
28
34
 
29
35
  # Do not process same files twice
30
36
  return if self.include? file_name
31
37
 
32
- fj = FileJob.new(file_name, brite_client, @lines_pool, fj_options)
38
+ fj_options[:queue] = @queue
39
+ fj_options[:threads] = @num_threads
40
+
41
+ fj = FileJob.new(file_name, brite_client, fj_options)
33
42
  self.add(fj)
34
- @fj_pool.execute do
35
- fj.verify!
43
+ Thread.new do
44
+ fj.verify!(File.expand_path(file, out_dir))
36
45
 
37
46
  case fj.status
38
47
  when 'error'
@@ -41,10 +50,8 @@ module Britebox
41
50
  File.open(File.expand_path(err_name + '.log', out_dir), 'w+') do |f|
42
51
  f.write fj.error # TODO: add extended error log
43
52
  end
44
- when 'exporting'
45
- if fj.export! File.expand_path(file, out_dir)
46
- File.delete file_name
47
- end
53
+ when 'complete'
54
+ File.delete file_name
48
55
  else
49
56
  raise "unexpected error, status: #{fj.status}"
50
57
  end
@@ -61,20 +68,23 @@ module Britebox
61
68
  print "\r#{clear}#{up}" * @prev_buffer_height
62
69
  end
63
70
 
64
- buffer = "\nFiles processing: #{@file_jobs.count}"
71
+ buffer = "\nFiles in list: #{@file_jobs.count}"
65
72
 
66
73
  @file_jobs.each do |fj|
67
74
  fname = File.basename(fj.file_name)
68
75
  if fname.length > 20
69
76
  fname = fname[0..16] + '...'
70
77
  end
78
+
71
79
  if fj.status == 'error'
72
80
  status_str = fj.error
81
+ elsif fj.status == 'complete'
82
+ status_str = "100.0 %"
73
83
  else
74
- status_str = "#{(fj.lines_processed || '-')} / #{fj.lines_total || '-'}"
84
+ status_str = "#{fj.percent_complete} %".rjust(7)
75
85
  end
76
86
 
77
- buffer << "\n#{fname.ljust(20)} | #{fj.status.to_s.ljust(9)} | #{status_str}"
87
+ buffer << "\n#{fname.ljust(20)} | #{fj.status.to_s.ljust(9)} | #{human_size(fj.size_total).rjust(8)} | #{status_str}"
78
88
  end
79
89
 
80
90
  buffer << "\n #{SPINNERS[@refresh_number % 4]}\n"
@@ -0,0 +1,16 @@
1
+ module Britebox
2
+ module Helpers
3
+ def human_size(n)
4
+ count = 0
5
+ while n >= 1024 and count < 4
6
+ n /= 1024.0
7
+ count += 1
8
+ end
9
+ if count == 0
10
+ n.to_s + 'B'
11
+ else
12
+ format("%.2f", n) + %w(B KB MB GB TB)[count]
13
+ end
14
+ end
15
+ end
16
+ end
@@ -1,3 +1,3 @@
1
1
  module Britebox
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: britebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Shapiotko
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-07 00:00:00.000000000 Z
12
+ date: 2013-07-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: brite-api
@@ -25,20 +25,6 @@ dependencies:
25
25
  - - '>='
26
26
  - !ruby/object:Gem::Version
27
27
  version: 0.0.1
28
- - !ruby/object:Gem::Dependency
29
- name: thread_storm
30
- requirement: !ruby/object:Gem::Requirement
31
- requirements:
32
- - - ~>
33
- - !ruby/object:Gem::Version
34
- version: 0.7.1
35
- type: :runtime
36
- prerelease: false
37
- version_requirements: !ruby/object:Gem::Requirement
38
- requirements:
39
- - - ~>
40
- - !ruby/object:Gem::Version
41
- version: 0.7.1
42
28
  - !ruby/object:Gem::Dependency
43
29
  name: listen
44
30
  requirement: !ruby/object:Gem::Requirement
@@ -62,8 +48,10 @@ extensions: []
62
48
  extra_rdoc_files: []
63
49
  files:
64
50
  - lib/britebox/cli.rb
51
+ - lib/britebox/export_buffer.rb
65
52
  - lib/britebox/file_job.rb
66
53
  - lib/britebox/file_job_pool.rb
54
+ - lib/britebox/helpers.rb
67
55
  - lib/britebox/version.rb
68
56
  - lib/britebox.rb
69
57
  - README.md