britebox 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 933b24652e4d8e01a6cba8e94c78826441a6833b
4
- data.tar.gz: 71d0fe270ab25989b2157fbdb5f40bd78b70bdd3
3
+ metadata.gz: 9e800dec5d7cc7526497042fdd135dfeefb49b08
4
+ data.tar.gz: caef336a484073e5b13fdbe31ab1c72d92155281
5
5
  SHA512:
6
- metadata.gz: 7014db8bbe827d9030e69f9e377d2b04fcb43550a64742af704277d6c221204fbe8ff006cd8342ace8960bcd9f97fa84fd3215a1d1a44a735cd049ae3001673d
7
- data.tar.gz: 664811ca1f66bb90f5dbc3de04317deebedaebf1dad076c7c5821674d622a0a2a17db5aeaeba8ff5016775dd18d4dcfd29a2854dfdffb29b65732976608f3703
6
+ metadata.gz: d8cf57385c286d5a807c0a3e04fc5c4ed0ec73754ebe1c0072d73951461b1d3285a63b93fe0f4dc94b5cd887e5209e5fe8fc2bdc80cd95257998e146005a87b6
7
+ data.tar.gz: a887c75d7e6423e5e17fccb2b226f16a13b97c91839676cab6f11d6856aaa84312a34796247dffffa3f73e1f53c1840d2efba86340320ef457a69fadd5b5c4ac
@@ -1,4 +1,6 @@
1
1
 
2
2
  require 'britebox/version'
3
+ require 'britebox/helpers'
3
4
  require 'britebox/file_job'
4
- require 'britebox/file_job_pool'
5
+ require 'britebox/file_job_pool'
6
+ require 'britebox/export_buffer'
@@ -0,0 +1,87 @@
1
+ module Britebox
2
+ class ExportBuffer
3
+ def initialize(out_file_name, header_line, col_separator)
4
+ @file = File.new out_file_name, 'w+'
5
+ @col_separator = col_separator
6
+ write_header_row(header_line)
7
+ @last_exported_line_idx = 0
8
+
9
+ @buffer = []
10
+ @backlog = []
11
+ @semaphore = Mutex.new
12
+ end
13
+
14
+ def buffer_size
15
+ 100
16
+ end
17
+
18
+ # {n: N, line: [...]}
19
+ def push(item)
20
+ check_backlog
21
+
22
+ @semaphore.synchronize do
23
+ if item[:n] == @last_exported_line_idx
24
+ @buffer << item[:line]
25
+ @last_exported_line_idx += 1
26
+ else
27
+ @backlog << item
28
+ end
29
+ end
30
+
31
+ flush_buffer
32
+ end
33
+
34
+ def flush_backlog
35
+ @semaphore.synchronize do
36
+ @backlog.each do |item|
37
+ @buffer << item[:line]
38
+ @last_exported_line_idx += 1
39
+ end
40
+ end
41
+ flush_buffer(true)
42
+ end
43
+
44
+ def flush_buffer(force = false)
45
+ if force || @buffer.size > buffer_size
46
+ @semaphore.synchronize do
47
+ @file.write @buffer.map{ |line| CSV.generate_line(line, col_sep: @col_separator) }.join
48
+ @buffer = []
49
+ end
50
+ end
51
+ end
52
+
53
+ def close
54
+ @file.close
55
+ end
56
+
57
+ def status
58
+ {
59
+ buffer: @buffer.size,
60
+ backlog: @backlog.size,
61
+ last_exported_line_idx: @last_exported_line_idx,
62
+ }
63
+ end
64
+
65
+ private
66
+
67
+ def check_backlog
68
+ loop do
69
+ if (itm = @backlog.detect { |q| q[:n] == @last_exported_line_idx })
70
+ @semaphore.synchronize do
71
+ @buffer << itm[:line]
72
+ @backlog.delete(itm)
73
+ @last_exported_line_idx += 1
74
+ end
75
+ else
76
+ break
77
+ end
78
+ end
79
+ end
80
+
81
+ def write_header_row(row)
82
+ @file.write(CSV.generate_line(row, col_sep: @col_separator)) if row
83
+ end
84
+
85
+ alias_method :<<, :push
86
+ end
87
+ end
@@ -1,45 +1,59 @@
1
- require 'thread_storm'
2
1
  require 'brite-api'
3
2
  require 'csv'
4
3
 
5
4
  module Britebox
6
5
  class FileJob
7
6
 
8
- attr_reader :file_name, :lines_total, :error, :status, :processed_lines
7
+ attr_reader :file_name, :threads_count, :error, :status, :size_total, :size_processed
9
8
 
10
9
  EMAIL_PATTERN = /(\S+)@(\S+)/
11
10
  COL_SEPARATORS = [";", "|", "\t"]
12
11
 
13
- def initialize(file_name, brite_client, thread_pool, options = {})
12
+ def initialize(file_name, brite_client, options = {})
14
13
  @file_name = file_name
15
14
  @brite_client = brite_client
16
- @thread_pool = thread_pool
17
- @processed_lines = []
18
- @status = 'pending'
19
- end
20
15
 
21
- def lines_processed
22
- @processed_lines.compact.count
23
- end
16
+ @threads_count = options[:threads] || Britebox::FileJobPool::THREAD_NUM_DEFAULT
17
+ @queue = options[:queue]
24
18
 
25
- def verify!
26
- @status = 'verifying'
19
+ @status = 'pending'
20
+
21
+ @size_processed = 0
27
22
 
28
23
  unless File.exist?(file_name)
29
24
  report_error!("File #{file_name} not found") and return
30
25
  end
31
- file_data = File.read file_name
32
- if file_data.size == 0
26
+ if (@size_total = File.size(file_name)) == 0
33
27
  report_error!("File #{file_name} is empty") and return
34
28
  end
35
29
 
30
+ @semaphore = Mutex.new
31
+ end
32
+
33
+ def buffer_size
34
+ @threads_count * 4
35
+ end
36
+
37
+ def percent_complete
38
+ if @size_total.to_i > 0
39
+ (100.0 * @size_processed / @size_total).round(1)
40
+ else
41
+ 0.0
42
+ end
43
+ end
44
+
45
+ def verify!(file_name_to)
46
+ return if @status == 'error'
47
+
36
48
  test_lines = []
37
49
  begin
38
50
  line_n = 0
39
- CSV.parse(file_data) do |line|
51
+ CSV.foreach(file_name) do |line|
40
52
  if line && line.size > 0
41
53
  if line_n < 5
42
54
  test_lines << line
55
+ else
56
+ break
43
57
  end
44
58
 
45
59
  line_n += 1
@@ -51,56 +65,66 @@ module Britebox
51
65
 
52
66
  autoconfigure(test_lines) || return
53
67
 
54
- parsed_lines = CSV.parse(file_data, col_sep: @col_separator)
55
- file_data = nil # Free up resources
56
-
57
- @lines_total = parsed_lines.count
58
- @processed_lines = Array.new(@lines_total)
59
- @lines_total -= 1 if is_header_row?(parsed_lines.first)
68
+ # Block processing if some another FileJob processing lines
69
+ @queue.pop if @queue
60
70
 
71
+ @status = 'verifying'
61
72
 
62
- # spin up multiple processing threads
63
- jobs = []
64
- parsed_lines.each_with_index do |line, idx|
65
- next if idx == 0 && is_header_row?(line)
73
+ @in_buffer = Queue.new
74
+ @out_buffer = ExportBuffer.new(file_name_to, @header_row, @col_separator)
75
+
76
+ @threads = []
77
+ @threads_count.times do
78
+ @threads << Thread.new do
79
+ loop do
80
+ item = Timeout.timeout(1) { @in_buffer.pop } rescue nil
81
+ break if item.nil?
82
+
83
+ email = item[:line][@email_index]
84
+ if email.to_s.match(EMAIL_PATTERN)
85
+ begin
86
+ contact = @brite_client.contacts.create(email: email)
87
+ contact.verify!
88
+ contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
89
+ rescue
90
+ contact_status = ['error', false, false]
91
+ end
92
+ else
93
+ contact_status = ['invalid', false, false]
94
+ end
66
95
 
67
- jobs << @thread_pool.execute do
68
- email = line[@email_index]
69
- begin
70
- contact = @brite_client.contacts.create(email: email)
71
- contact.verify!
72
- # ['email_status', 'disposable', 'role_account']
73
- contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
74
- rescue Exception => ex
75
- contact_status = ['error', false, false]
96
+ @semaphore.synchronize do
97
+ @out_buffer << {n: item[:n], line: item[:line] + contact_status}
98
+ @size_processed += CSV.generate_line(item[:line], col_sep: @col_separator).size
99
+ end
76
100
  end
77
- # Store processed file
78
- @processed_lines[idx] = line + contact_status
79
101
  end
80
102
  end
81
103
 
82
- # Wait for all threads
83
- jobs.each{ |j| j.join }
104
+ idx = 0
105
+ CSV.foreach(file_name, col_sep: @col_separator) do |line|
106
+ if idx == 0 && is_header_row?(line)
107
+ @size_processed += CSV.generate_line(line, col_sep: @col_separator).size
108
+ next
109
+ end
110
+ next if line.nil? || line.size == 0
111
+ # Throttle file reading
112
+ sleep(0.1) while @in_buffer.size > buffer_size
84
113
 
85
- # Free up resources
86
- parsed_lines = nil
114
+ @in_buffer << {n: idx, line: line}
115
+ idx += 1
116
+ end
87
117
 
88
- @status = 'exporting'
118
+ @threads.each{ |m| m.join }
89
119
 
90
- true
91
- end
120
+ @out_buffer.flush_backlog
121
+ @out_buffer.close
92
122
 
93
- def export!(file_name)
94
- file = File.new(file_name, "w+")
123
+ @status = 'complete'
95
124
 
96
- file.write(CSV.generate_line(@header_row, col_sep: @col_separator)) if @header_row
97
- @processed_lines.each do |line|
98
- next if line.nil?
99
- file.write CSV.generate_line(line, col_sep: @col_separator)
100
- end
125
+ # Release the lock
126
+ @queue.push(:flag) if @queue
101
127
 
102
- file.close
103
- @status = 'complete'
104
128
  true
105
129
  end
106
130
 
@@ -128,11 +152,16 @@ module Britebox
128
152
 
129
153
 
130
154
  def autoconfigure(test_lines)
155
+ if test_lines.nil? || test_lines.size == 0
156
+ report_error! 'Column separator could not be determined'
157
+ return
158
+ end
159
+
131
160
  @col_separator = nil
132
161
  @header_row = nil
133
162
  @email_index = nil
134
163
 
135
- if test_lines.first.size > 1
164
+ if test_lines[0].size > 1
136
165
  @col_separator = ','
137
166
  else
138
167
  COL_SEPARATORS.each do |sep|
@@ -1,6 +1,8 @@
1
1
  module Britebox
2
2
  # Keep current status for all processing FileJobs
3
3
  class FileJobPool
4
+ include Britebox::Helpers
5
+
4
6
  attr_reader :file_jobs
5
7
 
6
8
  SPINNERS = ["|", "/", "—", "\\"]
@@ -8,10 +10,14 @@ module Britebox
8
10
 
9
11
  def initialize(num_threads = nil)
10
12
  num_threads ||= THREAD_NUM_DEFAULT
13
+ @num_threads = num_threads
14
+
15
+ # Only single FileJob can own :flag & process own lines
16
+ @queue = Queue.new
17
+ @queue << :flag
18
+
11
19
  @file_jobs = []
12
20
  @refresh_number = 0
13
- @fj_pool = ThreadStorm.new size: num_threads
14
- @lines_pool = ThreadStorm.new size: num_threads
15
21
  end
16
22
 
17
23
  def add(file_job)
@@ -23,16 +29,19 @@ module Britebox
23
29
  @file_jobs.map{ |fj| fj.file_name }.include? file_name
24
30
  end
25
31
 
26
- def process_file!(file, dir, out_dir, brite_client, fj_options)
27
- file_name = File.expand_path(file, dir)
32
+ def process_file!(file, in_dir, out_dir, brite_client, fj_options = {})
33
+ file_name = File.expand_path(file, in_dir)
28
34
 
29
35
  # Do not process same files twice
30
36
  return if self.include? file_name
31
37
 
32
- fj = FileJob.new(file_name, brite_client, @lines_pool, fj_options)
38
+ fj_options[:queue] = @queue
39
+ fj_options[:threads] = @num_threads
40
+
41
+ fj = FileJob.new(file_name, brite_client, fj_options)
33
42
  self.add(fj)
34
- @fj_pool.execute do
35
- fj.verify!
43
+ Thread.new do
44
+ fj.verify!(File.expand_path(file, out_dir))
36
45
 
37
46
  case fj.status
38
47
  when 'error'
@@ -41,10 +50,8 @@ module Britebox
41
50
  File.open(File.expand_path(err_name + '.log', out_dir), 'w+') do |f|
42
51
  f.write fj.error # TODO: add extended error log
43
52
  end
44
- when 'exporting'
45
- if fj.export! File.expand_path(file, out_dir)
46
- File.delete file_name
47
- end
53
+ when 'complete'
54
+ File.delete file_name
48
55
  else
49
56
  raise "unexpected error, status: #{fj.status}"
50
57
  end
@@ -61,20 +68,23 @@ module Britebox
61
68
  print "\r#{clear}#{up}" * @prev_buffer_height
62
69
  end
63
70
 
64
- buffer = "\nFiles processing: #{@file_jobs.count}"
71
+ buffer = "\nFiles in list: #{@file_jobs.count}"
65
72
 
66
73
  @file_jobs.each do |fj|
67
74
  fname = File.basename(fj.file_name)
68
75
  if fname.length > 20
69
76
  fname = fname[0..16] + '...'
70
77
  end
78
+
71
79
  if fj.status == 'error'
72
80
  status_str = fj.error
81
+ elsif fj.status == 'complete'
82
+ status_str = "100.0 %"
73
83
  else
74
- status_str = "#{(fj.lines_processed || '-')} / #{fj.lines_total || '-'}"
84
+ status_str = "#{fj.percent_complete} %".rjust(7)
75
85
  end
76
86
 
77
- buffer << "\n#{fname.ljust(20)} | #{fj.status.to_s.ljust(9)} | #{status_str}"
87
+ buffer << "\n#{fname.ljust(20)} | #{fj.status.to_s.ljust(9)} | #{human_size(fj.size_total).rjust(8)} | #{status_str}"
78
88
  end
79
89
 
80
90
  buffer << "\n #{SPINNERS[@refresh_number % 4]}\n"
@@ -0,0 +1,16 @@
1
+ module Britebox
2
+ module Helpers
3
+ def human_size(n)
4
+ count = 0
5
+ while n >= 1024 and count < 4
6
+ n /= 1024.0
7
+ count += 1
8
+ end
9
+ if count == 0
10
+ n.to_s + 'B'
11
+ else
12
+ format("%.2f", n) + %w(B KB MB GB TB)[count]
13
+ end
14
+ end
15
+ end
16
+ end
@@ -1,3 +1,3 @@
1
1
  module Britebox
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: britebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Shapiotko
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-07 00:00:00.000000000 Z
12
+ date: 2013-07-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: brite-api
@@ -25,20 +25,6 @@ dependencies:
25
25
  - - '>='
26
26
  - !ruby/object:Gem::Version
27
27
  version: 0.0.1
28
- - !ruby/object:Gem::Dependency
29
- name: thread_storm
30
- requirement: !ruby/object:Gem::Requirement
31
- requirements:
32
- - - ~>
33
- - !ruby/object:Gem::Version
34
- version: 0.7.1
35
- type: :runtime
36
- prerelease: false
37
- version_requirements: !ruby/object:Gem::Requirement
38
- requirements:
39
- - - ~>
40
- - !ruby/object:Gem::Version
41
- version: 0.7.1
42
28
  - !ruby/object:Gem::Dependency
43
29
  name: listen
44
30
  requirement: !ruby/object:Gem::Requirement
@@ -62,8 +48,10 @@ extensions: []
62
48
  extra_rdoc_files: []
63
49
  files:
64
50
  - lib/britebox/cli.rb
51
+ - lib/britebox/export_buffer.rb
65
52
  - lib/britebox/file_job.rb
66
53
  - lib/britebox/file_job_pool.rb
54
+ - lib/britebox/helpers.rb
67
55
  - lib/britebox/version.rb
68
56
  - lib/britebox.rb
69
57
  - README.md