britebox 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/britebox.rb +3 -1
- data/lib/britebox/export_buffer.rb +87 -0
- data/lib/britebox/file_job.rb +82 -53
- data/lib/britebox/file_job_pool.rb +24 -14
- data/lib/britebox/helpers.rb +16 -0
- data/lib/britebox/version.rb +1 -1
- metadata +4 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e800dec5d7cc7526497042fdd135dfeefb49b08
|
4
|
+
data.tar.gz: caef336a484073e5b13fdbe31ab1c72d92155281
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8cf57385c286d5a807c0a3e04fc5c4ed0ec73754ebe1c0072d73951461b1d3285a63b93fe0f4dc94b5cd887e5209e5fe8fc2bdc80cd95257998e146005a87b6
|
7
|
+
data.tar.gz: a887c75d7e6423e5e17fccb2b226f16a13b97c91839676cab6f11d6856aaa84312a34796247dffffa3f73e1f53c1840d2efba86340320ef457a69fadd5b5c4ac
|
data/lib/britebox.rb
CHANGED
@@ -0,0 +1,87 @@
|
|
1
|
+
module Britebox
|
2
|
+
class ExportBuffer
|
3
|
+
def initialize(out_file_name, header_line, col_separator)
|
4
|
+
@file = File.new out_file_name, 'w+'
|
5
|
+
@col_separator = col_separator
|
6
|
+
write_header_row(header_line)
|
7
|
+
@last_exported_line_idx = 0
|
8
|
+
|
9
|
+
@buffer = []
|
10
|
+
@backlog = []
|
11
|
+
@semaphore = Mutex.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def buffer_size
|
15
|
+
100
|
16
|
+
end
|
17
|
+
|
18
|
+
# {n: N, line: [...]}
|
19
|
+
def push(item)
|
20
|
+
check_backlog
|
21
|
+
|
22
|
+
@semaphore.synchronize do
|
23
|
+
if item[:n] == @last_exported_line_idx
|
24
|
+
@buffer << item[:line]
|
25
|
+
@last_exported_line_idx += 1
|
26
|
+
else
|
27
|
+
@backlog << item
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
flush_buffer
|
32
|
+
end
|
33
|
+
|
34
|
+
def flush_backlog
|
35
|
+
@semaphore.synchronize do
|
36
|
+
@backlog.each do |item|
|
37
|
+
@buffer << item[:line]
|
38
|
+
@last_exported_line_idx += 1
|
39
|
+
end
|
40
|
+
end
|
41
|
+
flush_buffer(true)
|
42
|
+
end
|
43
|
+
|
44
|
+
def flush_buffer(force = false)
|
45
|
+
if force || @buffer.size > buffer_size
|
46
|
+
@semaphore.synchronize do
|
47
|
+
@file.write @buffer.map{ |line| CSV.generate_line(line, col_sep: @col_separator) }.join
|
48
|
+
@buffer = []
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def close
|
54
|
+
@file.close
|
55
|
+
end
|
56
|
+
|
57
|
+
def status
|
58
|
+
{
|
59
|
+
buffer: @buffer.size,
|
60
|
+
backlog: @backlog.size,
|
61
|
+
last_exported_line_idx: @last_exported_line_idx,
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def check_backlog
|
68
|
+
loop do
|
69
|
+
if (itm = @backlog.detect { |q| q[:n] == @last_exported_line_idx })
|
70
|
+
@semaphore.synchronize do
|
71
|
+
@buffer << itm[:line]
|
72
|
+
@backlog.delete(itm)
|
73
|
+
@last_exported_line_idx += 1
|
74
|
+
end
|
75
|
+
else
|
76
|
+
break
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def write_header_row(row)
|
82
|
+
@file.write(CSV.generate_line(row, col_sep: @col_separator)) if row
|
83
|
+
end
|
84
|
+
|
85
|
+
alias_method :<<, :push
|
86
|
+
end
|
87
|
+
end
|
data/lib/britebox/file_job.rb
CHANGED
@@ -1,45 +1,59 @@
|
|
1
|
-
require 'thread_storm'
|
2
1
|
require 'brite-api'
|
3
2
|
require 'csv'
|
4
3
|
|
5
4
|
module Britebox
|
6
5
|
class FileJob
|
7
6
|
|
8
|
-
attr_reader :file_name, :
|
7
|
+
attr_reader :file_name, :threads_count, :error, :status, :size_total, :size_processed
|
9
8
|
|
10
9
|
EMAIL_PATTERN = /(\S+)@(\S+)/
|
11
10
|
COL_SEPARATORS = [";", "|", "\t"]
|
12
11
|
|
13
|
-
def initialize(file_name, brite_client,
|
12
|
+
def initialize(file_name, brite_client, options = {})
|
14
13
|
@file_name = file_name
|
15
14
|
@brite_client = brite_client
|
16
|
-
@thread_pool = thread_pool
|
17
|
-
@processed_lines = []
|
18
|
-
@status = 'pending'
|
19
|
-
end
|
20
15
|
|
21
|
-
|
22
|
-
@
|
23
|
-
end
|
16
|
+
@threads_count = options[:threads] || Britebox::FileJobPool::THREAD_NUM_DEFAULT
|
17
|
+
@queue = options[:queue]
|
24
18
|
|
25
|
-
|
26
|
-
|
19
|
+
@status = 'pending'
|
20
|
+
|
21
|
+
@size_processed = 0
|
27
22
|
|
28
23
|
unless File.exist?(file_name)
|
29
24
|
report_error!("File #{file_name} not found") and return
|
30
25
|
end
|
31
|
-
|
32
|
-
if file_data.size == 0
|
26
|
+
if (@size_total = File.size(file_name)) == 0
|
33
27
|
report_error!("File #{file_name} is empty") and return
|
34
28
|
end
|
35
29
|
|
30
|
+
@semaphore = Mutex.new
|
31
|
+
end
|
32
|
+
|
33
|
+
def buffer_size
|
34
|
+
@threads_count * 4
|
35
|
+
end
|
36
|
+
|
37
|
+
def percent_complete
|
38
|
+
if @size_total.to_i > 0
|
39
|
+
(100.0 * @size_processed / @size_total).round(1)
|
40
|
+
else
|
41
|
+
0.0
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def verify!(file_name_to)
|
46
|
+
return if @status == 'error'
|
47
|
+
|
36
48
|
test_lines = []
|
37
49
|
begin
|
38
50
|
line_n = 0
|
39
|
-
CSV.
|
51
|
+
CSV.foreach(file_name) do |line|
|
40
52
|
if line && line.size > 0
|
41
53
|
if line_n < 5
|
42
54
|
test_lines << line
|
55
|
+
else
|
56
|
+
break
|
43
57
|
end
|
44
58
|
|
45
59
|
line_n += 1
|
@@ -51,56 +65,66 @@ module Britebox
|
|
51
65
|
|
52
66
|
autoconfigure(test_lines) || return
|
53
67
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
@lines_total = parsed_lines.count
|
58
|
-
@processed_lines = Array.new(@lines_total)
|
59
|
-
@lines_total -= 1 if is_header_row?(parsed_lines.first)
|
68
|
+
# Block processing if some another FileJob processing lines
|
69
|
+
@queue.pop if @queue
|
60
70
|
|
71
|
+
@status = 'verifying'
|
61
72
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
73
|
+
@in_buffer = Queue.new
|
74
|
+
@out_buffer = ExportBuffer.new(file_name_to, @header_row, @col_separator)
|
75
|
+
|
76
|
+
@threads = []
|
77
|
+
@threads_count.times do
|
78
|
+
@threads << Thread.new do
|
79
|
+
loop do
|
80
|
+
item = Timeout.timeout(1) { @in_buffer.pop } rescue nil
|
81
|
+
break if item.nil?
|
82
|
+
|
83
|
+
email = item[:line][@email_index]
|
84
|
+
if email.to_s.match(EMAIL_PATTERN)
|
85
|
+
begin
|
86
|
+
contact = @brite_client.contacts.create(email: email)
|
87
|
+
contact.verify!
|
88
|
+
contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
|
89
|
+
rescue
|
90
|
+
contact_status = ['error', false, false]
|
91
|
+
end
|
92
|
+
else
|
93
|
+
contact_status = ['invalid', false, false]
|
94
|
+
end
|
66
95
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
contact.verify!
|
72
|
-
# ['email_status', 'disposable', 'role_account']
|
73
|
-
contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
|
74
|
-
rescue Exception => ex
|
75
|
-
contact_status = ['error', false, false]
|
96
|
+
@semaphore.synchronize do
|
97
|
+
@out_buffer << {n: item[:n], line: item[:line] + contact_status}
|
98
|
+
@size_processed += CSV.generate_line(item[:line], col_sep: @col_separator).size
|
99
|
+
end
|
76
100
|
end
|
77
|
-
# Store processed file
|
78
|
-
@processed_lines[idx] = line + contact_status
|
79
101
|
end
|
80
102
|
end
|
81
103
|
|
82
|
-
|
83
|
-
|
104
|
+
idx = 0
|
105
|
+
CSV.foreach(file_name, col_sep: @col_separator) do |line|
|
106
|
+
if idx == 0 && is_header_row?(line)
|
107
|
+
@size_processed += CSV.generate_line(line, col_sep: @col_separator).size
|
108
|
+
next
|
109
|
+
end
|
110
|
+
next if line.nil? || line.size == 0
|
111
|
+
# Throttle file reading
|
112
|
+
sleep(0.1) while @in_buffer.size > buffer_size
|
84
113
|
|
85
|
-
|
86
|
-
|
114
|
+
@in_buffer << {n: idx, line: line}
|
115
|
+
idx += 1
|
116
|
+
end
|
87
117
|
|
88
|
-
@
|
118
|
+
@threads.each{ |m| m.join }
|
89
119
|
|
90
|
-
|
91
|
-
|
120
|
+
@out_buffer.flush_backlog
|
121
|
+
@out_buffer.close
|
92
122
|
|
93
|
-
|
94
|
-
file = File.new(file_name, "w+")
|
123
|
+
@status = 'complete'
|
95
124
|
|
96
|
-
|
97
|
-
@
|
98
|
-
next if line.nil?
|
99
|
-
file.write CSV.generate_line(line, col_sep: @col_separator)
|
100
|
-
end
|
125
|
+
# Release the lock
|
126
|
+
@queue.push(:flag) if @queue
|
101
127
|
|
102
|
-
file.close
|
103
|
-
@status = 'complete'
|
104
128
|
true
|
105
129
|
end
|
106
130
|
|
@@ -128,11 +152,16 @@ module Britebox
|
|
128
152
|
|
129
153
|
|
130
154
|
def autoconfigure(test_lines)
|
155
|
+
if test_lines.nil? || test_lines.size == 0
|
156
|
+
report_error! 'Column separator could not be determined'
|
157
|
+
return
|
158
|
+
end
|
159
|
+
|
131
160
|
@col_separator = nil
|
132
161
|
@header_row = nil
|
133
162
|
@email_index = nil
|
134
163
|
|
135
|
-
if test_lines.
|
164
|
+
if test_lines[0].size > 1
|
136
165
|
@col_separator = ','
|
137
166
|
else
|
138
167
|
COL_SEPARATORS.each do |sep|
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module Britebox
|
2
2
|
# Keep current status for all processing FileJobs
|
3
3
|
class FileJobPool
|
4
|
+
include Britebox::Helpers
|
5
|
+
|
4
6
|
attr_reader :file_jobs
|
5
7
|
|
6
8
|
SPINNERS = ["|", "/", "—", "\\"]
|
@@ -8,10 +10,14 @@ module Britebox
|
|
8
10
|
|
9
11
|
def initialize(num_threads = nil)
|
10
12
|
num_threads ||= THREAD_NUM_DEFAULT
|
13
|
+
@num_threads = num_threads
|
14
|
+
|
15
|
+
# Only single FileJob can own :flag & process own lines
|
16
|
+
@queue = Queue.new
|
17
|
+
@queue << :flag
|
18
|
+
|
11
19
|
@file_jobs = []
|
12
20
|
@refresh_number = 0
|
13
|
-
@fj_pool = ThreadStorm.new size: num_threads
|
14
|
-
@lines_pool = ThreadStorm.new size: num_threads
|
15
21
|
end
|
16
22
|
|
17
23
|
def add(file_job)
|
@@ -23,16 +29,19 @@ module Britebox
|
|
23
29
|
@file_jobs.map{ |fj| fj.file_name }.include? file_name
|
24
30
|
end
|
25
31
|
|
26
|
-
def process_file!(file,
|
27
|
-
file_name = File.expand_path(file,
|
32
|
+
def process_file!(file, in_dir, out_dir, brite_client, fj_options = {})
|
33
|
+
file_name = File.expand_path(file, in_dir)
|
28
34
|
|
29
35
|
# Do not process same files twice
|
30
36
|
return if self.include? file_name
|
31
37
|
|
32
|
-
|
38
|
+
fj_options[:queue] = @queue
|
39
|
+
fj_options[:threads] = @num_threads
|
40
|
+
|
41
|
+
fj = FileJob.new(file_name, brite_client, fj_options)
|
33
42
|
self.add(fj)
|
34
|
-
|
35
|
-
fj.verify!
|
43
|
+
Thread.new do
|
44
|
+
fj.verify!(File.expand_path(file, out_dir))
|
36
45
|
|
37
46
|
case fj.status
|
38
47
|
when 'error'
|
@@ -41,10 +50,8 @@ module Britebox
|
|
41
50
|
File.open(File.expand_path(err_name + '.log', out_dir), 'w+') do |f|
|
42
51
|
f.write fj.error # TODO: add extended error log
|
43
52
|
end
|
44
|
-
when '
|
45
|
-
|
46
|
-
File.delete file_name
|
47
|
-
end
|
53
|
+
when 'complete'
|
54
|
+
File.delete file_name
|
48
55
|
else
|
49
56
|
raise "unexpected error, status: #{fj.status}"
|
50
57
|
end
|
@@ -61,20 +68,23 @@ module Britebox
|
|
61
68
|
print "\r#{clear}#{up}" * @prev_buffer_height
|
62
69
|
end
|
63
70
|
|
64
|
-
buffer = "\nFiles
|
71
|
+
buffer = "\nFiles in list: #{@file_jobs.count}"
|
65
72
|
|
66
73
|
@file_jobs.each do |fj|
|
67
74
|
fname = File.basename(fj.file_name)
|
68
75
|
if fname.length > 20
|
69
76
|
fname = fname[0..16] + '...'
|
70
77
|
end
|
78
|
+
|
71
79
|
if fj.status == 'error'
|
72
80
|
status_str = fj.error
|
81
|
+
elsif fj.status == 'complete'
|
82
|
+
status_str = "100.0 %"
|
73
83
|
else
|
74
|
-
status_str = "#{
|
84
|
+
status_str = "#{fj.percent_complete} %".rjust(7)
|
75
85
|
end
|
76
86
|
|
77
|
-
buffer << "\n#{fname.ljust(20)} | #{fj.status.to_s.ljust(9)} | #{status_str}"
|
87
|
+
buffer << "\n#{fname.ljust(20)} | #{fj.status.to_s.ljust(9)} | #{human_size(fj.size_total).rjust(8)} | #{status_str}"
|
78
88
|
end
|
79
89
|
|
80
90
|
buffer << "\n #{SPINNERS[@refresh_number % 4]}\n"
|
data/lib/britebox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: britebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Shapiotko
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-07-
|
12
|
+
date: 2013-07-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: brite-api
|
@@ -25,20 +25,6 @@ dependencies:
|
|
25
25
|
- - '>='
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: 0.0.1
|
28
|
-
- !ruby/object:Gem::Dependency
|
29
|
-
name: thread_storm
|
30
|
-
requirement: !ruby/object:Gem::Requirement
|
31
|
-
requirements:
|
32
|
-
- - ~>
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version: 0.7.1
|
35
|
-
type: :runtime
|
36
|
-
prerelease: false
|
37
|
-
version_requirements: !ruby/object:Gem::Requirement
|
38
|
-
requirements:
|
39
|
-
- - ~>
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
version: 0.7.1
|
42
28
|
- !ruby/object:Gem::Dependency
|
43
29
|
name: listen
|
44
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,8 +48,10 @@ extensions: []
|
|
62
48
|
extra_rdoc_files: []
|
63
49
|
files:
|
64
50
|
- lib/britebox/cli.rb
|
51
|
+
- lib/britebox/export_buffer.rb
|
65
52
|
- lib/britebox/file_job.rb
|
66
53
|
- lib/britebox/file_job_pool.rb
|
54
|
+
- lib/britebox/helpers.rb
|
67
55
|
- lib/britebox/version.rb
|
68
56
|
- lib/britebox.rb
|
69
57
|
- README.md
|