britebox 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/britebox.rb +3 -1
- data/lib/britebox/export_buffer.rb +87 -0
- data/lib/britebox/file_job.rb +82 -53
- data/lib/britebox/file_job_pool.rb +24 -14
- data/lib/britebox/helpers.rb +16 -0
- data/lib/britebox/version.rb +1 -1
- metadata +4 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e800dec5d7cc7526497042fdd135dfeefb49b08
|
4
|
+
data.tar.gz: caef336a484073e5b13fdbe31ab1c72d92155281
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8cf57385c286d5a807c0a3e04fc5c4ed0ec73754ebe1c0072d73951461b1d3285a63b93fe0f4dc94b5cd887e5209e5fe8fc2bdc80cd95257998e146005a87b6
|
7
|
+
data.tar.gz: a887c75d7e6423e5e17fccb2b226f16a13b97c91839676cab6f11d6856aaa84312a34796247dffffa3f73e1f53c1840d2efba86340320ef457a69fadd5b5c4ac
|
data/lib/britebox.rb
CHANGED
@@ -0,0 +1,87 @@
|
|
1
|
+
module Britebox
|
2
|
+
class ExportBuffer
|
3
|
+
def initialize(out_file_name, header_line, col_separator)
|
4
|
+
@file = File.new out_file_name, 'w+'
|
5
|
+
@col_separator = col_separator
|
6
|
+
write_header_row(header_line)
|
7
|
+
@last_exported_line_idx = 0
|
8
|
+
|
9
|
+
@buffer = []
|
10
|
+
@backlog = []
|
11
|
+
@semaphore = Mutex.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def buffer_size
|
15
|
+
100
|
16
|
+
end
|
17
|
+
|
18
|
+
# {n: N, line: [...]}
|
19
|
+
def push(item)
|
20
|
+
check_backlog
|
21
|
+
|
22
|
+
@semaphore.synchronize do
|
23
|
+
if item[:n] == @last_exported_line_idx
|
24
|
+
@buffer << item[:line]
|
25
|
+
@last_exported_line_idx += 1
|
26
|
+
else
|
27
|
+
@backlog << item
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
flush_buffer
|
32
|
+
end
|
33
|
+
|
34
|
+
def flush_backlog
|
35
|
+
@semaphore.synchronize do
|
36
|
+
@backlog.each do |item|
|
37
|
+
@buffer << item[:line]
|
38
|
+
@last_exported_line_idx += 1
|
39
|
+
end
|
40
|
+
end
|
41
|
+
flush_buffer(true)
|
42
|
+
end
|
43
|
+
|
44
|
+
def flush_buffer(force = false)
|
45
|
+
if force || @buffer.size > buffer_size
|
46
|
+
@semaphore.synchronize do
|
47
|
+
@file.write @buffer.map{ |line| CSV.generate_line(line, col_sep: @col_separator) }.join
|
48
|
+
@buffer = []
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def close
|
54
|
+
@file.close
|
55
|
+
end
|
56
|
+
|
57
|
+
def status
|
58
|
+
{
|
59
|
+
buffer: @buffer.size,
|
60
|
+
backlog: @backlog.size,
|
61
|
+
last_exported_line_idx: @last_exported_line_idx,
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def check_backlog
|
68
|
+
loop do
|
69
|
+
if (itm = @backlog.detect { |q| q[:n] == @last_exported_line_idx })
|
70
|
+
@semaphore.synchronize do
|
71
|
+
@buffer << itm[:line]
|
72
|
+
@backlog.delete(itm)
|
73
|
+
@last_exported_line_idx += 1
|
74
|
+
end
|
75
|
+
else
|
76
|
+
break
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def write_header_row(row)
|
82
|
+
@file.write(CSV.generate_line(row, col_sep: @col_separator)) if row
|
83
|
+
end
|
84
|
+
|
85
|
+
alias_method :<<, :push
|
86
|
+
end
|
87
|
+
end
|
data/lib/britebox/file_job.rb
CHANGED
@@ -1,45 +1,59 @@
|
|
1
|
-
require 'thread_storm'
|
2
1
|
require 'brite-api'
|
3
2
|
require 'csv'
|
4
3
|
|
5
4
|
module Britebox
|
6
5
|
class FileJob
|
7
6
|
|
8
|
-
attr_reader :file_name, :
|
7
|
+
attr_reader :file_name, :threads_count, :error, :status, :size_total, :size_processed
|
9
8
|
|
10
9
|
EMAIL_PATTERN = /(\S+)@(\S+)/
|
11
10
|
COL_SEPARATORS = [";", "|", "\t"]
|
12
11
|
|
13
|
-
def initialize(file_name, brite_client,
|
12
|
+
def initialize(file_name, brite_client, options = {})
|
14
13
|
@file_name = file_name
|
15
14
|
@brite_client = brite_client
|
16
|
-
@thread_pool = thread_pool
|
17
|
-
@processed_lines = []
|
18
|
-
@status = 'pending'
|
19
|
-
end
|
20
15
|
|
21
|
-
|
22
|
-
@
|
23
|
-
end
|
16
|
+
@threads_count = options[:threads] || Britebox::FileJobPool::THREAD_NUM_DEFAULT
|
17
|
+
@queue = options[:queue]
|
24
18
|
|
25
|
-
|
26
|
-
|
19
|
+
@status = 'pending'
|
20
|
+
|
21
|
+
@size_processed = 0
|
27
22
|
|
28
23
|
unless File.exist?(file_name)
|
29
24
|
report_error!("File #{file_name} not found") and return
|
30
25
|
end
|
31
|
-
|
32
|
-
if file_data.size == 0
|
26
|
+
if (@size_total = File.size(file_name)) == 0
|
33
27
|
report_error!("File #{file_name} is empty") and return
|
34
28
|
end
|
35
29
|
|
30
|
+
@semaphore = Mutex.new
|
31
|
+
end
|
32
|
+
|
33
|
+
def buffer_size
|
34
|
+
@threads_count * 4
|
35
|
+
end
|
36
|
+
|
37
|
+
def percent_complete
|
38
|
+
if @size_total.to_i > 0
|
39
|
+
(100.0 * @size_processed / @size_total).round(1)
|
40
|
+
else
|
41
|
+
0.0
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def verify!(file_name_to)
|
46
|
+
return if @status == 'error'
|
47
|
+
|
36
48
|
test_lines = []
|
37
49
|
begin
|
38
50
|
line_n = 0
|
39
|
-
CSV.
|
51
|
+
CSV.foreach(file_name) do |line|
|
40
52
|
if line && line.size > 0
|
41
53
|
if line_n < 5
|
42
54
|
test_lines << line
|
55
|
+
else
|
56
|
+
break
|
43
57
|
end
|
44
58
|
|
45
59
|
line_n += 1
|
@@ -51,56 +65,66 @@ module Britebox
|
|
51
65
|
|
52
66
|
autoconfigure(test_lines) || return
|
53
67
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
@lines_total = parsed_lines.count
|
58
|
-
@processed_lines = Array.new(@lines_total)
|
59
|
-
@lines_total -= 1 if is_header_row?(parsed_lines.first)
|
68
|
+
# Block processing if some another FileJob processing lines
|
69
|
+
@queue.pop if @queue
|
60
70
|
|
71
|
+
@status = 'verifying'
|
61
72
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
73
|
+
@in_buffer = Queue.new
|
74
|
+
@out_buffer = ExportBuffer.new(file_name_to, @header_row, @col_separator)
|
75
|
+
|
76
|
+
@threads = []
|
77
|
+
@threads_count.times do
|
78
|
+
@threads << Thread.new do
|
79
|
+
loop do
|
80
|
+
item = Timeout.timeout(1) { @in_buffer.pop } rescue nil
|
81
|
+
break if item.nil?
|
82
|
+
|
83
|
+
email = item[:line][@email_index]
|
84
|
+
if email.to_s.match(EMAIL_PATTERN)
|
85
|
+
begin
|
86
|
+
contact = @brite_client.contacts.create(email: email)
|
87
|
+
contact.verify!
|
88
|
+
contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
|
89
|
+
rescue
|
90
|
+
contact_status = ['error', false, false]
|
91
|
+
end
|
92
|
+
else
|
93
|
+
contact_status = ['invalid', false, false]
|
94
|
+
end
|
66
95
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
contact.verify!
|
72
|
-
# ['email_status', 'disposable', 'role_account']
|
73
|
-
contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
|
74
|
-
rescue Exception => ex
|
75
|
-
contact_status = ['error', false, false]
|
96
|
+
@semaphore.synchronize do
|
97
|
+
@out_buffer << {n: item[:n], line: item[:line] + contact_status}
|
98
|
+
@size_processed += CSV.generate_line(item[:line], col_sep: @col_separator).size
|
99
|
+
end
|
76
100
|
end
|
77
|
-
# Store processed file
|
78
|
-
@processed_lines[idx] = line + contact_status
|
79
101
|
end
|
80
102
|
end
|
81
103
|
|
82
|
-
|
83
|
-
|
104
|
+
idx = 0
|
105
|
+
CSV.foreach(file_name, col_sep: @col_separator) do |line|
|
106
|
+
if idx == 0 && is_header_row?(line)
|
107
|
+
@size_processed += CSV.generate_line(line, col_sep: @col_separator).size
|
108
|
+
next
|
109
|
+
end
|
110
|
+
next if line.nil? || line.size == 0
|
111
|
+
# Throttle file reading
|
112
|
+
sleep(0.1) while @in_buffer.size > buffer_size
|
84
113
|
|
85
|
-
|
86
|
-
|
114
|
+
@in_buffer << {n: idx, line: line}
|
115
|
+
idx += 1
|
116
|
+
end
|
87
117
|
|
88
|
-
@
|
118
|
+
@threads.each{ |m| m.join }
|
89
119
|
|
90
|
-
|
91
|
-
|
120
|
+
@out_buffer.flush_backlog
|
121
|
+
@out_buffer.close
|
92
122
|
|
93
|
-
|
94
|
-
file = File.new(file_name, "w+")
|
123
|
+
@status = 'complete'
|
95
124
|
|
96
|
-
|
97
|
-
@
|
98
|
-
next if line.nil?
|
99
|
-
file.write CSV.generate_line(line, col_sep: @col_separator)
|
100
|
-
end
|
125
|
+
# Release the lock
|
126
|
+
@queue.push(:flag) if @queue
|
101
127
|
|
102
|
-
file.close
|
103
|
-
@status = 'complete'
|
104
128
|
true
|
105
129
|
end
|
106
130
|
|
@@ -128,11 +152,16 @@ module Britebox
|
|
128
152
|
|
129
153
|
|
130
154
|
def autoconfigure(test_lines)
|
155
|
+
if test_lines.nil? || test_lines.size == 0
|
156
|
+
report_error! 'Column separator could not be determined'
|
157
|
+
return
|
158
|
+
end
|
159
|
+
|
131
160
|
@col_separator = nil
|
132
161
|
@header_row = nil
|
133
162
|
@email_index = nil
|
134
163
|
|
135
|
-
if test_lines.
|
164
|
+
if test_lines[0].size > 1
|
136
165
|
@col_separator = ','
|
137
166
|
else
|
138
167
|
COL_SEPARATORS.each do |sep|
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module Britebox
|
2
2
|
# Keep current status for all processing FileJobs
|
3
3
|
class FileJobPool
|
4
|
+
include Britebox::Helpers
|
5
|
+
|
4
6
|
attr_reader :file_jobs
|
5
7
|
|
6
8
|
SPINNERS = ["|", "/", "—", "\\"]
|
@@ -8,10 +10,14 @@ module Britebox
|
|
8
10
|
|
9
11
|
def initialize(num_threads = nil)
|
10
12
|
num_threads ||= THREAD_NUM_DEFAULT
|
13
|
+
@num_threads = num_threads
|
14
|
+
|
15
|
+
# Only single FileJob can own :flag & process own lines
|
16
|
+
@queue = Queue.new
|
17
|
+
@queue << :flag
|
18
|
+
|
11
19
|
@file_jobs = []
|
12
20
|
@refresh_number = 0
|
13
|
-
@fj_pool = ThreadStorm.new size: num_threads
|
14
|
-
@lines_pool = ThreadStorm.new size: num_threads
|
15
21
|
end
|
16
22
|
|
17
23
|
def add(file_job)
|
@@ -23,16 +29,19 @@ module Britebox
|
|
23
29
|
@file_jobs.map{ |fj| fj.file_name }.include? file_name
|
24
30
|
end
|
25
31
|
|
26
|
-
def process_file!(file,
|
27
|
-
file_name = File.expand_path(file,
|
32
|
+
def process_file!(file, in_dir, out_dir, brite_client, fj_options = {})
|
33
|
+
file_name = File.expand_path(file, in_dir)
|
28
34
|
|
29
35
|
# Do not process same files twice
|
30
36
|
return if self.include? file_name
|
31
37
|
|
32
|
-
|
38
|
+
fj_options[:queue] = @queue
|
39
|
+
fj_options[:threads] = @num_threads
|
40
|
+
|
41
|
+
fj = FileJob.new(file_name, brite_client, fj_options)
|
33
42
|
self.add(fj)
|
34
|
-
|
35
|
-
fj.verify!
|
43
|
+
Thread.new do
|
44
|
+
fj.verify!(File.expand_path(file, out_dir))
|
36
45
|
|
37
46
|
case fj.status
|
38
47
|
when 'error'
|
@@ -41,10 +50,8 @@ module Britebox
|
|
41
50
|
File.open(File.expand_path(err_name + '.log', out_dir), 'w+') do |f|
|
42
51
|
f.write fj.error # TODO: add extended error log
|
43
52
|
end
|
44
|
-
when '
|
45
|
-
|
46
|
-
File.delete file_name
|
47
|
-
end
|
53
|
+
when 'complete'
|
54
|
+
File.delete file_name
|
48
55
|
else
|
49
56
|
raise "unexpected error, status: #{fj.status}"
|
50
57
|
end
|
@@ -61,20 +68,23 @@ module Britebox
|
|
61
68
|
print "\r#{clear}#{up}" * @prev_buffer_height
|
62
69
|
end
|
63
70
|
|
64
|
-
buffer = "\nFiles
|
71
|
+
buffer = "\nFiles in list: #{@file_jobs.count}"
|
65
72
|
|
66
73
|
@file_jobs.each do |fj|
|
67
74
|
fname = File.basename(fj.file_name)
|
68
75
|
if fname.length > 20
|
69
76
|
fname = fname[0..16] + '...'
|
70
77
|
end
|
78
|
+
|
71
79
|
if fj.status == 'error'
|
72
80
|
status_str = fj.error
|
81
|
+
elsif fj.status == 'complete'
|
82
|
+
status_str = "100.0 %"
|
73
83
|
else
|
74
|
-
status_str = "#{
|
84
|
+
status_str = "#{fj.percent_complete} %".rjust(7)
|
75
85
|
end
|
76
86
|
|
77
|
-
buffer << "\n#{fname.ljust(20)} | #{fj.status.to_s.ljust(9)} | #{status_str}"
|
87
|
+
buffer << "\n#{fname.ljust(20)} | #{fj.status.to_s.ljust(9)} | #{human_size(fj.size_total).rjust(8)} | #{status_str}"
|
78
88
|
end
|
79
89
|
|
80
90
|
buffer << "\n #{SPINNERS[@refresh_number % 4]}\n"
|
data/lib/britebox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: britebox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Shapiotko
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-07-
|
12
|
+
date: 2013-07-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: brite-api
|
@@ -25,20 +25,6 @@ dependencies:
|
|
25
25
|
- - '>='
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: 0.0.1
|
28
|
-
- !ruby/object:Gem::Dependency
|
29
|
-
name: thread_storm
|
30
|
-
requirement: !ruby/object:Gem::Requirement
|
31
|
-
requirements:
|
32
|
-
- - ~>
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version: 0.7.1
|
35
|
-
type: :runtime
|
36
|
-
prerelease: false
|
37
|
-
version_requirements: !ruby/object:Gem::Requirement
|
38
|
-
requirements:
|
39
|
-
- - ~>
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
version: 0.7.1
|
42
28
|
- !ruby/object:Gem::Dependency
|
43
29
|
name: listen
|
44
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,8 +48,10 @@ extensions: []
|
|
62
48
|
extra_rdoc_files: []
|
63
49
|
files:
|
64
50
|
- lib/britebox/cli.rb
|
51
|
+
- lib/britebox/export_buffer.rb
|
65
52
|
- lib/britebox/file_job.rb
|
66
53
|
- lib/britebox/file_job_pool.rb
|
54
|
+
- lib/britebox/helpers.rb
|
67
55
|
- lib/britebox/version.rb
|
68
56
|
- lib/britebox.rb
|
69
57
|
- README.md
|