britebox 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/britebox.rb +4 -1
- data/lib/britebox/file_job.rb +72 -213
- data/lib/britebox/file_job_timer.rb +47 -0
- data/lib/britebox/format_recognizer.rb +144 -0
- data/lib/britebox/lines_verifier.rb +79 -0
- data/lib/britebox/version.rb +1 -1
- data/resources/assets/javascripts/compiled-coffee.js +5 -3
- data/resources/assets/javascripts/controllers/file_jobs_ctrl.coffee +3 -2
- data/resources/assets/javascripts/controllers/settings_ctrl.coffee +1 -1
- data/resources/public/javascripts/application.js +5 -3
- data/resources/public/javascripts/bootstrap-3.0.0.min.js +6 -0
- data/resources/public/stylesheets/bootstrap-3.0.0.min.css +9 -0
- data/resources/views/_settings_modal.erb +1 -1
- data/resources/views/index.erb +5 -8
- metadata +7 -5
- data/resources/assets/images/screenshot.png +0 -0
- data/resources/public/javascripts/bootstrap-3.0.0.RC1.min.js +0 -6
- data/resources/public/stylesheets/bootstrap-3.0.0.RC1.min.css +0 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0949a7ddc1540e0c8374818c3647930eb3560ded
|
4
|
+
data.tar.gz: 24c7939e3c0ca1e318ea9ff8af22c485649558b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0a6695c78f1daed658d42c1826cb7ad5d43efdb9f3fe35bc7e371a6804e726f5c7d05a47f7a98722a897fd6c3a8efb028165367528d93aa0aa22d4a022d9f3f
|
7
|
+
data.tar.gz: b71b27e62b1877698515972d0afe8935edbfd262cac51c2087193fdcd83acfa271eac77ba93e7d6535a20f6ed09fcc46cb8b381c7002ac0e7c5e4158156ac2eb
|
data/lib/britebox.rb
CHANGED
@@ -3,8 +3,11 @@ require 'britebox/version'
|
|
3
3
|
require 'britebox/helpers'
|
4
4
|
require 'britebox/config'
|
5
5
|
require 'britebox/file_job'
|
6
|
+
require 'britebox/file_job_timer'
|
6
7
|
require 'britebox/file_job_pool'
|
7
8
|
require 'britebox/export_buffer'
|
8
9
|
require 'britebox/event_log'
|
9
10
|
require 'britebox/web_ui'
|
10
|
-
require 'britebox/filtered_error_io'
|
11
|
+
require 'britebox/filtered_error_io'
|
12
|
+
require 'britebox/format_recognizer'
|
13
|
+
require 'britebox/lines_verifier'
|
data/lib/britebox/file_job.rb
CHANGED
@@ -4,11 +4,10 @@ require 'csv'
|
|
4
4
|
module Britebox
|
5
5
|
class FileJob
|
6
6
|
|
7
|
-
attr_reader :file_name, :threads_count, :error, :status, :size_total,
|
8
|
-
:
|
7
|
+
attr_reader :file_name, :threads_count, :error, :status, :size_total,
|
8
|
+
:timer, :queue, :semaphore, :brite_client
|
9
9
|
|
10
|
-
|
11
|
-
COL_SEPARATORS = [";", "|", "\t"]
|
10
|
+
attr_accessor :size_processed
|
12
11
|
|
13
12
|
def initialize(file_name, brite_client, options = {})
|
14
13
|
@file_name = file_name
|
@@ -21,6 +20,9 @@ module Britebox
|
|
21
20
|
|
22
21
|
@size_processed = 0
|
23
22
|
|
23
|
+
@semaphore = Mutex.new
|
24
|
+
@timer = FileJobTimer.new
|
25
|
+
|
24
26
|
unless File.exist?(file_name)
|
25
27
|
report_error!("File not found")
|
26
28
|
return
|
@@ -29,8 +31,18 @@ module Britebox
|
|
29
31
|
report_error!("File is empty")
|
30
32
|
return
|
31
33
|
end
|
34
|
+
end
|
32
35
|
|
33
|
-
|
36
|
+
def started_at
|
37
|
+
@timer.started_at
|
38
|
+
end
|
39
|
+
|
40
|
+
def processed_at
|
41
|
+
@timer.ended_at
|
42
|
+
end
|
43
|
+
|
44
|
+
def duration
|
45
|
+
@timer.duration
|
34
46
|
end
|
35
47
|
|
36
48
|
def as_json
|
@@ -41,8 +53,8 @@ module Britebox
|
|
41
53
|
threads: @threads_count,
|
42
54
|
size_total: @size_total,
|
43
55
|
size_processed: @size_processed,
|
44
|
-
started_at:
|
45
|
-
processed_at:
|
56
|
+
started_at: started_at,
|
57
|
+
processed_at: processed_at,
|
46
58
|
duration: duration,
|
47
59
|
percent_complete: percent_complete,
|
48
60
|
error: @error
|
@@ -57,13 +69,6 @@ module Britebox
|
|
57
69
|
@threads_count * 4
|
58
70
|
end
|
59
71
|
|
60
|
-
def duration
|
61
|
-
if @started_at
|
62
|
-
end_time = @processed_at || Time.now
|
63
|
-
end_time - @started_at
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
72
|
def percent_complete
|
68
73
|
if @size_total.to_i > 0
|
69
74
|
(100.0 * @size_processed / @size_total).round(1)
|
@@ -94,14 +99,31 @@ module Britebox
|
|
94
99
|
|
95
100
|
def status=(new_status)
|
96
101
|
return if status == new_status
|
102
|
+
return if verifying? && new_status == 'pending' # avoid double-clicking
|
103
|
+
|
104
|
+
if new_status == 'verifying'
|
105
|
+
raise("status 'verifying' can't be set, use 'pending' instead")
|
106
|
+
end
|
97
107
|
|
98
108
|
# Release processing slot
|
99
|
-
if
|
100
|
-
|
109
|
+
if new_status == 'cancelled'
|
110
|
+
release_flag
|
101
111
|
end
|
102
112
|
|
103
|
-
if new_status == '
|
104
|
-
|
113
|
+
if verifying? && new_status == 'paused'
|
114
|
+
release_flag
|
115
|
+
end
|
116
|
+
|
117
|
+
if new_status == 'pending'
|
118
|
+
Thread.new do
|
119
|
+
while @getting_flag
|
120
|
+
sleep(0.1)
|
121
|
+
end
|
122
|
+
get_flag
|
123
|
+
@semaphore.synchronize do
|
124
|
+
@status = 'verifying'
|
125
|
+
end
|
126
|
+
end
|
105
127
|
end
|
106
128
|
|
107
129
|
@status = new_status
|
@@ -110,106 +132,36 @@ module Britebox
|
|
110
132
|
def verify!(file_name_to)
|
111
133
|
return if error?
|
112
134
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
test_lines << line
|
120
|
-
else
|
121
|
-
break
|
122
|
-
end
|
123
|
-
|
124
|
-
line_n += 1
|
125
|
-
end
|
126
|
-
end
|
127
|
-
rescue Exception => ex
|
128
|
-
report_error!(ex.message.to_s) and return
|
135
|
+
recognizer = FormatRecognizer.new(@file_name)
|
136
|
+
if recognizer.recognize!
|
137
|
+
opts = recognizer.opts
|
138
|
+
else
|
139
|
+
report_error! recognizer.error
|
140
|
+
return
|
129
141
|
end
|
130
142
|
|
131
|
-
autoconfigure(test_lines) || return
|
132
143
|
|
133
144
|
# Block processing if some another FileJob processing lines
|
134
|
-
|
145
|
+
loop do
|
146
|
+
break if verifying?
|
147
|
+
get_flag
|
135
148
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
@threads = []
|
143
|
-
@threads_count.times do
|
144
|
-
@threads << Thread.new do
|
145
|
-
loop do
|
146
|
-
# Pause all processing
|
147
|
-
loop do
|
148
|
-
break unless paused?
|
149
|
-
sleep(0.1)
|
150
|
-
end
|
151
|
-
|
152
|
-
break if cancelled?
|
153
|
-
|
154
|
-
# Wait for processing slot
|
155
|
-
if pending?
|
156
|
-
@semaphore.synchronize do
|
157
|
-
@queue.pop if @queue && pending?
|
158
|
-
@status = 'verifying'
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
item = Timeout.timeout(1) { @in_buffer.pop } rescue nil
|
163
|
-
break if item.nil?
|
164
|
-
|
165
|
-
email = item[:line][@email_index]
|
166
|
-
if email.to_s.match(EMAIL_PATTERN)
|
167
|
-
if Britebox::Config.simulate
|
168
|
-
# Do not send real requests in this mode
|
169
|
-
sleep(1)
|
170
|
-
contact_status = ['unknown', false, false]
|
171
|
-
else
|
172
|
-
begin
|
173
|
-
contact = @brite_client.contacts.create(email: email)
|
174
|
-
contact.verify!
|
175
|
-
contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
|
176
|
-
rescue
|
177
|
-
contact_status = ['error', false, false]
|
178
|
-
end
|
179
|
-
end
|
180
|
-
else
|
181
|
-
contact_status = ['invalid', false, false]
|
182
|
-
end
|
183
|
-
|
184
|
-
@semaphore.synchronize do
|
185
|
-
@out_buffer << {n: item[:n], line: item[:line] + contact_status}
|
186
|
-
@size_processed += CSV.generate_line(item[:line], col_sep: @col_separator).size
|
187
|
-
end
|
188
|
-
end
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
idx = 0
|
193
|
-
CSV.foreach(file_name, col_sep: @col_separator) do |line|
|
194
|
-
if idx == 0 && is_header_row?(line)
|
195
|
-
@size_processed += CSV.generate_line(line, col_sep: @col_separator).size
|
196
|
-
next
|
149
|
+
if paused?
|
150
|
+
release_flag
|
151
|
+
sleep 0.3
|
152
|
+
else
|
153
|
+
break
|
197
154
|
end
|
198
|
-
next if line.nil? || line.size == 0
|
199
|
-
# Throttle file reading
|
200
|
-
break if cancelled?
|
201
|
-
sleep(0.1) while @in_buffer.size > buffer_size && !cancelled?
|
202
|
-
|
203
|
-
@in_buffer << {n: idx, line: line}
|
204
|
-
idx += 1
|
205
155
|
end
|
206
156
|
|
207
|
-
|
157
|
+
if pending? || verifying?
|
158
|
+
@timer.start
|
159
|
+
@status = 'verifying'
|
208
160
|
|
209
|
-
|
210
|
-
@out_buffer.close
|
161
|
+
LinesVerifier.new(self).process!(file_name_to, opts)
|
211
162
|
|
212
|
-
|
163
|
+
@timer.stop
|
164
|
+
end
|
213
165
|
|
214
166
|
if cancelled?
|
215
167
|
File.delete file_name_to
|
@@ -217,126 +169,33 @@ module Britebox
|
|
217
169
|
@status = 'complete'
|
218
170
|
end
|
219
171
|
|
220
|
-
|
221
|
-
@queue.push(:flag) if @queue
|
172
|
+
release_flag
|
222
173
|
|
223
174
|
true
|
224
175
|
end
|
225
176
|
|
226
177
|
private
|
227
178
|
|
228
|
-
def
|
229
|
-
|
230
|
-
v = v.first if v.instance_of? Array
|
231
|
-
return false if v.to_s.match(EMAIL_PATTERN)
|
232
|
-
end
|
233
|
-
!row.join(" ").downcase.match("email").nil?
|
234
|
-
end
|
235
|
-
|
236
|
-
def find_email_index(row)
|
237
|
-
row.each_with_index do |value, index|
|
238
|
-
return index if value.to_s.match(EMAIL_PATTERN)
|
239
|
-
end
|
240
|
-
nil
|
241
|
-
end
|
242
|
-
|
243
|
-
def report_error!(text)
|
244
|
-
@status = 'error'
|
245
|
-
@error = text
|
179
|
+
def release_flag
|
180
|
+
@queue.push(:flag) if @queue
|
246
181
|
end
|
247
182
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
report_error! 'Column separator could not be determined'
|
252
|
-
return
|
253
|
-
end
|
254
|
-
|
255
|
-
@col_separator = nil
|
256
|
-
@header_row = nil
|
257
|
-
@email_index = nil
|
258
|
-
|
259
|
-
if test_lines[0].size > 1
|
260
|
-
@col_separator = ','
|
261
|
-
else
|
262
|
-
COL_SEPARATORS.each do |sep|
|
263
|
-
if is_separator?(sep, test_lines)
|
264
|
-
@col_separator = sep
|
265
|
-
break
|
266
|
-
end
|
267
|
-
end
|
183
|
+
def get_flag
|
184
|
+
@semaphore.synchronize do
|
185
|
+
@getting_flag = true
|
268
186
|
end
|
269
187
|
|
270
|
-
|
271
|
-
if @col_separator.nil? && test_lines.first.size == 1 && !is_separator?(' ', test_lines)
|
272
|
-
@col_separator = ','
|
273
|
-
end
|
274
|
-
|
275
|
-
if @col_separator.nil?
|
276
|
-
report_error! 'Column separator could not be determined'
|
277
|
-
return
|
278
|
-
end
|
279
|
-
|
280
|
-
test_rows = test_lines.map do |line|
|
281
|
-
if @col_separator == ","
|
282
|
-
line
|
283
|
-
else
|
284
|
-
line.first.split(@col_separator)
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
288
|
-
test_rows.each do |row|
|
289
|
-
if (index = find_email_index row)
|
290
|
-
@email_index = index
|
291
|
-
break
|
292
|
-
end
|
293
|
-
end
|
294
|
-
|
295
|
-
if @email_index.nil?
|
296
|
-
report_error! "Email column could not be determined."
|
297
|
-
return
|
298
|
-
end
|
299
|
-
|
300
|
-
configure_header_row(test_rows)
|
301
|
-
|
302
|
-
true
|
303
|
-
end
|
304
|
-
|
305
|
-
|
306
|
-
def configure_header_row(test_rows)
|
307
|
-
plus_headers = ['email_status', 'disposable', 'role_account']
|
188
|
+
@queue.pop if @queue
|
308
189
|
|
309
|
-
|
310
|
-
|
311
|
-
else
|
312
|
-
hr = []
|
313
|
-
test_rows.first.count.times do |i|
|
314
|
-
if i == @email_index
|
315
|
-
hr << "email"
|
316
|
-
else
|
317
|
-
hr << "column_#{i}"
|
318
|
-
end
|
319
|
-
end
|
190
|
+
@semaphore.synchronize do
|
191
|
+
@getting_flag = false
|
320
192
|
end
|
321
|
-
@header_row = hr + plus_headers
|
322
193
|
end
|
323
194
|
|
324
|
-
def
|
325
|
-
|
326
|
-
|
327
|
-
if fcount == 1
|
328
|
-
false
|
329
|
-
else
|
330
|
-
lines.each do |line|
|
331
|
-
cc = line.first.split(sep).size
|
332
|
-
same_count += 1 if cc == fcount
|
333
|
-
end
|
334
|
-
same_count == lines.count
|
335
|
-
end
|
195
|
+
def report_error!(text)
|
196
|
+
@status = 'error'
|
197
|
+
@error = text
|
336
198
|
end
|
337
199
|
|
338
|
-
|
339
|
-
|
340
|
-
|
341
200
|
end
|
342
201
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Britebox
|
2
|
+
class FileJobTimer
|
3
|
+
attr_reader :started_at, :ended_at
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@banked_time = 0
|
7
|
+
@is_running = false
|
8
|
+
end
|
9
|
+
|
10
|
+
def start
|
11
|
+
@banked_time = 0
|
12
|
+
@started_at = @resumed_at = Time.now
|
13
|
+
@is_running = true
|
14
|
+
end
|
15
|
+
|
16
|
+
def running?
|
17
|
+
@is_running
|
18
|
+
end
|
19
|
+
|
20
|
+
def pause
|
21
|
+
return unless running?
|
22
|
+
|
23
|
+
@is_running = false
|
24
|
+
@banked_time += (Time.now - @resumed_at)
|
25
|
+
end
|
26
|
+
|
27
|
+
def resume
|
28
|
+
return if running?
|
29
|
+
|
30
|
+
@resumed_at = Time.now
|
31
|
+
@is_running = true
|
32
|
+
end
|
33
|
+
|
34
|
+
def stop
|
35
|
+
pause
|
36
|
+
@ended_at = Time.now
|
37
|
+
end
|
38
|
+
|
39
|
+
def duration
|
40
|
+
if running?
|
41
|
+
@banked_time + (Time.now - @resumed_at)
|
42
|
+
else
|
43
|
+
@banked_time
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
module Britebox
|
2
|
+
class FormatRecognizer
|
3
|
+
attr_reader :opts, :error
|
4
|
+
|
5
|
+
EMAIL_PATTERN = /(\S+)@(\S+)/
|
6
|
+
COL_SEPARATORS = [";", "|", "\t"]
|
7
|
+
PLUS_HEADERS = ['email_status', 'disposable', 'role_account']
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def initialize(file_name)
|
12
|
+
@file_name = file_name
|
13
|
+
@opts = {col_separator: nil, header_row: nil, email_index: nil}
|
14
|
+
end
|
15
|
+
|
16
|
+
def recognize!
|
17
|
+
test_lines = []
|
18
|
+
begin
|
19
|
+
line_n = 0
|
20
|
+
CSV.foreach(@file_name) do |line|
|
21
|
+
if line && line.size > 0
|
22
|
+
if line_n < 5
|
23
|
+
test_lines << line
|
24
|
+
else
|
25
|
+
break
|
26
|
+
end
|
27
|
+
|
28
|
+
line_n += 1
|
29
|
+
end
|
30
|
+
end
|
31
|
+
rescue Exception => ex
|
32
|
+
@error = ex.message.to_s
|
33
|
+
return false
|
34
|
+
end
|
35
|
+
|
36
|
+
autoconfigure(test_lines) || return
|
37
|
+
|
38
|
+
true
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def find_email_index(row)
|
43
|
+
row.each_with_index do |value, index|
|
44
|
+
return index if value.to_s.match(EMAIL_PATTERN)
|
45
|
+
end
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.is_header_row?(row)
|
50
|
+
row.each do |v|
|
51
|
+
v = v.first if v.instance_of? Array
|
52
|
+
return false if v.to_s.match(EMAIL_PATTERN)
|
53
|
+
end
|
54
|
+
!row.join(" ").downcase.match("email").nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def autoconfigure(test_lines)
|
60
|
+
if test_lines.nil? || test_lines.size == 0
|
61
|
+
@error = 'Column separator could not be determined'
|
62
|
+
return false
|
63
|
+
end
|
64
|
+
|
65
|
+
if test_lines[0].size > 1
|
66
|
+
@opts[:col_separator] = ','
|
67
|
+
else
|
68
|
+
COL_SEPARATORS.each do |sep|
|
69
|
+
if is_separator?(sep, test_lines)
|
70
|
+
@opts[:col_separator] = sep
|
71
|
+
break
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Single column file
|
77
|
+
if @opts[:col_separator].nil? && test_lines.first.size == 1 && !is_separator?(' ', test_lines)
|
78
|
+
@opts[:col_separator] = ','
|
79
|
+
end
|
80
|
+
|
81
|
+
if @opts[:col_separator].nil?
|
82
|
+
@error = 'Column separator could not be determined'
|
83
|
+
return false
|
84
|
+
end
|
85
|
+
|
86
|
+
test_rows = test_lines.map do |line|
|
87
|
+
if @opts[:col_separator] == ","
|
88
|
+
line
|
89
|
+
else
|
90
|
+
line.first.split(@opts[:col_separator])
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
test_rows.each do |row|
|
95
|
+
if (index = find_email_index row)
|
96
|
+
@opts[:email_index] = index
|
97
|
+
break
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
if @opts[:email_index].nil?
|
102
|
+
@error = "Email column could not be determined."
|
103
|
+
return false
|
104
|
+
end
|
105
|
+
|
106
|
+
configure_header_row(test_rows)
|
107
|
+
|
108
|
+
true
|
109
|
+
end
|
110
|
+
|
111
|
+
def configure_header_row(test_rows)
|
112
|
+
if self.class.is_header_row?(test_rows.first)
|
113
|
+
hr = test_rows.first
|
114
|
+
else
|
115
|
+
hr = []
|
116
|
+
test_rows.first.count.times do |i|
|
117
|
+
if i == @opts[:email_index]
|
118
|
+
hr << "email"
|
119
|
+
else
|
120
|
+
hr << "column_#{i}"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
@opts[:header_row] = hr + PLUS_HEADERS
|
125
|
+
end
|
126
|
+
|
127
|
+
def is_separator?(sep, lines)
|
128
|
+
same_count = 0
|
129
|
+
fcount = lines.first.first.split(sep).size
|
130
|
+
if fcount == 1
|
131
|
+
false
|
132
|
+
else
|
133
|
+
lines.each do |line|
|
134
|
+
cc = line.first.split(sep).size
|
135
|
+
same_count += 1 if cc == fcount
|
136
|
+
end
|
137
|
+
same_count == lines.count
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
end
|
144
|
+
end
|