britebox 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/britebox.rb +4 -1
- data/lib/britebox/file_job.rb +72 -213
- data/lib/britebox/file_job_timer.rb +47 -0
- data/lib/britebox/format_recognizer.rb +144 -0
- data/lib/britebox/lines_verifier.rb +79 -0
- data/lib/britebox/version.rb +1 -1
- data/resources/assets/javascripts/compiled-coffee.js +5 -3
- data/resources/assets/javascripts/controllers/file_jobs_ctrl.coffee +3 -2
- data/resources/assets/javascripts/controllers/settings_ctrl.coffee +1 -1
- data/resources/public/javascripts/application.js +5 -3
- data/resources/public/javascripts/bootstrap-3.0.0.min.js +6 -0
- data/resources/public/stylesheets/bootstrap-3.0.0.min.css +9 -0
- data/resources/views/_settings_modal.erb +1 -1
- data/resources/views/index.erb +5 -8
- metadata +7 -5
- data/resources/assets/images/screenshot.png +0 -0
- data/resources/public/javascripts/bootstrap-3.0.0.RC1.min.js +0 -6
- data/resources/public/stylesheets/bootstrap-3.0.0.RC1.min.css +0 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0949a7ddc1540e0c8374818c3647930eb3560ded
|
4
|
+
data.tar.gz: 24c7939e3c0ca1e318ea9ff8af22c485649558b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0a6695c78f1daed658d42c1826cb7ad5d43efdb9f3fe35bc7e371a6804e726f5c7d05a47f7a98722a897fd6c3a8efb028165367528d93aa0aa22d4a022d9f3f
|
7
|
+
data.tar.gz: b71b27e62b1877698515972d0afe8935edbfd262cac51c2087193fdcd83acfa271eac77ba93e7d6535a20f6ed09fcc46cb8b381c7002ac0e7c5e4158156ac2eb
|
data/lib/britebox.rb
CHANGED
@@ -3,8 +3,11 @@ require 'britebox/version'
|
|
3
3
|
require 'britebox/helpers'
|
4
4
|
require 'britebox/config'
|
5
5
|
require 'britebox/file_job'
|
6
|
+
require 'britebox/file_job_timer'
|
6
7
|
require 'britebox/file_job_pool'
|
7
8
|
require 'britebox/export_buffer'
|
8
9
|
require 'britebox/event_log'
|
9
10
|
require 'britebox/web_ui'
|
10
|
-
require 'britebox/filtered_error_io'
|
11
|
+
require 'britebox/filtered_error_io'
|
12
|
+
require 'britebox/format_recognizer'
|
13
|
+
require 'britebox/lines_verifier'
|
data/lib/britebox/file_job.rb
CHANGED
@@ -4,11 +4,10 @@ require 'csv'
|
|
4
4
|
module Britebox
|
5
5
|
class FileJob
|
6
6
|
|
7
|
-
attr_reader :file_name, :threads_count, :error, :status, :size_total,
|
8
|
-
:
|
7
|
+
attr_reader :file_name, :threads_count, :error, :status, :size_total,
|
8
|
+
:timer, :queue, :semaphore, :brite_client
|
9
9
|
|
10
|
-
|
11
|
-
COL_SEPARATORS = [";", "|", "\t"]
|
10
|
+
attr_accessor :size_processed
|
12
11
|
|
13
12
|
def initialize(file_name, brite_client, options = {})
|
14
13
|
@file_name = file_name
|
@@ -21,6 +20,9 @@ module Britebox
|
|
21
20
|
|
22
21
|
@size_processed = 0
|
23
22
|
|
23
|
+
@semaphore = Mutex.new
|
24
|
+
@timer = FileJobTimer.new
|
25
|
+
|
24
26
|
unless File.exist?(file_name)
|
25
27
|
report_error!("File not found")
|
26
28
|
return
|
@@ -29,8 +31,18 @@ module Britebox
|
|
29
31
|
report_error!("File is empty")
|
30
32
|
return
|
31
33
|
end
|
34
|
+
end
|
32
35
|
|
33
|
-
|
36
|
+
def started_at
|
37
|
+
@timer.started_at
|
38
|
+
end
|
39
|
+
|
40
|
+
def processed_at
|
41
|
+
@timer.ended_at
|
42
|
+
end
|
43
|
+
|
44
|
+
def duration
|
45
|
+
@timer.duration
|
34
46
|
end
|
35
47
|
|
36
48
|
def as_json
|
@@ -41,8 +53,8 @@ module Britebox
|
|
41
53
|
threads: @threads_count,
|
42
54
|
size_total: @size_total,
|
43
55
|
size_processed: @size_processed,
|
44
|
-
started_at:
|
45
|
-
processed_at:
|
56
|
+
started_at: started_at,
|
57
|
+
processed_at: processed_at,
|
46
58
|
duration: duration,
|
47
59
|
percent_complete: percent_complete,
|
48
60
|
error: @error
|
@@ -57,13 +69,6 @@ module Britebox
|
|
57
69
|
@threads_count * 4
|
58
70
|
end
|
59
71
|
|
60
|
-
def duration
|
61
|
-
if @started_at
|
62
|
-
end_time = @processed_at || Time.now
|
63
|
-
end_time - @started_at
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
72
|
def percent_complete
|
68
73
|
if @size_total.to_i > 0
|
69
74
|
(100.0 * @size_processed / @size_total).round(1)
|
@@ -94,14 +99,31 @@ module Britebox
|
|
94
99
|
|
95
100
|
def status=(new_status)
|
96
101
|
return if status == new_status
|
102
|
+
return if verifying? && new_status == 'pending' # avoid double-clicking
|
103
|
+
|
104
|
+
if new_status == 'verifying'
|
105
|
+
raise("status 'verifying' can't be set, use 'pending' instead")
|
106
|
+
end
|
97
107
|
|
98
108
|
# Release processing slot
|
99
|
-
if
|
100
|
-
|
109
|
+
if new_status == 'cancelled'
|
110
|
+
release_flag
|
101
111
|
end
|
102
112
|
|
103
|
-
if new_status == '
|
104
|
-
|
113
|
+
if verifying? && new_status == 'paused'
|
114
|
+
release_flag
|
115
|
+
end
|
116
|
+
|
117
|
+
if new_status == 'pending'
|
118
|
+
Thread.new do
|
119
|
+
while @getting_flag
|
120
|
+
sleep(0.1)
|
121
|
+
end
|
122
|
+
get_flag
|
123
|
+
@semaphore.synchronize do
|
124
|
+
@status = 'verifying'
|
125
|
+
end
|
126
|
+
end
|
105
127
|
end
|
106
128
|
|
107
129
|
@status = new_status
|
@@ -110,106 +132,36 @@ module Britebox
|
|
110
132
|
def verify!(file_name_to)
|
111
133
|
return if error?
|
112
134
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
test_lines << line
|
120
|
-
else
|
121
|
-
break
|
122
|
-
end
|
123
|
-
|
124
|
-
line_n += 1
|
125
|
-
end
|
126
|
-
end
|
127
|
-
rescue Exception => ex
|
128
|
-
report_error!(ex.message.to_s) and return
|
135
|
+
recognizer = FormatRecognizer.new(@file_name)
|
136
|
+
if recognizer.recognize!
|
137
|
+
opts = recognizer.opts
|
138
|
+
else
|
139
|
+
report_error! recognizer.error
|
140
|
+
return
|
129
141
|
end
|
130
142
|
|
131
|
-
autoconfigure(test_lines) || return
|
132
143
|
|
133
144
|
# Block processing if some another FileJob processing lines
|
134
|
-
|
145
|
+
loop do
|
146
|
+
break if verifying?
|
147
|
+
get_flag
|
135
148
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
@threads = []
|
143
|
-
@threads_count.times do
|
144
|
-
@threads << Thread.new do
|
145
|
-
loop do
|
146
|
-
# Pause all processing
|
147
|
-
loop do
|
148
|
-
break unless paused?
|
149
|
-
sleep(0.1)
|
150
|
-
end
|
151
|
-
|
152
|
-
break if cancelled?
|
153
|
-
|
154
|
-
# Wait for processing slot
|
155
|
-
if pending?
|
156
|
-
@semaphore.synchronize do
|
157
|
-
@queue.pop if @queue && pending?
|
158
|
-
@status = 'verifying'
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
item = Timeout.timeout(1) { @in_buffer.pop } rescue nil
|
163
|
-
break if item.nil?
|
164
|
-
|
165
|
-
email = item[:line][@email_index]
|
166
|
-
if email.to_s.match(EMAIL_PATTERN)
|
167
|
-
if Britebox::Config.simulate
|
168
|
-
# Do not send real requests in this mode
|
169
|
-
sleep(1)
|
170
|
-
contact_status = ['unknown', false, false]
|
171
|
-
else
|
172
|
-
begin
|
173
|
-
contact = @brite_client.contacts.create(email: email)
|
174
|
-
contact.verify!
|
175
|
-
contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
|
176
|
-
rescue
|
177
|
-
contact_status = ['error', false, false]
|
178
|
-
end
|
179
|
-
end
|
180
|
-
else
|
181
|
-
contact_status = ['invalid', false, false]
|
182
|
-
end
|
183
|
-
|
184
|
-
@semaphore.synchronize do
|
185
|
-
@out_buffer << {n: item[:n], line: item[:line] + contact_status}
|
186
|
-
@size_processed += CSV.generate_line(item[:line], col_sep: @col_separator).size
|
187
|
-
end
|
188
|
-
end
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
idx = 0
|
193
|
-
CSV.foreach(file_name, col_sep: @col_separator) do |line|
|
194
|
-
if idx == 0 && is_header_row?(line)
|
195
|
-
@size_processed += CSV.generate_line(line, col_sep: @col_separator).size
|
196
|
-
next
|
149
|
+
if paused?
|
150
|
+
release_flag
|
151
|
+
sleep 0.3
|
152
|
+
else
|
153
|
+
break
|
197
154
|
end
|
198
|
-
next if line.nil? || line.size == 0
|
199
|
-
# Throttle file reading
|
200
|
-
break if cancelled?
|
201
|
-
sleep(0.1) while @in_buffer.size > buffer_size && !cancelled?
|
202
|
-
|
203
|
-
@in_buffer << {n: idx, line: line}
|
204
|
-
idx += 1
|
205
155
|
end
|
206
156
|
|
207
|
-
|
157
|
+
if pending? || verifying?
|
158
|
+
@timer.start
|
159
|
+
@status = 'verifying'
|
208
160
|
|
209
|
-
|
210
|
-
@out_buffer.close
|
161
|
+
LinesVerifier.new(self).process!(file_name_to, opts)
|
211
162
|
|
212
|
-
|
163
|
+
@timer.stop
|
164
|
+
end
|
213
165
|
|
214
166
|
if cancelled?
|
215
167
|
File.delete file_name_to
|
@@ -217,126 +169,33 @@ module Britebox
|
|
217
169
|
@status = 'complete'
|
218
170
|
end
|
219
171
|
|
220
|
-
|
221
|
-
@queue.push(:flag) if @queue
|
172
|
+
release_flag
|
222
173
|
|
223
174
|
true
|
224
175
|
end
|
225
176
|
|
226
177
|
private
|
227
178
|
|
228
|
-
def
|
229
|
-
|
230
|
-
v = v.first if v.instance_of? Array
|
231
|
-
return false if v.to_s.match(EMAIL_PATTERN)
|
232
|
-
end
|
233
|
-
!row.join(" ").downcase.match("email").nil?
|
234
|
-
end
|
235
|
-
|
236
|
-
def find_email_index(row)
|
237
|
-
row.each_with_index do |value, index|
|
238
|
-
return index if value.to_s.match(EMAIL_PATTERN)
|
239
|
-
end
|
240
|
-
nil
|
241
|
-
end
|
242
|
-
|
243
|
-
def report_error!(text)
|
244
|
-
@status = 'error'
|
245
|
-
@error = text
|
179
|
+
def release_flag
|
180
|
+
@queue.push(:flag) if @queue
|
246
181
|
end
|
247
182
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
report_error! 'Column separator could not be determined'
|
252
|
-
return
|
253
|
-
end
|
254
|
-
|
255
|
-
@col_separator = nil
|
256
|
-
@header_row = nil
|
257
|
-
@email_index = nil
|
258
|
-
|
259
|
-
if test_lines[0].size > 1
|
260
|
-
@col_separator = ','
|
261
|
-
else
|
262
|
-
COL_SEPARATORS.each do |sep|
|
263
|
-
if is_separator?(sep, test_lines)
|
264
|
-
@col_separator = sep
|
265
|
-
break
|
266
|
-
end
|
267
|
-
end
|
183
|
+
def get_flag
|
184
|
+
@semaphore.synchronize do
|
185
|
+
@getting_flag = true
|
268
186
|
end
|
269
187
|
|
270
|
-
|
271
|
-
if @col_separator.nil? && test_lines.first.size == 1 && !is_separator?(' ', test_lines)
|
272
|
-
@col_separator = ','
|
273
|
-
end
|
274
|
-
|
275
|
-
if @col_separator.nil?
|
276
|
-
report_error! 'Column separator could not be determined'
|
277
|
-
return
|
278
|
-
end
|
279
|
-
|
280
|
-
test_rows = test_lines.map do |line|
|
281
|
-
if @col_separator == ","
|
282
|
-
line
|
283
|
-
else
|
284
|
-
line.first.split(@col_separator)
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
288
|
-
test_rows.each do |row|
|
289
|
-
if (index = find_email_index row)
|
290
|
-
@email_index = index
|
291
|
-
break
|
292
|
-
end
|
293
|
-
end
|
294
|
-
|
295
|
-
if @email_index.nil?
|
296
|
-
report_error! "Email column could not be determined."
|
297
|
-
return
|
298
|
-
end
|
299
|
-
|
300
|
-
configure_header_row(test_rows)
|
301
|
-
|
302
|
-
true
|
303
|
-
end
|
304
|
-
|
305
|
-
|
306
|
-
def configure_header_row(test_rows)
|
307
|
-
plus_headers = ['email_status', 'disposable', 'role_account']
|
188
|
+
@queue.pop if @queue
|
308
189
|
|
309
|
-
|
310
|
-
|
311
|
-
else
|
312
|
-
hr = []
|
313
|
-
test_rows.first.count.times do |i|
|
314
|
-
if i == @email_index
|
315
|
-
hr << "email"
|
316
|
-
else
|
317
|
-
hr << "column_#{i}"
|
318
|
-
end
|
319
|
-
end
|
190
|
+
@semaphore.synchronize do
|
191
|
+
@getting_flag = false
|
320
192
|
end
|
321
|
-
@header_row = hr + plus_headers
|
322
193
|
end
|
323
194
|
|
324
|
-
def
|
325
|
-
|
326
|
-
|
327
|
-
if fcount == 1
|
328
|
-
false
|
329
|
-
else
|
330
|
-
lines.each do |line|
|
331
|
-
cc = line.first.split(sep).size
|
332
|
-
same_count += 1 if cc == fcount
|
333
|
-
end
|
334
|
-
same_count == lines.count
|
335
|
-
end
|
195
|
+
def report_error!(text)
|
196
|
+
@status = 'error'
|
197
|
+
@error = text
|
336
198
|
end
|
337
199
|
|
338
|
-
|
339
|
-
|
340
|
-
|
341
200
|
end
|
342
201
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Britebox
|
2
|
+
class FileJobTimer
|
3
|
+
attr_reader :started_at, :ended_at
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@banked_time = 0
|
7
|
+
@is_running = false
|
8
|
+
end
|
9
|
+
|
10
|
+
def start
|
11
|
+
@banked_time = 0
|
12
|
+
@started_at = @resumed_at = Time.now
|
13
|
+
@is_running = true
|
14
|
+
end
|
15
|
+
|
16
|
+
def running?
|
17
|
+
@is_running
|
18
|
+
end
|
19
|
+
|
20
|
+
def pause
|
21
|
+
return unless running?
|
22
|
+
|
23
|
+
@is_running = false
|
24
|
+
@banked_time += (Time.now - @resumed_at)
|
25
|
+
end
|
26
|
+
|
27
|
+
def resume
|
28
|
+
return if running?
|
29
|
+
|
30
|
+
@resumed_at = Time.now
|
31
|
+
@is_running = true
|
32
|
+
end
|
33
|
+
|
34
|
+
def stop
|
35
|
+
pause
|
36
|
+
@ended_at = Time.now
|
37
|
+
end
|
38
|
+
|
39
|
+
def duration
|
40
|
+
if running?
|
41
|
+
@banked_time + (Time.now - @resumed_at)
|
42
|
+
else
|
43
|
+
@banked_time
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
module Britebox
|
2
|
+
class FormatRecognizer
|
3
|
+
attr_reader :opts, :error
|
4
|
+
|
5
|
+
EMAIL_PATTERN = /(\S+)@(\S+)/
|
6
|
+
COL_SEPARATORS = [";", "|", "\t"]
|
7
|
+
PLUS_HEADERS = ['email_status', 'disposable', 'role_account']
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def initialize(file_name)
|
12
|
+
@file_name = file_name
|
13
|
+
@opts = {col_separator: nil, header_row: nil, email_index: nil}
|
14
|
+
end
|
15
|
+
|
16
|
+
def recognize!
|
17
|
+
test_lines = []
|
18
|
+
begin
|
19
|
+
line_n = 0
|
20
|
+
CSV.foreach(@file_name) do |line|
|
21
|
+
if line && line.size > 0
|
22
|
+
if line_n < 5
|
23
|
+
test_lines << line
|
24
|
+
else
|
25
|
+
break
|
26
|
+
end
|
27
|
+
|
28
|
+
line_n += 1
|
29
|
+
end
|
30
|
+
end
|
31
|
+
rescue Exception => ex
|
32
|
+
@error = ex.message.to_s
|
33
|
+
return false
|
34
|
+
end
|
35
|
+
|
36
|
+
autoconfigure(test_lines) || return
|
37
|
+
|
38
|
+
true
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def find_email_index(row)
|
43
|
+
row.each_with_index do |value, index|
|
44
|
+
return index if value.to_s.match(EMAIL_PATTERN)
|
45
|
+
end
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.is_header_row?(row)
|
50
|
+
row.each do |v|
|
51
|
+
v = v.first if v.instance_of? Array
|
52
|
+
return false if v.to_s.match(EMAIL_PATTERN)
|
53
|
+
end
|
54
|
+
!row.join(" ").downcase.match("email").nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def autoconfigure(test_lines)
|
60
|
+
if test_lines.nil? || test_lines.size == 0
|
61
|
+
@error = 'Column separator could not be determined'
|
62
|
+
return false
|
63
|
+
end
|
64
|
+
|
65
|
+
if test_lines[0].size > 1
|
66
|
+
@opts[:col_separator] = ','
|
67
|
+
else
|
68
|
+
COL_SEPARATORS.each do |sep|
|
69
|
+
if is_separator?(sep, test_lines)
|
70
|
+
@opts[:col_separator] = sep
|
71
|
+
break
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Single column file
|
77
|
+
if @opts[:col_separator].nil? && test_lines.first.size == 1 && !is_separator?(' ', test_lines)
|
78
|
+
@opts[:col_separator] = ','
|
79
|
+
end
|
80
|
+
|
81
|
+
if @opts[:col_separator].nil?
|
82
|
+
@error = 'Column separator could not be determined'
|
83
|
+
return false
|
84
|
+
end
|
85
|
+
|
86
|
+
test_rows = test_lines.map do |line|
|
87
|
+
if @opts[:col_separator] == ","
|
88
|
+
line
|
89
|
+
else
|
90
|
+
line.first.split(@opts[:col_separator])
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
test_rows.each do |row|
|
95
|
+
if (index = find_email_index row)
|
96
|
+
@opts[:email_index] = index
|
97
|
+
break
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
if @opts[:email_index].nil?
|
102
|
+
@error = "Email column could not be determined."
|
103
|
+
return false
|
104
|
+
end
|
105
|
+
|
106
|
+
configure_header_row(test_rows)
|
107
|
+
|
108
|
+
true
|
109
|
+
end
|
110
|
+
|
111
|
+
def configure_header_row(test_rows)
|
112
|
+
if self.class.is_header_row?(test_rows.first)
|
113
|
+
hr = test_rows.first
|
114
|
+
else
|
115
|
+
hr = []
|
116
|
+
test_rows.first.count.times do |i|
|
117
|
+
if i == @opts[:email_index]
|
118
|
+
hr << "email"
|
119
|
+
else
|
120
|
+
hr << "column_#{i}"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
@opts[:header_row] = hr + PLUS_HEADERS
|
125
|
+
end
|
126
|
+
|
127
|
+
def is_separator?(sep, lines)
|
128
|
+
same_count = 0
|
129
|
+
fcount = lines.first.first.split(sep).size
|
130
|
+
if fcount == 1
|
131
|
+
false
|
132
|
+
else
|
133
|
+
lines.each do |line|
|
134
|
+
cc = line.first.split(sep).size
|
135
|
+
same_count += 1 if cc == fcount
|
136
|
+
end
|
137
|
+
same_count == lines.count
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
end
|
144
|
+
end
|