britebox 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: af52b5e18fb92d49ca360ef3a68e4a5b65825e3c
4
- data.tar.gz: 1c70ba5f08bdadef985eee62dda15a4ff435aa10
3
+ metadata.gz: 0949a7ddc1540e0c8374818c3647930eb3560ded
4
+ data.tar.gz: 24c7939e3c0ca1e318ea9ff8af22c485649558b9
5
5
  SHA512:
6
- metadata.gz: 83d66b045e53371ab19a2f777cd8336474b41f6fbf6bc146c8af0c7e47b07541bf0fb49a02ef80cd3ead29dba115872f64e0b0107a6a6b03e3f3978293f3b7a1
7
- data.tar.gz: 831f8252a4e93a07a4384eacfc88e722feba7b79ce25489ff2fb14b201ba7d010682ff5ab806506d11ef864bd2760f8f9fd686506c82ff029b690d7ebf07bbf1
6
+ metadata.gz: c0a6695c78f1daed658d42c1826cb7ad5d43efdb9f3fe35bc7e371a6804e726f5c7d05a47f7a98722a897fd6c3a8efb028165367528d93aa0aa22d4a022d9f3f
7
+ data.tar.gz: b71b27e62b1877698515972d0afe8935edbfd262cac51c2087193fdcd83acfa271eac77ba93e7d6535a20f6ed09fcc46cb8b381c7002ac0e7c5e4158156ac2eb
@@ -3,8 +3,11 @@ require 'britebox/version'
3
3
  require 'britebox/helpers'
4
4
  require 'britebox/config'
5
5
  require 'britebox/file_job'
6
+ require 'britebox/file_job_timer'
6
7
  require 'britebox/file_job_pool'
7
8
  require 'britebox/export_buffer'
8
9
  require 'britebox/event_log'
9
10
  require 'britebox/web_ui'
10
- require 'britebox/filtered_error_io'
11
+ require 'britebox/filtered_error_io'
12
+ require 'britebox/format_recognizer'
13
+ require 'britebox/lines_verifier'
@@ -4,11 +4,10 @@ require 'csv'
4
4
  module Britebox
5
5
  class FileJob
6
6
 
7
- attr_reader :file_name, :threads_count, :error, :status, :size_total, :size_processed,
8
- :started_at, :processed_at
7
+ attr_reader :file_name, :threads_count, :error, :status, :size_total,
8
+ :timer, :queue, :semaphore, :brite_client
9
9
 
10
- EMAIL_PATTERN = /(\S+)@(\S+)/
11
- COL_SEPARATORS = [";", "|", "\t"]
10
+ attr_accessor :size_processed
12
11
 
13
12
  def initialize(file_name, brite_client, options = {})
14
13
  @file_name = file_name
@@ -21,6 +20,9 @@ module Britebox
21
20
 
22
21
  @size_processed = 0
23
22
 
23
+ @semaphore = Mutex.new
24
+ @timer = FileJobTimer.new
25
+
24
26
  unless File.exist?(file_name)
25
27
  report_error!("File not found")
26
28
  return
@@ -29,8 +31,18 @@ module Britebox
29
31
  report_error!("File is empty")
30
32
  return
31
33
  end
34
+ end
32
35
 
33
- @semaphore = Mutex.new
36
+ def started_at
37
+ @timer.started_at
38
+ end
39
+
40
+ def processed_at
41
+ @timer.ended_at
42
+ end
43
+
44
+ def duration
45
+ @timer.duration
34
46
  end
35
47
 
36
48
  def as_json
@@ -41,8 +53,8 @@ module Britebox
41
53
  threads: @threads_count,
42
54
  size_total: @size_total,
43
55
  size_processed: @size_processed,
44
- started_at: @started_at,
45
- processed_at: @processed_at,
56
+ started_at: started_at,
57
+ processed_at: processed_at,
46
58
  duration: duration,
47
59
  percent_complete: percent_complete,
48
60
  error: @error
@@ -57,13 +69,6 @@ module Britebox
57
69
  @threads_count * 4
58
70
  end
59
71
 
60
- def duration
61
- if @started_at
62
- end_time = @processed_at || Time.now
63
- end_time - @started_at
64
- end
65
- end
66
-
67
72
  def percent_complete
68
73
  if @size_total.to_i > 0
69
74
  (100.0 * @size_processed / @size_total).round(1)
@@ -94,14 +99,31 @@ module Britebox
94
99
 
95
100
  def status=(new_status)
96
101
  return if status == new_status
102
+ return if verifying? && new_status == 'pending' # avoid double-clicking
103
+
104
+ if new_status == 'verifying'
105
+ raise("status 'verifying' can't be set, use 'pending' instead")
106
+ end
97
107
 
98
108
  # Release processing slot
99
- if verifying? && ['paused', 'cancelled'].include?(new_status)
100
- @queue.push(:flag) if @queue
109
+ if new_status == 'cancelled'
110
+ release_flag
101
111
  end
102
112
 
103
- if new_status == 'verifying'
104
- raise("status 'verifying' can't be set, use 'pending' instead")
113
+ if verifying? && new_status == 'paused'
114
+ release_flag
115
+ end
116
+
117
+ if new_status == 'pending'
118
+ Thread.new do
119
+ while @getting_flag
120
+ sleep(0.1)
121
+ end
122
+ get_flag
123
+ @semaphore.synchronize do
124
+ @status = 'verifying'
125
+ end
126
+ end
105
127
  end
106
128
 
107
129
  @status = new_status
@@ -110,106 +132,36 @@ module Britebox
110
132
  def verify!(file_name_to)
111
133
  return if error?
112
134
 
113
- test_lines = []
114
- begin
115
- line_n = 0
116
- CSV.foreach(file_name) do |line|
117
- if line && line.size > 0
118
- if line_n < 5
119
- test_lines << line
120
- else
121
- break
122
- end
123
-
124
- line_n += 1
125
- end
126
- end
127
- rescue Exception => ex
128
- report_error!(ex.message.to_s) and return
135
+ recognizer = FormatRecognizer.new(@file_name)
136
+ if recognizer.recognize!
137
+ opts = recognizer.opts
138
+ else
139
+ report_error! recognizer.error
140
+ return
129
141
  end
130
142
 
131
- autoconfigure(test_lines) || return
132
143
 
133
144
  # Block processing if some another FileJob processing lines
134
- @queue.pop if @queue
145
+ loop do
146
+ break if verifying?
147
+ get_flag
135
148
 
136
- @started_at = Time.now
137
- @status = 'verifying'
138
-
139
- @in_buffer = Queue.new
140
- @out_buffer = ExportBuffer.new(file_name_to, @header_row, @col_separator)
141
-
142
- @threads = []
143
- @threads_count.times do
144
- @threads << Thread.new do
145
- loop do
146
- # Pause all processing
147
- loop do
148
- break unless paused?
149
- sleep(0.1)
150
- end
151
-
152
- break if cancelled?
153
-
154
- # Wait for processing slot
155
- if pending?
156
- @semaphore.synchronize do
157
- @queue.pop if @queue && pending?
158
- @status = 'verifying'
159
- end
160
- end
161
-
162
- item = Timeout.timeout(1) { @in_buffer.pop } rescue nil
163
- break if item.nil?
164
-
165
- email = item[:line][@email_index]
166
- if email.to_s.match(EMAIL_PATTERN)
167
- if Britebox::Config.simulate
168
- # Do not send real requests in this mode
169
- sleep(1)
170
- contact_status = ['unknown', false, false]
171
- else
172
- begin
173
- contact = @brite_client.contacts.create(email: email)
174
- contact.verify!
175
- contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
176
- rescue
177
- contact_status = ['error', false, false]
178
- end
179
- end
180
- else
181
- contact_status = ['invalid', false, false]
182
- end
183
-
184
- @semaphore.synchronize do
185
- @out_buffer << {n: item[:n], line: item[:line] + contact_status}
186
- @size_processed += CSV.generate_line(item[:line], col_sep: @col_separator).size
187
- end
188
- end
189
- end
190
- end
191
-
192
- idx = 0
193
- CSV.foreach(file_name, col_sep: @col_separator) do |line|
194
- if idx == 0 && is_header_row?(line)
195
- @size_processed += CSV.generate_line(line, col_sep: @col_separator).size
196
- next
149
+ if paused?
150
+ release_flag
151
+ sleep 0.3
152
+ else
153
+ break
197
154
  end
198
- next if line.nil? || line.size == 0
199
- # Throttle file reading
200
- break if cancelled?
201
- sleep(0.1) while @in_buffer.size > buffer_size && !cancelled?
202
-
203
- @in_buffer << {n: idx, line: line}
204
- idx += 1
205
155
  end
206
156
 
207
- @threads.each{ |m| m.join }
157
+ if pending? || verifying?
158
+ @timer.start
159
+ @status = 'verifying'
208
160
 
209
- @out_buffer.flush_backlog
210
- @out_buffer.close
161
+ LinesVerifier.new(self).process!(file_name_to, opts)
211
162
 
212
- @processed_at = Time.now
163
+ @timer.stop
164
+ end
213
165
 
214
166
  if cancelled?
215
167
  File.delete file_name_to
@@ -217,126 +169,33 @@ module Britebox
217
169
  @status = 'complete'
218
170
  end
219
171
 
220
- # Release the lock
221
- @queue.push(:flag) if @queue
172
+ release_flag
222
173
 
223
174
  true
224
175
  end
225
176
 
226
177
  private
227
178
 
228
- def is_header_row?(row)
229
- row.each do |v|
230
- v = v.first if v.instance_of? Array
231
- return false if v.to_s.match(EMAIL_PATTERN)
232
- end
233
- !row.join(" ").downcase.match("email").nil?
234
- end
235
-
236
- def find_email_index(row)
237
- row.each_with_index do |value, index|
238
- return index if value.to_s.match(EMAIL_PATTERN)
239
- end
240
- nil
241
- end
242
-
243
- def report_error!(text)
244
- @status = 'error'
245
- @error = text
179
+ def release_flag
180
+ @queue.push(:flag) if @queue
246
181
  end
247
182
 
248
-
249
- def autoconfigure(test_lines)
250
- if test_lines.nil? || test_lines.size == 0
251
- report_error! 'Column separator could not be determined'
252
- return
253
- end
254
-
255
- @col_separator = nil
256
- @header_row = nil
257
- @email_index = nil
258
-
259
- if test_lines[0].size > 1
260
- @col_separator = ','
261
- else
262
- COL_SEPARATORS.each do |sep|
263
- if is_separator?(sep, test_lines)
264
- @col_separator = sep
265
- break
266
- end
267
- end
183
+ def get_flag
184
+ @semaphore.synchronize do
185
+ @getting_flag = true
268
186
  end
269
187
 
270
- # Single column file
271
- if @col_separator.nil? && test_lines.first.size == 1 && !is_separator?(' ', test_lines)
272
- @col_separator = ','
273
- end
274
-
275
- if @col_separator.nil?
276
- report_error! 'Column separator could not be determined'
277
- return
278
- end
279
-
280
- test_rows = test_lines.map do |line|
281
- if @col_separator == ","
282
- line
283
- else
284
- line.first.split(@col_separator)
285
- end
286
- end
287
-
288
- test_rows.each do |row|
289
- if (index = find_email_index row)
290
- @email_index = index
291
- break
292
- end
293
- end
294
-
295
- if @email_index.nil?
296
- report_error! "Email column could not be determined."
297
- return
298
- end
299
-
300
- configure_header_row(test_rows)
301
-
302
- true
303
- end
304
-
305
-
306
- def configure_header_row(test_rows)
307
- plus_headers = ['email_status', 'disposable', 'role_account']
188
+ @queue.pop if @queue
308
189
 
309
- if is_header_row?(test_rows.first)
310
- hr = test_rows.first
311
- else
312
- hr = []
313
- test_rows.first.count.times do |i|
314
- if i == @email_index
315
- hr << "email"
316
- else
317
- hr << "column_#{i}"
318
- end
319
- end
190
+ @semaphore.synchronize do
191
+ @getting_flag = false
320
192
  end
321
- @header_row = hr + plus_headers
322
193
  end
323
194
 
324
- def is_separator?(sep, lines)
325
- same_count = 0
326
- fcount = lines.first.first.split(sep).size
327
- if fcount == 1
328
- false
329
- else
330
- lines.each do |line|
331
- cc = line.first.split(sep).size
332
- same_count += 1 if cc == fcount
333
- end
334
- same_count == lines.count
335
- end
195
+ def report_error!(text)
196
+ @status = 'error'
197
+ @error = text
336
198
  end
337
199
 
338
-
339
-
340
-
341
200
  end
342
201
  end
@@ -0,0 +1,47 @@
1
+ module Britebox
2
+ class FileJobTimer
3
+ attr_reader :started_at, :ended_at
4
+
5
+ def initialize
6
+ @banked_time = 0
7
+ @is_running = false
8
+ end
9
+
10
+ def start
11
+ @banked_time = 0
12
+ @started_at = @resumed_at = Time.now
13
+ @is_running = true
14
+ end
15
+
16
+ def running?
17
+ @is_running
18
+ end
19
+
20
+ def pause
21
+ return unless running?
22
+
23
+ @is_running = false
24
+ @banked_time += (Time.now - @resumed_at)
25
+ end
26
+
27
+ def resume
28
+ return if running?
29
+
30
+ @resumed_at = Time.now
31
+ @is_running = true
32
+ end
33
+
34
+ def stop
35
+ pause
36
+ @ended_at = Time.now
37
+ end
38
+
39
+ def duration
40
+ if running?
41
+ @banked_time + (Time.now - @resumed_at)
42
+ else
43
+ @banked_time
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,144 @@
1
+ module Britebox
2
+ class FormatRecognizer
3
+ attr_reader :opts, :error
4
+
5
+ EMAIL_PATTERN = /(\S+)@(\S+)/
6
+ COL_SEPARATORS = [";", "|", "\t"]
7
+ PLUS_HEADERS = ['email_status', 'disposable', 'role_account']
8
+
9
+
10
+
11
+ def initialize(file_name)
12
+ @file_name = file_name
13
+ @opts = {col_separator: nil, header_row: nil, email_index: nil}
14
+ end
15
+
16
+ def recognize!
17
+ test_lines = []
18
+ begin
19
+ line_n = 0
20
+ CSV.foreach(@file_name) do |line|
21
+ if line && line.size > 0
22
+ if line_n < 5
23
+ test_lines << line
24
+ else
25
+ break
26
+ end
27
+
28
+ line_n += 1
29
+ end
30
+ end
31
+ rescue Exception => ex
32
+ @error = ex.message.to_s
33
+ return false
34
+ end
35
+
36
+ autoconfigure(test_lines) || return
37
+
38
+ true
39
+ end
40
+
41
+
42
+ def find_email_index(row)
43
+ row.each_with_index do |value, index|
44
+ return index if value.to_s.match(EMAIL_PATTERN)
45
+ end
46
+ nil
47
+ end
48
+
49
+ def self.is_header_row?(row)
50
+ row.each do |v|
51
+ v = v.first if v.instance_of? Array
52
+ return false if v.to_s.match(EMAIL_PATTERN)
53
+ end
54
+ !row.join(" ").downcase.match("email").nil?
55
+ end
56
+
57
+ private
58
+
59
+ def autoconfigure(test_lines)
60
+ if test_lines.nil? || test_lines.size == 0
61
+ @error = 'Column separator could not be determined'
62
+ return false
63
+ end
64
+
65
+ if test_lines[0].size > 1
66
+ @opts[:col_separator] = ','
67
+ else
68
+ COL_SEPARATORS.each do |sep|
69
+ if is_separator?(sep, test_lines)
70
+ @opts[:col_separator] = sep
71
+ break
72
+ end
73
+ end
74
+ end
75
+
76
+ # Single column file
77
+ if @opts[:col_separator].nil? && test_lines.first.size == 1 && !is_separator?(' ', test_lines)
78
+ @opts[:col_separator] = ','
79
+ end
80
+
81
+ if @opts[:col_separator].nil?
82
+ @error = 'Column separator could not be determined'
83
+ return false
84
+ end
85
+
86
+ test_rows = test_lines.map do |line|
87
+ if @opts[:col_separator] == ","
88
+ line
89
+ else
90
+ line.first.split(@opts[:col_separator])
91
+ end
92
+ end
93
+
94
+ test_rows.each do |row|
95
+ if (index = find_email_index row)
96
+ @opts[:email_index] = index
97
+ break
98
+ end
99
+ end
100
+
101
+ if @opts[:email_index].nil?
102
+ @error = "Email column could not be determined."
103
+ return false
104
+ end
105
+
106
+ configure_header_row(test_rows)
107
+
108
+ true
109
+ end
110
+
111
+ def configure_header_row(test_rows)
112
+ if self.class.is_header_row?(test_rows.first)
113
+ hr = test_rows.first
114
+ else
115
+ hr = []
116
+ test_rows.first.count.times do |i|
117
+ if i == @opts[:email_index]
118
+ hr << "email"
119
+ else
120
+ hr << "column_#{i}"
121
+ end
122
+ end
123
+ end
124
+ @opts[:header_row] = hr + PLUS_HEADERS
125
+ end
126
+
127
+ def is_separator?(sep, lines)
128
+ same_count = 0
129
+ fcount = lines.first.first.split(sep).size
130
+ if fcount == 1
131
+ false
132
+ else
133
+ lines.each do |line|
134
+ cc = line.first.split(sep).size
135
+ same_count += 1 if cc == fcount
136
+ end
137
+ same_count == lines.count
138
+ end
139
+ end
140
+
141
+
142
+
143
+ end
144
+ end