britebox 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: af52b5e18fb92d49ca360ef3a68e4a5b65825e3c
4
- data.tar.gz: 1c70ba5f08bdadef985eee62dda15a4ff435aa10
3
+ metadata.gz: 0949a7ddc1540e0c8374818c3647930eb3560ded
4
+ data.tar.gz: 24c7939e3c0ca1e318ea9ff8af22c485649558b9
5
5
  SHA512:
6
- metadata.gz: 83d66b045e53371ab19a2f777cd8336474b41f6fbf6bc146c8af0c7e47b07541bf0fb49a02ef80cd3ead29dba115872f64e0b0107a6a6b03e3f3978293f3b7a1
7
- data.tar.gz: 831f8252a4e93a07a4384eacfc88e722feba7b79ce25489ff2fb14b201ba7d010682ff5ab806506d11ef864bd2760f8f9fd686506c82ff029b690d7ebf07bbf1
6
+ metadata.gz: c0a6695c78f1daed658d42c1826cb7ad5d43efdb9f3fe35bc7e371a6804e726f5c7d05a47f7a98722a897fd6c3a8efb028165367528d93aa0aa22d4a022d9f3f
7
+ data.tar.gz: b71b27e62b1877698515972d0afe8935edbfd262cac51c2087193fdcd83acfa271eac77ba93e7d6535a20f6ed09fcc46cb8b381c7002ac0e7c5e4158156ac2eb
@@ -3,8 +3,11 @@ require 'britebox/version'
3
3
  require 'britebox/helpers'
4
4
  require 'britebox/config'
5
5
  require 'britebox/file_job'
6
+ require 'britebox/file_job_timer'
6
7
  require 'britebox/file_job_pool'
7
8
  require 'britebox/export_buffer'
8
9
  require 'britebox/event_log'
9
10
  require 'britebox/web_ui'
10
- require 'britebox/filtered_error_io'
11
+ require 'britebox/filtered_error_io'
12
+ require 'britebox/format_recognizer'
13
+ require 'britebox/lines_verifier'
@@ -4,11 +4,10 @@ require 'csv'
4
4
  module Britebox
5
5
  class FileJob
6
6
 
7
- attr_reader :file_name, :threads_count, :error, :status, :size_total, :size_processed,
8
- :started_at, :processed_at
7
+ attr_reader :file_name, :threads_count, :error, :status, :size_total,
8
+ :timer, :queue, :semaphore, :brite_client
9
9
 
10
- EMAIL_PATTERN = /(\S+)@(\S+)/
11
- COL_SEPARATORS = [";", "|", "\t"]
10
+ attr_accessor :size_processed
12
11
 
13
12
  def initialize(file_name, brite_client, options = {})
14
13
  @file_name = file_name
@@ -21,6 +20,9 @@ module Britebox
21
20
 
22
21
  @size_processed = 0
23
22
 
23
+ @semaphore = Mutex.new
24
+ @timer = FileJobTimer.new
25
+
24
26
  unless File.exist?(file_name)
25
27
  report_error!("File not found")
26
28
  return
@@ -29,8 +31,18 @@ module Britebox
29
31
  report_error!("File is empty")
30
32
  return
31
33
  end
34
+ end
32
35
 
33
- @semaphore = Mutex.new
36
+ def started_at
37
+ @timer.started_at
38
+ end
39
+
40
+ def processed_at
41
+ @timer.ended_at
42
+ end
43
+
44
+ def duration
45
+ @timer.duration
34
46
  end
35
47
 
36
48
  def as_json
@@ -41,8 +53,8 @@ module Britebox
41
53
  threads: @threads_count,
42
54
  size_total: @size_total,
43
55
  size_processed: @size_processed,
44
- started_at: @started_at,
45
- processed_at: @processed_at,
56
+ started_at: started_at,
57
+ processed_at: processed_at,
46
58
  duration: duration,
47
59
  percent_complete: percent_complete,
48
60
  error: @error
@@ -57,13 +69,6 @@ module Britebox
57
69
  @threads_count * 4
58
70
  end
59
71
 
60
- def duration
61
- if @started_at
62
- end_time = @processed_at || Time.now
63
- end_time - @started_at
64
- end
65
- end
66
-
67
72
  def percent_complete
68
73
  if @size_total.to_i > 0
69
74
  (100.0 * @size_processed / @size_total).round(1)
@@ -94,14 +99,31 @@ module Britebox
94
99
 
95
100
  def status=(new_status)
96
101
  return if status == new_status
102
+ return if verifying? && new_status == 'pending' # avoid double-clicking
103
+
104
+ if new_status == 'verifying'
105
+ raise("status 'verifying' can't be set, use 'pending' instead")
106
+ end
97
107
 
98
108
  # Release processing slot
99
- if verifying? && ['paused', 'cancelled'].include?(new_status)
100
- @queue.push(:flag) if @queue
109
+ if new_status == 'cancelled'
110
+ release_flag
101
111
  end
102
112
 
103
- if new_status == 'verifying'
104
- raise("status 'verifying' can't be set, use 'pending' instead")
113
+ if verifying? && new_status == 'paused'
114
+ release_flag
115
+ end
116
+
117
+ if new_status == 'pending'
118
+ Thread.new do
119
+ while @getting_flag
120
+ sleep(0.1)
121
+ end
122
+ get_flag
123
+ @semaphore.synchronize do
124
+ @status = 'verifying'
125
+ end
126
+ end
105
127
  end
106
128
 
107
129
  @status = new_status
@@ -110,106 +132,36 @@ module Britebox
110
132
  def verify!(file_name_to)
111
133
  return if error?
112
134
 
113
- test_lines = []
114
- begin
115
- line_n = 0
116
- CSV.foreach(file_name) do |line|
117
- if line && line.size > 0
118
- if line_n < 5
119
- test_lines << line
120
- else
121
- break
122
- end
123
-
124
- line_n += 1
125
- end
126
- end
127
- rescue Exception => ex
128
- report_error!(ex.message.to_s) and return
135
+ recognizer = FormatRecognizer.new(@file_name)
136
+ if recognizer.recognize!
137
+ opts = recognizer.opts
138
+ else
139
+ report_error! recognizer.error
140
+ return
129
141
  end
130
142
 
131
- autoconfigure(test_lines) || return
132
143
 
133
144
  # Block processing if some another FileJob processing lines
134
- @queue.pop if @queue
145
+ loop do
146
+ break if verifying?
147
+ get_flag
135
148
 
136
- @started_at = Time.now
137
- @status = 'verifying'
138
-
139
- @in_buffer = Queue.new
140
- @out_buffer = ExportBuffer.new(file_name_to, @header_row, @col_separator)
141
-
142
- @threads = []
143
- @threads_count.times do
144
- @threads << Thread.new do
145
- loop do
146
- # Pause all processing
147
- loop do
148
- break unless paused?
149
- sleep(0.1)
150
- end
151
-
152
- break if cancelled?
153
-
154
- # Wait for processing slot
155
- if pending?
156
- @semaphore.synchronize do
157
- @queue.pop if @queue && pending?
158
- @status = 'verifying'
159
- end
160
- end
161
-
162
- item = Timeout.timeout(1) { @in_buffer.pop } rescue nil
163
- break if item.nil?
164
-
165
- email = item[:line][@email_index]
166
- if email.to_s.match(EMAIL_PATTERN)
167
- if Britebox::Config.simulate
168
- # Do not send real requests in this mode
169
- sleep(1)
170
- contact_status = ['unknown', false, false]
171
- else
172
- begin
173
- contact = @brite_client.contacts.create(email: email)
174
- contact.verify!
175
- contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
176
- rescue
177
- contact_status = ['error', false, false]
178
- end
179
- end
180
- else
181
- contact_status = ['invalid', false, false]
182
- end
183
-
184
- @semaphore.synchronize do
185
- @out_buffer << {n: item[:n], line: item[:line] + contact_status}
186
- @size_processed += CSV.generate_line(item[:line], col_sep: @col_separator).size
187
- end
188
- end
189
- end
190
- end
191
-
192
- idx = 0
193
- CSV.foreach(file_name, col_sep: @col_separator) do |line|
194
- if idx == 0 && is_header_row?(line)
195
- @size_processed += CSV.generate_line(line, col_sep: @col_separator).size
196
- next
149
+ if paused?
150
+ release_flag
151
+ sleep 0.3
152
+ else
153
+ break
197
154
  end
198
- next if line.nil? || line.size == 0
199
- # Throttle file reading
200
- break if cancelled?
201
- sleep(0.1) while @in_buffer.size > buffer_size && !cancelled?
202
-
203
- @in_buffer << {n: idx, line: line}
204
- idx += 1
205
155
  end
206
156
 
207
- @threads.each{ |m| m.join }
157
+ if pending? || verifying?
158
+ @timer.start
159
+ @status = 'verifying'
208
160
 
209
- @out_buffer.flush_backlog
210
- @out_buffer.close
161
+ LinesVerifier.new(self).process!(file_name_to, opts)
211
162
 
212
- @processed_at = Time.now
163
+ @timer.stop
164
+ end
213
165
 
214
166
  if cancelled?
215
167
  File.delete file_name_to
@@ -217,126 +169,33 @@ module Britebox
217
169
  @status = 'complete'
218
170
  end
219
171
 
220
- # Release the lock
221
- @queue.push(:flag) if @queue
172
+ release_flag
222
173
 
223
174
  true
224
175
  end
225
176
 
226
177
  private
227
178
 
228
- def is_header_row?(row)
229
- row.each do |v|
230
- v = v.first if v.instance_of? Array
231
- return false if v.to_s.match(EMAIL_PATTERN)
232
- end
233
- !row.join(" ").downcase.match("email").nil?
234
- end
235
-
236
- def find_email_index(row)
237
- row.each_with_index do |value, index|
238
- return index if value.to_s.match(EMAIL_PATTERN)
239
- end
240
- nil
241
- end
242
-
243
- def report_error!(text)
244
- @status = 'error'
245
- @error = text
179
+ def release_flag
180
+ @queue.push(:flag) if @queue
246
181
  end
247
182
 
248
-
249
- def autoconfigure(test_lines)
250
- if test_lines.nil? || test_lines.size == 0
251
- report_error! 'Column separator could not be determined'
252
- return
253
- end
254
-
255
- @col_separator = nil
256
- @header_row = nil
257
- @email_index = nil
258
-
259
- if test_lines[0].size > 1
260
- @col_separator = ','
261
- else
262
- COL_SEPARATORS.each do |sep|
263
- if is_separator?(sep, test_lines)
264
- @col_separator = sep
265
- break
266
- end
267
- end
183
+ def get_flag
184
+ @semaphore.synchronize do
185
+ @getting_flag = true
268
186
  end
269
187
 
270
- # Single column file
271
- if @col_separator.nil? && test_lines.first.size == 1 && !is_separator?(' ', test_lines)
272
- @col_separator = ','
273
- end
274
-
275
- if @col_separator.nil?
276
- report_error! 'Column separator could not be determined'
277
- return
278
- end
279
-
280
- test_rows = test_lines.map do |line|
281
- if @col_separator == ","
282
- line
283
- else
284
- line.first.split(@col_separator)
285
- end
286
- end
287
-
288
- test_rows.each do |row|
289
- if (index = find_email_index row)
290
- @email_index = index
291
- break
292
- end
293
- end
294
-
295
- if @email_index.nil?
296
- report_error! "Email column could not be determined."
297
- return
298
- end
299
-
300
- configure_header_row(test_rows)
301
-
302
- true
303
- end
304
-
305
-
306
- def configure_header_row(test_rows)
307
- plus_headers = ['email_status', 'disposable', 'role_account']
188
+ @queue.pop if @queue
308
189
 
309
- if is_header_row?(test_rows.first)
310
- hr = test_rows.first
311
- else
312
- hr = []
313
- test_rows.first.count.times do |i|
314
- if i == @email_index
315
- hr << "email"
316
- else
317
- hr << "column_#{i}"
318
- end
319
- end
190
+ @semaphore.synchronize do
191
+ @getting_flag = false
320
192
  end
321
- @header_row = hr + plus_headers
322
193
  end
323
194
 
324
- def is_separator?(sep, lines)
325
- same_count = 0
326
- fcount = lines.first.first.split(sep).size
327
- if fcount == 1
328
- false
329
- else
330
- lines.each do |line|
331
- cc = line.first.split(sep).size
332
- same_count += 1 if cc == fcount
333
- end
334
- same_count == lines.count
335
- end
195
+ def report_error!(text)
196
+ @status = 'error'
197
+ @error = text
336
198
  end
337
199
 
338
-
339
-
340
-
341
200
  end
342
201
  end
@@ -0,0 +1,47 @@
1
+ module Britebox
2
+ class FileJobTimer
3
+ attr_reader :started_at, :ended_at
4
+
5
+ def initialize
6
+ @banked_time = 0
7
+ @is_running = false
8
+ end
9
+
10
+ def start
11
+ @banked_time = 0
12
+ @started_at = @resumed_at = Time.now
13
+ @is_running = true
14
+ end
15
+
16
+ def running?
17
+ @is_running
18
+ end
19
+
20
+ def pause
21
+ return unless running?
22
+
23
+ @is_running = false
24
+ @banked_time += (Time.now - @resumed_at)
25
+ end
26
+
27
+ def resume
28
+ return if running?
29
+
30
+ @resumed_at = Time.now
31
+ @is_running = true
32
+ end
33
+
34
+ def stop
35
+ pause
36
+ @ended_at = Time.now
37
+ end
38
+
39
+ def duration
40
+ if running?
41
+ @banked_time + (Time.now - @resumed_at)
42
+ else
43
+ @banked_time
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,144 @@
1
+ module Britebox
2
+ class FormatRecognizer
3
+ attr_reader :opts, :error
4
+
5
+ EMAIL_PATTERN = /(\S+)@(\S+)/
6
+ COL_SEPARATORS = [";", "|", "\t"]
7
+ PLUS_HEADERS = ['email_status', 'disposable', 'role_account']
8
+
9
+
10
+
11
+ def initialize(file_name)
12
+ @file_name = file_name
13
+ @opts = {col_separator: nil, header_row: nil, email_index: nil}
14
+ end
15
+
16
+ def recognize!
17
+ test_lines = []
18
+ begin
19
+ line_n = 0
20
+ CSV.foreach(@file_name) do |line|
21
+ if line && line.size > 0
22
+ if line_n < 5
23
+ test_lines << line
24
+ else
25
+ break
26
+ end
27
+
28
+ line_n += 1
29
+ end
30
+ end
31
+ rescue Exception => ex
32
+ @error = ex.message.to_s
33
+ return false
34
+ end
35
+
36
+ autoconfigure(test_lines) || return
37
+
38
+ true
39
+ end
40
+
41
+
42
+ def find_email_index(row)
43
+ row.each_with_index do |value, index|
44
+ return index if value.to_s.match(EMAIL_PATTERN)
45
+ end
46
+ nil
47
+ end
48
+
49
+ def self.is_header_row?(row)
50
+ row.each do |v|
51
+ v = v.first if v.instance_of? Array
52
+ return false if v.to_s.match(EMAIL_PATTERN)
53
+ end
54
+ !row.join(" ").downcase.match("email").nil?
55
+ end
56
+
57
+ private
58
+
59
+ def autoconfigure(test_lines)
60
+ if test_lines.nil? || test_lines.size == 0
61
+ @error = 'Column separator could not be determined'
62
+ return false
63
+ end
64
+
65
+ if test_lines[0].size > 1
66
+ @opts[:col_separator] = ','
67
+ else
68
+ COL_SEPARATORS.each do |sep|
69
+ if is_separator?(sep, test_lines)
70
+ @opts[:col_separator] = sep
71
+ break
72
+ end
73
+ end
74
+ end
75
+
76
+ # Single column file
77
+ if @opts[:col_separator].nil? && test_lines.first.size == 1 && !is_separator?(' ', test_lines)
78
+ @opts[:col_separator] = ','
79
+ end
80
+
81
+ if @opts[:col_separator].nil?
82
+ @error = 'Column separator could not be determined'
83
+ return false
84
+ end
85
+
86
+ test_rows = test_lines.map do |line|
87
+ if @opts[:col_separator] == ","
88
+ line
89
+ else
90
+ line.first.split(@opts[:col_separator])
91
+ end
92
+ end
93
+
94
+ test_rows.each do |row|
95
+ if (index = find_email_index row)
96
+ @opts[:email_index] = index
97
+ break
98
+ end
99
+ end
100
+
101
+ if @opts[:email_index].nil?
102
+ @error = "Email column could not be determined."
103
+ return false
104
+ end
105
+
106
+ configure_header_row(test_rows)
107
+
108
+ true
109
+ end
110
+
111
+ def configure_header_row(test_rows)
112
+ if self.class.is_header_row?(test_rows.first)
113
+ hr = test_rows.first
114
+ else
115
+ hr = []
116
+ test_rows.first.count.times do |i|
117
+ if i == @opts[:email_index]
118
+ hr << "email"
119
+ else
120
+ hr << "column_#{i}"
121
+ end
122
+ end
123
+ end
124
+ @opts[:header_row] = hr + PLUS_HEADERS
125
+ end
126
+
127
+ def is_separator?(sep, lines)
128
+ same_count = 0
129
+ fcount = lines.first.first.split(sep).size
130
+ if fcount == 1
131
+ false
132
+ else
133
+ lines.each do |line|
134
+ cc = line.first.split(sep).size
135
+ same_count += 1 if cc == fcount
136
+ end
137
+ same_count == lines.count
138
+ end
139
+ end
140
+
141
+
142
+
143
+ end
144
+ end