britebox 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 933b24652e4d8e01a6cba8e94c78826441a6833b
4
+ data.tar.gz: 71d0fe270ab25989b2157fbdb5f40bd78b70bdd3
5
+ SHA512:
6
+ metadata.gz: 7014db8bbe827d9030e69f9e377d2b04fcb43550a64742af704277d6c221204fbe8ff006cd8342ace8960bcd9f97fa84fd3215a1d1a44a735cd049ae3001673d
7
+ data.tar.gz: 664811ca1f66bb90f5dbc3de04317deebedaebf1dad076c7c5821674d622a0a2a17db5aeaeba8ff5016775dd18d4dcfd29a2854dfdffb29b65732976608f3703
data/LICENSE ADDED
File without changes
@@ -0,0 +1,4 @@
1
+ britebox
2
+ ========
3
+
4
+ BriteVerify API CLI tool
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'britebox'
5
+ require 'britebox/cli'
6
+
7
+ Thread.abort_on_exception = true
8
+ THREAD_NUM_MAX = 10
9
+
10
+ commands = ['watch']
11
+
12
+ if $*.size == 1 && ($*[0] == '-v' || $*[0] == '--version')
13
+ puts Britebox::VERSION
14
+ exit 0
15
+ end
16
+
17
+ if $*.size == 0 || (not commands.include?($*[0]))
18
+ puts 'usage: britebox COMMAND [OPTIONS]'
19
+ puts " COMMAND: #{commands.join(', ')}"
20
+ puts ' run britebox COMMAND --help to get more information about each command'
21
+ exit 1
22
+ end
23
+
24
+ def common_opts(opts)
25
+
26
+ end
27
+
28
+
29
+ @cli = Britebox::CLI.new
30
+
31
+ command = $*.shift
32
+ if command == 'watch'
33
+ params = {}
34
+ options = {}
35
+
36
+ opts = OptionParser.new do |opts|
37
+ opts.banner = "usage: britebox watch [OPTIONS]"
38
+
39
+ opts.on('-d', '--dir WATCHDIR', 'Watch this directory for incoming files') do |v|
40
+ params[:dir] = v
41
+ end
42
+
43
+ opts.on('-o', '--output OUTPUTDIR', 'Place verified files into this directory') do |v|
44
+ params[:output] = v
45
+ end
46
+
47
+ opts.on('-k', '--apikey APIKEY', 'BriteVerify API Key') do |v|
48
+ params[:api_key] = v
49
+ end
50
+
51
+ opts.on('-t', '--threads THREADS', "Maximum number of parallel threads used for processing, "+
52
+ "Default is #{Britebox::FileJobPool::THREAD_NUM_DEFAULT}, " +
53
+ "Maximum is #{THREAD_NUM_MAX}") do |v|
54
+ unless (1..THREAD_NUM_MAX).include? v.to_i
55
+ puts "Threads number should be in range 1..#{THREAD_NUM_MAX}"
56
+ exit 1
57
+ end
58
+ params[:threads] = v.to_i
59
+ end
60
+
61
+ common_opts(opts)
62
+ end
63
+
64
+ begin
65
+ opts.parse!
66
+ rescue OptionParser::ParseError
67
+ puts $!.to_s
68
+ exit 1
69
+ end
70
+
71
+ @cli.watch(params)
72
+
73
+
74
+ end
75
+
76
+
@@ -0,0 +1,4 @@
1
+
2
+ require 'britebox/version'
3
+ require 'britebox/file_job'
4
+ require 'britebox/file_job_pool'
@@ -0,0 +1,63 @@
1
+ require 'listen'
2
+ require 'fileutils'
3
+
4
+ module Britebox
5
+ class CLI
6
+ MONITOR_EXTENSIONS = ['csv', 'txt']
7
+
8
+ def initialize
9
+
10
+ end
11
+
12
+ def watch(params)
13
+ @api_key = params[:api_key] || raise("Please provide BriteVerify API Key")
14
+
15
+ if params[:dir]
16
+ @dir = File.expand_path params[:dir]
17
+ else
18
+ raise("Please provide directory-to-watch")
19
+ end
20
+
21
+ if params[:output]
22
+ @out_dir = File.expand_path params[:output]
23
+ else
24
+ @out_dir = File.expand_path 'verified', @dir
25
+ end
26
+
27
+ fj_options = {threads: params[:threads]}
28
+
29
+ puts "Watching directory #{@dir}"
30
+ puts "Output directory is #{@out_dir}"
31
+ puts "Press Ctrl-C to quit"
32
+
33
+ fj_pool = FileJobPool.new(params[:threads])
34
+
35
+ options = {filter: /\.(#{MONITOR_EXTENSIONS.join('|')})$/}
36
+ if @out_dir.include? @dir
37
+ options[:ignore] = Regexp.new "^" + @out_dir.sub(@dir + File::SEPARATOR, '')
38
+ end
39
+
40
+ brite_client = BriteAPI::Client.new(@api_key)
41
+
42
+ Listen.to(@dir, options) do |modified, added, removed|
43
+ (modified + added).each do |file|
44
+ fj_pool.process_file!(file, @dir, @out_dir, brite_client, fj_options)
45
+ end
46
+ end
47
+
48
+ # Trigger change for existing files
49
+ MONITOR_EXTENSIONS.each do |ext|
50
+ FileUtils.touch Dir.glob(File.expand_path("*.#{ext}", @dir)).map{ |f| File.expand_path(f, "*.#{ext}")}
51
+ end
52
+
53
+ # Update current status in cycle
54
+ loop do
55
+ fj_pool.print_status
56
+ sleep 0.5
57
+ end
58
+
59
+
60
+
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,218 @@
1
+ require 'thread_storm'
2
+ require 'brite-api'
3
+ require 'csv'
4
+
5
+ module Britebox
6
+ class FileJob
7
+
8
+ attr_reader :file_name, :lines_total, :error, :status, :processed_lines
9
+
10
+ EMAIL_PATTERN = /(\S+)@(\S+)/
11
+ COL_SEPARATORS = [";", "|", "\t"]
12
+
13
+ def initialize(file_name, brite_client, thread_pool, options = {})
14
+ @file_name = file_name
15
+ @brite_client = brite_client
16
+ @thread_pool = thread_pool
17
+ @processed_lines = []
18
+ @status = 'pending'
19
+ end
20
+
21
+ def lines_processed
22
+ @processed_lines.compact.count
23
+ end
24
+
25
+ def verify!
26
+ @status = 'verifying'
27
+
28
+ unless File.exist?(file_name)
29
+ report_error!("File #{file_name} not found") and return
30
+ end
31
+ file_data = File.read file_name
32
+ if file_data.size == 0
33
+ report_error!("File #{file_name} is empty") and return
34
+ end
35
+
36
+ test_lines = []
37
+ begin
38
+ line_n = 0
39
+ CSV.parse(file_data) do |line|
40
+ if line && line.size > 0
41
+ if line_n < 5
42
+ test_lines << line
43
+ end
44
+
45
+ line_n += 1
46
+ end
47
+ end
48
+ rescue Exception => ex
49
+ report_error!(ex.message.to_s) and return
50
+ end
51
+
52
+ autoconfigure(test_lines) || return
53
+
54
+ parsed_lines = CSV.parse(file_data, col_sep: @col_separator)
55
+ file_data = nil # Free up resources
56
+
57
+ @lines_total = parsed_lines.count
58
+ @processed_lines = Array.new(@lines_total)
59
+ @lines_total -= 1 if is_header_row?(parsed_lines.first)
60
+
61
+
62
+ # spin up multiple processing threads
63
+ jobs = []
64
+ parsed_lines.each_with_index do |line, idx|
65
+ next if idx == 0 && is_header_row?(line)
66
+
67
+ jobs << @thread_pool.execute do
68
+ email = line[@email_index]
69
+ begin
70
+ contact = @brite_client.contacts.create(email: email)
71
+ contact.verify!
72
+ # ['email_status', 'disposable', 'role_account']
73
+ contact_status = [contact.status, contact.response[:email]['disposable'], contact.response[:email]['role_address']]
74
+ rescue Exception => ex
75
+ contact_status = ['error', false, false]
76
+ end
77
+ # Store processed file
78
+ @processed_lines[idx] = line + contact_status
79
+ end
80
+ end
81
+
82
+ # Wait for all threads
83
+ jobs.each{ |j| j.join }
84
+
85
+ # Free up resources
86
+ parsed_lines = nil
87
+
88
+ @status = 'exporting'
89
+
90
+ true
91
+ end
92
+
93
+ def export!(file_name)
94
+ file = File.new(file_name, "w+")
95
+
96
+ file.write(CSV.generate_line(@header_row, col_sep: @col_separator)) if @header_row
97
+ @processed_lines.each do |line|
98
+ next if line.nil?
99
+ file.write CSV.generate_line(line, col_sep: @col_separator)
100
+ end
101
+
102
+ file.close
103
+ @status = 'complete'
104
+ true
105
+ end
106
+
107
+ private
108
+
109
+ def is_header_row?(row)
110
+ row.each do |v|
111
+ v = v.first if v.instance_of? Array
112
+ return false if v.to_s.match(EMAIL_PATTERN)
113
+ end
114
+ !row.join(" ").downcase.match("email").nil?
115
+ end
116
+
117
+ def find_email_index(row)
118
+ row.each_with_index do |value, index|
119
+ return index if value.to_s.match(EMAIL_PATTERN)
120
+ end
121
+ nil
122
+ end
123
+
124
+ def report_error!(text)
125
+ @status = 'error'
126
+ @error = text
127
+ end
128
+
129
+
130
+ def autoconfigure(test_lines)
131
+ @col_separator = nil
132
+ @header_row = nil
133
+ @email_index = nil
134
+
135
+ if test_lines.first.size > 1
136
+ @col_separator = ','
137
+ else
138
+ COL_SEPARATORS.each do |sep|
139
+ if is_separator?(sep, test_lines)
140
+ @col_separator = sep
141
+ break
142
+ end
143
+ end
144
+ end
145
+
146
+ # Single column file
147
+ if @col_separator.nil? && test_lines.first.size == 1 && is_separator?(' ', test_lines)
148
+ @col_separator = ','
149
+ end
150
+
151
+ if @col_separator.nil?
152
+ report_error! 'Column separator could not be determined'
153
+ return
154
+ end
155
+
156
+ test_rows = test_lines.map do |line|
157
+ if @col_separator == ","
158
+ line
159
+ else
160
+ line.first.split(@col_separator)
161
+ end
162
+ end
163
+
164
+ test_rows.each do |row|
165
+ if (index = find_email_index row)
166
+ @email_index = index
167
+ break
168
+ end
169
+ end
170
+
171
+ if @email_index.nil?
172
+ report_error! "Email column could not be determined."
173
+ return
174
+ end
175
+
176
+ configure_header_row(test_rows)
177
+
178
+ true
179
+ end
180
+
181
+
182
+ def configure_header_row(test_rows)
183
+ plus_headers = ['email_status', 'disposable', 'role_account']
184
+
185
+ if is_header_row?(test_rows.first)
186
+ hr = test_rows.first
187
+ else
188
+ hr = []
189
+ test_rows.first.count.times do |i|
190
+ if i == @email_index
191
+ hr << "email"
192
+ else
193
+ hr << "column_#{i}"
194
+ end
195
+ end
196
+ end
197
+ @header_row = hr + plus_headers
198
+ end
199
+
200
+ def is_separator?(sep, lines)
201
+ same_count = 0
202
+ fcount = lines.first.first.split(sep).size
203
+ if fcount == 1
204
+ false
205
+ else
206
+ lines.each do |line|
207
+ cc = line.first.split(sep).size
208
+ same_count += 1 if cc == fcount
209
+ end
210
+ same_count == lines.count
211
+ end
212
+ end
213
+
214
+
215
+
216
+
217
+ end
218
+ end
@@ -0,0 +1,90 @@
1
+ module Britebox
2
+ # Keep current status for all processing FileJobs
3
+ class FileJobPool
4
+ attr_reader :file_jobs
5
+
6
+ SPINNERS = ["|", "/", "—", "\\"]
7
+ THREAD_NUM_DEFAULT = 8
8
+
9
+ def initialize(num_threads = nil)
10
+ num_threads ||= THREAD_NUM_DEFAULT
11
+ @file_jobs = []
12
+ @refresh_number = 0
13
+ @fj_pool = ThreadStorm.new size: num_threads
14
+ @lines_pool = ThreadStorm.new size: num_threads
15
+ end
16
+
17
+ def add(file_job)
18
+ return if include? file_job.file_name
19
+ @file_jobs << file_job
20
+ end
21
+
22
+ def include?(file_name)
23
+ @file_jobs.map{ |fj| fj.file_name }.include? file_name
24
+ end
25
+
26
+ def process_file!(file, dir, out_dir, brite_client, fj_options)
27
+ file_name = File.expand_path(file, dir)
28
+
29
+ # Do not process same files twice
30
+ return if self.include? file_name
31
+
32
+ fj = FileJob.new(file_name, brite_client, @lines_pool, fj_options)
33
+ self.add(fj)
34
+ @fj_pool.execute do
35
+ fj.verify!
36
+
37
+ case fj.status
38
+ when 'error'
39
+ err_name = File.basename(file, File.extname(file)) + '_error' + File.extname(file)
40
+ File.rename file_name, File.expand_path(err_name, out_dir)
41
+ File.open(File.expand_path(err_name + '.log', out_dir), 'w+') do |f|
42
+ f.write fj.error # TODO: add extended error log
43
+ end
44
+ when 'exporting'
45
+ if fj.export! File.expand_path(file, out_dir)
46
+ File.delete file_name
47
+ end
48
+ else
49
+ raise "unexpected error, status: #{fj.status}"
50
+ end
51
+ end
52
+ end
53
+
54
+
55
+
56
+ def print_status
57
+ clear = "\e[K"
58
+ up = "\e[A"
59
+
60
+ if @prev_buffer_height
61
+ print "\r#{clear}#{up}" * @prev_buffer_height
62
+ end
63
+
64
+ buffer = "\nFiles processing: #{@file_jobs.count}"
65
+
66
+ @file_jobs.each do |fj|
67
+ fname = File.basename(fj.file_name)
68
+ if fname.length > 20
69
+ fname = fname[0..16] + '...'
70
+ end
71
+ if fj.status == 'error'
72
+ status_str = fj.error
73
+ else
74
+ status_str = "#{(fj.lines_processed || '-')} / #{fj.lines_total || '-'}"
75
+ end
76
+
77
+ buffer << "\n#{fname.ljust(20)} | #{fj.status.to_s.ljust(9)} | #{status_str}"
78
+ end
79
+
80
+ buffer << "\n #{SPINNERS[@refresh_number % 4]}\n"
81
+
82
+ print buffer
83
+
84
+ @prev_buffer_height = buffer.count("\n")
85
+ @refresh_number += 1
86
+ end
87
+
88
+
89
+ end
90
+ end
@@ -0,0 +1,3 @@
1
+ module Britebox
2
+ VERSION = '0.0.1'
3
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: britebox
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alexander Shapiotko
8
+ - BriteVerify
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: brite-api
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - '>='
19
+ - !ruby/object:Gem::Version
20
+ version: 0.0.1
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - '>='
26
+ - !ruby/object:Gem::Version
27
+ version: 0.0.1
28
+ - !ruby/object:Gem::Dependency
29
+ name: thread_storm
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ~>
33
+ - !ruby/object:Gem::Version
34
+ version: 0.7.1
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ~>
40
+ - !ruby/object:Gem::Version
41
+ version: 0.7.1
42
+ - !ruby/object:Gem::Dependency
43
+ name: listen
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - '>='
47
+ - !ruby/object:Gem::Version
48
+ version: 0.0.1
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - '>='
54
+ - !ruby/object:Gem::Version
55
+ version: 0.0.1
56
+ description: BriteVerify API CLI tool
57
+ email:
58
+ - support@briteverify.com
59
+ executables:
60
+ - britebox
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - lib/britebox/cli.rb
65
+ - lib/britebox/file_job.rb
66
+ - lib/britebox/file_job_pool.rb
67
+ - lib/britebox/version.rb
68
+ - lib/britebox.rb
69
+ - README.md
70
+ - LICENSE
71
+ - bin/britebox
72
+ homepage: https://github.com/thousandsofthem/britebox
73
+ licenses: []
74
+ metadata: {}
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '1.9'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - '>='
87
+ - !ruby/object:Gem::Version
88
+ version: 1.3.6
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 2.0.0
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: BriteVerify API CLI tool
95
+ test_files: []