result2csv 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+ gem 'rest-client'
3
+ gem 'addressable'
4
+ gem 'highline'
5
+ gem 'aws-sdk-v1'
6
+ gem 'dotenv'
data/Gemfile.lock ADDED
@@ -0,0 +1,37 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ addressable (2.3.8)
5
+ aws-sdk-v1 (1.66.0)
6
+ json (~> 1.4)
7
+ nokogiri (>= 1.4.4)
8
+ domain_name (0.5.24)
9
+ unf (>= 0.0.5, < 1.0.0)
10
+ highline (1.7.3)
11
+ http-cookie (1.0.2)
12
+ domain_name (~> 0.5)
13
+ json (1.8.3)
14
+ mime-types (2.6.1)
15
+ mini_portile (0.6.2)
16
+ netrc (0.10.3)
17
+ nokogiri (1.6.6.2)
18
+ mini_portile (~> 0.6.0)
19
+ rest-client (1.8.0)
20
+ http-cookie (>= 1.0.2, < 2.0)
21
+ mime-types (>= 1.16, < 3.0)
22
+ netrc (~> 0.7)
23
+ unf (0.1.4)
24
+ unf_ext
25
+ unf_ext (0.0.7.1)
26
+
27
+ PLATFORMS
28
+ ruby
29
+
30
+ DEPENDENCIES
31
+ addressable
32
+ aws-sdk-v1
33
+ highline
34
+ rest-client
35
+
36
+ BUNDLED WITH
37
+ 1.10.6
data/README.md ADDED
File without changes
data/bin/result2csv ADDED
@@ -0,0 +1,9 @@
1
+ require 'result2csv'
2
+ #require_relative '../lib/result2csv'
3
+
4
+ # Dotenv.load
5
+
6
+ result_url = ARGV[0]
7
+ object_name = "#{Result2csv::Converter.s3_object_key(result_url).split('.').first}_csv.csv"
8
+ csv = Result2csv::Converter.convert_to_csv(result_url, 1, 1)
9
+ Result2csv::Converter.write_csv_to_file(object_name, csv)
data/lib/.DS_Store ADDED
Binary file
@@ -0,0 +1,18 @@
1
+ Array.class_eval do
2
+ def to_csv
3
+ require 'csv'
4
+ CSV.generate(:encoding => "utf-8") do |csv|
5
+ self.each do |row|
6
+ unless (row.empty? || row.nil?)
7
+ csv << row
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
13
+
14
+ Hash.class_eval do
15
+ def method_missing(name, *args, &block)
16
+ self.has_key?(name.to_s) ? self[name.to_s] : super
17
+ end
18
+ end
@@ -0,0 +1,166 @@
1
+ module Result2csv
2
+ class Converter
3
+
4
+ def self.retrieve(options)
5
+ date_string = '{"'+ Time.at(Time.now.gmtime.to_i - 604800).strftime("%F") +'":""}'
6
+ querystrings = 'fields=["url"]&dates=' + date_string
7
+ template = Addressable::Template.new("https://#{options[:user_token]}:@api.80legs.com/v2/results/#{options[:crawl_name]}/{?query*}")
8
+ template = template.expand({
9
+ "query" => {
10
+ "fields" => ["url"],
11
+ "dates" => date_string
12
+ }
13
+ })
14
+ response = RestClient.get(template.to_s) do |response, request|
15
+ @code = response.code
16
+ @body = response.body
17
+ end
18
+ return {body: @body, status: @code}
19
+ end
20
+
21
+ def self.get_results_url(options)
22
+ result = self.retrieve(crawl_name: options[:crawl_name], user_token: options[:user_token])
23
+ return JSON.parse(result[:body]) if result[:status] < 400
24
+ return ''
25
+ end
26
+
27
+ def self.s3_object_key(url)
28
+ uri = Addressable::URI.parse(url)
29
+ uri.path.split('/')[1,2].join('/')
30
+ end
31
+
32
+ def self.s3_object(key)
33
+ s3 = AWS::S3.new
34
+ s3.buckets["datafiniti-voltron-results"].objects[key]
35
+ end
36
+
37
+ def self.s3_csv_file(url)
38
+ key = "#{s3_object_key(url).split('.').first}_csv.csv"
39
+ s3_object(key).url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :response_content_type => "application/csv").to_s
40
+ end
41
+
42
+ def self.has_csv?(url)
43
+
44
+ s3 = AWS::S3.new
45
+ csv_path = "#{s3_object_key(url).split('.').first}_csv.csv"
46
+ s3.buckets["datafiniti-voltron-results"].objects[csv_path].exists? rescue false
47
+ end
48
+
49
+ def self.does_not_have_csv?(result_url)
50
+ result_url = result_url[:result_url] if result_url.is_a? Hash
51
+ !has_csv?(result_url)
52
+ end
53
+
54
+ def self.file(url)
55
+ return JSON.parse RestClient.get url
56
+ end
57
+
58
+ def self.bucket
59
+ s3 = AWS::S3.new
60
+ s3.buckets["datafiniti-voltron-results"]
61
+ end
62
+
63
+ def self.s3_url(url, content_type="application/json")
64
+ uri = Addressable::URI.parse(url)
65
+ s3 = AWS::S3.new
66
+ obj = s3.buckets["datafiniti-voltron-results"].objects[uri.path.split('/')[1,2].join('/')]
67
+ url = obj.url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :resonse_content_type => content_type)
68
+ return url.to_s
69
+ end
70
+
71
+ def self.write_csv_to_s3(object_name, csv)
72
+ obj = bucket.objects["#{object_name}"].write(csv)
73
+ end
74
+
75
+ def self.write_csv_to_file(object_name, csv)
76
+ File.open("#{object_name.split('/').last}", 'w') { |file| file.write(csv) }
77
+ end
78
+
79
+
80
+
81
+ def self.truncate_to_max_cell_size(string)
82
+ unless string.nil?
83
+ begin
84
+ JSON.parse(string).to_s.gsub('"', "").gsub(",", ";")
85
+ rescue
86
+ string.to_s[0,32767].gsub('"', "").gsub(",", ";")
87
+ end
88
+ else
89
+ ""
90
+ end
91
+ end
92
+
93
+ def self.parse_csv_headers(result_file)
94
+ headers = []
95
+ result_file.first.keys.to_a.each{|h| headers << h}
96
+ return headers
97
+ end
98
+
99
+ def self.parser
100
+ Yajl::Parser.new
101
+ end
102
+
103
+ def self.results_values_to_row(object, columns)
104
+ row = []
105
+ object.values.to_a.each do |v|
106
+ row << truncate_to_max_cell_size(v)
107
+ end
108
+ return row
109
+ end
110
+
111
+
112
+ def self.convert_to_csv(url, user_token, result_id)
113
+ require 'json'
114
+ JSON.freeze
115
+ new_array, matrix = [], []
116
+ # report_csv_conversion_progress(user_token, result_id, "downloading")
117
+ cached_result = RestClient.get(url)
118
+ begin
119
+ result_file = JSON.parse(cached_result)
120
+ rescue
121
+ begin
122
+ result_file = JSON.parse(cached_result.gsub(/\],/, ','))
123
+ rescue
124
+ result_file = JSON.parse(cached_result.gsub(/^,/, ''))
125
+ end
126
+ end
127
+ # report_csv_conversion_progress(user_token, result_id, "starting")
128
+ matrix = create_matrix(result_file, user_token, result_id)
129
+ return matrix.to_csv
130
+ end
131
+
132
+ def self.create_matrix(result_file, user_token, result_id)
133
+ matrix = Array.new
134
+ headers = parse_csv_headers(result_file)
135
+ matrix << headers
136
+ total_size = result_file.size
137
+ count = 1
138
+ result_file.each do |object|
139
+ matrix << results_values_to_row(object, headers.size)
140
+ progress = ((count.to_f/total_size.to_f)*100).to_i
141
+ count += 1
142
+ report_csv_conversion_progress(user_token, result_id, "converting: #{progress}%")# if progress % 10 == 0
143
+ end
144
+ return matrix
145
+ end
146
+
147
+ def self.report_csv_conversion_progress(user_token, result_id, message)
148
+ # RealtimeMessage.publish(user_token, 'conversion-status', {:progress => message, :id => result_id})
149
+ print "\r#{message}"
150
+ end
151
+
152
+ def self.toggle_downloaded_state(user, id, state)
153
+ # return EightyLegsApi.conn(token: user.token).put("/results/#{id}", {downloaded: state}.to_json, :content_type => :json)
154
+ RestClient.put("https://#{user.token}:@api.80legs.com/v2/results/#{id}", {downloaded: state}.to_json, :content_type => :json) do |response|
155
+ return response.code
156
+ end
157
+ end
158
+
159
+ def self.test_csv(result_url)
160
+ object_name = "#{Result.s3_object_key(result_url).split('.').first}_csv.csv"
161
+ csv = Result.convert_to_csv(result_url, 1, 1)
162
+ Result.write_csv_to_s3(object_name, csv)
163
+ puts Result.s3_csv_file(result_url).to_s
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,3 @@
1
+ module Result2csv
2
+ VERSION = '0.1'
3
+ end
data/lib/result2csv.rb ADDED
@@ -0,0 +1,11 @@
1
+ require 'rest-client'
2
+ require 'addressable/uri'
3
+ require 'addressable/template'
4
+ require 'highline/import'
5
+ require 'aws-sdk-v1'
6
+
7
+ module Result2csv
8
+ Dir[File.dirname(__FILE__) + '/result2csv/*.rb'].each do |file|
9
+ require file
10
+ end
11
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'result2csv/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "result2csv"
8
+ spec.version = Result2csv::VERSION
9
+ spec.authors = ["Nick Prokesch"]
10
+ spec.email = ["nick@prokes.ch"]
11
+ spec.summary = %q{Converts json results to csv}
12
+ spec.description = %q{For use with 80legs}
13
+ spec.homepage = "http://nick.prokes.ch"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.require_paths = ["lib"]
18
+ spec.executables = ['result2csv']
19
+
20
+ spec.add_dependency "addressable", "~> 2"
21
+ spec.add_dependency "highline", "~> 1"
22
+ spec.add_dependency "rest-client", "~> 1"
23
+ spec.add_dependency "aws-sdk-v1", "~> 1"
24
+ spec.add_dependency "dotenv", "~> 1"
25
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: result2csv
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - Nick Prokesch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: addressable
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: highline
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rest-client
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1'
55
+ - !ruby/object:Gem::Dependency
56
+ name: aws-sdk-v1
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1'
69
+ - !ruby/object:Gem::Dependency
70
+ name: dotenv
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1'
83
+ description: For use with 80legs
84
+ email:
85
+ - nick@prokes.ch
86
+ executables:
87
+ - result2csv
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".DS_Store"
92
+ - ".gitignore"
93
+ - ".ruby-version"
94
+ - 122988_2_csv.csv
95
+ - Gemfile
96
+ - Gemfile.lock
97
+ - README.md
98
+ - bin/result2csv
99
+ - lib/.DS_Store
100
+ - lib/result2csv.rb
101
+ - lib/result2csv/_overrides.rb
102
+ - lib/result2csv/converter.rb
103
+ - lib/result2csv/version.rb
104
+ - result2csv.gemspec
105
+ homepage: http://nick.prokes.ch
106
+ licenses:
107
+ - MIT
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.4.8
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: Converts json results to csv
129
+ test_files: []