datahunter 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/hunter +1 -1
- data/lib/datahunter/base.rb +49 -42
- data/lib/datahunter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a69c30628131f2921984bddf97d7285b72df1fec
|
4
|
+
data.tar.gz: db641793fbb347d570595ee76cf47e5e5534e4f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 962d15c0e12844ec7c99fa69ed8f1e566b629c5f5b2f4d6e239d36907a54b9d761fbbf3166b02f58cbe17ab1bad5e7e5f9a6e2a84dce1d4b76639a1bb6ff28cb
|
7
|
+
data.tar.gz: ed0e562e83f233bb4491ace62c157c0dca9a417d413d4a319fef8bbc6e7cc1d1a9fb2d5abe63cbdc88af73a74deaf2b2cc5d56c17c596d83d92bed8e2257a65d
|
data/bin/hunter
CHANGED
@@ -53,7 +53,7 @@ command :find do |c|
|
|
53
53
|
|
54
54
|
case ask "### get the data? (y/n)".colorize(:yellow)
|
55
55
|
when 'y'
|
56
|
-
if datasets[index].has_key?("resources")
|
56
|
+
if datasets[index].has_key?("resources") and datasets[index]["resources"].any?
|
57
57
|
Datahunter.download_the_data datasets[index]
|
58
58
|
else
|
59
59
|
Datahunter.open_in_browser datasets[index]["uri"]
|
data/lib/datahunter/base.rb
CHANGED
@@ -2,14 +2,16 @@ require 'colorize'
|
|
2
2
|
require 'launchy'
|
3
3
|
require 'downloadr'
|
4
4
|
require 'addressable/uri'
|
5
|
-
# require 'Pathname'
|
6
5
|
|
7
6
|
module Datahunter
|
8
7
|
|
9
8
|
# DATASETS_URL = "http://localhost:3000/api/datasets/"
|
10
9
|
DATASETS_URL = "http://shrouded-harbor-5877.herokuapp.com/api/datasets/"
|
11
10
|
FEEDBACK_URL = "https://docs.google.com/forms/d/1yNzZjCCXvWHQCbWz4sx-nui3LafeeLcT7FF9T-vbKvw/viewform"
|
12
|
-
REQUEST_URL =
|
11
|
+
REQUEST_URL =
|
12
|
+
"https://docs.google.com/forms/d/1NRKWmb_mcpKJmrutXvZSZnysM_v0rfLhjD897H3Myrw/viewform?usp=send_form"
|
13
|
+
|
14
|
+
@extensions = ["json", "csv", "xml", "zip", "gz", "xls", "xlsx", "tsv"]
|
13
15
|
|
14
16
|
def self.datasets_url tag, geo=nil, temp=nil
|
15
17
|
tag = tag.downcase.split.first if tag
|
@@ -41,7 +43,44 @@ module Datahunter
|
|
41
43
|
puts ("score: ".colorize(:green) + "#{dataset["huntscore"]}")
|
42
44
|
puts
|
43
45
|
end
|
44
|
-
|
46
|
+
|
47
|
+
def self.print_downloadable_links resources
|
48
|
+
resources.each_with_index do |dl, i|
|
49
|
+
puts ("#{i}. ".colorize(:yellow) +
|
50
|
+
"#{dl["title"]} - ".colorize(:blue) +
|
51
|
+
"#{dl["format"]}".colorize(:green))
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.parse_extension_from_uri uri
|
56
|
+
uri.basename.split(".").last
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.download_file url, format="", alt_url=""
|
60
|
+
uri = Addressable::URI.parse(url)
|
61
|
+
extension = Datahunter.parse_extension_from_uri uri
|
62
|
+
|
63
|
+
if !@extensions.include? extension
|
64
|
+
Launchy.open(url, options = {})
|
65
|
+
else
|
66
|
+
location = Dir.pwd
|
67
|
+
uri = Addressable::URI.parse(url)
|
68
|
+
file_name = uri.basename
|
69
|
+
loc = location + "/" + file_name
|
70
|
+
|
71
|
+
case ask ("Create/overwrite #{loc}?(y/rename/n)".colorize(:yellow))
|
72
|
+
when 'rename'
|
73
|
+
loc = ask "Path/to/filename: ".colorize(:yellow)
|
74
|
+
when 'n'
|
75
|
+
abort("Ok then")
|
76
|
+
end
|
77
|
+
puts "Start downloading..."
|
78
|
+
Downloadr::HTTP.download(url, loc)
|
79
|
+
puts "Your file has been downloaded ;)".colorize(:green)
|
80
|
+
Datahunter.print_excuse_and_alternative_url_message alt_url
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
45
84
|
def self.download_the_data dataset
|
46
85
|
resources = dataset["resources"]
|
47
86
|
number_of_downloadable_links = resources.size
|
@@ -52,11 +91,11 @@ module Datahunter
|
|
52
91
|
Datahunter.print_downloadable_links resources
|
53
92
|
dl = ask("### which one? (0/1/...)".colorize(:yellow), Integer) {|i| i.in = 0..(number_of_downloadable_links - 1)}
|
54
93
|
end
|
55
|
-
|
94
|
+
|
56
95
|
dl = dl.to_i
|
57
96
|
Datahunter.download_file(resources[dl]["url"], resources[dl]["format"], dataset["uri"])
|
58
97
|
end
|
59
|
-
|
98
|
+
|
60
99
|
def self.open_in_browser url
|
61
100
|
if url =~ /\A#{URI::regexp}\z/
|
62
101
|
puts "You can't download this dataset directly, but you should from there"
|
@@ -90,45 +129,13 @@ module Datahunter
|
|
90
129
|
"if you just want to give us a feedback, don't hesitate!".colorize(:red)
|
91
130
|
end
|
92
131
|
|
93
|
-
def self.print_bad_uri_message
|
94
|
-
puts "The URL given by the publisher is not valid. We'll try to find out why "\
|
95
|
-
"as soon as we can!".colorize(:red)
|
96
|
-
end
|
97
|
-
|
98
|
-
private
|
99
|
-
|
100
|
-
def self.print_downloadable_links resources
|
101
|
-
resources.each_with_index do |dl, i|
|
102
|
-
puts ("#{i}. ".colorize(:yellow) +
|
103
|
-
"#{dl["title"]} - ".colorize(:blue) +
|
104
|
-
"#{dl["format"]}".colorize(:green))
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.download_file url, format="", alt_url=""
|
109
|
-
if format == "HTML"
|
110
|
-
Launchy.open(url, options = {})
|
111
|
-
else
|
112
|
-
location = Dir.pwd
|
113
|
-
uri = Addressable::URI.parse(url)
|
114
|
-
file_name = uri.basename
|
115
|
-
loc = location + "/" + file_name
|
116
|
-
|
117
|
-
case ask ("Create/overwrite #{loc}?(y/rename/n)".colorize(:yellow))
|
118
|
-
when 'rename'
|
119
|
-
loc = ask "Path/to/filename: ".colorize(:yellow)
|
120
|
-
when 'n'
|
121
|
-
abort("Ok then")
|
122
|
-
end
|
123
|
-
puts "Start downloading..."
|
124
|
-
Downloadr::HTTP.download(url, loc)
|
125
|
-
puts "Your file has been downloaded ;)".colorize(:green)
|
126
|
-
Datahunter.print_excuse_and_alternative_url_message alt_url
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
132
|
def self.print_excuse_and_alternative_url_message alt_url=""
|
131
133
|
puts "If this is not the file you expected, it's maybe because publisher don't always keep the metadata up-to-date. We try to clean most of uri's and check the url. Anyway you may be able to download your file by hand here:"
|
132
134
|
puts "#{alt_url}".colorize(:blue)
|
133
135
|
end
|
136
|
+
|
137
|
+
def self.print_bad_uri_message
|
138
|
+
puts "The URL given by the publisher is not valid. We'll try to find out why "\
|
139
|
+
"as soon as we can!".colorize(:red)
|
140
|
+
end
|
134
141
|
end
|
data/lib/datahunter/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahunter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Terpo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|