nsrr 0.1.0.beta1 → 0.1.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 155cc2e206c5b662c49888859663f40c4e376c21
4
- data.tar.gz: 21461c274e3324827ed970425e91647d9fca2ad9
3
+ metadata.gz: 3924a64a6b689e857473a865bbc08a9b0753c575
4
+ data.tar.gz: 54066220485da3828fee75c6864f95bf91e30ae6
5
5
  SHA512:
6
- metadata.gz: 396491cfb57a83a188a9a4abd1e455ff95b3abff322f2a1301694d80334f85e016b49920ef2829e0b2650706c08d06cd2440f4a88d71793fb46a30cc612d43c9
7
- data.tar.gz: c0d6949622fc0ec591013f2b8260b86f5f2b4df51410c80d6a2f6041487a57bd5a92a5951a3bf474229cb618ebc357ee2bcb1269aad5b9e1e3333c824e47bfbd
6
+ metadata.gz: 21f4d39b700cc218e210ca77fafec61ade8fed81da9ee48ba2ec59e650539c5c9642f6490b5394a7da58c335d391c8f1fae4d2adc641f818bb468bf1e38de8b8
7
+ data.tar.gz: 16686364fce98068357195fdbd790661cfa1615fe051d2aaba08445643def2dd0f298ba4ce1ecd19e5df4ec390d331fdb2cb453ab9984f3f502ad2b60fc0029d
data/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  ## 0.1.0
2
2
 
3
3
  ### Enhancements
4
+ - Added a `nsrr console` command that allows users to access and download datasets and files
5
+ - Datasets can be loaded in the console environment
6
+ - `d = Dataset.find 'shhs'`
7
+ - Dataset files can be downloaded as well
8
+ - `d.download`
9
+ - The download function can include a path, method, and depth
10
+ - **path**
11
+ - can be `nil` to download entire dataset or a string to specify a folder
12
+ - **method**
13
+ - 'md5' [default]
14
+ - Checks if a downloaded file exists with the exact md5 as the online version, if so, skips that file
15
+ - 'fresh'
16
+ - Downloads every file without checking if it was already downloaded
17
+ - 'fast'
18
+ - Only checks if a download file exists with the same file size as the online version, if so, skips that file
19
+ - **depth**
20
+ - 'recursive' [default]
21
+ - Downloads files in selected path folder and all subfolders
22
+ - 'shallow'
23
+ - Only downloads files in selected path folder
4
24
  - Added a `nsrr version` command the returns the current version of the nsrr gem
5
25
  - Added testing framework to more easily add new tests for new features
data/README.md CHANGED
@@ -26,6 +26,104 @@ Or install it yourself as:
26
26
 
27
27
  ## Usage
28
28
 
29
+ ### Open the console and download entire datasets
30
+
31
+ ```console
32
+ $ nsrr console
33
+ ```
34
+
35
+ ```
36
+ > d = Dataset.find 'shhs'
37
+ > d.download
38
+
39
+ File Integrity Check Method: md5
40
+ Depth: recursive
41
+ Get your token here: https://sleepdata.org/token
42
+ Please enter your download token:
43
+ create shhs/
44
+ create shhs/datasets
45
+ identical shhs-cvd-dataset-0.4.0.csv
46
+ identical shhs-data-dictionary-0.4.0-domains.csv
47
+ identical shhs-data-dictionary-0.4.0-forms.csv
48
+ identical shhs-data-dictionary-0.4.0-variables.csv
49
+ identical shhs1-dataset-0.4.0.csv
50
+ ...
51
+ ```
52
+
53
+ **method**
54
+ - 'md5' [default]
55
+ - Checks if a downloaded file exists with the exact md5 as the online version, if so, skips that file
56
+ - 'fresh'
57
+ - Downloads every file without checking if it was already downloaded
58
+ - 'fast'
59
+ - Only checks if a download file exists with the same file size as the online version, if so, skips that file
60
+
61
+ **depth**
62
+ - 'recursive' [default]
63
+ - Downloads files in selected path folder and all subfolders
64
+ - 'shallow'
65
+ - Only downloads files in selected path folder
66
+
67
+ For example to download only the shhs1 edfs folder and skip MD5 file validation:
68
+
69
+ ```
70
+ > d = Dataset.find 'shhs'
71
+ > d.download('edfs/shhs1', method: 'fast', depth: 'shallow')
72
+
73
+ File Integrity Check Method: fast
74
+ Depth: shallow
75
+ Get your token here: https://sleepdata.org/token
76
+ Please enter your download token:
77
+
78
+ create shhs/edfs/shhs1
79
+ download 100001.EDF
80
+ download 100002.EDF
81
+ download 100003.EDF
82
+ download 100004.EDF
83
+ ...
84
+ ```
85
+
86
+ You can type `Ctrl-C` to pause the download, and retype the command to restart:
87
+
88
+ ```
89
+ > d = Dataset.find 'shhs'
90
+ > d.download
91
+
92
+ File Integrity Check Method: md5
93
+ Depth: recursive
94
+ Get your token here: https://sleepdata.org/token
95
+ Please enter your download token:
96
+
97
+ create shhs/
98
+ create shhs/datasets
99
+ download shhs-cvd-dataset-0.4.0.csv
100
+ download shhs-data-dictionary-0.4.0-domains.csv
101
+ ^C
102
+ Interrupted
103
+
104
+ Finished in 4.384734 seconds.
105
+
106
+ 1 folder created, 2 files downloaded, 60 MiBs downloaded, 0 files skipped, 0 files failed
107
+
108
+ > d.download
109
+
110
+ File Integrity Check Method: md5
111
+ Depth: recursive
112
+ Get your token here: https://sleepdata.org/token
113
+ Please enter your download token:
114
+
115
+ create shhs/
116
+ create shhs/datasets
117
+ identical shhs-cvd-dataset-0.4.0.csv
118
+ identical shhs-data-dictionary-0.4.0-domains.csv
119
+ ^C
120
+ Interrupted
121
+
122
+ Finished in 2.384734 seconds.
123
+
124
+ 1 folder created, 0 files downloaded, 0 MiBs downloaded, 2 files skipped, 0 files failed
125
+ ```
126
+
29
127
  ### Show the version of the NSRR gem
30
128
 
31
129
  ```
data/lib/nsrr.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "nsrr/version"
2
2
 
3
3
  Nsrr::COMMANDS = {
4
+ 'c' => :console,
4
5
  'v' => :version
5
6
  }
6
7
 
@@ -9,12 +10,36 @@ module Nsrr
9
10
  self.send((Nsrr::COMMANDS[argv.first.to_s.scan(/\w/).first] || :help), argv)
10
11
  end
11
12
 
13
+ def self.console(argv)
14
+ require 'nsrr/commands/console'
15
+ Nsrr::Commands::Console.start(argv)
16
+ # console = Nsrr::Commands::Console.new(argv)
17
+ # console.start
18
+ # `#{File.expand_path('../', __FILE__)}/nsrr/commands/console2`
19
+
20
+
21
+ # require 'irb'
22
+ # require 'irb/completion'
23
+ # IRB.setup nil
24
+ # IRB.conf[:MAIN_CONTEXT] = IRB::Irb.new.context
25
+ # require 'irb/ext/multi-irb'
26
+ # IRB.irb nil, self
27
+
28
+ # require 'irb'
29
+ # ARGV.clear
30
+ # @a = "hello"
31
+ # IRB.start
32
+
33
+ end
34
+
12
35
  def self.help(argv)
13
36
  puts <<-EOT
14
37
 
15
38
  Usage: nsrr COMMAND [ARGS]
16
39
 
17
40
  The most common nsrr commands are:
41
+ [c]onsole Load an interactive console to access
42
+ and download datasets and files
18
43
  [v]ersion Returns the version of nsrr gem
19
44
 
20
45
  Commands can be referenced by the first letter:
@@ -0,0 +1,28 @@
1
+ require 'irb'
2
+ require 'irb/completion'
3
+ require 'nsrr/models/all'
4
+
5
+ module Nsrr
6
+ module Commands
7
+ class Console
8
+ class << self
9
+ def start(*args)
10
+ new(*args).start
11
+ end
12
+ end
13
+
14
+ attr_reader :console
15
+
16
+ def initialize(argv)
17
+ ARGV.clear
18
+ @console = IRB
19
+ end
20
+
21
+ def start
22
+ puts "Loading environment (Nsrr #{Nsrr::VERSION::STRING})"
23
+ @console.start
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -0,0 +1 @@
1
+ Nsrr::WEBSITE = "https://sleepdata.org"
@@ -0,0 +1,48 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+
4
+ module Nsrr
5
+ module Helpers
6
+ class DownloadRequest
7
+ class << self
8
+ def get(*args)
9
+ new(*args).get
10
+ end
11
+ end
12
+
13
+ attr_reader :url, :error, :file_size
14
+
15
+ def initialize(url, download_folder)
16
+ begin
17
+ escaped_url = URI.escape(url)
18
+ @url = URI.parse(escaped_url)
19
+ @http = Net::HTTP.new(@url.host, @url.port)
20
+ @http.use_ssl = true if (@url.scheme == 'https')
21
+ @download_folder = download_folder
22
+ @file_size = 0
23
+ rescue => e
24
+ @error = 'Invalid Token'
25
+ end
26
+ end
27
+
28
+ def get
29
+ req = Net::HTTP::Get.new(@url.path)
30
+ response = @http.start do |http|
31
+ http.request(req)
32
+ end
33
+ case response.code when '200'
34
+ ::File.open(@download_folder, 'wb') do |local_file|
35
+ local_file.write( response.body )
36
+ end
37
+ @file_size = ::File.size(@download_folder)
38
+ when '302'
39
+ @error = 'Token Not Authorized to Access Specified File'
40
+ else
41
+ @error = "#{response.code} #{response.class.name}"
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+
@@ -0,0 +1,9 @@
1
+ module Nsrr
2
+ module Helpers
3
+ class HashHelper
4
+ def self.symbolize_keys(hash)
5
+ hash.inject({}){|memo,(k,v)| memo[k.to_sym] = v; memo}
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,32 @@
1
+ require 'net/http'
2
+ require 'json'
3
+
4
+ module Nsrr
5
+ module Helpers
6
+ class JsonRequest
7
+ class << self
8
+ def get(*args)
9
+ new(*args).get
10
+ end
11
+ end
12
+
13
+ attr_reader :url
14
+
15
+ def initialize(url)
16
+ @url = URI.parse(url)
17
+ @http = Net::HTTP.new(@url.host, @url.port)
18
+ @http.use_ssl = true if (@url.scheme == 'https')
19
+ end
20
+
21
+ def get
22
+ req = Net::HTTP::Get.new(@url.path)
23
+ response = @http.start do |http|
24
+ http.request(req)
25
+ end
26
+ JSON.parse(response.body) rescue nil
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+
@@ -0,0 +1 @@
1
+ require 'nsrr/models/dataset'
@@ -0,0 +1,126 @@
1
+ require 'colorize'
2
+ require 'fileutils'
3
+
4
+ require 'nsrr/helpers/constants'
5
+ require 'nsrr/helpers/hash_helper'
6
+ require 'nsrr/helpers/json_request'
7
+
8
+ require 'nsrr/models/file'
9
+
10
+ module Nsrr
11
+ module Models
12
+ class Dataset
13
+ def self.find(slug)
14
+ json = Nsrr::Helpers::JsonRequest.get("#{Nsrr::WEBSITE}/datasets/#{slug}.json")
15
+ if json
16
+ new(json)
17
+ else
18
+ nil
19
+ end
20
+ end
21
+
22
+ attr_reader :slug, :name
23
+
24
+ def initialize(json = {})
25
+ @slug = json['slug']
26
+ @name = json['name']
27
+ @files = {}
28
+ @download_token = nil
29
+ end
30
+
31
+ def files(path = nil)
32
+ @files[path] ||= begin
33
+ json = Nsrr::Helpers::JsonRequest.get("#{Nsrr::WEBSITE}/datasets/#{@slug}/json_manifest/#{path}")
34
+ (json || []).collect{|file_json| Nsrr::Models::File.new(file_json)}
35
+ end
36
+ end
37
+
38
+ def folders(path = nil)
39
+ self.files(path).select{|f| !f.is_file}.collect{|f| f.name}
40
+ end
41
+
42
+ # Options include:
43
+ # method:
44
+ # 'md5' => [default] Checks if a downloaded file exists with the exact md5 as the online version, if so, skips that file
45
+ # 'fresh' => Downloads every file without checking if it was already downloaded
46
+ # 'fast' => Only checks if a download file exists with the same file size as the online version, if so, skips that file
47
+ # depth:
48
+ # 'recursive' => [default] Downloads files in selected path folder and all subfolders
49
+ # 'shallow' => Only downloads files in selected path folder
50
+ def download(path = nil, *args)
51
+ options = Nsrr::Helpers::HashHelper.symbolize_keys(args.first || {})
52
+ options[:method] ||= 'md5'
53
+ options[:depth] ||= 'recursive'
54
+ @folders_created = 0
55
+ @files_downloaded = 0
56
+ @downloaded_bytes = 0
57
+ @files_skipped = 0
58
+ @files_failed = 0
59
+
60
+ begin
61
+ if @download_token.to_s == ''
62
+ puts " File Integrity Check Method: " + options[:method].to_s.colorize(:white)
63
+ puts " Depth: " + options[:depth].to_s.colorize(:white)
64
+ set_download_token()
65
+ end
66
+
67
+ @start_time = Time.now
68
+
69
+ download_helper(path, options)
70
+ rescue Interrupt, IRB::Abort
71
+ puts "\n Interrupted".colorize(:red)
72
+ end
73
+
74
+ @downloaded_megabytes = @downloaded_bytes / (1024 * 1024)
75
+
76
+ puts "\nFinished in #{Time.now - @start_time} seconds."
77
+ puts "\n#{@folders_created} folder#{"s" if @folders_created != 1} created".colorize(:white) + ", " +
78
+ "#{@files_downloaded} file#{"s" if @files_downloaded != 1} downloaded".colorize(:green) + ", " +
79
+ "#{@downloaded_megabytes} MiB#{"s" if @downloaded_megabytes != 1} downloaded".colorize(:green) + ", " +
80
+ "#{@files_skipped} file#{"s" if @files_skipped != 1} skipped".colorize(:blue) + ", " +
81
+ "#{@files_failed} file#{"s" if @files_failed != 1} failed".colorize(:red) + "\n\n"
82
+ nil
83
+ end
84
+
85
+ def download_helper(path, options)
86
+ current_folder = ::File.join(self.slug.to_s, path.to_s)
87
+ create_folder(current_folder)
88
+
89
+ self.files(path).select{|f| f.is_file}.each do |file|
90
+ result = file.download(options[:method], current_folder, @download_token)
91
+ case result when 'fail'
92
+ @files_failed += 1
93
+ when 'skip'
94
+ @files_skipped += 1
95
+ else
96
+ @files_downloaded += 1
97
+ @downloaded_bytes += result
98
+ end
99
+ end
100
+
101
+ if options[:depth] == 'recursive'
102
+ self.files(path).select{|f| !f.is_file}.each do |file|
103
+ folder = [path, file.name].compact.join('/')
104
+ self.download_helper(folder, options)
105
+ end
106
+ end
107
+ end
108
+
109
+ def create_folder(folder)
110
+ puts " create".colorize( :white ) + " #{folder}"
111
+ FileUtils.mkdir_p folder
112
+ @folders_created += 1
113
+ end
114
+
115
+ def set_download_token
116
+ puts " Get your token here: " + "https://sleepdata.org/token".colorize(:blue)
117
+ print "Please enter your download token: "
118
+ @download_token = gets.chomp
119
+ end
120
+
121
+ end
122
+ end
123
+ end
124
+
125
+ class Dataset < Nsrr::Models::Dataset
126
+ end
@@ -0,0 +1,98 @@
1
+ # Models a downloadable file or folder
2
+ require 'digest/md5'
3
+
4
+ require 'nsrr/helpers/constants'
5
+ require 'nsrr/helpers/download_request'
6
+
7
+ module Nsrr
8
+ module Models
9
+ class File
10
+ attr_reader :name, :is_file, :web_file_size, :web_checksum
11
+
12
+ def initialize(json = {})
13
+ @name = json['file_name']
14
+ @web_checksum = json['checksum']
15
+ @is_file = json['is_file']
16
+ @web_file_size = json['file_size']
17
+ @dataset_slug = json['dataset']
18
+ @file_path = json['file_path']
19
+ end
20
+
21
+ # method:
22
+ # 'md5' => [default] Checks if a downloaded file exists with the exact md5 as the online version, if so, skips that file
23
+ # 'fresh' => Downloads every file without checking if it was already downloaded
24
+ # 'fast' => Only checks if a download file exists with the same file size as the online version, if so, skips that file
25
+ def download(method, path, token)
26
+ do_md5_check = (method == 'md5')
27
+ redownload_all = (method == 'fresh')
28
+
29
+ if do_md5_check
30
+ if md5_matches?(path)
31
+ skip
32
+ else
33
+ force_download(path, token)
34
+ end
35
+ else
36
+ if redownload_all
37
+ force_download(path, token)
38
+ else
39
+ if file_size_matches?(path)
40
+ skip
41
+ else
42
+ force_download(path, token)
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+ def force_download(path, token)
49
+ download_folder = ::File.join(Dir.pwd, path.to_s, @name.to_s)
50
+ download_url = "#{Nsrr::WEBSITE}/datasets/#{@dataset_slug}/files/a/#{token}/m/nsrr-gem-v#{Nsrr::VERSION::STRING.gsub('.', '-')}/#{@file_path.to_s}"
51
+ download_request = Nsrr::Helpers::DownloadRequest.new(download_url, download_folder)
52
+ download_request.get
53
+
54
+ if download_request.error.to_s == ''
55
+ puts " download".colorize( :green ) + " #{@name}"
56
+ download_request.file_size
57
+ else
58
+ puts " failed".colorize( :red ) + " #{@name}"
59
+ puts " #{download_request.error}"
60
+ 'fail'
61
+ end
62
+ end
63
+
64
+ def skip
65
+ puts " identical".colorize( :blue ) + " #{self.name}"
66
+ 'skip'
67
+ end
68
+
69
+ def md5_matches?(path)
70
+ @web_checksum == local_checksum(path)
71
+ end
72
+
73
+ def local_checksum(path)
74
+ download_folder = ::File.join(Dir.pwd, path.to_s, self.name.to_s)
75
+ if ::File.exist?(download_folder)
76
+ Digest::MD5.file(download_folder).hexdigest
77
+ else
78
+ ""
79
+ end
80
+ end
81
+
82
+ def file_size_matches?(path)
83
+ @web_file_size == local_filesize(path)
84
+ end
85
+
86
+ def local_filesize(path)
87
+ download_folder = ::File.join(Dir.pwd, path.to_s, self.name.to_s)
88
+ if ::File.exist?(download_folder)
89
+ ::File.size(download_folder)
90
+ else
91
+ -1
92
+ end
93
+ end
94
+
95
+ end
96
+ end
97
+ end
98
+
data/lib/nsrr/version.rb CHANGED
@@ -3,7 +3,7 @@ module Nsrr
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
5
  TINY = 0
6
- BUILD = "beta1" # nil, "pre", "rc", "rc2"
6
+ BUILD = "beta2" # nil, "pre", "rc", "rc2"
7
7
 
8
8
  STRING = [MAJOR, MINOR, TINY, BUILD].compact.join('.')
9
9
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nsrr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.beta1
4
+ version: 0.1.0.beta2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Remo Mueller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-25 00:00:00.000000000 Z
11
+ date: 2014-07-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -95,6 +95,14 @@ files:
95
95
  - Rakefile
96
96
  - bin/nsrr
97
97
  - lib/nsrr.rb
98
+ - lib/nsrr/commands/console.rb
99
+ - lib/nsrr/helpers/constants.rb
100
+ - lib/nsrr/helpers/download_request.rb
101
+ - lib/nsrr/helpers/hash_helper.rb
102
+ - lib/nsrr/helpers/json_request.rb
103
+ - lib/nsrr/models/all.rb
104
+ - lib/nsrr/models/dataset.rb
105
+ - lib/nsrr/models/file.rb
98
106
  - lib/nsrr/version.rb
99
107
  - nsrr.gemspec
100
108
  homepage: https://github.com/nsrr/nsrr-gem