nsrr 0.1.0.beta1 → 0.1.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 155cc2e206c5b662c49888859663f40c4e376c21
4
- data.tar.gz: 21461c274e3324827ed970425e91647d9fca2ad9
3
+ metadata.gz: 3924a64a6b689e857473a865bbc08a9b0753c575
4
+ data.tar.gz: 54066220485da3828fee75c6864f95bf91e30ae6
5
5
  SHA512:
6
- metadata.gz: 396491cfb57a83a188a9a4abd1e455ff95b3abff322f2a1301694d80334f85e016b49920ef2829e0b2650706c08d06cd2440f4a88d71793fb46a30cc612d43c9
7
- data.tar.gz: c0d6949622fc0ec591013f2b8260b86f5f2b4df51410c80d6a2f6041487a57bd5a92a5951a3bf474229cb618ebc357ee2bcb1269aad5b9e1e3333c824e47bfbd
6
+ metadata.gz: 21f4d39b700cc218e210ca77fafec61ade8fed81da9ee48ba2ec59e650539c5c9642f6490b5394a7da58c335d391c8f1fae4d2adc641f818bb468bf1e38de8b8
7
+ data.tar.gz: 16686364fce98068357195fdbd790661cfa1615fe051d2aaba08445643def2dd0f298ba4ce1ecd19e5df4ec390d331fdb2cb453ab9984f3f502ad2b60fc0029d
data/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  ## 0.1.0
2
2
 
3
3
  ### Enhancements
4
+ - Added a `nsrr console` command that allows users to access and download datasets and files
5
+ - Datasets can be loaded in the console environment
6
+ - `d = Dataset.find 'shhs'`
7
+ - Dataset files can be downloaded as well
8
+ - `d.download`
9
+ - The download function can include a path, method, and depth
10
+ - **path**
11
+ - can be `nil` to download entire dataset or a string to specify a folder
12
+ - **method**
13
+ - 'md5' [default]
14
+ - Checks if a downloaded file exists with the exact md5 as the online version, if so, skips that file
15
+ - 'fresh'
16
+ - Downloads every file without checking if it was already downloaded
17
+ - 'fast'
18
+ - Only checks if a download file exists with the same file size as the online version, if so, skips that file
19
+ - **depth**
20
+ - 'recursive' [default]
21
+ - Downloads files in selected path folder and all subfolders
22
+ - 'shallow'
23
+ - Only downloads files in selected path folder
4
24
  - Added a `nsrr version` command the returns the current version of the nsrr gem
5
25
  - Added testing framework to more easily add new tests for new features
data/README.md CHANGED
@@ -26,6 +26,104 @@ Or install it yourself as:
26
26
 
27
27
  ## Usage
28
28
 
29
+ ### Open the console and download entire datasets
30
+
31
+ ```console
32
+ $ nsrr console
33
+ ```
34
+
35
+ ```
36
+ > d = Dataset.find 'shhs'
37
+ > d.download
38
+
39
+ File Integrity Check Method: md5
40
+ Depth: recursive
41
+ Get your token here: https://sleepdata.org/token
42
+ Please enter your download token:
43
+ create shhs/
44
+ create shhs/datasets
45
+ identical shhs-cvd-dataset-0.4.0.csv
46
+ identical shhs-data-dictionary-0.4.0-domains.csv
47
+ identical shhs-data-dictionary-0.4.0-forms.csv
48
+ identical shhs-data-dictionary-0.4.0-variables.csv
49
+ identical shhs1-dataset-0.4.0.csv
50
+ ...
51
+ ```
52
+
53
+ **method**
54
+ - 'md5' [default]
55
+ - Checks if a downloaded file exists with the exact md5 as the online version, if so, skips that file
56
+ - 'fresh'
57
+ - Downloads every file without checking if it was already downloaded
58
+ - 'fast'
59
+ - Only checks if a download file exists with the same file size as the online version, if so, skips that file
60
+
61
+ **depth**
62
+ - 'recursive' [default]
63
+ - Downloads files in selected path folder and all subfolders
64
+ - 'shallow'
65
+ - Only downloads files in selected path folder
66
+
67
+ For example to download only the shhs1 edfs folder and skip MD5 file validation:
68
+
69
+ ```
70
+ > d = Dataset.find 'shhs'
71
+ > d.download('edfs/shhs1', method: 'fast', depth: 'shallow')
72
+
73
+ File Integrity Check Method: fast
74
+ Depth: shallow
75
+ Get your token here: https://sleepdata.org/token
76
+ Please enter your download token:
77
+
78
+ create shhs/edfs/shhs1
79
+ download 100001.EDF
80
+ download 100002.EDF
81
+ download 100003.EDF
82
+ download 100004.EDF
83
+ ...
84
+ ```
85
+
86
+ You can type `Ctrl-C` to pause the download, and retype the command to restart:
87
+
88
+ ```
89
+ > d = Dataset.find 'shhs'
90
+ > d.download
91
+
92
+ File Integrity Check Method: md5
93
+ Depth: recursive
94
+ Get your token here: https://sleepdata.org/token
95
+ Please enter your download token:
96
+
97
+ create shhs/
98
+ create shhs/datasets
99
+ download shhs-cvd-dataset-0.4.0.csv
100
+ download shhs-data-dictionary-0.4.0-domains.csv
101
+ ^C
102
+ Interrupted
103
+
104
+ Finished in 4.384734 seconds.
105
+
106
+ 1 folder created, 2 files downloaded, 60 MiBs downloaded, 0 files skipped, 0 files failed
107
+
108
+ > d.download
109
+
110
+ File Integrity Check Method: md5
111
+ Depth: recursive
112
+ Get your token here: https://sleepdata.org/token
113
+ Please enter your download token:
114
+
115
+ create shhs/
116
+ create shhs/datasets
117
+ identical shhs-cvd-dataset-0.4.0.csv
118
+ identical shhs-data-dictionary-0.4.0-domains.csv
119
+ ^C
120
+ Interrupted
121
+
122
+ Finished in 2.384734 seconds.
123
+
124
+ 1 folder created, 0 files downloaded, 0 MiBs downloaded, 2 files skipped, 0 files failed
125
+ ```
126
+
29
127
  ### Show the version of the NSRR gem
30
128
 
31
129
  ```
data/lib/nsrr.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "nsrr/version"
2
2
 
3
3
  Nsrr::COMMANDS = {
4
+ 'c' => :console,
4
5
  'v' => :version
5
6
  }
6
7
 
@@ -9,12 +10,36 @@ module Nsrr
9
10
  self.send((Nsrr::COMMANDS[argv.first.to_s.scan(/\w/).first] || :help), argv)
10
11
  end
11
12
 
13
+ def self.console(argv)
14
+ require 'nsrr/commands/console'
15
+ Nsrr::Commands::Console.start(argv)
16
+ # console = Nsrr::Commands::Console.new(argv)
17
+ # console.start
18
+ # `#{File.expand_path('../', __FILE__)}/nsrr/commands/console2`
19
+
20
+
21
+ # require 'irb'
22
+ # require 'irb/completion'
23
+ # IRB.setup nil
24
+ # IRB.conf[:MAIN_CONTEXT] = IRB::Irb.new.context
25
+ # require 'irb/ext/multi-irb'
26
+ # IRB.irb nil, self
27
+
28
+ # require 'irb'
29
+ # ARGV.clear
30
+ # @a = "hello"
31
+ # IRB.start
32
+
33
+ end
34
+
12
35
  def self.help(argv)
13
36
  puts <<-EOT
14
37
 
15
38
  Usage: nsrr COMMAND [ARGS]
16
39
 
17
40
  The most common nsrr commands are:
41
+ [c]onsole Load an interactive console to access
42
+ and download datasets and files
18
43
  [v]ersion Returns the version of nsrr gem
19
44
 
20
45
  Commands can be referenced by the first letter:
@@ -0,0 +1,28 @@
1
+ require 'irb'
2
+ require 'irb/completion'
3
+ require 'nsrr/models/all'
4
+
5
+ module Nsrr
6
+ module Commands
7
+ class Console
8
+ class << self
9
+ def start(*args)
10
+ new(*args).start
11
+ end
12
+ end
13
+
14
+ attr_reader :console
15
+
16
+ def initialize(argv)
17
+ ARGV.clear
18
+ @console = IRB
19
+ end
20
+
21
+ def start
22
+ puts "Loading environment (Nsrr #{Nsrr::VERSION::STRING})"
23
+ @console.start
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -0,0 +1 @@
1
+ Nsrr::WEBSITE = "https://sleepdata.org"
@@ -0,0 +1,48 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+
4
+ module Nsrr
5
+ module Helpers
6
+ class DownloadRequest
7
+ class << self
8
+ def get(*args)
9
+ new(*args).get
10
+ end
11
+ end
12
+
13
+ attr_reader :url, :error, :file_size
14
+
15
+ def initialize(url, download_folder)
16
+ begin
17
+ escaped_url = URI.escape(url)
18
+ @url = URI.parse(escaped_url)
19
+ @http = Net::HTTP.new(@url.host, @url.port)
20
+ @http.use_ssl = true if (@url.scheme == 'https')
21
+ @download_folder = download_folder
22
+ @file_size = 0
23
+ rescue => e
24
+ @error = 'Invalid Token'
25
+ end
26
+ end
27
+
28
+ def get
29
+ req = Net::HTTP::Get.new(@url.path)
30
+ response = @http.start do |http|
31
+ http.request(req)
32
+ end
33
+ case response.code when '200'
34
+ ::File.open(@download_folder, 'wb') do |local_file|
35
+ local_file.write( response.body )
36
+ end
37
+ @file_size = ::File.size(@download_folder)
38
+ when '302'
39
+ @error = 'Token Not Authorized to Access Specified File'
40
+ else
41
+ @error = "#{response.code} #{response.class.name}"
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+
@@ -0,0 +1,9 @@
1
+ module Nsrr
2
+ module Helpers
3
+ class HashHelper
4
+ def self.symbolize_keys(hash)
5
+ hash.inject({}){|memo,(k,v)| memo[k.to_sym] = v; memo}
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,32 @@
1
+ require 'net/http'
2
+ require 'json'
3
+
4
+ module Nsrr
5
+ module Helpers
6
+ class JsonRequest
7
+ class << self
8
+ def get(*args)
9
+ new(*args).get
10
+ end
11
+ end
12
+
13
+ attr_reader :url
14
+
15
+ def initialize(url)
16
+ @url = URI.parse(url)
17
+ @http = Net::HTTP.new(@url.host, @url.port)
18
+ @http.use_ssl = true if (@url.scheme == 'https')
19
+ end
20
+
21
+ def get
22
+ req = Net::HTTP::Get.new(@url.path)
23
+ response = @http.start do |http|
24
+ http.request(req)
25
+ end
26
+ JSON.parse(response.body) rescue nil
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+
@@ -0,0 +1 @@
1
+ require 'nsrr/models/dataset'
@@ -0,0 +1,126 @@
1
+ require 'colorize'
2
+ require 'fileutils'
3
+
4
+ require 'nsrr/helpers/constants'
5
+ require 'nsrr/helpers/hash_helper'
6
+ require 'nsrr/helpers/json_request'
7
+
8
+ require 'nsrr/models/file'
9
+
10
+ module Nsrr
11
+ module Models
12
+ class Dataset
13
+ def self.find(slug)
14
+ json = Nsrr::Helpers::JsonRequest.get("#{Nsrr::WEBSITE}/datasets/#{slug}.json")
15
+ if json
16
+ new(json)
17
+ else
18
+ nil
19
+ end
20
+ end
21
+
22
+ attr_reader :slug, :name
23
+
24
+ def initialize(json = {})
25
+ @slug = json['slug']
26
+ @name = json['name']
27
+ @files = {}
28
+ @download_token = nil
29
+ end
30
+
31
+ def files(path = nil)
32
+ @files[path] ||= begin
33
+ json = Nsrr::Helpers::JsonRequest.get("#{Nsrr::WEBSITE}/datasets/#{@slug}/json_manifest/#{path}")
34
+ (json || []).collect{|file_json| Nsrr::Models::File.new(file_json)}
35
+ end
36
+ end
37
+
38
+ def folders(path = nil)
39
+ self.files(path).select{|f| !f.is_file}.collect{|f| f.name}
40
+ end
41
+
42
+ # Options include:
43
+ # method:
44
+ # 'md5' => [default] Checks if a downloaded file exists with the exact md5 as the online version, if so, skips that file
45
+ # 'fresh' => Downloads every file without checking if it was already downloaded
46
+ # 'fast' => Only checks if a download file exists with the same file size as the online version, if so, skips that file
47
+ # depth:
48
+ # 'recursive' => [default] Downloads files in selected path folder and all subfolders
49
+ # 'shallow' => Only downloads files in selected path folder
50
+ def download(path = nil, *args)
51
+ options = Nsrr::Helpers::HashHelper.symbolize_keys(args.first || {})
52
+ options[:method] ||= 'md5'
53
+ options[:depth] ||= 'recursive'
54
+ @folders_created = 0
55
+ @files_downloaded = 0
56
+ @downloaded_bytes = 0
57
+ @files_skipped = 0
58
+ @files_failed = 0
59
+
60
+ begin
61
+ if @download_token.to_s == ''
62
+ puts " File Integrity Check Method: " + options[:method].to_s.colorize(:white)
63
+ puts " Depth: " + options[:depth].to_s.colorize(:white)
64
+ set_download_token()
65
+ end
66
+
67
+ @start_time = Time.now
68
+
69
+ download_helper(path, options)
70
+ rescue Interrupt, IRB::Abort
71
+ puts "\n Interrupted".colorize(:red)
72
+ end
73
+
74
+ @downloaded_megabytes = @downloaded_bytes / (1024 * 1024)
75
+
76
+ puts "\nFinished in #{Time.now - @start_time} seconds."
77
+ puts "\n#{@folders_created} folder#{"s" if @folders_created != 1} created".colorize(:white) + ", " +
78
+ "#{@files_downloaded} file#{"s" if @files_downloaded != 1} downloaded".colorize(:green) + ", " +
79
+ "#{@downloaded_megabytes} MiB#{"s" if @downloaded_megabytes != 1} downloaded".colorize(:green) + ", " +
80
+ "#{@files_skipped} file#{"s" if @files_skipped != 1} skipped".colorize(:blue) + ", " +
81
+ "#{@files_failed} file#{"s" if @files_failed != 1} failed".colorize(:red) + "\n\n"
82
+ nil
83
+ end
84
+
85
+ def download_helper(path, options)
86
+ current_folder = ::File.join(self.slug.to_s, path.to_s)
87
+ create_folder(current_folder)
88
+
89
+ self.files(path).select{|f| f.is_file}.each do |file|
90
+ result = file.download(options[:method], current_folder, @download_token)
91
+ case result when 'fail'
92
+ @files_failed += 1
93
+ when 'skip'
94
+ @files_skipped += 1
95
+ else
96
+ @files_downloaded += 1
97
+ @downloaded_bytes += result
98
+ end
99
+ end
100
+
101
+ if options[:depth] == 'recursive'
102
+ self.files(path).select{|f| !f.is_file}.each do |file|
103
+ folder = [path, file.name].compact.join('/')
104
+ self.download_helper(folder, options)
105
+ end
106
+ end
107
+ end
108
+
109
+ def create_folder(folder)
110
+ puts " create".colorize( :white ) + " #{folder}"
111
+ FileUtils.mkdir_p folder
112
+ @folders_created += 1
113
+ end
114
+
115
+ def set_download_token
116
+ puts " Get your token here: " + "https://sleepdata.org/token".colorize(:blue)
117
+ print "Please enter your download token: "
118
+ @download_token = gets.chomp
119
+ end
120
+
121
+ end
122
+ end
123
+ end
124
+
125
+ class Dataset < Nsrr::Models::Dataset
126
+ end
@@ -0,0 +1,98 @@
1
+ # Models a downloadable file or folder
2
+ require 'digest/md5'
3
+
4
+ require 'nsrr/helpers/constants'
5
+ require 'nsrr/helpers/download_request'
6
+
7
+ module Nsrr
8
+ module Models
9
+ class File
10
+ attr_reader :name, :is_file, :web_file_size, :web_checksum
11
+
12
+ def initialize(json = {})
13
+ @name = json['file_name']
14
+ @web_checksum = json['checksum']
15
+ @is_file = json['is_file']
16
+ @web_file_size = json['file_size']
17
+ @dataset_slug = json['dataset']
18
+ @file_path = json['file_path']
19
+ end
20
+
21
+ # method:
22
+ # 'md5' => [default] Checks if a downloaded file exists with the exact md5 as the online version, if so, skips that file
23
+ # 'fresh' => Downloads every file without checking if it was already downloaded
24
+ # 'fast' => Only checks if a download file exists with the same file size as the online version, if so, skips that file
25
+ def download(method, path, token)
26
+ do_md5_check = (method == 'md5')
27
+ redownload_all = (method == 'fresh')
28
+
29
+ if do_md5_check
30
+ if md5_matches?(path)
31
+ skip
32
+ else
33
+ force_download(path, token)
34
+ end
35
+ else
36
+ if redownload_all
37
+ force_download(path, token)
38
+ else
39
+ if file_size_matches?(path)
40
+ skip
41
+ else
42
+ force_download(path, token)
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+ def force_download(path, token)
49
+ download_folder = ::File.join(Dir.pwd, path.to_s, @name.to_s)
50
+ download_url = "#{Nsrr::WEBSITE}/datasets/#{@dataset_slug}/files/a/#{token}/m/nsrr-gem-v#{Nsrr::VERSION::STRING.gsub('.', '-')}/#{@file_path.to_s}"
51
+ download_request = Nsrr::Helpers::DownloadRequest.new(download_url, download_folder)
52
+ download_request.get
53
+
54
+ if download_request.error.to_s == ''
55
+ puts " download".colorize( :green ) + " #{@name}"
56
+ download_request.file_size
57
+ else
58
+ puts " failed".colorize( :red ) + " #{@name}"
59
+ puts " #{download_request.error}"
60
+ 'fail'
61
+ end
62
+ end
63
+
64
+ def skip
65
+ puts " identical".colorize( :blue ) + " #{self.name}"
66
+ 'skip'
67
+ end
68
+
69
+ def md5_matches?(path)
70
+ @web_checksum == local_checksum(path)
71
+ end
72
+
73
+ def local_checksum(path)
74
+ download_folder = ::File.join(Dir.pwd, path.to_s, self.name.to_s)
75
+ if ::File.exist?(download_folder)
76
+ Digest::MD5.file(download_folder).hexdigest
77
+ else
78
+ ""
79
+ end
80
+ end
81
+
82
+ def file_size_matches?(path)
83
+ @web_file_size == local_filesize(path)
84
+ end
85
+
86
+ def local_filesize(path)
87
+ download_folder = ::File.join(Dir.pwd, path.to_s, self.name.to_s)
88
+ if ::File.exist?(download_folder)
89
+ ::File.size(download_folder)
90
+ else
91
+ -1
92
+ end
93
+ end
94
+
95
+ end
96
+ end
97
+ end
98
+
data/lib/nsrr/version.rb CHANGED
@@ -3,7 +3,7 @@ module Nsrr
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
5
  TINY = 0
6
- BUILD = "beta1" # nil, "pre", "rc", "rc2"
6
+ BUILD = "beta2" # nil, "pre", "rc", "rc2"
7
7
 
8
8
  STRING = [MAJOR, MINOR, TINY, BUILD].compact.join('.')
9
9
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nsrr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.beta1
4
+ version: 0.1.0.beta2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Remo Mueller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-25 00:00:00.000000000 Z
11
+ date: 2014-07-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -95,6 +95,14 @@ files:
95
95
  - Rakefile
96
96
  - bin/nsrr
97
97
  - lib/nsrr.rb
98
+ - lib/nsrr/commands/console.rb
99
+ - lib/nsrr/helpers/constants.rb
100
+ - lib/nsrr/helpers/download_request.rb
101
+ - lib/nsrr/helpers/hash_helper.rb
102
+ - lib/nsrr/helpers/json_request.rb
103
+ - lib/nsrr/models/all.rb
104
+ - lib/nsrr/models/dataset.rb
105
+ - lib/nsrr/models/file.rb
98
106
  - lib/nsrr/version.rb
99
107
  - nsrr.gemspec
100
108
  homepage: https://github.com/nsrr/nsrr-gem