doi_extractor 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +54 -0
  3. data/Capfile +34 -0
  4. data/Gemfile +6 -0
  5. data/README.md +50 -0
  6. data/Rakefile +43 -0
  7. data/bin/doi_extractor +18 -0
  8. data/config/deploy.rb +34 -0
  9. data/config/deploy/production.rb +65 -0
  10. data/doi_extractor.gemspec +29 -0
  11. data/lib/doi_extractor.rb +25 -0
  12. data/lib/doi_extractor/cancel_command.rb +24 -0
  13. data/lib/doi_extractor/command.rb +81 -0
  14. data/lib/doi_extractor/command_line_parser.rb +154 -0
  15. data/lib/doi_extractor/create_command.rb +32 -0
  16. data/lib/doi_extractor/download_command.rb +109 -0
  17. data/lib/doi_extractor/download_location.rb +86 -0
  18. data/lib/doi_extractor/errors.rb +34 -0
  19. data/lib/doi_extractor/ipums_client.rb +159 -0
  20. data/lib/doi_extractor/ipums_uri_builder.rb +51 -0
  21. data/lib/doi_extractor/old_ruby_patch.rb +25 -0
  22. data/lib/doi_extractor/options.rb +132 -0
  23. data/lib/doi_extractor/secrets.rb +18 -0
  24. data/lib/doi_extractor/status_command.rb +62 -0
  25. data/lib/doi_extractor/version.rb +3 -0
  26. data/spec/fixtures/api_creds.yml +2 -0
  27. data/spec/reports/SPEC-DoiExtractor-CancelCommand-when-successful.xml +7 -0
  28. data/spec/reports/SPEC-DoiExtractor-CancelCommand.xml +3 -0
  29. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-a-valid-create-command-with-email.xml +14 -0
  30. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-a-valid-create-command.xml +9 -0
  31. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-a-valid-download-command.xml +9 -0
  32. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-a-valid-status-command.xml +9 -0
  33. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-an-invalid-command.xml +9 -0
  34. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-an-unknown-option.xml +5 -0
  35. data/spec/reports/SPEC-DoiExtractor-CommandLineParser.xml +3 -0
  36. data/spec/reports/SPEC-DoiExtractor-CreateCommand-when-an-existing-extract-is-processing.xml +7 -0
  37. data/spec/reports/SPEC-DoiExtractor-CreateCommand-when-successful.xml +7 -0
  38. data/spec/reports/SPEC-DoiExtractor-CreateCommand.xml +3 -0
  39. data/spec/reports/SPEC-DoiExtractor-DownloadCommand-user-cancels-download.xml +7 -0
  40. data/spec/reports/SPEC-DoiExtractor-DownloadCommand-when-an-extract-is-available-when-force-is-not-set.xml +5 -0
  41. data/spec/reports/SPEC-DoiExtractor-DownloadCommand-when-an-extract-is-available.xml +13 -0
  42. data/spec/reports/SPEC-DoiExtractor-DownloadCommand.xml +3 -0
  43. data/spec/reports/SPEC-DoiExtractor-DownloadLocation.xml +11 -0
  44. data/spec/reports/SPEC-DoiExtractor-IpumsClient.xml +7 -0
  45. data/spec/reports/SPEC-DoiExtractor-IpumsUriBuilder-internal-environment.xml +5 -0
  46. data/spec/reports/SPEC-DoiExtractor-IpumsUriBuilder-live-environment.xml +5 -0
  47. data/spec/reports/SPEC-DoiExtractor-IpumsUriBuilder-local-environment.xml +5 -0
  48. data/spec/reports/SPEC-DoiExtractor-IpumsUriBuilder.xml +3 -0
  49. data/spec/reports/SPEC-DoiExtractor-Options-for-command-with-create-command-with-invalid-doi-version.xml +5 -0
  50. data/spec/reports/SPEC-DoiExtractor-Options-for-command-with-create-command.xml +7 -0
  51. data/spec/reports/SPEC-DoiExtractor-Options-for-command-with-download-command.xml +7 -0
  52. data/spec/reports/SPEC-DoiExtractor-Options-for-command-with-status-command.xml +7 -0
  53. data/spec/reports/SPEC-DoiExtractor-Options-for-command.xml +3 -0
  54. data/spec/reports/SPEC-DoiExtractor-Options-when-setting-path-values.xml +9 -0
  55. data/spec/reports/SPEC-DoiExtractor-Options.xml +5 -0
  56. data/spec/reports/SPEC-DoiExtractor-Secrets.xml +7 -0
  57. data/spec/reports/SPEC-DoiExtractor-StatusCommand.xml +5 -0
  58. data/spec/spec_helper.rb +20 -0
  59. data/spec/support/test_input.rb +36 -0
  60. data/spec/unit/cancel_command_spec.rb +28 -0
  61. data/spec/unit/command_line_parser_spec.rb +68 -0
  62. data/spec/unit/create_command_spec.rb +44 -0
  63. data/spec/unit/download_command_spec.rb +139 -0
  64. data/spec/unit/download_location_spec.rb +71 -0
  65. data/spec/unit/ipums_client_spec.rb +23 -0
  66. data/spec/unit/ipums_uri_builder_spec.rb +26 -0
  67. data/spec/unit/options_spec.rb +86 -0
  68. data/spec/unit/secrets_spec.rb +14 -0
  69. data/spec/unit/status_command_spec.rb +46 -0
  70. metadata +282 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: '092d2a2486613b1db9414d267cfccef66d4adc17'
4
+ data.tar.gz: 36df18eeaba0c2999ef7d7b448fa83543f1787c2
5
+ SHA512:
6
+ metadata.gz: 9022ebc38079cf1d779a006ca8a91aee2aab6f298245d2af300493a7653e4754be9e45df3d579e3374229c3991af24c9a41c7719eb451d9f10884cbcd5d8bdd1
7
+ data.tar.gz: b46e08cd9ef16a96c4d54e140b87e1a752200bc2cad89f1ca88af54eb3a9b02ce2ff2428a6404c1f61db118a1d18d7969882159cc1a33c7cafff75171cb1dfea
data/.gitignore ADDED
@@ -0,0 +1,54 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /spec/testdata/
10
+ /test/tmp/
11
+ /test/version_tmp/
12
+ /tmp/
13
+ /log/
14
+
15
+ Gemfile.lock
16
+
17
+ # Used by dotenv library to load environment variables.
18
+ # .env
19
+
20
+ ## Specific to RubyMotion:
21
+ .dat*
22
+ .repl_history
23
+ build/
24
+ *.bridgesupport
25
+ build-iPhoneOS/
26
+ build-iPhoneSimulator/
27
+
28
+ ## Specific to RubyMotion (use of CocoaPods):
29
+ #
30
+ # We recommend against adding the Pods directory to your .gitignore. However
31
+ # you should judge for yourself, the pros and cons are mentioned at:
32
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
33
+ #
34
+ # vendor/Pods/
35
+
36
+ ## Documentation cache and generated files:
37
+ /.yardoc/
38
+ /_yardoc/
39
+ /doc/
40
+ /rdoc/
41
+
42
+ ## Environment normalization:
43
+ /.bundle/
44
+ /vendor/bundle
45
+ /lib/bundler/man/
46
+
47
+ # for a library or gem, you might want to ignore these files since the code is
48
+ # intended to run in multiple environments; otherwise, check them in:
49
+ # Gemfile.lock
50
+ # .ruby-version
51
+ # .ruby-gemset
52
+
53
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
54
+ .rvmrc
data/Capfile ADDED
@@ -0,0 +1,34 @@
1
+ # Load DSL and set up stages
2
+ require "capistrano/setup"
3
+
4
+ # Include default deployment tasks
5
+ require "capistrano/deploy"
6
+
7
+ # Include doi_extractor version
8
+ require File.expand_path('../lib/doi_extractor/version', __FILE__)
9
+
10
+ # Load the SCM plugin appropriate to your project:
11
+ require "capistrano/scm/git"
12
+ install_plugin Capistrano::SCM::Git
13
+
14
+ # Include tasks from other gems included in your Gemfile
15
+ #
16
+ # For documentation on these, see for example:
17
+ #
18
+ # https://github.com/capistrano/rvm
19
+ # https://github.com/capistrano/rbenv
20
+ # https://github.com/capistrano/chruby
21
+ # https://github.com/capistrano/bundler
22
+ # https://github.com/capistrano/rails
23
+ # https://github.com/capistrano/passenger
24
+ #
25
+ # require "capistrano/rvm"
26
+ # require "capistrano/rbenv"
27
+ # require "capistrano/chruby"
28
+ require "capistrano/bundler"
29
+ # require "capistrano/rails/assets"
30
+ # require "capistrano/rails/migrations"
31
+ # require "capistrano/passenger"
32
+
33
+ # Load custom tasks from `lib/capistrano/tasks` if you have any defined
34
+ Dir.glob("lib/capistrano/tasks/*.rake").each { |r| import r }
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ gem "capistrano", "3.8.1", platform: :ruby
6
+ gem 'capistrano-bundler', '~> 1.2', platform: :ruby
data/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # DOI Extractor
2
+
3
+ ## Use
4
+
5
+ The DOI Extractor is a command line tool for generating, submitting, downloading,
6
+ and packaging a set of extracts for DOI archival. It currently supports 4 commands:
7
+ `cancel`, `create`, `download`, and `status`.
8
+
9
+ #### Cancel
10
+ Cancels a submitted DOI extract group. Will fail all associated pending extracts.
11
+
12
+ #### Create
13
+ Creates a new DOI extract group and its associated extracts, then submits them.
14
+ Optionally takes an email address and will send a notification when all extracts are
15
+ ready to download.
16
+
17
+ #### Download
18
+ Once all extracts for a DOI group are complete, this command will download and package them.
19
+
20
+ #### Status
21
+ Prints summary information about recent DOI extracts or detailed information about a
22
+ particular extract
23
+
24
+ ### Further Information
25
+
26
+ See `doi_extractor <COMMAND> --help` for more detailed options.
27
+
28
+ ## Examples
29
+
30
+ ### Create and submit a new DOI extract group for USA
31
+ `doi_extractor create -p usa -v 6.5 -E delbert@umn.edu`
32
+
33
+ ### Check the status of a submitted extract group
34
+ `doi_extractor status -p usa -i 5`
35
+
36
+ ### Download the completed extracts to the default location
37
+ `doi_extractor download -p usa -i 5`
38
+
39
+ ## Deployment
40
+
41
+ The DOI Extractor is a gem that gets installed to the shared jruby environment
42
+ (/pkg/mpctools/jruby).
43
+
44
+ It may be deployed using capistrano: `cap production deploy`
45
+
46
+ ### Ruby Version Notes
47
+
48
+ This gem uses capistrano 3.8, which is not supported under jruby 1.7. For deployment, use a modern ruby (MRI works fine).
49
+
50
+ The CI build uses jruby to ensure the Gemfile is valid in that env and that all specs pass.
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ require 'ci/reporter/rake/rspec'
4
+
5
+ RSpec::Core::RakeTask.new(:spec => 'ci:setup:rspec')
6
+
7
+ task :test => :spec
8
+ task :default => :spec
9
+
10
+ task :disable_rubygems do
11
+ ENV['gem_push'] = 'no'
12
+ end
13
+
14
+ Rake::Task[:release].enhance [:disable_rubygems]
15
+
16
+ namespace :spec do
17
+
18
+ desc "integration test the JSON API endpoints"
19
+ RSpec::Core::RakeTask.new(:integration) do |t|
20
+ # set the RAILS_ENV such that :integration tagged
21
+ # specs are run
22
+ ENV['ENABLE_INTEGRATION'] = 'true'
23
+
24
+ # only run those files in the 'integration' directory
25
+ t.pattern = "./spec/integration{,/*/**}/*_spec.rb"
26
+ end
27
+
28
+ desc 'Run unit and integration tests'
29
+ task :all do
30
+ ENV['ENABLE_INTEGRATION'] = 'true'
31
+ Rake::Task[:spec].invoke
32
+ end
33
+ end
34
+
35
+ namespace :test do
36
+ task :integration => 'spec:integration'
37
+ task :all => 'spec:all'
38
+ end
39
+
40
+ desc "Open an irb session preloaded with this library"
41
+ task :console do
42
+ sh "irb -rubygems -I lib -r doi_extractor.rb"
43
+ end
data/bin/doi_extractor ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'doi_extractor'
4
+
5
+ parser = DoiExtractor::CommandLineParser.parse(ARGV)
6
+
7
+ if parser.valid?
8
+ cmd = DoiExtractor::Command.for(parser.options)
9
+ begin
10
+ cmd.execute
11
+ rescue DoiExtractor::CommandFailError => e
12
+ STDERR.puts e.error_report
13
+ exit e.exit_code
14
+ end
15
+ else
16
+ puts parser
17
+ exit 1
18
+ end
data/config/deploy.rb ADDED
@@ -0,0 +1,34 @@
1
+ # config valid only for current version of Capistrano
2
+ lock "3.8.1"
3
+
4
+ set :application, 'doi_extractor'
5
+ set :repo_url, 'git@github.umn.edu:mpc/doi_extractor.git'
6
+ set :branch, `git rev-parse --abbrev-ref HEAD`.chomp
7
+ set :deploy_to, '/pkg/ipums/programming/doi_extractor'
8
+
9
+
10
+ # Default value for :linked_files is []
11
+ # append :linked_files, "config/database.yml", "config/secrets.yml"
12
+ # Default value for linked_dirs is []
13
+ # set :linked_dirs, %w{vendor/bundle}
14
+
15
+ set :app_version, DoiExtractor::VERSION
16
+ set :jruby_path, -> { "/pkg/mpctools/jruby/current" }
17
+ set :default_env, -> { {path: "#{fetch(:jruby_path)}/bin:$PATH"} }
18
+ set :keep_releases, 5
19
+ set :user, `logname`.chomp
20
+
21
+ set :bundle_flags, '--quiet'
22
+ set :bundle_without, 'deployment'
23
+
24
+ after 'deploy:published', :jruby_install
25
+
26
+ desc 'Installs the gem into the shared jruby env'
27
+ task :jruby_install do
28
+ on roles(:app) do
29
+ within current_path do
30
+ execute :rake, 'build'
31
+ execute "PATH=#{fetch(:jruby_path)}/bin:$PATH gem install --wrapper --bindir #{fetch(:jruby_path)}/bin #{current_path}/pkg/doi_extractor-#{fetch(:app_version)}.gem"
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,65 @@
1
+
2
+ server 'ipums-internal-web.pop.umn.edu', user: fetch(:user), roles: %w(app)
3
+
4
+
5
+ # server-based syntax
6
+ # ======================
7
+ # Defines a single server with a list of roles and multiple properties.
8
+ # You can define all roles on a single server, or split them:
9
+
10
+ # server "example.com", user: "deploy", roles: %w{app db web}, my_property: :my_value
11
+ # server "example.com", user: "deploy", roles: %w{app web}, other_property: :other_value
12
+ # server "db.example.com", user: "deploy", roles: %w{db}
13
+
14
+
15
+
16
+ # role-based syntax
17
+ # ==================
18
+
19
+ # Defines a role with one or multiple servers. The primary server in each
20
+ # group is considered to be the first unless any hosts have the primary
21
+ # property set. Specify the username and a domain or IP for the server.
22
+ # Don't use `:all`, it's a meta role.
23
+
24
+ # role :app, %w{deploy@example.com}, my_property: :my_value
25
+ # role :web, %w{user1@primary.com user2@additional.com}, other_property: :other_value
26
+ # role :db, %w{deploy@example.com}
27
+
28
+
29
+
30
+ # Configuration
31
+ # =============
32
+ # You can set any configuration variable like in config/deploy.rb
33
+ # These variables are then only loaded and set in this stage.
34
+ # For available Capistrano configuration variables see the documentation page.
35
+ # http://capistranorb.com/documentation/getting-started/configuration/
36
+ # Feel free to add new variables to customise your setup.
37
+
38
+
39
+
40
+ # Custom SSH Options
41
+ # ==================
42
+ # You may pass any option but keep in mind that net/ssh understands a
43
+ # limited set of options, consult the Net::SSH documentation.
44
+ # http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start
45
+ #
46
+ # Global options
47
+ # --------------
48
+ # set :ssh_options, {
49
+ # keys: %w(/home/rlisowski/.ssh/id_rsa),
50
+ # forward_agent: false,
51
+ # auth_methods: %w(password)
52
+ # }
53
+ #
54
+ # The server-based syntax can be used to override options:
55
+ # ------------------------------------
56
+ # server "example.com",
57
+ # user: "user_name",
58
+ # roles: %w{web app},
59
+ # ssh_options: {
60
+ # user: "user_name", # overrides user setting above
61
+ # keys: %w(/home/user_name/.ssh/id_rsa),
62
+ # forward_agent: false,
63
+ # auth_methods: %w(publickey password)
64
+ # # password: "please use keys"
65
+ # }
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'doi_extractor/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "doi_extractor"
8
+ spec.version = DoiExtractor::VERSION
9
+ spec.authors = ["Dan Elbert"]
10
+ spec.email = ["delbert@umn.edu"]
11
+ spec.homepage = 'https://github.umn.edu/mpc/doi_extractor'
12
+ spec.summary = %q{Command line tool to package IPUMS DOI extracts}
13
+
14
+ spec.files = Dir['bin/**/*'] + Dir['config/**/*'] + Dir['lib/**/*'] + Dir['spec/**/*'] + %w(.gitignore Capfile doi_extractor.gemspec Gemfile Rakefile README.md)
15
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
16
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
+ spec.require_paths = ["lib"]
18
+
19
+ spec.add_runtime_dependency 'json'
20
+
21
+ spec.add_development_dependency "bundler", "> 1.10"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "public_suffix", "1.4.6" # Locked to this version because of jruby in 1.9 mode
24
+ spec.add_development_dependency "rspec", "~> 3.5.0"
25
+ spec.add_development_dependency "ci_reporter_rspec", "~> 1.0.0"
26
+ spec.add_development_dependency "simplecov", "~> 0.13.0"
27
+ spec.add_development_dependency "simplecov-rcov", "~> 0.2.3"
28
+ spec.add_development_dependency "webmock", "~> 2.3.2"
29
+ end
@@ -0,0 +1,25 @@
1
+ require 'erb'
2
+ require 'fileutils'
3
+ require 'json'
4
+ require 'net/https'
5
+ require 'optparse'
6
+ require 'optparse/uri'
7
+ require 'ostruct'
8
+ require 'stringio'
9
+ require 'uri'
10
+ require 'yaml'
11
+
12
+ require 'doi_extractor/old_ruby_patch'
13
+ require 'doi_extractor/version'
14
+ require 'doi_extractor/command'
15
+ require 'doi_extractor/cancel_command'
16
+ require 'doi_extractor/command_line_parser'
17
+ require 'doi_extractor/create_command'
18
+ require 'doi_extractor/download_location'
19
+ require 'doi_extractor/errors'
20
+ require 'doi_extractor/download_command'
21
+ require 'doi_extractor/ipums_client'
22
+ require 'doi_extractor/ipums_uri_builder'
23
+ require 'doi_extractor/options'
24
+ require 'doi_extractor/secrets'
25
+ require 'doi_extractor/status_command'
@@ -0,0 +1,24 @@
1
+ module DoiExtractor
2
+ class CancelCommand < Command
3
+ def _execute
4
+ doi = ipums_client.get_doi_extract(options.extract_group_id)
5
+ unless doi
6
+ fail('No extract group found')
7
+ end
8
+
9
+ if options.force
10
+ say('skipping user confirmation', true)
11
+ else
12
+ i = ask("Are you sure you want to cancel DOI extract with ID #{doi.id}?")
13
+ if i.to_s.downcase[0...1] != 'y'
14
+ fail('user cancelled')
15
+ end
16
+ end
17
+
18
+ ipums_client.cancel_doi_extract(doi.id)
19
+
20
+ say("Complete")
21
+
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,81 @@
1
+ module DoiExtractor
2
+ class Command
3
+
4
+ def self.for(options)
5
+ case options.command
6
+ when 'status'
7
+ StatusCommand.new(options)
8
+ when 'create'
9
+ CreateCommand.new(options)
10
+ when 'download'
11
+ DownloadCommand.new(options)
12
+ when 'cancel'
13
+ CancelCommand.new(options)
14
+ else
15
+ raise "Invalid options: #{options.inspect}"
16
+ end
17
+ end
18
+
19
+ attr_reader :options, :start_time
20
+ attr_accessor :std_out, :user_input_callback
21
+
22
+ def initialize(options, std_out = STDOUT, input_callback = nil)
23
+ @options = options
24
+ @std_out = std_out
25
+ @user_input_callback = input_callback || default_input_callback
26
+ end
27
+
28
+ def execute
29
+ begin
30
+ @start_time = Time.now
31
+ cmd = self.class.name.split('::').last.sub('Command', '')
32
+ say("starting #{cmd} Command", true)
33
+ say("api: #{options.api_uri}", true)
34
+ say("path: #{options.download_base_path}", true)
35
+ _execute
36
+ say("Finished in #{(Time.now - @start_time).round(2)} seconds", true)
37
+ rescue CommandFailError => e
38
+ raise e
39
+ rescue => e
40
+ raise CommandFailError.new('An Unexpected Error Has Occurred', e)
41
+ end
42
+ end
43
+
44
+ protected
45
+
46
+ # Subclasses should override this method
47
+ def _execute
48
+ end
49
+
50
+ def default_input_callback
51
+ ->(msg) { say_nb("#{msg}\n(y/n) "); STDIN.gets }
52
+ end
53
+
54
+ def ask(msg)
55
+ user_input_callback.call(msg)
56
+ end
57
+
58
+ def say_nb(msg, is_verbose = false)
59
+ if options.verbose || !is_verbose
60
+ std_out.print msg
61
+ end
62
+ end
63
+
64
+ def say(msg, is_verbose = false)
65
+ say_nb(msg + "\n", is_verbose)
66
+ end
67
+
68
+ def fail(msg, ex = nil, code = 1)
69
+ raise CommandFailError.new(msg, ex, code)
70
+ end
71
+
72
+ def ipums_client
73
+ @ipums_client ||= IpumsClient.new(options.api_uri, secrets.api_username, secrets.api_password)
74
+ end
75
+
76
+ def secrets
77
+ @secrets ||= Secrets.new({api_username: options.api_username, api_password: options.api_password})
78
+ end
79
+
80
+ end
81
+ end