doi_extractor 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +54 -0
  3. data/Capfile +34 -0
  4. data/Gemfile +6 -0
  5. data/README.md +50 -0
  6. data/Rakefile +43 -0
  7. data/bin/doi_extractor +18 -0
  8. data/config/deploy.rb +34 -0
  9. data/config/deploy/production.rb +65 -0
  10. data/doi_extractor.gemspec +29 -0
  11. data/lib/doi_extractor.rb +25 -0
  12. data/lib/doi_extractor/cancel_command.rb +24 -0
  13. data/lib/doi_extractor/command.rb +81 -0
  14. data/lib/doi_extractor/command_line_parser.rb +154 -0
  15. data/lib/doi_extractor/create_command.rb +32 -0
  16. data/lib/doi_extractor/download_command.rb +109 -0
  17. data/lib/doi_extractor/download_location.rb +86 -0
  18. data/lib/doi_extractor/errors.rb +34 -0
  19. data/lib/doi_extractor/ipums_client.rb +159 -0
  20. data/lib/doi_extractor/ipums_uri_builder.rb +51 -0
  21. data/lib/doi_extractor/old_ruby_patch.rb +25 -0
  22. data/lib/doi_extractor/options.rb +132 -0
  23. data/lib/doi_extractor/secrets.rb +18 -0
  24. data/lib/doi_extractor/status_command.rb +62 -0
  25. data/lib/doi_extractor/version.rb +3 -0
  26. data/spec/fixtures/api_creds.yml +2 -0
  27. data/spec/reports/SPEC-DoiExtractor-CancelCommand-when-successful.xml +7 -0
  28. data/spec/reports/SPEC-DoiExtractor-CancelCommand.xml +3 -0
  29. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-a-valid-create-command-with-email.xml +14 -0
  30. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-a-valid-create-command.xml +9 -0
  31. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-a-valid-download-command.xml +9 -0
  32. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-a-valid-status-command.xml +9 -0
  33. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-an-invalid-command.xml +9 -0
  34. data/spec/reports/SPEC-DoiExtractor-CommandLineParser-with-an-unknown-option.xml +5 -0
  35. data/spec/reports/SPEC-DoiExtractor-CommandLineParser.xml +3 -0
  36. data/spec/reports/SPEC-DoiExtractor-CreateCommand-when-an-existing-extract-is-processing.xml +7 -0
  37. data/spec/reports/SPEC-DoiExtractor-CreateCommand-when-successful.xml +7 -0
  38. data/spec/reports/SPEC-DoiExtractor-CreateCommand.xml +3 -0
  39. data/spec/reports/SPEC-DoiExtractor-DownloadCommand-user-cancels-download.xml +7 -0
  40. data/spec/reports/SPEC-DoiExtractor-DownloadCommand-when-an-extract-is-available-when-force-is-not-set.xml +5 -0
  41. data/spec/reports/SPEC-DoiExtractor-DownloadCommand-when-an-extract-is-available.xml +13 -0
  42. data/spec/reports/SPEC-DoiExtractor-DownloadCommand.xml +3 -0
  43. data/spec/reports/SPEC-DoiExtractor-DownloadLocation.xml +11 -0
  44. data/spec/reports/SPEC-DoiExtractor-IpumsClient.xml +7 -0
  45. data/spec/reports/SPEC-DoiExtractor-IpumsUriBuilder-internal-environment.xml +5 -0
  46. data/spec/reports/SPEC-DoiExtractor-IpumsUriBuilder-live-environment.xml +5 -0
  47. data/spec/reports/SPEC-DoiExtractor-IpumsUriBuilder-local-environment.xml +5 -0
  48. data/spec/reports/SPEC-DoiExtractor-IpumsUriBuilder.xml +3 -0
  49. data/spec/reports/SPEC-DoiExtractor-Options-for-command-with-create-command-with-invalid-doi-version.xml +5 -0
  50. data/spec/reports/SPEC-DoiExtractor-Options-for-command-with-create-command.xml +7 -0
  51. data/spec/reports/SPEC-DoiExtractor-Options-for-command-with-download-command.xml +7 -0
  52. data/spec/reports/SPEC-DoiExtractor-Options-for-command-with-status-command.xml +7 -0
  53. data/spec/reports/SPEC-DoiExtractor-Options-for-command.xml +3 -0
  54. data/spec/reports/SPEC-DoiExtractor-Options-when-setting-path-values.xml +9 -0
  55. data/spec/reports/SPEC-DoiExtractor-Options.xml +5 -0
  56. data/spec/reports/SPEC-DoiExtractor-Secrets.xml +7 -0
  57. data/spec/reports/SPEC-DoiExtractor-StatusCommand.xml +5 -0
  58. data/spec/spec_helper.rb +20 -0
  59. data/spec/support/test_input.rb +36 -0
  60. data/spec/unit/cancel_command_spec.rb +28 -0
  61. data/spec/unit/command_line_parser_spec.rb +68 -0
  62. data/spec/unit/create_command_spec.rb +44 -0
  63. data/spec/unit/download_command_spec.rb +139 -0
  64. data/spec/unit/download_location_spec.rb +71 -0
  65. data/spec/unit/ipums_client_spec.rb +23 -0
  66. data/spec/unit/ipums_uri_builder_spec.rb +26 -0
  67. data/spec/unit/options_spec.rb +86 -0
  68. data/spec/unit/secrets_spec.rb +14 -0
  69. data/spec/unit/status_command_spec.rb +46 -0
  70. metadata +282 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: '092d2a2486613b1db9414d267cfccef66d4adc17'
4
+ data.tar.gz: 36df18eeaba0c2999ef7d7b448fa83543f1787c2
5
+ SHA512:
6
+ metadata.gz: 9022ebc38079cf1d779a006ca8a91aee2aab6f298245d2af300493a7653e4754be9e45df3d579e3374229c3991af24c9a41c7719eb451d9f10884cbcd5d8bdd1
7
+ data.tar.gz: b46e08cd9ef16a96c4d54e140b87e1a752200bc2cad89f1ca88af54eb3a9b02ce2ff2428a6404c1f61db118a1d18d7969882159cc1a33c7cafff75171cb1dfea
data/.gitignore ADDED
@@ -0,0 +1,54 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /spec/testdata/
10
+ /test/tmp/
11
+ /test/version_tmp/
12
+ /tmp/
13
+ /log/
14
+
15
+ Gemfile.lock
16
+
17
+ # Used by dotenv library to load environment variables.
18
+ # .env
19
+
20
+ ## Specific to RubyMotion:
21
+ .dat*
22
+ .repl_history
23
+ build/
24
+ *.bridgesupport
25
+ build-iPhoneOS/
26
+ build-iPhoneSimulator/
27
+
28
+ ## Specific to RubyMotion (use of CocoaPods):
29
+ #
30
+ # We recommend against adding the Pods directory to your .gitignore. However
31
+ # you should judge for yourself, the pros and cons are mentioned at:
32
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
33
+ #
34
+ # vendor/Pods/
35
+
36
+ ## Documentation cache and generated files:
37
+ /.yardoc/
38
+ /_yardoc/
39
+ /doc/
40
+ /rdoc/
41
+
42
+ ## Environment normalization:
43
+ /.bundle/
44
+ /vendor/bundle
45
+ /lib/bundler/man/
46
+
47
+ # for a library or gem, you might want to ignore these files since the code is
48
+ # intended to run in multiple environments; otherwise, check them in:
49
+ # Gemfile.lock
50
+ # .ruby-version
51
+ # .ruby-gemset
52
+
53
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
54
+ .rvmrc
data/Capfile ADDED
@@ -0,0 +1,34 @@
1
+ # Load DSL and set up stages
2
+ require "capistrano/setup"
3
+
4
+ # Include default deployment tasks
5
+ require "capistrano/deploy"
6
+
7
+ # Include doi_extractor version
8
+ require File.expand_path('../lib/doi_extractor/version', __FILE__)
9
+
10
+ # Load the SCM plugin appropriate to your project:
11
+ require "capistrano/scm/git"
12
+ install_plugin Capistrano::SCM::Git
13
+
14
+ # Include tasks from other gems included in your Gemfile
15
+ #
16
+ # For documentation on these, see for example:
17
+ #
18
+ # https://github.com/capistrano/rvm
19
+ # https://github.com/capistrano/rbenv
20
+ # https://github.com/capistrano/chruby
21
+ # https://github.com/capistrano/bundler
22
+ # https://github.com/capistrano/rails
23
+ # https://github.com/capistrano/passenger
24
+ #
25
+ # require "capistrano/rvm"
26
+ # require "capistrano/rbenv"
27
+ # require "capistrano/chruby"
28
+ require "capistrano/bundler"
29
+ # require "capistrano/rails/assets"
30
+ # require "capistrano/rails/migrations"
31
+ # require "capistrano/passenger"
32
+
33
+ # Load custom tasks from `lib/capistrano/tasks` if you have any defined
34
+ Dir.glob("lib/capistrano/tasks/*.rake").each { |r| import r }
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ gem "capistrano", "3.8.1", platform: :ruby
6
+ gem 'capistrano-bundler', '~> 1.2', platform: :ruby
data/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # DOI Extractor
2
+
3
+ ## Use
4
+
5
+ The DOI Extractor is a command line tool for generating, submitting, downloading,
6
+ and packaging a set of extracts for DOI archival. It currently supports 4 commands:
7
+ `cancel`, `create`, `download`, and `status`.
8
+
9
+ #### Cancel
10
+ Cancels a submitted DOI extract group. Will fail all associated pending extracts.
11
+
12
+ #### Create
13
+ Creates a new DOI extract group and its associated extracts, then submits them.
14
+ Optionally takes an email address and will send a notification when all extracts are
15
+ ready to download.
16
+
17
+ #### Download
18
+ Once all extracts for a DOI group are complete, this command will download and package them.
19
+
20
+ #### Status
21
+ Prints summary information about recent DOI extracts or detailed information about a
22
+ particular extract
23
+
24
+ ### Further Information
25
+
26
+ See `doi_extractor <COMMAND> --help` for more detailed options.
27
+
28
+ ## Examples
29
+
30
+ ### Create and submit a new DOI extract group for USA
31
+ `doi_extractor create -p usa -v 6.5 -E delbert@umn.edu`
32
+
33
+ ### Check the status of a submitted extract group
34
+ `doi_extractor status -p usa -i 5`
35
+
36
+ ### Download the completed extracts to the default location
37
+ `doi_extractor download -p usa -i 5`
38
+
39
+ ## Deployment
40
+
41
+ The DOI Extractor is a gem that gets installed to the shared jruby environment
42
+ (/pkg/mpctools/jruby).
43
+
44
+ It may be deployed using capistrano: `cap production deploy`
45
+
46
+ ### Ruby Version Notes
47
+
48
+ This gem uses capistrano 3.8, which is not supported under jruby 1.7. For deployment, use a modern ruby (MRI works fine).
49
+
50
+ The CI build uses jruby to ensure the Gemfile is valid in that env and that all specs pass.
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ require 'ci/reporter/rake/rspec'
4
+
5
+ RSpec::Core::RakeTask.new(:spec => 'ci:setup:rspec')
6
+
7
+ task :test => :spec
8
+ task :default => :spec
9
+
10
+ task :disable_rubygems do
11
+ ENV['gem_push'] = 'no'
12
+ end
13
+
14
+ Rake::Task[:release].enhance [:disable_rubygems]
15
+
16
+ namespace :spec do
17
+
18
+ desc "integration test the JSON API endpoints"
19
+ RSpec::Core::RakeTask.new(:integration) do |t|
20
+ # set the RAILS_ENV such that :integration tagged
21
+ # specs are run
22
+ ENV['ENABLE_INTEGRATION'] = 'true'
23
+
24
+ # only run those files in the 'integration' directory
25
+ t.pattern = "./spec/integration{,/*/**}/*_spec.rb"
26
+ end
27
+
28
+ desc 'Run unit and integration tests'
29
+ task :all do
30
+ ENV['ENABLE_INTEGRATION'] = 'true'
31
+ Rake::Task[:spec].invoke
32
+ end
33
+ end
34
+
35
+ namespace :test do
36
+ task :integration => 'spec:integration'
37
+ task :all => 'spec:all'
38
+ end
39
+
40
+ desc "Open an irb session preloaded with this library"
41
+ task :console do
42
+ sh "irb -rubygems -I lib -r doi_extractor.rb"
43
+ end
data/bin/doi_extractor ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'doi_extractor'
4
+
5
+ parser = DoiExtractor::CommandLineParser.parse(ARGV)
6
+
7
+ if parser.valid?
8
+ cmd = DoiExtractor::Command.for(parser.options)
9
+ begin
10
+ cmd.execute
11
+ rescue DoiExtractor::CommandFailError => e
12
+ STDERR.puts e.error_report
13
+ exit e.exit_code
14
+ end
15
+ else
16
+ puts parser
17
+ exit 1
18
+ end
data/config/deploy.rb ADDED
@@ -0,0 +1,34 @@
1
+ # config valid only for current version of Capistrano
2
+ lock "3.8.1"
3
+
4
+ set :application, 'doi_extractor'
5
+ set :repo_url, 'git@github.umn.edu:mpc/doi_extractor.git'
6
+ set :branch, `git rev-parse --abbrev-ref HEAD`.chomp
7
+ set :deploy_to, '/pkg/ipums/programming/doi_extractor'
8
+
9
+
10
+ # Default value for :linked_files is []
11
+ # append :linked_files, "config/database.yml", "config/secrets.yml"
12
+ # Default value for linked_dirs is []
13
+ # set :linked_dirs, %w{vendor/bundle}
14
+
15
+ set :app_version, DoiExtractor::VERSION
16
+ set :jruby_path, -> { "/pkg/mpctools/jruby/current" }
17
+ set :default_env, -> { {path: "#{fetch(:jruby_path)}/bin:$PATH"} }
18
+ set :keep_releases, 5
19
+ set :user, `logname`.chomp
20
+
21
+ set :bundle_flags, '--quiet'
22
+ set :bundle_without, 'deployment'
23
+
24
+ after 'deploy:published', :jruby_install
25
+
26
+ desc 'Installs the gem into the shared jruby env'
27
+ task :jruby_install do
28
+ on roles(:app) do
29
+ within current_path do
30
+ execute :rake, 'build'
31
+ execute "PATH=#{fetch(:jruby_path)}/bin:$PATH gem install --wrapper --bindir #{fetch(:jruby_path)}/bin #{current_path}/pkg/doi_extractor-#{fetch(:app_version)}.gem"
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,65 @@
1
+
2
+ server 'ipums-internal-web.pop.umn.edu', user: fetch(:user), roles: %w(app)
3
+
4
+
5
+ # server-based syntax
6
+ # ======================
7
+ # Defines a single server with a list of roles and multiple properties.
8
+ # You can define all roles on a single server, or split them:
9
+
10
+ # server "example.com", user: "deploy", roles: %w{app db web}, my_property: :my_value
11
+ # server "example.com", user: "deploy", roles: %w{app web}, other_property: :other_value
12
+ # server "db.example.com", user: "deploy", roles: %w{db}
13
+
14
+
15
+
16
+ # role-based syntax
17
+ # ==================
18
+
19
+ # Defines a role with one or multiple servers. The primary server in each
20
+ # group is considered to be the first unless any hosts have the primary
21
+ # property set. Specify the username and a domain or IP for the server.
22
+ # Don't use `:all`, it's a meta role.
23
+
24
+ # role :app, %w{deploy@example.com}, my_property: :my_value
25
+ # role :web, %w{user1@primary.com user2@additional.com}, other_property: :other_value
26
+ # role :db, %w{deploy@example.com}
27
+
28
+
29
+
30
+ # Configuration
31
+ # =============
32
+ # You can set any configuration variable like in config/deploy.rb
33
+ # These variables are then only loaded and set in this stage.
34
+ # For available Capistrano configuration variables see the documentation page.
35
+ # http://capistranorb.com/documentation/getting-started/configuration/
36
+ # Feel free to add new variables to customise your setup.
37
+
38
+
39
+
40
+ # Custom SSH Options
41
+ # ==================
42
+ # You may pass any option but keep in mind that net/ssh understands a
43
+ # limited set of options, consult the Net::SSH documentation.
44
+ # http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start
45
+ #
46
+ # Global options
47
+ # --------------
48
+ # set :ssh_options, {
49
+ # keys: %w(/home/rlisowski/.ssh/id_rsa),
50
+ # forward_agent: false,
51
+ # auth_methods: %w(password)
52
+ # }
53
+ #
54
+ # The server-based syntax can be used to override options:
55
+ # ------------------------------------
56
+ # server "example.com",
57
+ # user: "user_name",
58
+ # roles: %w{web app},
59
+ # ssh_options: {
60
+ # user: "user_name", # overrides user setting above
61
+ # keys: %w(/home/user_name/.ssh/id_rsa),
62
+ # forward_agent: false,
63
+ # auth_methods: %w(publickey password)
64
+ # # password: "please use keys"
65
+ # }
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'doi_extractor/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "doi_extractor"
8
+ spec.version = DoiExtractor::VERSION
9
+ spec.authors = ["Dan Elbert"]
10
+ spec.email = ["delbert@umn.edu"]
11
+ spec.homepage = 'https://github.umn.edu/mpc/doi_extractor'
12
+ spec.summary = %q{Command line tool to package IPUMS DOI extracts}
13
+
14
+ spec.files = Dir['bin/**/*'] + Dir['config/**/*'] + Dir['lib/**/*'] + Dir['spec/**/*'] + %w(.gitignore Capfile doi_extractor.gemspec Gemfile Rakefile README.md)
15
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
16
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
+ spec.require_paths = ["lib"]
18
+
19
+ spec.add_runtime_dependency 'json'
20
+
21
+ spec.add_development_dependency "bundler", "> 1.10"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "public_suffix", "1.4.6" # Locked to this version because of jruby in 1.9 mode
24
+ spec.add_development_dependency "rspec", "~> 3.5.0"
25
+ spec.add_development_dependency "ci_reporter_rspec", "~> 1.0.0"
26
+ spec.add_development_dependency "simplecov", "~> 0.13.0"
27
+ spec.add_development_dependency "simplecov-rcov", "~> 0.2.3"
28
+ spec.add_development_dependency "webmock", "~> 2.3.2"
29
+ end
@@ -0,0 +1,25 @@
1
+ require 'erb'
2
+ require 'fileutils'
3
+ require 'json'
4
+ require 'net/https'
5
+ require 'optparse'
6
+ require 'optparse/uri'
7
+ require 'ostruct'
8
+ require 'stringio'
9
+ require 'uri'
10
+ require 'yaml'
11
+
12
+ require 'doi_extractor/old_ruby_patch'
13
+ require 'doi_extractor/version'
14
+ require 'doi_extractor/command'
15
+ require 'doi_extractor/cancel_command'
16
+ require 'doi_extractor/command_line_parser'
17
+ require 'doi_extractor/create_command'
18
+ require 'doi_extractor/download_location'
19
+ require 'doi_extractor/errors'
20
+ require 'doi_extractor/download_command'
21
+ require 'doi_extractor/ipums_client'
22
+ require 'doi_extractor/ipums_uri_builder'
23
+ require 'doi_extractor/options'
24
+ require 'doi_extractor/secrets'
25
+ require 'doi_extractor/status_command'
@@ -0,0 +1,24 @@
1
+ module DoiExtractor
2
+ class CancelCommand < Command
3
+ def _execute
4
+ doi = ipums_client.get_doi_extract(options.extract_group_id)
5
+ unless doi
6
+ fail('No extract group found')
7
+ end
8
+
9
+ if options.force
10
+ say('skipping user confirmation', true)
11
+ else
12
+ i = ask("Are you sure you want to cancel DOI extract with ID #{doi.id}?")
13
+ if i.to_s.downcase[0...1] != 'y'
14
+ fail('user cancelled')
15
+ end
16
+ end
17
+
18
+ ipums_client.cancel_doi_extract(doi.id)
19
+
20
+ say("Complete")
21
+
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,81 @@
1
+ module DoiExtractor
2
+ class Command
3
+
4
+ def self.for(options)
5
+ case options.command
6
+ when 'status'
7
+ StatusCommand.new(options)
8
+ when 'create'
9
+ CreateCommand.new(options)
10
+ when 'download'
11
+ DownloadCommand.new(options)
12
+ when 'cancel'
13
+ CancelCommand.new(options)
14
+ else
15
+ raise "Invalid options: #{options.inspect}"
16
+ end
17
+ end
18
+
19
+ attr_reader :options, :start_time
20
+ attr_accessor :std_out, :user_input_callback
21
+
22
+ def initialize(options, std_out = STDOUT, input_callback = nil)
23
+ @options = options
24
+ @std_out = std_out
25
+ @user_input_callback = input_callback || default_input_callback
26
+ end
27
+
28
+ def execute
29
+ begin
30
+ @start_time = Time.now
31
+ cmd = self.class.name.split('::').last.sub('Command', '')
32
+ say("starting #{cmd} Command", true)
33
+ say("api: #{options.api_uri}", true)
34
+ say("path: #{options.download_base_path}", true)
35
+ _execute
36
+ say("Finished in #{(Time.now - @start_time).round(2)} seconds", true)
37
+ rescue CommandFailError => e
38
+ raise e
39
+ rescue => e
40
+ raise CommandFailError.new('An Unexpected Error Has Occurred', e)
41
+ end
42
+ end
43
+
44
+ protected
45
+
46
+ # Subclasses should override this method
47
+ def _execute
48
+ end
49
+
50
+ def default_input_callback
51
+ ->(msg) { say_nb("#{msg}\n(y/n) "); STDIN.gets }
52
+ end
53
+
54
+ def ask(msg)
55
+ user_input_callback.call(msg)
56
+ end
57
+
58
+ def say_nb(msg, is_verbose = false)
59
+ if options.verbose || !is_verbose
60
+ std_out.print msg
61
+ end
62
+ end
63
+
64
+ def say(msg, is_verbose = false)
65
+ say_nb(msg + "\n", is_verbose)
66
+ end
67
+
68
+ def fail(msg, ex = nil, code = 1)
69
+ raise CommandFailError.new(msg, ex, code)
70
+ end
71
+
72
+ def ipums_client
73
+ @ipums_client ||= IpumsClient.new(options.api_uri, secrets.api_username, secrets.api_password)
74
+ end
75
+
76
+ def secrets
77
+ @secrets ||= Secrets.new({api_username: options.api_username, api_password: options.api_password})
78
+ end
79
+
80
+ end
81
+ end