research_metadata_batch 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: bc61ec93f5d5bffe445daa23c318a5ddd2a829b01a799a5d46866e9094f9d594
4
+ data.tar.gz: b0b0a8d00afb219911466efb16b3ec024a670af3e307a86426ed0c13de84e1fd
5
+ SHA512:
6
+ metadata.gz: 412d324f9e61ed97b51f3b8a8c1fbb42d9f93f41dd12d98c4487922816cfee9d9387c7e06cc6a0041fc8ccf7096eb352ded2345e8208d6f70a7f8cf45a6ca063
7
+ data.tar.gz: 3235149ee8993d812b876165efa311017ea42ef80747c5c6730b0da3bc27c0208884fbabaf02676bd1da253756e25c1690bb00b76a32137e6dce28c321480f08
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ InstalledFiles
7
+ _yardoc
8
+ coverage
9
+ doc/
10
+ lib/bundler/man
11
+ pkg
12
+ rdoc
13
+ spec/reports
14
+ test/tmp
15
+ test/version_tmp
16
+ tmp
17
+ .idea
@@ -0,0 +1,17 @@
1
+ # Change Log
2
+ All notable changes to this project will be documented in this file.
3
+ This project adheres to [Semantic Versioning](http://semver.org/).
4
+
5
+ ## 0.1.0 - 2018-09-27
6
+ ### Added
7
+ - Support for resource types:
8
+ - dataset
9
+ - event
10
+ - external_organisation
11
+ - journal
12
+ - person
13
+ - organisational_unit
14
+ - project
15
+ - publisher
16
+ - research_output
17
+ - For Pure API 59.
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in research_metadata_batch.gemspec
6
+ gemspec
@@ -0,0 +1,41 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ research_metadata_batch (0.1.0)
5
+ puree (~> 2.2)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ addressable (2.5.2)
11
+ public_suffix (>= 2.0.2, < 4.0)
12
+ domain_name (0.5.20180417)
13
+ unf (>= 0.0.5, < 1.0.0)
14
+ http (2.2.2)
15
+ addressable (~> 2.3)
16
+ http-cookie (~> 1.0)
17
+ http-form_data (~> 1.0.1)
18
+ http_parser.rb (~> 0.6.0)
19
+ http-cookie (1.0.3)
20
+ domain_name (~> 0.5)
21
+ http-form_data (1.0.3)
22
+ http_parser.rb (0.6.0)
23
+ mini_portile2 (2.3.0)
24
+ nokogiri (1.8.4)
25
+ mini_portile2 (~> 2.3.0)
26
+ public_suffix (3.0.3)
27
+ puree (2.2.0)
28
+ http (~> 2.0)
29
+ nokogiri (~> 1.6)
30
+ unf (0.1.4)
31
+ unf_ext
32
+ unf_ext (0.0.7.5)
33
+
34
+ PLATFORMS
35
+ ruby
36
+
37
+ DEPENDENCIES
38
+ research_metadata_batch!
39
+
40
+ BUNDLED WITH
41
+ 1.16.1
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Adrian Albin-Clark
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,106 @@
1
+ # Research Metadata Batch
2
+ For the batch processing of Pure records. Custom actions and log messages can be
3
+ defined in user-defined applications.
4
+
5
+ ## Status
6
+
7
+ [![Gem Version](https://badge.fury.io/rb/research_metadata_batch.svg)](https://badge.fury.io/rb/research_metadata_batch)
8
+ [![Maintainability](https://api.codeclimate.com/v1/badges/d3d1723f2900c3e4774a/maintainability)](https://codeclimate.com/github/lulibrary/research-metadata-batch/maintainability)
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ ```ruby
15
+ gem 'research_metadata_batch'
16
+ ```
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install research_metadata_batch
25
+
26
+ ## Basic usage
27
+ Uses the default gem behaviour which merely inspects the metadata models using STDOUT.
28
+
29
+ ```ruby
30
+ pure_config = {
31
+ url: ENV['PURE_URL'],
32
+ username: ENV['PURE_USERNAME'],
33
+ password: ENV['PURE_PASSWORD'],
34
+ api_key: ENV['PURE_API_KEY']
35
+ }
36
+ ResearchMetadataBatch::Dataset.new(pure_config: pure_config).process
37
+ ```
38
+
39
+ ## Making an application
40
+ Require this gem, then open up the base class {ResearchMetadataBatch::Base} as below. Implement methods from
41
+ {ResearchMetadataBatch::Custom} as inherited methods, including any secondary initialisation using the
42
+ ``init`` method.
43
+
44
+
45
+ For resource-specific customisation, open up a resource class e.g. {ResearchMetadataBatch::Dataset}. Implement methods from
46
+ {ResearchMetadataBatch::Custom} as resource-specific methods.
47
+
48
+ This example uses Amazon Web Services.
49
+
50
+ ### Base class
51
+ ```ruby
52
+ module ResearchMetadataBatch
53
+ class Base
54
+ def init(aws_config:)
55
+ aws_credentials = Aws::Credentials.new aws_config[:access_key_id],
56
+ aws_config[:secret_access_key]
57
+ @s3_client = Aws::S3::Client.new region: aws_config[:region],
58
+ credentials: aws_credentials
59
+ @s3_bucket = aws_config[:s3_bucket]
60
+ end
61
+
62
+ def act(model)
63
+ # Do something involving Amazon Web Services
64
+ end
65
+ end
66
+ end
67
+ ```
68
+
69
+ ### Resource class
70
+ ```ruby
71
+ module ResearchMetadataBatch
72
+ class Dataset
73
+ # Implement methods from ResearchMetadataBatch::Custom
74
+ end
75
+ end
76
+ ```
77
+
78
+ ### Running a batch process
79
+ ```ruby
80
+ require_relative '/path/to/your/opened/class'
81
+
82
+ pure_config = {
83
+ url: ENV['PURE_URL'],
84
+ username: ENV['PURE_USERNAME'],
85
+ password: ENV['PURE_PASSWORD'],
86
+ api_key: ENV['PURE_API_KEY']
87
+ }
88
+
89
+ aws_config = {
90
+ access_key_id: ENV['AWS_ACCESS_KEY_ID'],
91
+ secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
92
+ region: ENV['AWS_REGION'],
93
+ s3_bucket: 'YOUR_S3_BUCKET'
94
+ }
95
+
96
+ log_file = '/path/to/your/log/file'
97
+
98
+ config = {
99
+ pure_config: pure_config,
100
+ log_file: log_file
101
+ }
102
+
103
+ batch = ResearchMetadataBatch::Dataset.new config
104
+ batch.init aws_config: aws_config
105
+ batch.process
106
+ ```
@@ -0,0 +1,6 @@
1
+ require 'research_metadata_batch/version'
2
+ require 'research_metadata_batch/research_metadata_batch'
3
+
4
+ # Batch processing for the Pure Research Information System.
5
+ module ResearchMetadataBatch
6
+ end
@@ -0,0 +1,134 @@
1
+ require 'logger'
2
+ require 'puree'
3
+ require_relative 'custom'
4
+
5
+ module ResearchMetadataBatch
6
+ # @note Not to be used directly
7
+ class Base
8
+ include ResearchMetadataBatch::Custom
9
+ # @param pure_config [Hash]
10
+ # @option config [String] :url
11
+ # @option config [String] :username
12
+ # @option config [String] :password
13
+ # @option config [String] :api_key
14
+ # @param log_file [String]
15
+ def initialize(pure_config:, log_file: nil)
16
+ @pure_config = pure_config
17
+ if log_file
18
+ @logger = Logger.new File.new(log_file, 'a'), 20, 'daily'
19
+ else
20
+ @logger = Logger.new(STDOUT)
21
+ end
22
+ end
23
+
24
+ # @param max [Fixnum] Number of records to act upon. Omit to act upon as many as possible.
25
+ # @param limit [Fixnum] Pure records limit.
26
+ # @param offset [Fixnum] Pure records offset.
27
+ # @param action [Boolean] Set to false to mock an action.
28
+ # @param delay [Fixnum] Delay in seconds between limit-sized batches.
29
+ def process(max: nil, limit: 20, offset: 0, action: true, delay: 0)
30
+ records_available = resource_count
31
+
32
+ @logger.info "#{records_available} records in Pure before processing"
33
+ if action
34
+ begin
35
+ preflight
36
+ @logger.info preflight_success_log_message
37
+ rescue => error
38
+ @logger.info preflight_error_log_message(error)
39
+ end
40
+ end
41
+
42
+ if max
43
+ if max >= 0 && max <= records_available
44
+ qty_to_find = max
45
+ end
46
+ else
47
+ qty_to_find = records_available
48
+ end
49
+
50
+ if offset < 0 || offset > records_available - 1
51
+ offset = 0
52
+ end
53
+
54
+ qty_obtained = 0
55
+ position = offset
56
+
57
+ while position < records_available
58
+ # extract from Pure
59
+ begin
60
+ result = resource_batch limit, position
61
+ rescue => e
62
+ @logger.error e
63
+ sleep 10
64
+ redo
65
+ end
66
+
67
+ result.each do |i|
68
+
69
+ if !record_valid? i
70
+ @logger.warn "#{log_message_prefix(position, i.uuid)} - record invalid"
71
+ position += 1
72
+ next
73
+ end
74
+
75
+ begin
76
+ if action
77
+ act_msg = act i
78
+ else
79
+ act_msg = mock_act i
80
+ end
81
+ @logger.info "#{log_message_prefix(position, i.uuid)} - #{act_success_log_message(i, act_msg)}" if act_msg
82
+ rescue => error
83
+ @logger.error "#{log_message_prefix(position, i.uuid)} - ERROR=#{error}"
84
+ end
85
+
86
+ position += 1
87
+ qty_obtained += 1
88
+
89
+ break if qty_obtained == qty_to_find
90
+ end
91
+
92
+ break if qty_obtained == qty_to_find
93
+
94
+ # handle error response
95
+ if result.empty?
96
+ @logger.error "#{log_message_prefix(position, nil)} - ERROR=system"
97
+ position += 1
98
+ end
99
+
100
+ sleep delay
101
+ end
102
+
103
+ @logger.info "#{records_available} records in Pure after processing"
104
+
105
+ end
106
+
107
+ private
108
+
109
+ def act(model)
110
+ puts model.inspect
111
+ end
112
+
113
+ def record_valid?(model)
114
+ true
115
+ end
116
+
117
+ # @return [String]
118
+ def log_message_prefix(pure_record, pure_uuid)
119
+ "PURE_RECORD=#{pure_record} - PURE_UUID=#{pure_uuid}"
120
+ end
121
+
122
+ def resource_count
123
+ resource_class = "Puree::Extractor::#{Puree::Util::String.titleize(@resource_type)}"
124
+ Object.const_get(resource_class).new(@pure_config).count
125
+ end
126
+
127
+ def resource_batch(limit, offset)
128
+ resource_method = "#{@resource_type}s".to_sym
129
+ client = Puree::REST::Client.new(@pure_config).send resource_method
130
+ response = client.all params: {size: limit, offset: offset}
131
+ Puree::XMLExtractor::Collection.send resource_method, response.to_s
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,47 @@
1
+ module ResearchMetadataBatch
2
+
3
+ # @note These methods (except init) are used internally by {ResearchMetadataBatch::Base#process} and have been left public for documentation purposes only
4
+ module Custom
5
+
6
+ # Second stage initialisation, perhaps third party services.
7
+ # @param args [Hash]
8
+ def init(**args)
9
+ end
10
+
11
+ # Anything to be done at the start of a batch run
12
+ def preflight
13
+ end
14
+
15
+ # Message when preflight method completes
16
+ # @return [String]
17
+ def preflight_success_log_message
18
+ end
19
+
20
+ # Message when preflight method does not complete
21
+ # @return [String]
22
+ def preflight_error_log_message(error)
23
+ end
24
+
25
+ # Do something with model metadata
26
+ # @return [String, nil] Optionally, return something transaction-specific, such as a code/ID from an external service.
27
+ def act(model)
28
+ end
29
+
30
+ # Message when act/mock_act completes
31
+ # @return [String]
32
+ def act_success_log_message(model, act_msg)
33
+ end
34
+
35
+ # Fake doing something with model metadata
36
+ # @return [String, nil]
37
+ def mock_act(model)
38
+ end
39
+
40
+ # Check for values in metadata
41
+ # @return [Boolean]
42
+ def record_valid?(model)
43
+ end
44
+
45
+ end
46
+
47
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'base'
2
+
3
+ module ResearchMetadataBatch
4
+
5
+ class Dataset < ResearchMetadataBatch::Base
6
+ # (see ResearchMetadataBatch::Base#initialize)
7
+ def initialize(pure_config:, log_file: nil)
8
+ super
9
+ @resource_type = :dataset
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'base'
2
+
3
+ module ResearchMetadataBatch
4
+
5
+ class Event < ResearchMetadataBatch::Base
6
+ # (see ResearchMetadataBatch::Base#initialize)
7
+ def initialize(pure_config:, log_file: nil)
8
+ super
9
+ @resource_type = :event
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'base'
2
+
3
+ module ResearchMetadataBatch
4
+
5
+ class ExternalOrganisation < ResearchMetadataBatch::Base
6
+ # (see ResearchMetadataBatch::Base#initialize)
7
+ def initialize(pure_config:, log_file: nil)
8
+ super
9
+ @resource_type = :external_organisation
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'base'
2
+
3
+ module ResearchMetadataBatch
4
+
5
+ class Journal < ResearchMetadataBatch::Base
6
+ # (see ResearchMetadataBatch::Base#initialize)
7
+ def initialize(pure_config:, log_file: nil)
8
+ super
9
+ @resource_type = :journal
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'base'
2
+
3
+ module ResearchMetadataBatch
4
+
5
+ class OrganisationalUnit < ResearchMetadataBatch::Base
6
+ # (see ResearchMetadataBatch::Base#initialize)
7
+ def initialize(pure_config:, log_file: nil)
8
+ super
9
+ @resource_type = :organisational_unit
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'base'
2
+
3
+ module ResearchMetadataBatch
4
+
5
+ class Person < ResearchMetadataBatch::Base
6
+ # (see ResearchMetadataBatch::Base#initialize)
7
+ def initialize(pure_config:, log_file: nil)
8
+ super
9
+ @resource_type = :person
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'base'
2
+
3
+ module ResearchMetadataBatch
4
+
5
+ class Project < ResearchMetadataBatch::Base
6
+ # (see ResearchMetadataBatch::Base#initialize)
7
+ def initialize(pure_config:, log_file: nil)
8
+ super
9
+ @resource_type = :project
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'base'
2
+
3
+ module ResearchMetadataBatch
4
+
5
+ class Publisher < ResearchMetadataBatch::Base
6
+ # (see ResearchMetadataBatch::Base#initialize)
7
+ def initialize(pure_config:, log_file: nil)
8
+ super
9
+ @resource_type = :publisher
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,9 @@
1
+ require_relative 'dataset'
2
+ require_relative 'event'
3
+ require_relative 'external_organisation'
4
+ require_relative 'journal'
5
+ require_relative 'organisational_unit'
6
+ require_relative 'person'
7
+ require_relative 'project'
8
+ require_relative 'publisher'
9
+ require_relative 'research_output'
@@ -0,0 +1,25 @@
1
+ require_relative 'base'
2
+
3
+ module ResearchMetadataBatch
4
+
5
+ class ResearchOutput < ResearchMetadataBatch::Base
6
+ # (see ResearchMetadataBatch::Base#initialize)
7
+ def initialize(pure_config:, log_file: nil)
8
+ super
9
+ @resource_type = :research_output
10
+ end
11
+
12
+ private
13
+
14
+ def resource_batch(limit, offset)
15
+ research_outputs_hash = super
16
+ research_outputs_array = []
17
+ research_outputs_hash.each do |k, v|
18
+ research_outputs_array += v
19
+ end
20
+ research_outputs_array
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,4 @@
1
+ module ResearchMetadataBatch
2
+ # Semantic version number
3
+ VERSION = "0.1.0"
4
+ end
@@ -0,0 +1,19 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "research_metadata_batch/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "research_metadata_batch"
8
+ spec.version = ResearchMetadataBatch::VERSION
9
+ spec.authors = ["Adrian Albin-Clark"]
10
+ spec.email = ["a.albin-clark@lancaster.ac.uk"]
11
+ spec.summary = %q{Batch processing for the Pure Research Information System.}
12
+ spec.homepage = 'https://github.com/lulibrary/research_metadata_batch'
13
+ spec.license = "MIT"
14
+ spec.files = `git ls-files -z`.split("\x0")
15
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
16
+ spec.require_paths = ["lib"]
17
+ spec.required_ruby_version = '~> 2.1'
18
+ spec.add_dependency 'puree', '~> 2.2'
19
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: research_metadata_batch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Adrian Albin-Clark
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-10-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: puree
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.2'
27
+ description:
28
+ email:
29
+ - a.albin-clark@lancaster.ac.uk
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - ".gitignore"
35
+ - CHANGELOG.md
36
+ - Gemfile
37
+ - Gemfile.lock
38
+ - LICENSE.txt
39
+ - README.md
40
+ - lib/research_metadata_batch.rb
41
+ - lib/research_metadata_batch/base.rb
42
+ - lib/research_metadata_batch/custom.rb
43
+ - lib/research_metadata_batch/dataset.rb
44
+ - lib/research_metadata_batch/event.rb
45
+ - lib/research_metadata_batch/external_organisation.rb
46
+ - lib/research_metadata_batch/journal.rb
47
+ - lib/research_metadata_batch/organisational_unit.rb
48
+ - lib/research_metadata_batch/person.rb
49
+ - lib/research_metadata_batch/project.rb
50
+ - lib/research_metadata_batch/publisher.rb
51
+ - lib/research_metadata_batch/research_metadata_batch.rb
52
+ - lib/research_metadata_batch/research_output.rb
53
+ - lib/research_metadata_batch/version.rb
54
+ - research_metadata_batch.gemspec
55
+ homepage: https://github.com/lulibrary/research_metadata_batch
56
+ licenses:
57
+ - MIT
58
+ metadata: {}
59
+ post_install_message:
60
+ rdoc_options: []
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '2.1'
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubyforge_project:
75
+ rubygems_version: 2.7.3
76
+ signing_key:
77
+ specification_version: 4
78
+ summary: Batch processing for the Pure Research Information System.
79
+ test_files: []