research_metadata_batch 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4dd55837b2cfc04e261c6b4ab714a63b0767839dd07daa4b4d0f5b6f16c4f8aa
4
- data.tar.gz: f9e675684cd2ba0fd35e7c78f4f72451ed4fd9c26e9394c81e4b636450f506fe
3
+ metadata.gz: 9d27b60c2067c5d5faae6b2680f50186e4087c146bdb076f65955dfd05172397
4
+ data.tar.gz: 25c9789f3452dc091c2b180eac05f9038ff63f10152eaee9cc4a4fd68be2e862
5
5
  SHA512:
6
- metadata.gz: 188bd5524e03206bd00f84b33a94f56559f0898f95e02e23df2f44ebf4fba5443a07e3e2b2885038d8800d038bf8d0eb1439ac777ef1d4e552fee1ac6fce3e2a
7
- data.tar.gz: f1936694098d4543dd6d310e7ed0ba6ff1317b827824a7c3edff650dfa9d2754810eeef03686dc9928d82085282deab935656f0cf61f1dc2117eb70f44497e26
6
+ metadata.gz: 103ffbdd89ab5f0056608f07c3f30fb388f294db3ec882cb5e3b4e729c83b0bfedac16ef5047b17b785c7988f065cab55fb32ca4311064bf50879fc1321dc70e
7
+ data.tar.gz: 15e0e3496b5194d4a15b1f07a9c8e69d64412e41622a352d62fc5f82dbc16d8dad621494c81e09b01b5c8213a19dbeb53221621e802a4616ecb1ad7b1df19ecf
data/CHANGELOG.md CHANGED
@@ -2,9 +2,17 @@
2
2
  All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## 0.3.0 - 2018-10-12
6
+ ### Added
7
+ - Support for GET and POST parameters.
8
+ - Simplified logging.
9
+
10
+ ### Changed
11
+ - Record values matching rather than validation.
12
+
5
13
  ## 0.2.0 - 2018-10-05
6
14
  ### Changed
7
- - Record validation
15
+ - Record validation.
8
16
 
9
17
  ## 0.1.0 - 2018-09-27
10
18
  ### Added
data/Gemfile.lock CHANGED
@@ -1,8 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- research_metadata_batch (0.2.0)
5
- puree (~> 2.2)
4
+ research_metadata_batch (0.3.0)
5
+ puree (~> 2.3)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
@@ -24,7 +24,7 @@ GEM
24
24
  nokogiri (1.8.4)
25
25
  mini_portile2 (~> 2.3.0)
26
26
  public_suffix (3.0.3)
27
- puree (2.2.0)
27
+ puree (2.3.0)
28
28
  http (~> 2.0)
29
29
  nokogiri (~> 1.6)
30
30
  unf (0.1.4)
data/README.md CHANGED
@@ -1,6 +1,5 @@
1
1
  # Research Metadata Batch
2
- For the batch processing of Pure records. Custom actions and log messages can be
3
- defined in user-defined applications.
2
+ Batch processing for the Pure Research Information System.
4
3
 
5
4
  ## Status
6
5
 
@@ -36,21 +35,16 @@ pure_config = {
36
35
  ResearchMetadataBatch::Dataset.new(pure_config: pure_config).process
37
36
  ```
38
37
 
39
- ## Making an application
40
- Require this gem, then open up the base class {ResearchMetadataBatch::Base} as below. Implement methods from
41
- {ResearchMetadataBatch::Custom} as inherited methods, including any secondary initialisation using the
42
- ``init`` method.
43
-
44
-
45
- For resource-specific customisation, open up a resource class e.g. {ResearchMetadataBatch::Dataset}. Implement methods from
46
- {ResearchMetadataBatch::Custom} as resource-specific methods.
47
-
38
+ ## Example application
48
39
  This example uses Amazon Web Services.
49
40
 
50
- ### Base class
41
+ ### shared.rb
42
+ Implement methods from {ResearchMetadataBatch::Shared}.
51
43
  ```ruby
52
- module ResearchMetadataBatch
53
- class Base
44
+ require 'aws-sdk-s3'
45
+
46
+ module App
47
+ module Shared
54
48
  def init(aws_config:)
55
49
  aws_credentials = Aws::Credentials.new aws_config[:access_key_id],
56
50
  aws_config[:secret_access_key]
@@ -66,19 +60,21 @@ module ResearchMetadataBatch
66
60
  end
67
61
  ```
68
62
 
69
- ### Resource class
63
+ ### research_output.rb
70
64
  ```ruby
71
- module ResearchMetadataBatch
72
- class Dataset
73
- # Implement methods from ResearchMetadataBatch::Custom
65
+ require_relative 'shared'
66
+
67
+ module App
68
+ class ResearchOutput < ResearchMetadataBatch::ResearchOutput
69
+ include App::Shared
74
70
  end
75
71
  end
76
72
  ```
77
73
 
78
- ### Running a batch process
74
+ ### script.rb
79
75
  ```ruby
80
76
  require 'research_metadata_batch'
81
- # require your opened classes
77
+ require_relative 'research_output'
82
78
 
83
79
  pure_config = {
84
80
  url: ENV['PURE_URL'],
@@ -101,7 +97,15 @@ config = {
101
97
  log_file: log_file
102
98
  }
103
99
 
104
- batch = ResearchMetadataBatch::Dataset.new config
100
+ batch = App::ResearchOutput.new config
105
101
  batch.init aws_config: aws_config
106
- batch.process
102
+ params = {
103
+ size: 50,
104
+ typeUri: [
105
+ '/dk/atira/pure/researchoutput/researchoutputtypes/contributiontojournal/article',
106
+ '/dk/atira/pure/researchoutput/researchoutputtypes/contributiontoconference/paper'
107
+ ]
108
+ }
109
+ batch.process params: params
110
+
107
111
  ```
@@ -1,12 +1,12 @@
1
1
  require 'logger'
2
2
  require 'puree'
3
- require_relative 'custom'
3
+ require_relative 'shared'
4
4
 
5
5
  module ResearchMetadataBatch
6
6
  # @note Not to be used directly
7
7
  class Base
8
- include ResearchMetadataBatch::Custom
9
- # @param pure_config [Hash]
8
+ include ResearchMetadataBatch::Shared
9
+ # @param pure_config [Hash]
10
10
  # @option config [String] :url
11
11
  # @option config [String] :username
12
12
  # @option config [String] :password
@@ -21,22 +21,18 @@ module ResearchMetadataBatch
21
21
  end
22
22
  end
23
23
 
24
+ # @param params [Hash] Combined GET and POST parameters for all records
24
25
  # @param max [Fixnum] Number of records to act upon. Omit to act upon as many as possible.
25
- # @param limit [Fixnum] Pure records limit.
26
- # @param offset [Fixnum] Pure records offset.
27
- # @param action [Boolean] Set to false to mock an action.
28
- # @param delay [Fixnum] Delay in seconds between limit-sized batches.
29
- def process(max: nil, limit: 20, offset: 0, action: true, delay: 0)
30
- records_available = resource_count
31
-
32
- @logger.info "#{records_available} records in Pure before processing"
33
- if action
34
- begin
35
- preflight
36
- @logger.info preflight_success_log_message
37
- rescue => error
38
- @logger.info preflight_error_log_message(error)
39
- end
26
+ # @param delay [Fixnum] Delay in seconds between batches.
27
+ def process(params: {}, max: nil, delay: 0)
28
+ offset = params[:offset]
29
+ records_available = resource_count params
30
+ @logger.info "PURE_RECORDS_AVAILABLE=#{records_available}"
31
+ begin
32
+ preflight_msg = preflight
33
+ @logger.info "PREFLIGHT=#{preflight_msg}" if preflight_msg
34
+ rescue => error
35
+ @logger.error "PREFLIGHT=#{error}"
40
36
  end
41
37
 
42
38
  if max
@@ -47,7 +43,7 @@ module ResearchMetadataBatch
47
43
  qty_to_find = records_available
48
44
  end
49
45
 
50
- if offset < 0 || offset > records_available - 1
46
+ if !offset || offset < 0 || offset > records_available - 1
51
47
  offset = 0
52
48
  end
53
49
 
@@ -57,35 +53,27 @@ module ResearchMetadataBatch
57
53
  while position < records_available
58
54
  # extract from Pure
59
55
  begin
60
- result = resource_batch limit, position
61
- rescue => e
62
- @logger.error e
56
+ params[:offset] = position
57
+ result = resource_batch params
58
+ rescue => error
59
+ @logger.error "METADATA_EXTRACTION=#{error}"
63
60
  sleep 10
64
61
  redo
65
62
  end
66
63
 
67
64
  result.each do |i|
68
65
 
69
- record_validation_error = validate_record i
70
- if record_validation_error
71
- @logger.warn "#{log_message_prefix(position, i.uuid)} - VALIDATION_ERROR=#{record_validation_error}"
66
+ unless valid? i
67
+ @logger.info "#{log_message_prefix(position, i.uuid)} : VALID=false"
72
68
  position += 1
73
69
  next
74
70
  end
75
71
 
76
72
  begin
77
- if action
78
- act_msg = act i
79
- else
80
- act_msg = mock_act i
81
- end
82
- if act_msg
83
- @logger.info "#{log_message_prefix(position, i.uuid)} - #{act_success_log_message(i, act_msg)}"
84
- else
85
- @logger.info "#{log_message_prefix(position, i.uuid)}"
86
- end
73
+ act_msg = act i
74
+ @logger.info "#{log_message_prefix(position, i.uuid)} : #{act_msg}"
87
75
  rescue => error
88
- @logger.error "#{log_message_prefix(position, i.uuid)} - ERROR=#{error}"
76
+ @logger.error "#{log_message_prefix(position, i.uuid)} : #{error}"
89
77
  end
90
78
 
91
79
  position += 1
@@ -98,37 +86,35 @@ module ResearchMetadataBatch
98
86
 
99
87
  # handle error response
100
88
  if result.empty?
101
- @logger.error "PURE_RECORD=#{position} - ERROR=No data"
89
+ @logger.error "PURE_RECORD=#{position} : METADATA_EXTRACTION=No data"
102
90
  position += 1
103
91
  end
104
92
 
105
93
  sleep delay
106
94
  end
107
95
 
108
- @logger.info "#{records_available} records in Pure after processing"
96
+ @logger.info "PURE_RECORDS_AVAILABLE=#{records_available}"
109
97
 
110
98
  end
111
99
 
112
100
  private
113
101
 
114
- def act(model)
115
- puts model.inspect
116
- end
117
-
118
102
  # @return [String]
119
103
  def log_message_prefix(pure_record, pure_uuid)
120
- "PURE_RECORD=#{pure_record} - PURE_UUID=#{pure_uuid}"
104
+ "PURE_RECORD=#{pure_record} : PURE_UUID=#{pure_uuid}"
121
105
  end
122
106
 
123
- def resource_count
107
+ def resource_count(params)
108
+ params = params.dup
124
109
  resource_class = "Puree::Extractor::#{Puree::Util::String.titleize(@resource_type)}"
125
- Object.const_get(resource_class).new(@pure_config).count
110
+ Object.const_get(resource_class).new(@pure_config).count(params)
126
111
  end
127
112
 
128
- def resource_batch(limit, offset)
113
+ def resource_batch(params)
114
+ params = params.dup
129
115
  resource_method = "#{@resource_type}s".to_sym
130
116
  client = Puree::REST::Client.new(@pure_config).send resource_method
131
- response = client.all params: {size: limit, offset: offset}
117
+ response = client.all_complex params: params
132
118
  Puree::XMLExtractor::Collection.send resource_method, response.to_s
133
119
  end
134
120
  end
@@ -11,7 +11,7 @@ module ResearchMetadataBatch
11
11
 
12
12
  private
13
13
 
14
- def resource_batch(limit, offset)
14
+ def resource_batch(params)
15
15
  research_outputs_hash = super
16
16
  research_outputs_array = []
17
17
  research_outputs_hash.each do |k, v|
@@ -1,7 +1,7 @@
1
1
  module ResearchMetadataBatch
2
2
 
3
3
  # @note These methods (except init) are used internally by {ResearchMetadataBatch::Base#process} and have been left public for documentation purposes only
4
- module Custom
4
+ module Shared
5
5
 
6
6
  # Second stage initialisation, perhaps third party services.
7
7
  # @param args [Hash]
@@ -9,37 +9,20 @@ module ResearchMetadataBatch
9
9
  end
10
10
 
11
11
  # Anything to be done at the start of a batch run
12
+ # @return [String, nil] Optionally, return something to indicate what has been done.
12
13
  def preflight
13
14
  end
14
15
 
15
- # Message when preflight method completes
16
- # @return [String]
17
- def preflight_success_log_message
18
- end
19
-
20
- # Message when preflight method does not complete
21
- # @return [String]
22
- def preflight_error_log_message(error)
23
- end
24
-
25
16
  # Do something with model metadata
26
17
  # @return [String, nil] Optionally, return something transaction-specific, such as a code/ID from an external service.
27
18
  def act(model)
19
+ puts model.inspect
28
20
  end
29
21
 
30
- # Message when act/mock_act completes
31
- # @return [String]
32
- def act_success_log_message(model, act_msg)
33
- end
34
-
35
- # Fake doing something with model metadata
36
- # @return [String, nil]
37
- def mock_act(model)
38
- end
39
-
40
- # Check for values in metadata
41
- # @return [String, nil]
42
- def validate_record(model)
22
+ # Check for values in metadata.
23
+ # @return [Boolean]
24
+ def valid?(model)
25
+ true
43
26
  end
44
27
 
45
28
  end
@@ -1,4 +1,4 @@
1
1
  module ResearchMetadataBatch
2
2
  # Semantic version number
3
- VERSION = "0.2.0"
3
+ VERSION = "0.3.0"
4
4
  end
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
14
14
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
15
15
  spec.require_paths = ["lib"]
16
16
  spec.required_ruby_version = '~> 2.1'
17
- spec.add_dependency 'puree', '~> 2.2'
17
+ spec.add_dependency 'puree', '~> 2.3'
18
18
  spec.metadata = {
19
19
  "source_code_uri" => "https://github.com/lulibrary/research_metadata_batch"
20
20
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: research_metadata_batch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adrian Albin-Clark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-05 00:00:00.000000000 Z
11
+ date: 2018-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: puree
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.2'
19
+ version: '2.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.2'
26
+ version: '2.3'
27
27
  description:
28
28
  email:
29
29
  - a.albin-clark@lancaster.ac.uk
@@ -39,7 +39,6 @@ files:
39
39
  - README.md
40
40
  - lib/research_metadata_batch.rb
41
41
  - lib/research_metadata_batch/base.rb
42
- - lib/research_metadata_batch/custom.rb
43
42
  - lib/research_metadata_batch/dataset.rb
44
43
  - lib/research_metadata_batch/event.rb
45
44
  - lib/research_metadata_batch/external_organisation.rb
@@ -50,6 +49,7 @@ files:
50
49
  - lib/research_metadata_batch/publisher.rb
51
50
  - lib/research_metadata_batch/research_metadata_batch.rb
52
51
  - lib/research_metadata_batch/research_output.rb
52
+ - lib/research_metadata_batch/shared.rb
53
53
  - lib/research_metadata_batch/version.rb
54
54
  - research_metadata_batch.gemspec
55
55
  homepage: