research_metadata_batch 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4dd55837b2cfc04e261c6b4ab714a63b0767839dd07daa4b4d0f5b6f16c4f8aa
4
- data.tar.gz: f9e675684cd2ba0fd35e7c78f4f72451ed4fd9c26e9394c81e4b636450f506fe
3
+ metadata.gz: 9d27b60c2067c5d5faae6b2680f50186e4087c146bdb076f65955dfd05172397
4
+ data.tar.gz: 25c9789f3452dc091c2b180eac05f9038ff63f10152eaee9cc4a4fd68be2e862
5
5
  SHA512:
6
- metadata.gz: 188bd5524e03206bd00f84b33a94f56559f0898f95e02e23df2f44ebf4fba5443a07e3e2b2885038d8800d038bf8d0eb1439ac777ef1d4e552fee1ac6fce3e2a
7
- data.tar.gz: f1936694098d4543dd6d310e7ed0ba6ff1317b827824a7c3edff650dfa9d2754810eeef03686dc9928d82085282deab935656f0cf61f1dc2117eb70f44497e26
6
+ metadata.gz: 103ffbdd89ab5f0056608f07c3f30fb388f294db3ec882cb5e3b4e729c83b0bfedac16ef5047b17b785c7988f065cab55fb32ca4311064bf50879fc1321dc70e
7
+ data.tar.gz: 15e0e3496b5194d4a15b1f07a9c8e69d64412e41622a352d62fc5f82dbc16d8dad621494c81e09b01b5c8213a19dbeb53221621e802a4616ecb1ad7b1df19ecf
data/CHANGELOG.md CHANGED
@@ -2,9 +2,17 @@
2
2
  All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## 0.3.0 - 2018-10-12
6
+ ### Added
7
+ - Support for GET and POST parameters.
8
+ - Simplified logging.
9
+
10
+ ### Changed
11
+ - Record values matching rather than validation.
12
+
5
13
  ## 0.2.0 - 2018-10-05
6
14
  ### Changed
7
- - Record validation
15
+ - Record validation.
8
16
 
9
17
  ## 0.1.0 - 2018-09-27
10
18
  ### Added
data/Gemfile.lock CHANGED
@@ -1,8 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- research_metadata_batch (0.2.0)
5
- puree (~> 2.2)
4
+ research_metadata_batch (0.3.0)
5
+ puree (~> 2.3)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
@@ -24,7 +24,7 @@ GEM
24
24
  nokogiri (1.8.4)
25
25
  mini_portile2 (~> 2.3.0)
26
26
  public_suffix (3.0.3)
27
- puree (2.2.0)
27
+ puree (2.3.0)
28
28
  http (~> 2.0)
29
29
  nokogiri (~> 1.6)
30
30
  unf (0.1.4)
data/README.md CHANGED
@@ -1,6 +1,5 @@
1
1
  # Research Metadata Batch
2
- For the batch processing of Pure records. Custom actions and log messages can be
3
- defined in user-defined applications.
2
+ Batch processing for the Pure Research Information System.
4
3
 
5
4
  ## Status
6
5
 
@@ -36,21 +35,16 @@ pure_config = {
36
35
  ResearchMetadataBatch::Dataset.new(pure_config: pure_config).process
37
36
  ```
38
37
 
39
- ## Making an application
40
- Require this gem, then open up the base class {ResearchMetadataBatch::Base} as below. Implement methods from
41
- {ResearchMetadataBatch::Custom} as inherited methods, including any secondary initialisation using the
42
- ``init`` method.
43
-
44
-
45
- For resource-specific customisation, open up a resource class e.g. {ResearchMetadataBatch::Dataset}. Implement methods from
46
- {ResearchMetadataBatch::Custom} as resource-specific methods.
47
-
38
+ ## Example application
48
39
  This example uses Amazon Web Services.
49
40
 
50
- ### Base class
41
+ ### shared.rb
42
+ Implement methods from {ResearchMetadataBatch::Shared}.
51
43
  ```ruby
52
- module ResearchMetadataBatch
53
- class Base
44
+ require 'aws-sdk-s3'
45
+
46
+ module App
47
+ module Shared
54
48
  def init(aws_config:)
55
49
  aws_credentials = Aws::Credentials.new aws_config[:access_key_id],
56
50
  aws_config[:secret_access_key]
@@ -66,19 +60,21 @@ module ResearchMetadataBatch
66
60
  end
67
61
  ```
68
62
 
69
- ### Resource class
63
+ ### research_output.rb
70
64
  ```ruby
71
- module ResearchMetadataBatch
72
- class Dataset
73
- # Implement methods from ResearchMetadataBatch::Custom
65
+ require_relative 'shared'
66
+
67
+ module App
68
+ class ResearchOutput < ResearchMetadataBatch::ResearchOutput
69
+ include App::Shared
74
70
  end
75
71
  end
76
72
  ```
77
73
 
78
- ### Running a batch process
74
+ ### script.rb
79
75
  ```ruby
80
76
  require 'research_metadata_batch'
81
- # require your opened classes
77
+ require_relative 'research_output'
82
78
 
83
79
  pure_config = {
84
80
  url: ENV['PURE_URL'],
@@ -101,7 +97,15 @@ config = {
101
97
  log_file: log_file
102
98
  }
103
99
 
104
- batch = ResearchMetadataBatch::Dataset.new config
100
+ batch = App::ResearchOutput.new config
105
101
  batch.init aws_config: aws_config
106
- batch.process
102
+ params = {
103
+ size: 50,
104
+ typeUri: [
105
+ '/dk/atira/pure/researchoutput/researchoutputtypes/contributiontojournal/article',
106
+ '/dk/atira/pure/researchoutput/researchoutputtypes/contributiontoconference/paper'
107
+ ]
108
+ }
109
+ batch.process params: params
110
+
107
111
  ```
@@ -1,12 +1,12 @@
1
1
  require 'logger'
2
2
  require 'puree'
3
- require_relative 'custom'
3
+ require_relative 'shared'
4
4
 
5
5
  module ResearchMetadataBatch
6
6
  # @note Not to be used directly
7
7
  class Base
8
- include ResearchMetadataBatch::Custom
9
- # @param pure_config [Hash]
8
+ include ResearchMetadataBatch::Shared
9
+ # @param pure_config [Hash]
10
10
  # @option config [String] :url
11
11
  # @option config [String] :username
12
12
  # @option config [String] :password
@@ -21,22 +21,18 @@ module ResearchMetadataBatch
21
21
  end
22
22
  end
23
23
 
24
+ # @param params [Hash] Combined GET and POST parameters for all records
24
25
  # @param max [Fixnum] Number of records to act upon. Omit to act upon as many as possible.
25
- # @param limit [Fixnum] Pure records limit.
26
- # @param offset [Fixnum] Pure records offset.
27
- # @param action [Boolean] Set to false to mock an action.
28
- # @param delay [Fixnum] Delay in seconds between limit-sized batches.
29
- def process(max: nil, limit: 20, offset: 0, action: true, delay: 0)
30
- records_available = resource_count
31
-
32
- @logger.info "#{records_available} records in Pure before processing"
33
- if action
34
- begin
35
- preflight
36
- @logger.info preflight_success_log_message
37
- rescue => error
38
- @logger.info preflight_error_log_message(error)
39
- end
26
+ # @param delay [Fixnum] Delay in seconds between batches.
27
+ def process(params: {}, max: nil, delay: 0)
28
+ offset = params[:offset]
29
+ records_available = resource_count params
30
+ @logger.info "PURE_RECORDS_AVAILABLE=#{records_available}"
31
+ begin
32
+ preflight_msg = preflight
33
+ @logger.info "PREFLIGHT=#{preflight_msg}" if preflight_msg
34
+ rescue => error
35
+ @logger.error "PREFLIGHT=#{error}"
40
36
  end
41
37
 
42
38
  if max
@@ -47,7 +43,7 @@ module ResearchMetadataBatch
47
43
  qty_to_find = records_available
48
44
  end
49
45
 
50
- if offset < 0 || offset > records_available - 1
46
+ if !offset || offset < 0 || offset > records_available - 1
51
47
  offset = 0
52
48
  end
53
49
 
@@ -57,35 +53,27 @@ module ResearchMetadataBatch
57
53
  while position < records_available
58
54
  # extract from Pure
59
55
  begin
60
- result = resource_batch limit, position
61
- rescue => e
62
- @logger.error e
56
+ params[:offset] = position
57
+ result = resource_batch params
58
+ rescue => error
59
+ @logger.error "METADATA_EXTRACTION=#{error}"
63
60
  sleep 10
64
61
  redo
65
62
  end
66
63
 
67
64
  result.each do |i|
68
65
 
69
- record_validation_error = validate_record i
70
- if record_validation_error
71
- @logger.warn "#{log_message_prefix(position, i.uuid)} - VALIDATION_ERROR=#{record_validation_error}"
66
+ unless valid? i
67
+ @logger.info "#{log_message_prefix(position, i.uuid)} : VALID=false"
72
68
  position += 1
73
69
  next
74
70
  end
75
71
 
76
72
  begin
77
- if action
78
- act_msg = act i
79
- else
80
- act_msg = mock_act i
81
- end
82
- if act_msg
83
- @logger.info "#{log_message_prefix(position, i.uuid)} - #{act_success_log_message(i, act_msg)}"
84
- else
85
- @logger.info "#{log_message_prefix(position, i.uuid)}"
86
- end
73
+ act_msg = act i
74
+ @logger.info "#{log_message_prefix(position, i.uuid)} : #{act_msg}"
87
75
  rescue => error
88
- @logger.error "#{log_message_prefix(position, i.uuid)} - ERROR=#{error}"
76
+ @logger.error "#{log_message_prefix(position, i.uuid)} : #{error}"
89
77
  end
90
78
 
91
79
  position += 1
@@ -98,37 +86,35 @@ module ResearchMetadataBatch
98
86
 
99
87
  # handle error response
100
88
  if result.empty?
101
- @logger.error "PURE_RECORD=#{position} - ERROR=No data"
89
+ @logger.error "PURE_RECORD=#{position} : METADATA_EXTRACTION=No data"
102
90
  position += 1
103
91
  end
104
92
 
105
93
  sleep delay
106
94
  end
107
95
 
108
- @logger.info "#{records_available} records in Pure after processing"
96
+ @logger.info "PURE_RECORDS_AVAILABLE=#{records_available}"
109
97
 
110
98
  end
111
99
 
112
100
  private
113
101
 
114
- def act(model)
115
- puts model.inspect
116
- end
117
-
118
102
  # @return [String]
119
103
  def log_message_prefix(pure_record, pure_uuid)
120
- "PURE_RECORD=#{pure_record} - PURE_UUID=#{pure_uuid}"
104
+ "PURE_RECORD=#{pure_record} : PURE_UUID=#{pure_uuid}"
121
105
  end
122
106
 
123
- def resource_count
107
+ def resource_count(params)
108
+ params = params.dup
124
109
  resource_class = "Puree::Extractor::#{Puree::Util::String.titleize(@resource_type)}"
125
- Object.const_get(resource_class).new(@pure_config).count
110
+ Object.const_get(resource_class).new(@pure_config).count(params)
126
111
  end
127
112
 
128
- def resource_batch(limit, offset)
113
+ def resource_batch(params)
114
+ params = params.dup
129
115
  resource_method = "#{@resource_type}s".to_sym
130
116
  client = Puree::REST::Client.new(@pure_config).send resource_method
131
- response = client.all params: {size: limit, offset: offset}
117
+ response = client.all_complex params: params
132
118
  Puree::XMLExtractor::Collection.send resource_method, response.to_s
133
119
  end
134
120
  end
@@ -11,7 +11,7 @@ module ResearchMetadataBatch
11
11
 
12
12
  private
13
13
 
14
- def resource_batch(limit, offset)
14
+ def resource_batch(params)
15
15
  research_outputs_hash = super
16
16
  research_outputs_array = []
17
17
  research_outputs_hash.each do |k, v|
@@ -1,7 +1,7 @@
1
1
  module ResearchMetadataBatch
2
2
 
3
3
  # @note These methods (except init) are used internally by {ResearchMetadataBatch::Base#process} and have been left public for documentation purposes only
4
- module Custom
4
+ module Shared
5
5
 
6
6
  # Second stage initialisation, perhaps third party services.
7
7
  # @param args [Hash]
@@ -9,37 +9,20 @@ module ResearchMetadataBatch
9
9
  end
10
10
 
11
11
  # Anything to be done at the start of a batch run
12
+ # @return [String, nil] Optionally, return something to indicate what has been done.
12
13
  def preflight
13
14
  end
14
15
 
15
- # Message when preflight method completes
16
- # @return [String]
17
- def preflight_success_log_message
18
- end
19
-
20
- # Message when preflight method does not complete
21
- # @return [String]
22
- def preflight_error_log_message(error)
23
- end
24
-
25
16
  # Do something with model metadata
26
17
  # @return [String, nil] Optionally, return something transaction-specific, such as a code/ID from an external service.
27
18
  def act(model)
19
+ puts model.inspect
28
20
  end
29
21
 
30
- # Message when act/mock_act completes
31
- # @return [String]
32
- def act_success_log_message(model, act_msg)
33
- end
34
-
35
- # Fake doing something with model metadata
36
- # @return [String, nil]
37
- def mock_act(model)
38
- end
39
-
40
- # Check for values in metadata
41
- # @return [String, nil]
42
- def validate_record(model)
22
+ # Check for values in metadata.
23
+ # @return [Boolean]
24
+ def valid?(model)
25
+ true
43
26
  end
44
27
 
45
28
  end
@@ -1,4 +1,4 @@
1
1
  module ResearchMetadataBatch
2
2
  # Semantic version number
3
- VERSION = "0.2.0"
3
+ VERSION = "0.3.0"
4
4
  end
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
14
14
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
15
15
  spec.require_paths = ["lib"]
16
16
  spec.required_ruby_version = '~> 2.1'
17
- spec.add_dependency 'puree', '~> 2.2'
17
+ spec.add_dependency 'puree', '~> 2.3'
18
18
  spec.metadata = {
19
19
  "source_code_uri" => "https://github.com/lulibrary/research_metadata_batch"
20
20
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: research_metadata_batch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adrian Albin-Clark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-05 00:00:00.000000000 Z
11
+ date: 2018-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: puree
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.2'
19
+ version: '2.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.2'
26
+ version: '2.3'
27
27
  description:
28
28
  email:
29
29
  - a.albin-clark@lancaster.ac.uk
@@ -39,7 +39,6 @@ files:
39
39
  - README.md
40
40
  - lib/research_metadata_batch.rb
41
41
  - lib/research_metadata_batch/base.rb
42
- - lib/research_metadata_batch/custom.rb
43
42
  - lib/research_metadata_batch/dataset.rb
44
43
  - lib/research_metadata_batch/event.rb
45
44
  - lib/research_metadata_batch/external_organisation.rb
@@ -50,6 +49,7 @@ files:
50
49
  - lib/research_metadata_batch/publisher.rb
51
50
  - lib/research_metadata_batch/research_metadata_batch.rb
52
51
  - lib/research_metadata_batch/research_output.rb
52
+ - lib/research_metadata_batch/shared.rb
53
53
  - lib/research_metadata_batch/version.rb
54
54
  - research_metadata_batch.gemspec
55
55
  homepage: