research_metadata_batch 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -1
- data/Gemfile.lock +3 -3
- data/README.md +26 -22
- data/lib/research_metadata_batch/base.rb +33 -47
- data/lib/research_metadata_batch/research_output.rb +1 -1
- data/lib/research_metadata_batch/{custom.rb → shared.rb} +7 -24
- data/lib/research_metadata_batch/version.rb +1 -1
- data/research_metadata_batch.gemspec +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d27b60c2067c5d5faae6b2680f50186e4087c146bdb076f65955dfd05172397
|
4
|
+
data.tar.gz: 25c9789f3452dc091c2b180eac05f9038ff63f10152eaee9cc4a4fd68be2e862
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 103ffbdd89ab5f0056608f07c3f30fb388f294db3ec882cb5e3b4e729c83b0bfedac16ef5047b17b785c7988f065cab55fb32ca4311064bf50879fc1321dc70e
|
7
|
+
data.tar.gz: 15e0e3496b5194d4a15b1f07a9c8e69d64412e41622a352d62fc5f82dbc16d8dad621494c81e09b01b5c8213a19dbeb53221621e802a4616ecb1ad7b1df19ecf
|
data/CHANGELOG.md
CHANGED
@@ -2,9 +2,17 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## 0.3.0 - 2018-10-12
|
6
|
+
### Added
|
7
|
+
- Support for GET and POST parameters.
|
8
|
+
- Simplified logging.
|
9
|
+
|
10
|
+
### Changed
|
11
|
+
- Record values matching rather than validation.
|
12
|
+
|
5
13
|
## 0.2.0 - 2018-10-05
|
6
14
|
### Changed
|
7
|
-
- Record validation
|
15
|
+
- Record validation.
|
8
16
|
|
9
17
|
## 0.1.0 - 2018-09-27
|
10
18
|
### Added
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
research_metadata_batch (0.
|
5
|
-
puree (~> 2.
|
4
|
+
research_metadata_batch (0.3.0)
|
5
|
+
puree (~> 2.3)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
@@ -24,7 +24,7 @@ GEM
|
|
24
24
|
nokogiri (1.8.4)
|
25
25
|
mini_portile2 (~> 2.3.0)
|
26
26
|
public_suffix (3.0.3)
|
27
|
-
puree (2.
|
27
|
+
puree (2.3.0)
|
28
28
|
http (~> 2.0)
|
29
29
|
nokogiri (~> 1.6)
|
30
30
|
unf (0.1.4)
|
data/README.md
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# Research Metadata Batch
|
2
|
-
|
3
|
-
defined in user-defined applications.
|
2
|
+
Batch processing for the Pure Research Information System.
|
4
3
|
|
5
4
|
## Status
|
6
5
|
|
@@ -36,21 +35,16 @@ pure_config = {
|
|
36
35
|
ResearchMetadataBatch::Dataset.new(pure_config: pure_config).process
|
37
36
|
```
|
38
37
|
|
39
|
-
##
|
40
|
-
Require this gem, then open up the base class {ResearchMetadataBatch::Base} as below. Implement methods from
|
41
|
-
{ResearchMetadataBatch::Custom} as inherited methods, including any secondary initialisation using the
|
42
|
-
``init`` method.
|
43
|
-
|
44
|
-
|
45
|
-
For resource-specific customisation, open up a resource class e.g. {ResearchMetadataBatch::Dataset}. Implement methods from
|
46
|
-
{ResearchMetadataBatch::Custom} as resource-specific methods.
|
47
|
-
|
38
|
+
## Example application
|
48
39
|
This example uses Amazon Web Services.
|
49
40
|
|
50
|
-
###
|
41
|
+
### shared.rb
|
42
|
+
Implement methods from {ResearchMetadataBatch::Shared}.
|
51
43
|
```ruby
|
52
|
-
|
53
|
-
|
44
|
+
require 'aws-sdk-s3'
|
45
|
+
|
46
|
+
module App
|
47
|
+
module Shared
|
54
48
|
def init(aws_config:)
|
55
49
|
aws_credentials = Aws::Credentials.new aws_config[:access_key_id],
|
56
50
|
aws_config[:secret_access_key]
|
@@ -66,19 +60,21 @@ module ResearchMetadataBatch
|
|
66
60
|
end
|
67
61
|
```
|
68
62
|
|
69
|
-
###
|
63
|
+
### research_output.rb
|
70
64
|
```ruby
|
71
|
-
|
72
|
-
|
73
|
-
|
65
|
+
require_relative 'shared'
|
66
|
+
|
67
|
+
module App
|
68
|
+
class ResearchOutput < ResearchMetadataBatch::ResearchOutput
|
69
|
+
include App::Shared
|
74
70
|
end
|
75
71
|
end
|
76
72
|
```
|
77
73
|
|
78
|
-
###
|
74
|
+
### script.rb
|
79
75
|
```ruby
|
80
76
|
require 'research_metadata_batch'
|
81
|
-
|
77
|
+
require_relative 'research_output'
|
82
78
|
|
83
79
|
pure_config = {
|
84
80
|
url: ENV['PURE_URL'],
|
@@ -101,7 +97,15 @@ config = {
|
|
101
97
|
log_file: log_file
|
102
98
|
}
|
103
99
|
|
104
|
-
batch =
|
100
|
+
batch = App::ResearchOutput.new config
|
105
101
|
batch.init aws_config: aws_config
|
106
|
-
|
102
|
+
params = {
|
103
|
+
size: 50,
|
104
|
+
typeUri: [
|
105
|
+
'/dk/atira/pure/researchoutput/researchoutputtypes/contributiontojournal/article',
|
106
|
+
'/dk/atira/pure/researchoutput/researchoutputtypes/contributiontoconference/paper'
|
107
|
+
]
|
108
|
+
}
|
109
|
+
batch.process params: params
|
110
|
+
|
107
111
|
```
|
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'logger'
|
2
2
|
require 'puree'
|
3
|
-
require_relative '
|
3
|
+
require_relative 'shared'
|
4
4
|
|
5
5
|
module ResearchMetadataBatch
|
6
6
|
# @note Not to be used directly
|
7
7
|
class Base
|
8
|
-
include ResearchMetadataBatch::
|
9
|
-
|
8
|
+
include ResearchMetadataBatch::Shared
|
9
|
+
# @param pure_config [Hash]
|
10
10
|
# @option config [String] :url
|
11
11
|
# @option config [String] :username
|
12
12
|
# @option config [String] :password
|
@@ -21,22 +21,18 @@ module ResearchMetadataBatch
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
# @param params [Hash] Combined GET and POST parameters for all records
|
24
25
|
# @param max [Fixnum] Number of records to act upon. Omit to act upon as many as possible.
|
25
|
-
# @param
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
preflight
|
36
|
-
@logger.info preflight_success_log_message
|
37
|
-
rescue => error
|
38
|
-
@logger.info preflight_error_log_message(error)
|
39
|
-
end
|
26
|
+
# @param delay [Fixnum] Delay in seconds between batches.
|
27
|
+
def process(params: {}, max: nil, delay: 0)
|
28
|
+
offset = params[:offset]
|
29
|
+
records_available = resource_count params
|
30
|
+
@logger.info "PURE_RECORDS_AVAILABLE=#{records_available}"
|
31
|
+
begin
|
32
|
+
preflight_msg = preflight
|
33
|
+
@logger.info "PREFLIGHT=#{preflight_msg}" if preflight_msg
|
34
|
+
rescue => error
|
35
|
+
@logger.error "PREFLIGHT=#{error}"
|
40
36
|
end
|
41
37
|
|
42
38
|
if max
|
@@ -47,7 +43,7 @@ module ResearchMetadataBatch
|
|
47
43
|
qty_to_find = records_available
|
48
44
|
end
|
49
45
|
|
50
|
-
if offset < 0 || offset > records_available - 1
|
46
|
+
if !offset || offset < 0 || offset > records_available - 1
|
51
47
|
offset = 0
|
52
48
|
end
|
53
49
|
|
@@ -57,35 +53,27 @@ module ResearchMetadataBatch
|
|
57
53
|
while position < records_available
|
58
54
|
# extract from Pure
|
59
55
|
begin
|
60
|
-
|
61
|
-
|
62
|
-
|
56
|
+
params[:offset] = position
|
57
|
+
result = resource_batch params
|
58
|
+
rescue => error
|
59
|
+
@logger.error "METADATA_EXTRACTION=#{error}"
|
63
60
|
sleep 10
|
64
61
|
redo
|
65
62
|
end
|
66
63
|
|
67
64
|
result.each do |i|
|
68
65
|
|
69
|
-
|
70
|
-
|
71
|
-
@logger.warn "#{log_message_prefix(position, i.uuid)} - VALIDATION_ERROR=#{record_validation_error}"
|
66
|
+
unless valid? i
|
67
|
+
@logger.info "#{log_message_prefix(position, i.uuid)} : VALID=false"
|
72
68
|
position += 1
|
73
69
|
next
|
74
70
|
end
|
75
71
|
|
76
72
|
begin
|
77
|
-
|
78
|
-
|
79
|
-
else
|
80
|
-
act_msg = mock_act i
|
81
|
-
end
|
82
|
-
if act_msg
|
83
|
-
@logger.info "#{log_message_prefix(position, i.uuid)} - #{act_success_log_message(i, act_msg)}"
|
84
|
-
else
|
85
|
-
@logger.info "#{log_message_prefix(position, i.uuid)}"
|
86
|
-
end
|
73
|
+
act_msg = act i
|
74
|
+
@logger.info "#{log_message_prefix(position, i.uuid)} : #{act_msg}"
|
87
75
|
rescue => error
|
88
|
-
@logger.error "#{log_message_prefix(position, i.uuid)}
|
76
|
+
@logger.error "#{log_message_prefix(position, i.uuid)} : #{error}"
|
89
77
|
end
|
90
78
|
|
91
79
|
position += 1
|
@@ -98,37 +86,35 @@ module ResearchMetadataBatch
|
|
98
86
|
|
99
87
|
# handle error response
|
100
88
|
if result.empty?
|
101
|
-
@logger.error "PURE_RECORD=#{position}
|
89
|
+
@logger.error "PURE_RECORD=#{position} : METADATA_EXTRACTION=No data"
|
102
90
|
position += 1
|
103
91
|
end
|
104
92
|
|
105
93
|
sleep delay
|
106
94
|
end
|
107
95
|
|
108
|
-
@logger.info "
|
96
|
+
@logger.info "PURE_RECORDS_AVAILABLE=#{records_available}"
|
109
97
|
|
110
98
|
end
|
111
99
|
|
112
100
|
private
|
113
101
|
|
114
|
-
def act(model)
|
115
|
-
puts model.inspect
|
116
|
-
end
|
117
|
-
|
118
102
|
# @return [String]
|
119
103
|
def log_message_prefix(pure_record, pure_uuid)
|
120
|
-
"PURE_RECORD=#{pure_record}
|
104
|
+
"PURE_RECORD=#{pure_record} : PURE_UUID=#{pure_uuid}"
|
121
105
|
end
|
122
106
|
|
123
|
-
def resource_count
|
107
|
+
def resource_count(params)
|
108
|
+
params = params.dup
|
124
109
|
resource_class = "Puree::Extractor::#{Puree::Util::String.titleize(@resource_type)}"
|
125
|
-
Object.const_get(resource_class).new(@pure_config).count
|
110
|
+
Object.const_get(resource_class).new(@pure_config).count(params)
|
126
111
|
end
|
127
112
|
|
128
|
-
def resource_batch(
|
113
|
+
def resource_batch(params)
|
114
|
+
params = params.dup
|
129
115
|
resource_method = "#{@resource_type}s".to_sym
|
130
116
|
client = Puree::REST::Client.new(@pure_config).send resource_method
|
131
|
-
response = client.
|
117
|
+
response = client.all_complex params: params
|
132
118
|
Puree::XMLExtractor::Collection.send resource_method, response.to_s
|
133
119
|
end
|
134
120
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module ResearchMetadataBatch
|
2
2
|
|
3
3
|
# @note These methods (except init) are used internally by {ResearchMetadataBatch::Base#process} and have been left public for documentation purposes only
|
4
|
-
module
|
4
|
+
module Shared
|
5
5
|
|
6
6
|
# Second stage initialisation, perhaps third party services.
|
7
7
|
# @param args [Hash]
|
@@ -9,37 +9,20 @@ module ResearchMetadataBatch
|
|
9
9
|
end
|
10
10
|
|
11
11
|
# Anything to be done at the start of a batch run
|
12
|
+
# @return [String, nil] Optionally, return something to indicate what has been done.
|
12
13
|
def preflight
|
13
14
|
end
|
14
15
|
|
15
|
-
# Message when preflight method completes
|
16
|
-
# @return [String]
|
17
|
-
def preflight_success_log_message
|
18
|
-
end
|
19
|
-
|
20
|
-
# Message when preflight method does not complete
|
21
|
-
# @return [String]
|
22
|
-
def preflight_error_log_message(error)
|
23
|
-
end
|
24
|
-
|
25
16
|
# Do something with model metadata
|
26
17
|
# @return [String, nil] Optionally, return something transaction-specific, such as a code/ID from an external service.
|
27
18
|
def act(model)
|
19
|
+
puts model.inspect
|
28
20
|
end
|
29
21
|
|
30
|
-
#
|
31
|
-
# @return [
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
# Fake doing something with model metadata
|
36
|
-
# @return [String, nil]
|
37
|
-
def mock_act(model)
|
38
|
-
end
|
39
|
-
|
40
|
-
# Check for values in metadata
|
41
|
-
# @return [String, nil]
|
42
|
-
def validate_record(model)
|
22
|
+
# Check for values in metadata.
|
23
|
+
# @return [Boolean]
|
24
|
+
def valid?(model)
|
25
|
+
true
|
43
26
|
end
|
44
27
|
|
45
28
|
end
|
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
15
15
|
spec.require_paths = ["lib"]
|
16
16
|
spec.required_ruby_version = '~> 2.1'
|
17
|
-
spec.add_dependency 'puree', '~> 2.
|
17
|
+
spec.add_dependency 'puree', '~> 2.3'
|
18
18
|
spec.metadata = {
|
19
19
|
"source_code_uri" => "https://github.com/lulibrary/research_metadata_batch"
|
20
20
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: research_metadata_batch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: puree
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '2.
|
19
|
+
version: '2.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2.
|
26
|
+
version: '2.3'
|
27
27
|
description:
|
28
28
|
email:
|
29
29
|
- a.albin-clark@lancaster.ac.uk
|
@@ -39,7 +39,6 @@ files:
|
|
39
39
|
- README.md
|
40
40
|
- lib/research_metadata_batch.rb
|
41
41
|
- lib/research_metadata_batch/base.rb
|
42
|
-
- lib/research_metadata_batch/custom.rb
|
43
42
|
- lib/research_metadata_batch/dataset.rb
|
44
43
|
- lib/research_metadata_batch/event.rb
|
45
44
|
- lib/research_metadata_batch/external_organisation.rb
|
@@ -50,6 +49,7 @@ files:
|
|
50
49
|
- lib/research_metadata_batch/publisher.rb
|
51
50
|
- lib/research_metadata_batch/research_metadata_batch.rb
|
52
51
|
- lib/research_metadata_batch/research_output.rb
|
52
|
+
- lib/research_metadata_batch/shared.rb
|
53
53
|
- lib/research_metadata_batch/version.rb
|
54
54
|
- research_metadata_batch.gemspec
|
55
55
|
homepage:
|