research_metadata_batch 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +7 -3
- data/README.md +10 -14
- data/lib/research_metadata_batch/base.rb +25 -17
- data/lib/research_metadata_batch/shared.rb +2 -2
- data/lib/research_metadata_batch/version.rb +1 -1
- data/research_metadata_batch.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6dfb82b4d3d35c9e38a757050f207c3adee991510369c98704ec77dafda3dff2
|
4
|
+
data.tar.gz: 1d4fecd07215e5e928bd76289d740f09c29a9c3144d566a03daf44d2086024c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75fc086bc6591d49058e2402293f964a6b0b00698e6236b0fdd50887b538951f585d5cd6a6e67a88b8d990b8f60b6a1fa37d4210077c647e2920f7065873c28e
|
7
|
+
data.tar.gz: c6363b1d95e72f6aeb9c332649d2c74fb2582bd100ce3ebcb2c7d701ea23ace6712a508678d721e587a4263fb456e2925afb2c5a0301e795930dc0c60ecc3386
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## 0.4.0 - 2018-11-02
|
6
|
+
### Changed
|
7
|
+
- Logging as JSON.
|
8
|
+
|
5
9
|
## 0.3.0 - 2018-10-12
|
6
10
|
### Added
|
7
11
|
- Support for GET and POST parameters.
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
research_metadata_batch (0.
|
4
|
+
research_metadata_batch (0.4.0)
|
5
|
+
ougai (~> 1.7)
|
5
6
|
puree (~> 2.3)
|
6
7
|
|
7
8
|
GEM
|
@@ -21,10 +22,13 @@ GEM
|
|
21
22
|
http-form_data (1.0.3)
|
22
23
|
http_parser.rb (0.6.0)
|
23
24
|
mini_portile2 (2.3.0)
|
24
|
-
nokogiri (1.8.
|
25
|
+
nokogiri (1.8.5)
|
25
26
|
mini_portile2 (~> 2.3.0)
|
27
|
+
oj (3.6.10)
|
28
|
+
ougai (1.7.0)
|
29
|
+
oj (~> 3.4)
|
26
30
|
public_suffix (3.0.3)
|
27
|
-
puree (2.
|
31
|
+
puree (2.4.0)
|
28
32
|
http (~> 2.0)
|
29
33
|
nokogiri (~> 1.6)
|
30
34
|
unf (0.1.4)
|
data/README.md
CHANGED
@@ -35,26 +35,25 @@ pure_config = {
|
|
35
35
|
ResearchMetadataBatch::Dataset.new(pure_config: pure_config).process
|
36
36
|
```
|
37
37
|
|
38
|
-
##
|
39
|
-
|
38
|
+
## Creating an application
|
39
|
+
Either open up classes or create subclasses to implement application-specific behaviour.
|
40
|
+
|
41
|
+
This example creates subclasses and uses Amazon Web Services.
|
40
42
|
|
41
43
|
### shared.rb
|
42
44
|
Implement methods from {ResearchMetadataBatch::Shared}.
|
43
45
|
```ruby
|
44
|
-
require
|
46
|
+
# require aws sdk
|
45
47
|
|
46
48
|
module App
|
47
49
|
module Shared
|
48
50
|
def init(aws_config:)
|
49
|
-
|
50
|
-
aws_config[:secret_access_key]
|
51
|
-
@s3_client = Aws::S3::Client.new region: aws_config[:region],
|
52
|
-
credentials: aws_credentials
|
53
|
-
@s3_bucket = aws_config[:s3_bucket]
|
51
|
+
# Do something with :aws_config
|
54
52
|
end
|
55
53
|
|
56
54
|
def act(model)
|
57
|
-
# Do something
|
55
|
+
# Do something with Amazon Web Services
|
56
|
+
return {key1: 'some_value', key2: 'another_value', msg: 'what_happened'}
|
58
57
|
end
|
59
58
|
end
|
60
59
|
end
|
@@ -84,10 +83,7 @@ pure_config = {
|
|
84
83
|
}
|
85
84
|
|
86
85
|
aws_config = {
|
87
|
-
|
88
|
-
secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
89
|
-
region: ENV['AWS_REGION'],
|
90
|
-
s3_bucket: 'YOUR_S3_BUCKET'
|
86
|
+
# details
|
91
87
|
}
|
92
88
|
|
93
89
|
log_file = '/path/to/your/log/file'
|
@@ -108,4 +104,4 @@ params = {
|
|
108
104
|
}
|
109
105
|
batch.process params: params
|
110
106
|
|
111
|
-
```
|
107
|
+
```
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'ougai'
|
2
2
|
require 'puree'
|
3
3
|
require_relative 'shared'
|
4
4
|
|
@@ -15,9 +15,9 @@ module ResearchMetadataBatch
|
|
15
15
|
def initialize(pure_config:, log_file: nil)
|
16
16
|
@pure_config = pure_config
|
17
17
|
if log_file
|
18
|
-
@logger = Logger.new File.new(log_file, 'a'), 20, 'daily'
|
18
|
+
@logger = Ougai::Logger.new File.new(log_file, 'a'), 20, 'daily'
|
19
19
|
else
|
20
|
-
@logger = Logger.new(STDOUT)
|
20
|
+
@logger = Ougai::Logger.new(STDOUT)
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
@@ -27,12 +27,12 @@ module ResearchMetadataBatch
|
|
27
27
|
def process(params: {}, max: nil, delay: 0)
|
28
28
|
offset = params[:offset]
|
29
29
|
records_available = resource_count params
|
30
|
-
|
30
|
+
log_records_available records_available
|
31
31
|
begin
|
32
|
-
|
33
|
-
@logger.info
|
32
|
+
preflight_h = preflight
|
33
|
+
@logger.info({preflight: preflight_h}) if preflight_h.is_a?(Hash) && !preflight_h.empty?
|
34
34
|
rescue => error
|
35
|
-
@logger.error
|
35
|
+
@logger.error({preflight: error})
|
36
36
|
end
|
37
37
|
|
38
38
|
if max
|
@@ -56,7 +56,7 @@ module ResearchMetadataBatch
|
|
56
56
|
params[:offset] = position
|
57
57
|
result = resource_batch params
|
58
58
|
rescue => error
|
59
|
-
@logger.error
|
59
|
+
@logger.error({metadata_extraction: error})
|
60
60
|
sleep 10
|
61
61
|
redo
|
62
62
|
end
|
@@ -64,16 +64,20 @@ module ResearchMetadataBatch
|
|
64
64
|
result.each do |i|
|
65
65
|
|
66
66
|
unless valid? i
|
67
|
-
@logger.info
|
67
|
+
@logger.info log_entry_core(position, i.uuid).merge({valid: false})
|
68
68
|
position += 1
|
69
69
|
next
|
70
70
|
end
|
71
71
|
|
72
72
|
begin
|
73
|
-
|
74
|
-
|
73
|
+
act_h = act i
|
74
|
+
act_log_entry = log_entry_core(position, i.uuid)
|
75
|
+
act_log_entry.merge!(act_h) if act_h.is_a?(Hash) && !act_h.empty?
|
76
|
+
@logger.info act_log_entry
|
75
77
|
rescue => error
|
76
|
-
|
78
|
+
act_error_log_entry = log_entry_core(position, i.uuid)
|
79
|
+
act_error_log_entry.merge!({error: error})
|
80
|
+
@logger.error act_error_log_entry
|
77
81
|
end
|
78
82
|
|
79
83
|
position += 1
|
@@ -86,22 +90,26 @@ module ResearchMetadataBatch
|
|
86
90
|
|
87
91
|
# handle error response
|
88
92
|
if result.empty?
|
89
|
-
@logger.error
|
93
|
+
@logger.error({pure_record: position, metadata_extraction: 'No data'})
|
90
94
|
position += 1
|
91
95
|
end
|
92
96
|
|
93
97
|
sleep delay
|
94
98
|
end
|
95
99
|
|
96
|
-
|
100
|
+
log_records_available records_available
|
97
101
|
|
98
102
|
end
|
99
103
|
|
100
104
|
private
|
101
105
|
|
102
|
-
|
103
|
-
|
104
|
-
|
106
|
+
def log_records_available(count)
|
107
|
+
@logger.info({pure_records_available: count})
|
108
|
+
end
|
109
|
+
|
110
|
+
# @return [Hash]
|
111
|
+
def log_entry_core(pure_record, pure_uuid)
|
112
|
+
{pure_record: pure_record, pure_uuid: pure_uuid}
|
105
113
|
end
|
106
114
|
|
107
115
|
def resource_count(params)
|
@@ -9,12 +9,12 @@ module ResearchMetadataBatch
|
|
9
9
|
end
|
10
10
|
|
11
11
|
# Anything to be done at the start of a batch run
|
12
|
-
# @return [
|
12
|
+
# @return [Hash, nil] Optionally, return something to indicate what has been done.
|
13
13
|
def preflight
|
14
14
|
end
|
15
15
|
|
16
16
|
# Do something with model metadata
|
17
|
-
# @return [
|
17
|
+
# @return [Hash, nil] Optionally, return something transaction-specific, such as a code/ID from an external service.
|
18
18
|
def act(model)
|
19
19
|
puts model.inspect
|
20
20
|
end
|
@@ -15,6 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.require_paths = ["lib"]
|
16
16
|
spec.required_ruby_version = '~> 2.1'
|
17
17
|
spec.add_dependency 'puree', '~> 2.3'
|
18
|
+
spec.add_dependency 'ougai', '~> 1.7'
|
18
19
|
spec.metadata = {
|
19
20
|
"source_code_uri" => "https://github.com/lulibrary/research_metadata_batch"
|
20
21
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: research_metadata_batch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: puree
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: ougai
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.7'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.7'
|
27
41
|
description:
|
28
42
|
email:
|
29
43
|
- a.albin-clark@lancaster.ac.uk
|