research_metadata_batch 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +7 -3
- data/README.md +10 -14
- data/lib/research_metadata_batch/base.rb +25 -17
- data/lib/research_metadata_batch/shared.rb +2 -2
- data/lib/research_metadata_batch/version.rb +1 -1
- data/research_metadata_batch.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6dfb82b4d3d35c9e38a757050f207c3adee991510369c98704ec77dafda3dff2
|
4
|
+
data.tar.gz: 1d4fecd07215e5e928bd76289d740f09c29a9c3144d566a03daf44d2086024c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75fc086bc6591d49058e2402293f964a6b0b00698e6236b0fdd50887b538951f585d5cd6a6e67a88b8d990b8f60b6a1fa37d4210077c647e2920f7065873c28e
|
7
|
+
data.tar.gz: c6363b1d95e72f6aeb9c332649d2c74fb2582bd100ce3ebcb2c7d701ea23ace6712a508678d721e587a4263fb456e2925afb2c5a0301e795930dc0c60ecc3386
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## 0.4.0 - 2018-11-02
|
6
|
+
### Changed
|
7
|
+
- Logging as JSON.
|
8
|
+
|
5
9
|
## 0.3.0 - 2018-10-12
|
6
10
|
### Added
|
7
11
|
- Support for GET and POST parameters.
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
research_metadata_batch (0.
|
4
|
+
research_metadata_batch (0.4.0)
|
5
|
+
ougai (~> 1.7)
|
5
6
|
puree (~> 2.3)
|
6
7
|
|
7
8
|
GEM
|
@@ -21,10 +22,13 @@ GEM
|
|
21
22
|
http-form_data (1.0.3)
|
22
23
|
http_parser.rb (0.6.0)
|
23
24
|
mini_portile2 (2.3.0)
|
24
|
-
nokogiri (1.8.
|
25
|
+
nokogiri (1.8.5)
|
25
26
|
mini_portile2 (~> 2.3.0)
|
27
|
+
oj (3.6.10)
|
28
|
+
ougai (1.7.0)
|
29
|
+
oj (~> 3.4)
|
26
30
|
public_suffix (3.0.3)
|
27
|
-
puree (2.
|
31
|
+
puree (2.4.0)
|
28
32
|
http (~> 2.0)
|
29
33
|
nokogiri (~> 1.6)
|
30
34
|
unf (0.1.4)
|
data/README.md
CHANGED
@@ -35,26 +35,25 @@ pure_config = {
|
|
35
35
|
ResearchMetadataBatch::Dataset.new(pure_config: pure_config).process
|
36
36
|
```
|
37
37
|
|
38
|
-
##
|
39
|
-
|
38
|
+
## Creating an application
|
39
|
+
Either open up classes or create subclasses to implement application-specific behaviour.
|
40
|
+
|
41
|
+
This example creates subclasses and uses Amazon Web Services.
|
40
42
|
|
41
43
|
### shared.rb
|
42
44
|
Implement methods from {ResearchMetadataBatch::Shared}.
|
43
45
|
```ruby
|
44
|
-
require
|
46
|
+
# require aws sdk
|
45
47
|
|
46
48
|
module App
|
47
49
|
module Shared
|
48
50
|
def init(aws_config:)
|
49
|
-
|
50
|
-
aws_config[:secret_access_key]
|
51
|
-
@s3_client = Aws::S3::Client.new region: aws_config[:region],
|
52
|
-
credentials: aws_credentials
|
53
|
-
@s3_bucket = aws_config[:s3_bucket]
|
51
|
+
# Do something with :aws_config
|
54
52
|
end
|
55
53
|
|
56
54
|
def act(model)
|
57
|
-
# Do something
|
55
|
+
# Do something with Amazon Web Services
|
56
|
+
return {key1: 'some_value', key2: 'another_value', msg: 'what_happened'}
|
58
57
|
end
|
59
58
|
end
|
60
59
|
end
|
@@ -84,10 +83,7 @@ pure_config = {
|
|
84
83
|
}
|
85
84
|
|
86
85
|
aws_config = {
|
87
|
-
|
88
|
-
secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
89
|
-
region: ENV['AWS_REGION'],
|
90
|
-
s3_bucket: 'YOUR_S3_BUCKET'
|
86
|
+
# details
|
91
87
|
}
|
92
88
|
|
93
89
|
log_file = '/path/to/your/log/file'
|
@@ -108,4 +104,4 @@ params = {
|
|
108
104
|
}
|
109
105
|
batch.process params: params
|
110
106
|
|
111
|
-
```
|
107
|
+
```
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'ougai'
|
2
2
|
require 'puree'
|
3
3
|
require_relative 'shared'
|
4
4
|
|
@@ -15,9 +15,9 @@ module ResearchMetadataBatch
|
|
15
15
|
def initialize(pure_config:, log_file: nil)
|
16
16
|
@pure_config = pure_config
|
17
17
|
if log_file
|
18
|
-
@logger = Logger.new File.new(log_file, 'a'), 20, 'daily'
|
18
|
+
@logger = Ougai::Logger.new File.new(log_file, 'a'), 20, 'daily'
|
19
19
|
else
|
20
|
-
@logger = Logger.new(STDOUT)
|
20
|
+
@logger = Ougai::Logger.new(STDOUT)
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
@@ -27,12 +27,12 @@ module ResearchMetadataBatch
|
|
27
27
|
def process(params: {}, max: nil, delay: 0)
|
28
28
|
offset = params[:offset]
|
29
29
|
records_available = resource_count params
|
30
|
-
|
30
|
+
log_records_available records_available
|
31
31
|
begin
|
32
|
-
|
33
|
-
@logger.info
|
32
|
+
preflight_h = preflight
|
33
|
+
@logger.info({preflight: preflight_h}) if preflight_h.is_a?(Hash) && !preflight_h.empty?
|
34
34
|
rescue => error
|
35
|
-
@logger.error
|
35
|
+
@logger.error({preflight: error})
|
36
36
|
end
|
37
37
|
|
38
38
|
if max
|
@@ -56,7 +56,7 @@ module ResearchMetadataBatch
|
|
56
56
|
params[:offset] = position
|
57
57
|
result = resource_batch params
|
58
58
|
rescue => error
|
59
|
-
@logger.error
|
59
|
+
@logger.error({metadata_extraction: error})
|
60
60
|
sleep 10
|
61
61
|
redo
|
62
62
|
end
|
@@ -64,16 +64,20 @@ module ResearchMetadataBatch
|
|
64
64
|
result.each do |i|
|
65
65
|
|
66
66
|
unless valid? i
|
67
|
-
@logger.info
|
67
|
+
@logger.info log_entry_core(position, i.uuid).merge({valid: false})
|
68
68
|
position += 1
|
69
69
|
next
|
70
70
|
end
|
71
71
|
|
72
72
|
begin
|
73
|
-
|
74
|
-
|
73
|
+
act_h = act i
|
74
|
+
act_log_entry = log_entry_core(position, i.uuid)
|
75
|
+
act_log_entry.merge!(act_h) if act_h.is_a?(Hash) && !act_h.empty?
|
76
|
+
@logger.info act_log_entry
|
75
77
|
rescue => error
|
76
|
-
|
78
|
+
act_error_log_entry = log_entry_core(position, i.uuid)
|
79
|
+
act_error_log_entry.merge!({error: error})
|
80
|
+
@logger.error act_error_log_entry
|
77
81
|
end
|
78
82
|
|
79
83
|
position += 1
|
@@ -86,22 +90,26 @@ module ResearchMetadataBatch
|
|
86
90
|
|
87
91
|
# handle error response
|
88
92
|
if result.empty?
|
89
|
-
@logger.error
|
93
|
+
@logger.error({pure_record: position, metadata_extraction: 'No data'})
|
90
94
|
position += 1
|
91
95
|
end
|
92
96
|
|
93
97
|
sleep delay
|
94
98
|
end
|
95
99
|
|
96
|
-
|
100
|
+
log_records_available records_available
|
97
101
|
|
98
102
|
end
|
99
103
|
|
100
104
|
private
|
101
105
|
|
102
|
-
|
103
|
-
|
104
|
-
|
106
|
+
def log_records_available(count)
|
107
|
+
@logger.info({pure_records_available: count})
|
108
|
+
end
|
109
|
+
|
110
|
+
# @return [Hash]
|
111
|
+
def log_entry_core(pure_record, pure_uuid)
|
112
|
+
{pure_record: pure_record, pure_uuid: pure_uuid}
|
105
113
|
end
|
106
114
|
|
107
115
|
def resource_count(params)
|
@@ -9,12 +9,12 @@ module ResearchMetadataBatch
|
|
9
9
|
end
|
10
10
|
|
11
11
|
# Anything to be done at the start of a batch run
|
12
|
-
# @return [
|
12
|
+
# @return [Hash, nil] Optionally, return something to indicate what has been done.
|
13
13
|
def preflight
|
14
14
|
end
|
15
15
|
|
16
16
|
# Do something with model metadata
|
17
|
-
# @return [
|
17
|
+
# @return [Hash, nil] Optionally, return something transaction-specific, such as a code/ID from an external service.
|
18
18
|
def act(model)
|
19
19
|
puts model.inspect
|
20
20
|
end
|
@@ -15,6 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.require_paths = ["lib"]
|
16
16
|
spec.required_ruby_version = '~> 2.1'
|
17
17
|
spec.add_dependency 'puree', '~> 2.3'
|
18
|
+
spec.add_dependency 'ougai', '~> 1.7'
|
18
19
|
spec.metadata = {
|
19
20
|
"source_code_uri" => "https://github.com/lulibrary/research_metadata_batch"
|
20
21
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: research_metadata_batch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: puree
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: ougai
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.7'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.7'
|
27
41
|
description:
|
28
42
|
email:
|
29
43
|
- a.albin-clark@lancaster.ac.uk
|