maltese 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/maltese/cli.rb +3 -3
- data/lib/maltese/sitemap.rb +7 -6
- data/lib/maltese/version.rb +1 -1
- data/spec/cli_spec.rb +4 -12
- data/spec/fixtures/vcr_cassettes/Maltese_CLI/sitemap/should_succeed.yml +21 -21
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +13 -13
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +129 -15
- data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +18 -132
- data/spec/spec_helper.rb +1 -1
- metadata +1 -3
- data/lib/maltese/utils.rb +0 -87
- data/public/sitemap.xml.gz +0 -0
@@ -10,23 +10,23 @@ http_interactions:
|
|
10
10
|
User-Agent:
|
11
11
|
- fog-core/1.37.0
|
12
12
|
X-Amz-Date:
|
13
|
-
-
|
13
|
+
- 20170226T231704Z
|
14
14
|
X-Amz-Content-Sha256:
|
15
15
|
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
16
16
|
Authorization:
|
17
17
|
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20170226/us-east-1/s3/aws4_request,
|
18
|
-
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=
|
18
|
+
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=e7ab88d4f355ede1b9a312b6ad3d72fdfd1c921c87d229a45b758576115d5c1f
|
19
19
|
response:
|
20
20
|
status:
|
21
21
|
code: 200
|
22
22
|
message: ''
|
23
23
|
headers:
|
24
24
|
X-Amz-Id-2:
|
25
|
-
-
|
25
|
+
- ZQuecF0xSq/j7wEmiyzLd5SdChGL+GP6rs2rB/6dVutYOjLO/16M9pB4vwHJTeE9FXx+xkGgGHI=
|
26
26
|
X-Amz-Request-Id:
|
27
|
-
-
|
27
|
+
- B4BF21A1FAEAF07C
|
28
28
|
Date:
|
29
|
-
- Sun, 26 Feb 2017
|
29
|
+
- Sun, 26 Feb 2017 23:17:06 GMT
|
30
30
|
Content-Type:
|
31
31
|
- application/xml
|
32
32
|
Server:
|
@@ -35,12 +35,12 @@ http_interactions:
|
|
35
35
|
encoding: UTF-8
|
36
36
|
string: |-
|
37
37
|
<?xml version="1.0" encoding="UTF-8"?>
|
38
|
-
<ListAllMyBucketsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>8170cc616e9d1833db4bade0c7af0010f04464343a1004b67a00fbcb41e361de</ID><DisplayName>infrastructure</DisplayName></Owner><Buckets><Bucket><Name>api.datacite.org</Name><CreationDate>2017-02-09T07:05:20.000Z</CreationDate></Bucket><Bucket><Name>api.test.datacite.org</Name><CreationDate>2017-02-09T07:05:21.000Z</CreationDate></Bucket><Bucket><Name>archive.datacite.org</Name><CreationDate>2016-12-26T07:02:24.000Z</CreationDate></Bucket><Bucket><Name>archive.test.datacite.org</Name><CreationDate>2017-01-13T19:04:30.000Z</CreationDate></Bucket><Bucket><Name>assets.datacite.org</Name><CreationDate>2016-04-29T17:51:32.000Z</CreationDate></Bucket><Bucket><Name>assets.test.datacite.org</Name><CreationDate>2017-01-13T18:47:02.000Z</CreationDate></Bucket><Bucket><Name>blog.datacite.org</Name><CreationDate>2015-09-26T08:59:21.000Z</CreationDate></Bucket><Bucket><Name>blog.test.datacite.org</Name><CreationDate>2017-01-13T18:54:23.000Z</CreationDate></Bucket><Bucket><Name>crosscite.org</Name><CreationDate>2016-01-05T22:11:03.000Z</CreationDate></Bucket><Bucket><Name>data.datacite.org</Name><CreationDate>2016-08-29T17:11:58.000Z</CreationDate></Bucket><Bucket><Name>data.test.datacite.org</Name><CreationDate>2017-01-13T20:17:34.000Z</CreationDate></Bucket><Bucket><Name>datacite.org</Name><CreationDate>2016-01-04T21:53:46.000Z</CreationDate></Bucket><Bucket><Name>lagotto.io</Name><CreationDate>2016-01-10T10:12:40.000Z</CreationDate></Bucket><Bucket><Name>ops.datacite.org</Name><CreationDate>2016-06-03T19:38:04.000Z</CreationDate></Bucket><Bucket><Name>papertrail.datacite.org</Name><CreationDate>2016-01-04T23:25:19.000Z</CreationDate></Bucket><Bucket><Name>pidapalooza.org</Name><CreationDate>2016-06-16T07:28:25.000Z</CreationDate></Bucket><Bucket><Name>schema.datacite.org</Name><CreationDate>2015-09-26T08:01:48.000Z</CreationDate></Bucket><Bucket><Name>schema.test.datacite.org</Name><CreationDate>2017-01-10T11:47:55.000Z</CreationDate></Bucket><Bucket><Name>search.datacite.org</Name><CreationDate>2017-02-26T20:00:32.000Z</CreationDate></Bucket><Bucket><Name>search.test.datacite.org</Name><CreationDate>2017-02-26T20:00:32.000Z</CreationDate></Bucket><Bucket><Name>
|
38
|
+
<ListAllMyBucketsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>8170cc616e9d1833db4bade0c7af0010f04464343a1004b67a00fbcb41e361de</ID><DisplayName>infrastructure</DisplayName></Owner><Buckets><Bucket><Name>api.datacite.org</Name><CreationDate>2017-02-09T07:05:20.000Z</CreationDate></Bucket><Bucket><Name>api.test.datacite.org</Name><CreationDate>2017-02-09T07:05:21.000Z</CreationDate></Bucket><Bucket><Name>archive.datacite.org</Name><CreationDate>2016-12-26T07:02:24.000Z</CreationDate></Bucket><Bucket><Name>archive.test.datacite.org</Name><CreationDate>2017-01-13T19:04:30.000Z</CreationDate></Bucket><Bucket><Name>assets.datacite.org</Name><CreationDate>2016-04-29T17:51:32.000Z</CreationDate></Bucket><Bucket><Name>assets.test.datacite.org</Name><CreationDate>2017-01-13T18:47:02.000Z</CreationDate></Bucket><Bucket><Name>blog.datacite.org</Name><CreationDate>2015-09-26T08:59:21.000Z</CreationDate></Bucket><Bucket><Name>blog.test.datacite.org</Name><CreationDate>2017-01-13T18:54:23.000Z</CreationDate></Bucket><Bucket><Name>crosscite.org</Name><CreationDate>2016-01-05T22:11:03.000Z</CreationDate></Bucket><Bucket><Name>data.datacite.org</Name><CreationDate>2016-08-29T17:11:58.000Z</CreationDate></Bucket><Bucket><Name>data.test.datacite.org</Name><CreationDate>2017-01-13T20:17:34.000Z</CreationDate></Bucket><Bucket><Name>datacite.org</Name><CreationDate>2016-01-04T21:53:46.000Z</CreationDate></Bucket><Bucket><Name>lagotto.io</Name><CreationDate>2016-01-10T10:12:40.000Z</CreationDate></Bucket><Bucket><Name>ops.datacite.org</Name><CreationDate>2016-06-03T19:38:04.000Z</CreationDate></Bucket><Bucket><Name>papertrail.datacite.org</Name><CreationDate>2016-01-04T23:25:19.000Z</CreationDate></Bucket><Bucket><Name>pidapalooza.org</Name><CreationDate>2016-06-16T07:28:25.000Z</CreationDate></Bucket><Bucket><Name>schema.datacite.org</Name><CreationDate>2015-09-26T08:01:48.000Z</CreationDate></Bucket><Bucket><Name>schema.test.datacite.org</Name><CreationDate>2017-01-10T11:47:55.000Z</CreationDate></Bucket><Bucket><Name>search.datacite.org</Name><CreationDate>2017-02-26T20:00:32.000Z</CreationDate></Bucket><Bucket><Name>search.test.datacite.org</Name><CreationDate>2017-02-26T20:00:32.000Z</CreationDate></Bucket><Bucket><Name>stats.datacite.org</Name><CreationDate>2016-08-29T10:21:19.000Z</CreationDate></Bucket><Bucket><Name>stats.test.datacite.org</Name><CreationDate>2017-01-13T20:17:34.000Z</CreationDate></Bucket><Bucket><Name>test.datacite.org</Name><CreationDate>2017-01-08T21:50:53.000Z</CreationDate></Bucket><Bucket><Name>www.crosscite.org</Name><CreationDate>2017-01-09T10:18:11.000Z</CreationDate></Bucket><Bucket><Name>www.datacite.org</Name><CreationDate>2015-09-26T08:01:48.000Z</CreationDate></Bucket><Bucket><Name>www.test.datacite.org</Name><CreationDate>2017-01-13T19:50:11.000Z</CreationDate></Bucket></Buckets></ListAllMyBucketsResult>
|
39
39
|
http_version:
|
40
40
|
recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
|
41
41
|
- request:
|
42
42
|
method: put
|
43
|
-
uri: https://s3-eu-west-1.amazonaws.com/search.datacite.org/sitemaps/sitemap.xml.gz
|
43
|
+
uri: https://s3-eu-west-1.amazonaws.com/search.datacite.org/sitemaps-test/sitemap.xml.gz
|
44
44
|
body:
|
45
45
|
encoding: US-ASCII
|
46
46
|
string: ''
|
@@ -52,7 +52,7 @@ http_interactions:
|
|
52
52
|
X-Amz-Acl:
|
53
53
|
- public-read
|
54
54
|
X-Amz-Date:
|
55
|
-
-
|
55
|
+
- 20170226T231706Z
|
56
56
|
X-Amz-Content-Sha256:
|
57
57
|
- STREAMING-AWS4-HMAC-SHA256-PAYLOAD
|
58
58
|
X-Amz-Decoded-Content-Length:
|
@@ -60,20 +60,20 @@ http_interactions:
|
|
60
60
|
Authorization:
|
61
61
|
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20170226/eu-west-1/s3/aws4_request,
|
62
62
|
SignedHeaders=content-type;host;x-amz-acl;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length,
|
63
|
-
Signature=
|
63
|
+
Signature=283085acb7b37369e262e1b0c5e4c38e2e7bac57843fc4410466dc9b3e36fe65
|
64
64
|
response:
|
65
65
|
status:
|
66
66
|
code: 200
|
67
67
|
message: ''
|
68
68
|
headers:
|
69
69
|
X-Amz-Id-2:
|
70
|
-
-
|
70
|
+
- Sxubg7JQjr/97ZhF/T7dDhmbsAVOVIqrmMN0ZdpAO6aXm8O/io0MOGGiubgWhUw3C9lJYjsGprc=
|
71
71
|
X-Amz-Request-Id:
|
72
|
-
-
|
72
|
+
- ED7CC1EEB6DFF0E0
|
73
73
|
Date:
|
74
|
-
- Sun, 26 Feb 2017
|
74
|
+
- Sun, 26 Feb 2017 23:17:06 GMT
|
75
75
|
Etag:
|
76
|
-
- '"
|
76
|
+
- '"cec15d6b7a72b73c565fc102e43f98b2"'
|
77
77
|
Content-Length:
|
78
78
|
- '0'
|
79
79
|
Server:
|
@@ -10,37 +10,151 @@ http_interactions:
|
|
10
10
|
User-Agent:
|
11
11
|
- fog-core/1.37.0
|
12
12
|
X-Amz-Date:
|
13
|
-
-
|
13
|
+
- 20150407T220000Z
|
14
14
|
X-Amz-Content-Sha256:
|
15
15
|
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
16
16
|
Authorization:
|
17
|
-
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/
|
18
|
-
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=
|
17
|
+
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20150407/us-east-1/s3/aws4_request,
|
18
|
+
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=2fe3044f0e0fd2b7ad2caeb9e6af67ab3de66969b6bbe67cf4e6d53621e33391
|
19
19
|
response:
|
20
20
|
status:
|
21
|
-
code:
|
21
|
+
code: 403
|
22
22
|
message: ''
|
23
23
|
headers:
|
24
|
+
X-Amz-Request-Id:
|
25
|
+
- AFF611ADFC0BB4FF
|
24
26
|
X-Amz-Id-2:
|
25
|
-
-
|
27
|
+
- "/bNnD4tgFXPfaowrE4eiO7FozZYXEYgHiENHDySPcRCX35tWZjANdnIBsCC9Q6kkixEiusHKQBU="
|
28
|
+
Content-Type:
|
29
|
+
- application/xml
|
30
|
+
Date:
|
31
|
+
- Sun, 26 Feb 2017 23:17:02 GMT
|
32
|
+
Server:
|
33
|
+
- AmazonS3
|
34
|
+
body:
|
35
|
+
encoding: UTF-8
|
36
|
+
string: |-
|
37
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
38
|
+
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>20150407T220000Z</RequestTime><ServerTime>2017-02-26T23:17:04Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>AFF611ADFC0BB4FF</RequestId><HostId>/bNnD4tgFXPfaowrE4eiO7FozZYXEYgHiENHDySPcRCX35tWZjANdnIBsCC9Q6kkixEiusHKQBU=</HostId></Error>
|
39
|
+
http_version:
|
40
|
+
recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
|
41
|
+
- request:
|
42
|
+
method: get
|
43
|
+
uri: https://s3.amazonaws.com/
|
44
|
+
body:
|
45
|
+
encoding: US-ASCII
|
46
|
+
string: ''
|
47
|
+
headers:
|
48
|
+
User-Agent:
|
49
|
+
- fog-core/1.37.0
|
50
|
+
X-Amz-Date:
|
51
|
+
- 20150407T220000Z
|
52
|
+
X-Amz-Content-Sha256:
|
53
|
+
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
54
|
+
Authorization:
|
55
|
+
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20150407/us-east-1/s3/aws4_request,
|
56
|
+
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=2fe3044f0e0fd2b7ad2caeb9e6af67ab3de66969b6bbe67cf4e6d53621e33391
|
57
|
+
response:
|
58
|
+
status:
|
59
|
+
code: 403
|
60
|
+
message: ''
|
61
|
+
headers:
|
26
62
|
X-Amz-Request-Id:
|
27
|
-
-
|
63
|
+
- 6C3AF366E95921DC
|
64
|
+
X-Amz-Id-2:
|
65
|
+
- NExE51oG2se2Z7H3VsKE+brqZWFRNxa4+GtaSv+ik5coyoqkoIc8ntppA0iWmbK/zBFJp4eq4x0=
|
66
|
+
Content-Type:
|
67
|
+
- application/xml
|
28
68
|
Date:
|
29
|
-
- Sun, 26 Feb 2017
|
69
|
+
- Sun, 26 Feb 2017 23:17:03 GMT
|
70
|
+
Server:
|
71
|
+
- AmazonS3
|
72
|
+
body:
|
73
|
+
encoding: UTF-8
|
74
|
+
string: |-
|
75
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
76
|
+
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>20150407T220000Z</RequestTime><ServerTime>2017-02-26T23:17:04Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>6C3AF366E95921DC</RequestId><HostId>NExE51oG2se2Z7H3VsKE+brqZWFRNxa4+GtaSv+ik5coyoqkoIc8ntppA0iWmbK/zBFJp4eq4x0=</HostId></Error>
|
77
|
+
http_version:
|
78
|
+
recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
|
79
|
+
- request:
|
80
|
+
method: get
|
81
|
+
uri: https://s3.amazonaws.com/
|
82
|
+
body:
|
83
|
+
encoding: US-ASCII
|
84
|
+
string: ''
|
85
|
+
headers:
|
86
|
+
User-Agent:
|
87
|
+
- fog-core/1.37.0
|
88
|
+
X-Amz-Date:
|
89
|
+
- 20150407T220000Z
|
90
|
+
X-Amz-Content-Sha256:
|
91
|
+
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
92
|
+
Authorization:
|
93
|
+
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20150407/us-east-1/s3/aws4_request,
|
94
|
+
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=2fe3044f0e0fd2b7ad2caeb9e6af67ab3de66969b6bbe67cf4e6d53621e33391
|
95
|
+
response:
|
96
|
+
status:
|
97
|
+
code: 403
|
98
|
+
message: ''
|
99
|
+
headers:
|
100
|
+
X-Amz-Request-Id:
|
101
|
+
- 330970DE0017172F
|
102
|
+
X-Amz-Id-2:
|
103
|
+
- 0z7s2hCXs5OjxgbjnpbDPuQt9r6spEdLnK+5sosrKFY8q9gyJxZ1gQivArIFwMSRrHFGwUjayt4=
|
30
104
|
Content-Type:
|
31
105
|
- application/xml
|
106
|
+
Date:
|
107
|
+
- Sun, 26 Feb 2017 23:17:03 GMT
|
108
|
+
Server:
|
109
|
+
- AmazonS3
|
110
|
+
body:
|
111
|
+
encoding: UTF-8
|
112
|
+
string: |-
|
113
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
114
|
+
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>20150407T220000Z</RequestTime><ServerTime>2017-02-26T23:17:04Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>330970DE0017172F</RequestId><HostId>0z7s2hCXs5OjxgbjnpbDPuQt9r6spEdLnK+5sosrKFY8q9gyJxZ1gQivArIFwMSRrHFGwUjayt4=</HostId></Error>
|
115
|
+
http_version:
|
116
|
+
recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
|
117
|
+
- request:
|
118
|
+
method: get
|
119
|
+
uri: https://s3.amazonaws.com/
|
120
|
+
body:
|
121
|
+
encoding: US-ASCII
|
122
|
+
string: ''
|
123
|
+
headers:
|
124
|
+
User-Agent:
|
125
|
+
- fog-core/1.37.0
|
126
|
+
X-Amz-Date:
|
127
|
+
- 20150407T220000Z
|
128
|
+
X-Amz-Content-Sha256:
|
129
|
+
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
130
|
+
Authorization:
|
131
|
+
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20150407/us-east-1/s3/aws4_request,
|
132
|
+
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=2fe3044f0e0fd2b7ad2caeb9e6af67ab3de66969b6bbe67cf4e6d53621e33391
|
133
|
+
response:
|
134
|
+
status:
|
135
|
+
code: 403
|
136
|
+
message: ''
|
137
|
+
headers:
|
138
|
+
X-Amz-Request-Id:
|
139
|
+
- B51EF66A02713CF1
|
140
|
+
X-Amz-Id-2:
|
141
|
+
- G6gHwBkRjHX7vinr62hb+Y93oAgmT/moREnayTOWuDSJN4yATUkTUasqVwpdNKuJQhuZOWJiutI=
|
142
|
+
Content-Type:
|
143
|
+
- application/xml
|
144
|
+
Date:
|
145
|
+
- Sun, 26 Feb 2017 23:17:04 GMT
|
32
146
|
Server:
|
33
147
|
- AmazonS3
|
34
148
|
body:
|
35
149
|
encoding: UTF-8
|
36
150
|
string: |-
|
37
151
|
<?xml version="1.0" encoding="UTF-8"?>
|
38
|
-
<
|
152
|
+
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>20150407T220000Z</RequestTime><ServerTime>2017-02-26T23:17:05Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>B51EF66A02713CF1</RequestId><HostId>G6gHwBkRjHX7vinr62hb+Y93oAgmT/moREnayTOWuDSJN4yATUkTUasqVwpdNKuJQhuZOWJiutI=</HostId></Error>
|
39
153
|
http_version:
|
40
154
|
recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
|
41
155
|
- request:
|
42
156
|
method: put
|
43
|
-
uri: https://s3-eu-west-1.amazonaws.com/search.datacite.org/sitemaps/sitemap.xml.gz
|
157
|
+
uri: https://s3-eu-west-1.amazonaws.com/search.datacite.org/sitemaps-test/sitemap.xml.gz
|
44
158
|
body:
|
45
159
|
encoding: US-ASCII
|
46
160
|
string: ''
|
@@ -52,7 +166,7 @@ http_interactions:
|
|
52
166
|
X-Amz-Acl:
|
53
167
|
- public-read
|
54
168
|
X-Amz-Date:
|
55
|
-
-
|
169
|
+
- 20170226T231704Z
|
56
170
|
X-Amz-Content-Sha256:
|
57
171
|
- STREAMING-AWS4-HMAC-SHA256-PAYLOAD
|
58
172
|
X-Amz-Decoded-Content-Length:
|
@@ -60,20 +174,20 @@ http_interactions:
|
|
60
174
|
Authorization:
|
61
175
|
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20170226/eu-west-1/s3/aws4_request,
|
62
176
|
SignedHeaders=content-type;host;x-amz-acl;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length,
|
63
|
-
Signature=
|
177
|
+
Signature=7cb583308e13da8753991703d16f56513c29cceb683bdd4f04f993d6794faac3
|
64
178
|
response:
|
65
179
|
status:
|
66
180
|
code: 200
|
67
181
|
message: ''
|
68
182
|
headers:
|
69
183
|
X-Amz-Id-2:
|
70
|
-
-
|
184
|
+
- aYVn0IlwhiMfeiIgZngv3IsDHTkIWoLlONhJK8WE9yNHbL2rnrlQIS8K6wqFN0JmfKt0PdXXd68=
|
71
185
|
X-Amz-Request-Id:
|
72
|
-
-
|
186
|
+
- 61DAFAF29518E861
|
73
187
|
Date:
|
74
|
-
- Sun, 26 Feb 2017
|
188
|
+
- Sun, 26 Feb 2017 23:17:05 GMT
|
75
189
|
Etag:
|
76
|
-
- '"
|
190
|
+
- '"157fe411131d9f960f73c9195ee315bd"'
|
77
191
|
Content-Length:
|
78
192
|
- '0'
|
79
193
|
Server:
|
@@ -19,7 +19,7 @@ http_interactions:
|
|
19
19
|
Server:
|
20
20
|
- openresty/1.11.2.2
|
21
21
|
Date:
|
22
|
-
- Sun, 26 Feb 2017
|
22
|
+
- Sun, 26 Feb 2017 23:17:53 GMT
|
23
23
|
Content-Type:
|
24
24
|
- application/json;charset=UTF-8
|
25
25
|
Connection:
|
@@ -30,7 +30,7 @@ http_interactions:
|
|
30
30
|
- GET, POST, OPTIONS
|
31
31
|
body:
|
32
32
|
encoding: UTF-8
|
33
|
-
string: '{"responseHeader":{"status":0,"QTime":
|
33
|
+
string: '{"responseHeader":{"status":0,"QTime":1},"response":{"numFound":2521,"start":0,"docs":[]}}
|
34
34
|
|
35
35
|
'
|
36
36
|
http_version:
|
@@ -54,7 +54,7 @@ http_interactions:
|
|
54
54
|
Server:
|
55
55
|
- openresty/1.11.2.2
|
56
56
|
Date:
|
57
|
-
- Sun, 26 Feb 2017
|
57
|
+
- Sun, 26 Feb 2017 23:17:53 GMT
|
58
58
|
Content-Type:
|
59
59
|
- application/json;charset=UTF-8
|
60
60
|
Connection:
|
@@ -80,151 +80,37 @@ http_interactions:
|
|
80
80
|
User-Agent:
|
81
81
|
- fog-core/1.37.0
|
82
82
|
X-Amz-Date:
|
83
|
-
-
|
83
|
+
- 20170226T231704Z
|
84
84
|
X-Amz-Content-Sha256:
|
85
85
|
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
86
86
|
Authorization:
|
87
|
-
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/
|
88
|
-
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=
|
87
|
+
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20170226/us-east-1/s3/aws4_request,
|
88
|
+
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=e7ab88d4f355ede1b9a312b6ad3d72fdfd1c921c87d229a45b758576115d5c1f
|
89
89
|
response:
|
90
90
|
status:
|
91
|
-
code:
|
91
|
+
code: 200
|
92
92
|
message: ''
|
93
93
|
headers:
|
94
|
-
X-Amz-Request-Id:
|
95
|
-
- F1F1935B9D093F3B
|
96
94
|
X-Amz-Id-2:
|
97
|
-
-
|
98
|
-
Content-Type:
|
99
|
-
- application/xml
|
100
|
-
Date:
|
101
|
-
- Sun, 26 Feb 2017 20:03:09 GMT
|
102
|
-
Server:
|
103
|
-
- AmazonS3
|
104
|
-
body:
|
105
|
-
encoding: UTF-8
|
106
|
-
string: |-
|
107
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
108
|
-
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>20150407T220001Z</RequestTime><ServerTime>2017-02-26T20:03:11Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>F1F1935B9D093F3B</RequestId><HostId>sGMBvTChZ4S+owf6quTgVg2rHd+0Vw4+0KomLLJHZziKUgbCDRItUwATwjsIT3Wldz/LNPs58jw=</HostId></Error>
|
109
|
-
http_version:
|
110
|
-
recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
|
111
|
-
- request:
|
112
|
-
method: get
|
113
|
-
uri: https://s3.amazonaws.com/
|
114
|
-
body:
|
115
|
-
encoding: US-ASCII
|
116
|
-
string: ''
|
117
|
-
headers:
|
118
|
-
User-Agent:
|
119
|
-
- fog-core/1.37.0
|
120
|
-
X-Amz-Date:
|
121
|
-
- 20150407T220001Z
|
122
|
-
X-Amz-Content-Sha256:
|
123
|
-
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
124
|
-
Authorization:
|
125
|
-
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20150407/us-east-1/s3/aws4_request,
|
126
|
-
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=63ae9b360414ffd780af6bec925edc820e5991b83ce6eab4d03861342340d476
|
127
|
-
response:
|
128
|
-
status:
|
129
|
-
code: 403
|
130
|
-
message: ''
|
131
|
-
headers:
|
95
|
+
- YW0AEI4DrC2+XZqK3P9yS7wmBhYI8WSadoF94kNZhhz7Cd2KaMYcxP+s8J/QtKWEX1OLrSdgAxQ=
|
132
96
|
X-Amz-Request-Id:
|
133
|
-
-
|
134
|
-
X-Amz-Id-2:
|
135
|
-
- pbCD9EJXef72LT9E3rF9Kg/Z1WwRDTZOmXOndbVm/rVbN0W2QnJyhordjReIFVGFsMG3y9cFXME=
|
136
|
-
Content-Type:
|
137
|
-
- application/xml
|
97
|
+
- E4F3A2FFAADFF9A2
|
138
98
|
Date:
|
139
|
-
- Sun, 26 Feb 2017
|
140
|
-
Server:
|
141
|
-
- AmazonS3
|
142
|
-
body:
|
143
|
-
encoding: UTF-8
|
144
|
-
string: |-
|
145
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
146
|
-
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>20150407T220001Z</RequestTime><ServerTime>2017-02-26T20:03:11Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>C9A6B93BA12A87BD</RequestId><HostId>pbCD9EJXef72LT9E3rF9Kg/Z1WwRDTZOmXOndbVm/rVbN0W2QnJyhordjReIFVGFsMG3y9cFXME=</HostId></Error>
|
147
|
-
http_version:
|
148
|
-
recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
|
149
|
-
- request:
|
150
|
-
method: get
|
151
|
-
uri: https://s3.amazonaws.com/
|
152
|
-
body:
|
153
|
-
encoding: US-ASCII
|
154
|
-
string: ''
|
155
|
-
headers:
|
156
|
-
User-Agent:
|
157
|
-
- fog-core/1.37.0
|
158
|
-
X-Amz-Date:
|
159
|
-
- 20150407T220001Z
|
160
|
-
X-Amz-Content-Sha256:
|
161
|
-
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
162
|
-
Authorization:
|
163
|
-
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20150407/us-east-1/s3/aws4_request,
|
164
|
-
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=63ae9b360414ffd780af6bec925edc820e5991b83ce6eab4d03861342340d476
|
165
|
-
response:
|
166
|
-
status:
|
167
|
-
code: 403
|
168
|
-
message: ''
|
169
|
-
headers:
|
170
|
-
X-Amz-Request-Id:
|
171
|
-
- 04FBF6BEDA1B20A8
|
172
|
-
X-Amz-Id-2:
|
173
|
-
- XuOHzBTtiAFfNV4wZEED7xsUWIxn11sYx5uM47rWoQjfeDnW1TH4/+5FBTsjGBeen1XJ4h1ywbA=
|
99
|
+
- Sun, 26 Feb 2017 23:17:55 GMT
|
174
100
|
Content-Type:
|
175
101
|
- application/xml
|
176
|
-
Date:
|
177
|
-
- Sun, 26 Feb 2017 20:03:10 GMT
|
178
|
-
Server:
|
179
|
-
- AmazonS3
|
180
|
-
body:
|
181
|
-
encoding: UTF-8
|
182
|
-
string: |-
|
183
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
184
|
-
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>20150407T220001Z</RequestTime><ServerTime>2017-02-26T20:03:12Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>04FBF6BEDA1B20A8</RequestId><HostId>XuOHzBTtiAFfNV4wZEED7xsUWIxn11sYx5uM47rWoQjfeDnW1TH4/+5FBTsjGBeen1XJ4h1ywbA=</HostId></Error>
|
185
|
-
http_version:
|
186
|
-
recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
|
187
|
-
- request:
|
188
|
-
method: get
|
189
|
-
uri: https://s3.amazonaws.com/
|
190
|
-
body:
|
191
|
-
encoding: US-ASCII
|
192
|
-
string: ''
|
193
|
-
headers:
|
194
|
-
User-Agent:
|
195
|
-
- fog-core/1.37.0
|
196
|
-
X-Amz-Date:
|
197
|
-
- 20150407T220001Z
|
198
|
-
X-Amz-Content-Sha256:
|
199
|
-
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
200
|
-
Authorization:
|
201
|
-
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20150407/us-east-1/s3/aws4_request,
|
202
|
-
SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=63ae9b360414ffd780af6bec925edc820e5991b83ce6eab4d03861342340d476
|
203
|
-
response:
|
204
|
-
status:
|
205
|
-
code: 403
|
206
|
-
message: ''
|
207
|
-
headers:
|
208
|
-
X-Amz-Request-Id:
|
209
|
-
- F355B67176738D33
|
210
|
-
X-Amz-Id-2:
|
211
|
-
- wKrrJy/Rz8Yaa/XUjwpcGb//ae93pPZoRKnA+vbOEnJH8zKA13p4G/0u8zhu7lhqcpV+kSK1lgY=
|
212
|
-
Content-Type:
|
213
|
-
- application/xml
|
214
|
-
Date:
|
215
|
-
- Sun, 26 Feb 2017 20:03:11 GMT
|
216
102
|
Server:
|
217
103
|
- AmazonS3
|
218
104
|
body:
|
219
105
|
encoding: UTF-8
|
220
106
|
string: |-
|
221
107
|
<?xml version="1.0" encoding="UTF-8"?>
|
222
|
-
<
|
108
|
+
<ListAllMyBucketsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>8170cc616e9d1833db4bade0c7af0010f04464343a1004b67a00fbcb41e361de</ID><DisplayName>infrastructure</DisplayName></Owner><Buckets><Bucket><Name>api.datacite.org</Name><CreationDate>2017-02-09T07:05:20.000Z</CreationDate></Bucket><Bucket><Name>api.test.datacite.org</Name><CreationDate>2017-02-09T07:05:21.000Z</CreationDate></Bucket><Bucket><Name>archive.datacite.org</Name><CreationDate>2016-12-26T07:02:24.000Z</CreationDate></Bucket><Bucket><Name>archive.test.datacite.org</Name><CreationDate>2017-01-13T19:04:30.000Z</CreationDate></Bucket><Bucket><Name>assets.datacite.org</Name><CreationDate>2016-04-29T17:51:32.000Z</CreationDate></Bucket><Bucket><Name>assets.test.datacite.org</Name><CreationDate>2017-01-13T18:47:02.000Z</CreationDate></Bucket><Bucket><Name>blog.datacite.org</Name><CreationDate>2015-09-26T08:59:21.000Z</CreationDate></Bucket><Bucket><Name>blog.test.datacite.org</Name><CreationDate>2017-01-13T18:54:23.000Z</CreationDate></Bucket><Bucket><Name>crosscite.org</Name><CreationDate>2016-01-05T22:11:03.000Z</CreationDate></Bucket><Bucket><Name>data.datacite.org</Name><CreationDate>2016-08-29T17:11:58.000Z</CreationDate></Bucket><Bucket><Name>data.test.datacite.org</Name><CreationDate>2017-01-13T20:17:34.000Z</CreationDate></Bucket><Bucket><Name>datacite.org</Name><CreationDate>2016-01-04T21:53:46.000Z</CreationDate></Bucket><Bucket><Name>lagotto.io</Name><CreationDate>2016-01-10T10:12:40.000Z</CreationDate></Bucket><Bucket><Name>ops.datacite.org</Name><CreationDate>2016-06-03T19:38:04.000Z</CreationDate></Bucket><Bucket><Name>papertrail.datacite.org</Name><CreationDate>2016-01-04T23:25:19.000Z</CreationDate></Bucket><Bucket><Name>pidapalooza.org</Name><CreationDate>2016-06-16T07:28:25.000Z</CreationDate></Bucket><Bucket><Name>schema.datacite.org</Name><CreationDate>2015-09-26T08:01:48.000Z</CreationDate></Bucket><Bucket><Name>schema.test.datacite.org</Name><CreationDate>2017-01-10T11:47:55.000Z</CreationDate></Bucket><Bucket><Name>search.datacite.org</Name><CreationDate>2017-02-26T20:00:32.000Z</CreationDate></Bucket><Bucket><Name>search.test.datacite.org</Name><CreationDate>2017-02-26T20:00:32.000Z</CreationDate></Bucket><Bucket><Name>stats.datacite.org</Name><CreationDate>2016-08-29T10:21:19.000Z</CreationDate></Bucket><Bucket><Name>stats.test.datacite.org</Name><CreationDate>2017-01-13T20:17:34.000Z</CreationDate></Bucket><Bucket><Name>test.datacite.org</Name><CreationDate>2017-01-08T21:50:53.000Z</CreationDate></Bucket><Bucket><Name>www.crosscite.org</Name><CreationDate>2017-01-09T10:18:11.000Z</CreationDate></Bucket><Bucket><Name>www.datacite.org</Name><CreationDate>2015-09-26T08:01:48.000Z</CreationDate></Bucket><Bucket><Name>www.test.datacite.org</Name><CreationDate>2017-01-13T19:50:11.000Z</CreationDate></Bucket></Buckets></ListAllMyBucketsResult>
|
223
109
|
http_version:
|
224
110
|
recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
|
225
111
|
- request:
|
226
112
|
method: put
|
227
|
-
uri: https://s3-eu-west-1.amazonaws.com/search.datacite.org/sitemaps/sitemap.xml.gz
|
113
|
+
uri: https://s3-eu-west-1.amazonaws.com/search.datacite.org/sitemaps-test/sitemap.xml.gz
|
228
114
|
body:
|
229
115
|
encoding: US-ASCII
|
230
116
|
string: ''
|
@@ -236,7 +122,7 @@ http_interactions:
|
|
236
122
|
X-Amz-Acl:
|
237
123
|
- public-read
|
238
124
|
X-Amz-Date:
|
239
|
-
-
|
125
|
+
- 20170226T231755Z
|
240
126
|
X-Amz-Content-Sha256:
|
241
127
|
- STREAMING-AWS4-HMAC-SHA256-PAYLOAD
|
242
128
|
X-Amz-Decoded-Content-Length:
|
@@ -244,20 +130,20 @@ http_interactions:
|
|
244
130
|
Authorization:
|
245
131
|
- AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20170226/eu-west-1/s3/aws4_request,
|
246
132
|
SignedHeaders=content-type;host;x-amz-acl;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length,
|
247
|
-
Signature=
|
133
|
+
Signature=bf87c1e1f8d8e4953520591f838baf2c4581ad1ad6a6ae8e16279127ef6a00f4
|
248
134
|
response:
|
249
135
|
status:
|
250
136
|
code: 200
|
251
137
|
message: ''
|
252
138
|
headers:
|
253
139
|
X-Amz-Id-2:
|
254
|
-
-
|
140
|
+
- aJuVURJE59LNw6VCI9dzyr4lcmXDmSqBP53P3qV/ROVTu/L2+DiqePGctgUf8AK2s5djv1h9xHg=
|
255
141
|
X-Amz-Request-Id:
|
256
|
-
-
|
142
|
+
- 43C955CD826AF51D
|
257
143
|
Date:
|
258
|
-
- Sun, 26 Feb 2017
|
144
|
+
- Sun, 26 Feb 2017 23:17:56 GMT
|
259
145
|
Etag:
|
260
|
-
- '"
|
146
|
+
- '"7427ddae1f77df30471b29fba613264e"'
|
261
147
|
Content-Length:
|
262
148
|
- '0'
|
263
149
|
Server:
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: maltese
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Fenner
|
@@ -252,10 +252,8 @@ files:
|
|
252
252
|
- lib/maltese.rb
|
253
253
|
- lib/maltese/cli.rb
|
254
254
|
- lib/maltese/sitemap.rb
|
255
|
-
- lib/maltese/utils.rb
|
256
255
|
- lib/maltese/version.rb
|
257
256
|
- maltese.gemspec
|
258
|
-
- public/sitemap.xml.gz
|
259
257
|
- spec/cli_spec.rb
|
260
258
|
- spec/fixtures/sitemap.json
|
261
259
|
- spec/fixtures/sitemap_nil.json
|
data/lib/maltese/utils.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
module Maltese
|
2
|
-
module Utils
|
3
|
-
# load ENV variables from container environment if json file exists
|
4
|
-
# see https://github.com/phusion/baseimage-docker#envvar_dumps
|
5
|
-
env_json_file = "/etc/container_environment.json"
|
6
|
-
if File.size?(env_json_file).to_i > 2
|
7
|
-
env_vars = JSON.parse(File.read(env_json_file))
|
8
|
-
env_vars.each { |k, v| ENV[k] = v }
|
9
|
-
end
|
10
|
-
|
11
|
-
def queue_jobs(options={})
|
12
|
-
options[:offset] = options[:offset].to_i || 0
|
13
|
-
options[:rows] = options[:rows].presence || job_batch_size
|
14
|
-
|
15
|
-
total = get_total(options)
|
16
|
-
|
17
|
-
if total > 0
|
18
|
-
puts process_data(options.merge(total: total))
|
19
|
-
else
|
20
|
-
puts "No works found for date range #{from_date} - #{until_date}."
|
21
|
-
end
|
22
|
-
|
23
|
-
# return number of works queued
|
24
|
-
total
|
25
|
-
end
|
26
|
-
|
27
|
-
def get_total(options={})
|
28
|
-
query_url = get_query_url(options.merge(rows: 0))
|
29
|
-
result = Maremma.get(query_url, options)
|
30
|
-
result.body.fetch("data", {}).fetch("response", {}).fetch("numFound", 0)
|
31
|
-
end
|
32
|
-
|
33
|
-
def get_query_url(options={})
|
34
|
-
updated = "updated:[#{from_date}T00:00:00Z TO #{until_date}T23:59:59Z]"
|
35
|
-
fq = "#{updated} AND has_metadata:true AND is_active:true"
|
36
|
-
|
37
|
-
params = { q: "*:*",
|
38
|
-
fq: fq,
|
39
|
-
start: options[:offset],
|
40
|
-
rows: options[:rows],
|
41
|
-
fl: "doi,updated",
|
42
|
-
sort: "updated asc",
|
43
|
-
wt: "json" }
|
44
|
-
url + URI.encode_www_form(params)
|
45
|
-
end
|
46
|
-
|
47
|
-
def process_data(options = {})
|
48
|
-
options[:start_time] = Time.now
|
49
|
-
|
50
|
-
# walk through paginated results
|
51
|
-
total_pages = (options[:total].to_f / job_batch_size).ceil
|
52
|
-
|
53
|
-
(0...total_pages).each do |page|
|
54
|
-
options[:offset] = page * job_batch_size
|
55
|
-
data = get_data(options.merge(timeout: timeout))
|
56
|
-
parse_data(data)
|
57
|
-
end
|
58
|
-
|
59
|
-
push_data(options)
|
60
|
-
end
|
61
|
-
|
62
|
-
def get_data(options={})
|
63
|
-
query_url = get_query_url(options)
|
64
|
-
Maremma.get(query_url, options)
|
65
|
-
end
|
66
|
-
|
67
|
-
def parse_data(result, options={})
|
68
|
-
return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
|
69
|
-
|
70
|
-
items = result.body.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
|
71
|
-
Array(items).each do |item|
|
72
|
-
loc = "/works/" + item.fetch("doi")
|
73
|
-
sitemap.add loc, changefreq: "monthly", lastmod: item.fetch("updated")
|
74
|
-
end
|
75
|
-
sitemap.sitemap.link_count
|
76
|
-
end
|
77
|
-
|
78
|
-
def push_data(options={})
|
79
|
-
# sync time with AWS S3 before uploading
|
80
|
-
fog_storage.sync_clock
|
81
|
-
|
82
|
-
sitemap.finalize!
|
83
|
-
time_taken = Time.now - options[:start_time]
|
84
|
-
sitemap.sitemap_index.stats_summary(:time_taken => time_taken)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
data/public/sitemap.xml.gz
DELETED
Binary file
|