maltese 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +19 -0
  3. data/.gitignore +55 -0
  4. data/.rubocop.yml +1156 -0
  5. data/.travis.yml +26 -0
  6. data/CHANGELOG.md +5 -0
  7. data/Dockerfile +16 -0
  8. data/Gemfile +3 -0
  9. data/Gemfile.lock +128 -0
  10. data/LICENSE.md +21 -0
  11. data/README.md +51 -0
  12. data/bin/maltese +5 -0
  13. data/lib/maltese/cli.rb +30 -0
  14. data/lib/maltese/sitemap.rb +140 -0
  15. data/lib/maltese/utils.rb +87 -0
  16. data/lib/maltese/version.rb +3 -0
  17. data/lib/maltese.rb +8 -0
  18. data/maltese.gemspec +37 -0
  19. data/public/sitemap.xml.gz +0 -0
  20. data/spec/cli_spec.rb +43 -0
  21. data/spec/fixtures/sitemap.json +7574 -0
  22. data/spec/fixtures/sitemap_nil.json +11 -0
  23. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +38 -0
  24. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +38 -0
  25. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_no_works.yml +38 -0
  26. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/get_total/with_works.yml +38 -0
  27. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +86 -0
  28. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +86 -0
  29. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_no_works_returned_by_the_Datacite_Solr_API.yml +38 -0
  30. data/spec/fixtures/vcr_cassettes/Maltese_Sitemap/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_Solr_API.yml +270 -0
  31. data/spec/sitemap_spec.rb +111 -0
  32. data/spec/spec_helper.rb +95 -0
  33. metadata +310 -0
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://search.datacite.org/api?fl=doi,updated&fq=updated:%5B2005-04-07T00:00:00Z%20TO%202005-04-08T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=*:*&rows=0&sort=updated%20asc&start=0&wt=json
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - Maremma - https://github.com/datacite/maremma
12
+ Accept:
13
+ - text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5
14
+ response:
15
+ status:
16
+ code: 200
17
+ message: ''
18
+ headers:
19
+ Server:
20
+ - openresty/1.11.2.2
21
+ Date:
22
+ - Sun, 26 Feb 2017 16:36:49 GMT
23
+ Content-Type:
24
+ - application/json;charset=UTF-8
25
+ Connection:
26
+ - keep-alive
27
+ Access-Control-Allow-Origin:
28
+ - "*"
29
+ Access-Control-Allow-Methods:
30
+ - GET, POST, OPTIONS
31
+ body:
32
+ encoding: UTF-8
33
+ string: '{"responseHeader":{"status":0,"QTime":0},"response":{"numFound":0,"start":0,"docs":[]}}
34
+
35
+ '
36
+ http_version:
37
+ recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
38
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://search.datacite.org/api?fl=doi,updated&fq=updated:%5B2015-04-07T00:00:00Z%20TO%202015-04-08T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=*:*&rows=0&sort=updated%20asc&start=0&wt=json
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - Maremma - https://github.com/datacite/maremma
12
+ Accept:
13
+ - text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5
14
+ response:
15
+ status:
16
+ code: 200
17
+ message: ''
18
+ headers:
19
+ Server:
20
+ - openresty/1.11.2.2
21
+ Date:
22
+ - Sun, 26 Feb 2017 16:36:49 GMT
23
+ Content-Type:
24
+ - application/json;charset=UTF-8
25
+ Connection:
26
+ - keep-alive
27
+ Access-Control-Allow-Origin:
28
+ - "*"
29
+ Access-Control-Allow-Methods:
30
+ - GET, POST, OPTIONS
31
+ body:
32
+ encoding: UTF-8
33
+ string: '{"responseHeader":{"status":0,"QTime":0},"response":{"numFound":2521,"start":0,"docs":[]}}
34
+
35
+ '
36
+ http_version:
37
+ recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
38
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,86 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://s3.amazonaws.com/
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - fog-core/1.37.0
12
+ X-Amz-Date:
13
+ - 20170226T163647Z
14
+ X-Amz-Content-Sha256:
15
+ - e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
16
+ Authorization:
17
+ - AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20170226/us-east-1/s3/aws4_request,
18
+ SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=a870dd62c29353f528db2bd649e4c885f853a687daecbfe3f6018a8d5e16304a
19
+ response:
20
+ status:
21
+ code: 200
22
+ message: ''
23
+ headers:
24
+ X-Amz-Id-2:
25
+ - "+M/2XPFQG6EvOmdSScfFqmtVWsk5on5+B/Ht7Y+bLCN7PAOu7OqMNOWpQlKvlMmstRnDSBV7aoM="
26
+ X-Amz-Request-Id:
27
+ - CE22FCA4A6FEA599
28
+ Date:
29
+ - Sun, 26 Feb 2017 16:36:51 GMT
30
+ Content-Type:
31
+ - application/xml
32
+ Server:
33
+ - AmazonS3
34
+ body:
35
+ encoding: UTF-8
36
+ string: |-
37
+ <?xml version="1.0" encoding="UTF-8"?>
38
+ <ListAllMyBucketsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>8170cc616e9d1833db4bade0c7af0010f04464343a1004b67a00fbcb41e361de</ID><DisplayName>infrastructure</DisplayName></Owner><Buckets><Bucket><Name>api.datacite.org</Name><CreationDate>2017-02-09T07:05:20.000Z</CreationDate></Bucket><Bucket><Name>api.test.datacite.org</Name><CreationDate>2017-02-09T07:05:21.000Z</CreationDate></Bucket><Bucket><Name>archive.datacite.org</Name><CreationDate>2016-12-26T07:02:24.000Z</CreationDate></Bucket><Bucket><Name>archive.test.datacite.org</Name><CreationDate>2017-01-13T19:04:30.000Z</CreationDate></Bucket><Bucket><Name>assets.datacite.org</Name><CreationDate>2016-04-29T17:51:32.000Z</CreationDate></Bucket><Bucket><Name>assets.test.datacite.org</Name><CreationDate>2017-01-13T18:47:02.000Z</CreationDate></Bucket><Bucket><Name>blog.datacite.org</Name><CreationDate>2015-09-26T08:59:21.000Z</CreationDate></Bucket><Bucket><Name>blog.test.datacite.org</Name><CreationDate>2017-01-13T18:54:23.000Z</CreationDate></Bucket><Bucket><Name>crosscite.org</Name><CreationDate>2016-01-05T22:11:03.000Z</CreationDate></Bucket><Bucket><Name>data.datacite.org</Name><CreationDate>2016-08-29T17:11:58.000Z</CreationDate></Bucket><Bucket><Name>data.test.datacite.org</Name><CreationDate>2017-01-13T20:17:34.000Z</CreationDate></Bucket><Bucket><Name>datacite.org</Name><CreationDate>2016-01-04T21:53:46.000Z</CreationDate></Bucket><Bucket><Name>lagotto.io</Name><CreationDate>2016-01-10T10:12:40.000Z</CreationDate></Bucket><Bucket><Name>ops.datacite.org</Name><CreationDate>2016-06-03T19:38:04.000Z</CreationDate></Bucket><Bucket><Name>papertrail.datacite.org</Name><CreationDate>2016-01-04T23:25:19.000Z</CreationDate></Bucket><Bucket><Name>pidapalooza.org</Name><CreationDate>2016-06-16T07:28:25.000Z</CreationDate></Bucket><Bucket><Name>schema.datacite.org</Name><CreationDate>2015-09-26T08:01:48.000Z</CreationDate></Bucket><Bucket><Name>schema.test.datacite.org</Name><CreationDate>2017-01-10T11:47:55.000Z</CreationDate></Bucket><Bucket><Name>sitemaps.datacite.org</Name><CreationDate>2017-01-12T21:41:18.000Z</CreationDate></Bucket><Bucket><Name><FOG_DIRECTORY></Name><CreationDate>2017-01-27T07:13:20.000Z</CreationDate></Bucket><Bucket><Name>stats.datacite.org</Name><CreationDate>2016-08-29T10:21:19.000Z</CreationDate></Bucket><Bucket><Name>stats.test.datacite.org</Name><CreationDate>2017-01-13T20:17:34.000Z</CreationDate></Bucket><Bucket><Name>test.datacite.org</Name><CreationDate>2017-01-08T21:50:53.000Z</CreationDate></Bucket><Bucket><Name>www.crosscite.org</Name><CreationDate>2017-01-09T10:18:11.000Z</CreationDate></Bucket><Bucket><Name>www.datacite.org</Name><CreationDate>2015-09-26T08:01:48.000Z</CreationDate></Bucket><Bucket><Name>www.test.datacite.org</Name><CreationDate>2017-01-13T19:50:11.000Z</CreationDate></Bucket></Buckets></ListAllMyBucketsResult>
39
+ http_version:
40
+ recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
41
+ - request:
42
+ method: put
43
+ uri: https://s3-eu-west-1.amazonaws.com/sitemaps.datacite.org/sitemaps/sitemap.xml.gz
44
+ body:
45
+ encoding: US-ASCII
46
+ string: ''
47
+ headers:
48
+ User-Agent:
49
+ - fog-core/1.37.0
50
+ Content-Type:
51
+ - application/gzip
52
+ X-Amz-Acl:
53
+ - public-read
54
+ X-Amz-Date:
55
+ - 20170226T163651Z
56
+ X-Amz-Content-Sha256:
57
+ - STREAMING-AWS4-HMAC-SHA256-PAYLOAD
58
+ X-Amz-Decoded-Content-Length:
59
+ - '329'
60
+ Authorization:
61
+ - AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20170226/eu-west-1/s3/aws4_request,
62
+ SignedHeaders=content-type;host;x-amz-acl;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length,
63
+ Signature=b63de16f8fac0b0003e6ae933232ad53711e436c6199b1c4f65d564ee1ea0247
64
+ response:
65
+ status:
66
+ code: 200
67
+ message: ''
68
+ headers:
69
+ X-Amz-Id-2:
70
+ - CsT9jP4uG+6GjUHkSnIGDeIm/gn2PW/DZZXgQeutpThEnCAfCtZ83Ex/UYkToD31191RPcorEsQ=
71
+ X-Amz-Request-Id:
72
+ - 6FB657528EF232AB
73
+ Date:
74
+ - Sun, 26 Feb 2017 16:36:51 GMT
75
+ Etag:
76
+ - '"c0671f5811543e1366ac05b118673cba"'
77
+ Content-Length:
78
+ - '0'
79
+ Server:
80
+ - AmazonS3
81
+ body:
82
+ encoding: UTF-8
83
+ string: ''
84
+ http_version:
85
+ recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
86
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,86 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://s3.amazonaws.com/
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - fog-core/1.37.0
12
+ X-Amz-Date:
13
+ - 20170226T163651Z
14
+ X-Amz-Content-Sha256:
15
+ - e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
16
+ Authorization:
17
+ - AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20170226/us-east-1/s3/aws4_request,
18
+ SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=6c2ef0efb87b485fb3b170845ed3be9683956c0c852508154c872a7fd24267e7
19
+ response:
20
+ status:
21
+ code: 200
22
+ message: ''
23
+ headers:
24
+ X-Amz-Id-2:
25
+ - scHGYPqdsMF0Ub8FXjqgJrFFybFuBIRH99xKtKffAOnoDLJHcBYOtn1vSFjJU59NydvrVIoYR30=
26
+ X-Amz-Request-Id:
27
+ - B06AE09620BE920D
28
+ Date:
29
+ - Sun, 26 Feb 2017 16:36:52 GMT
30
+ Content-Type:
31
+ - application/xml
32
+ Server:
33
+ - AmazonS3
34
+ body:
35
+ encoding: UTF-8
36
+ string: |-
37
+ <?xml version="1.0" encoding="UTF-8"?>
38
+ <ListAllMyBucketsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>8170cc616e9d1833db4bade0c7af0010f04464343a1004b67a00fbcb41e361de</ID><DisplayName>infrastructure</DisplayName></Owner><Buckets><Bucket><Name>api.datacite.org</Name><CreationDate>2017-02-09T07:05:20.000Z</CreationDate></Bucket><Bucket><Name>api.test.datacite.org</Name><CreationDate>2017-02-09T07:05:21.000Z</CreationDate></Bucket><Bucket><Name>archive.datacite.org</Name><CreationDate>2016-12-26T07:02:24.000Z</CreationDate></Bucket><Bucket><Name>archive.test.datacite.org</Name><CreationDate>2017-01-13T19:04:30.000Z</CreationDate></Bucket><Bucket><Name>assets.datacite.org</Name><CreationDate>2016-04-29T17:51:32.000Z</CreationDate></Bucket><Bucket><Name>assets.test.datacite.org</Name><CreationDate>2017-01-13T18:47:02.000Z</CreationDate></Bucket><Bucket><Name>blog.datacite.org</Name><CreationDate>2015-09-26T08:59:21.000Z</CreationDate></Bucket><Bucket><Name>blog.test.datacite.org</Name><CreationDate>2017-01-13T18:54:23.000Z</CreationDate></Bucket><Bucket><Name>crosscite.org</Name><CreationDate>2016-01-05T22:11:03.000Z</CreationDate></Bucket><Bucket><Name>data.datacite.org</Name><CreationDate>2016-08-29T17:11:58.000Z</CreationDate></Bucket><Bucket><Name>data.test.datacite.org</Name><CreationDate>2017-01-13T20:17:34.000Z</CreationDate></Bucket><Bucket><Name>datacite.org</Name><CreationDate>2016-01-04T21:53:46.000Z</CreationDate></Bucket><Bucket><Name>lagotto.io</Name><CreationDate>2016-01-10T10:12:40.000Z</CreationDate></Bucket><Bucket><Name>ops.datacite.org</Name><CreationDate>2016-06-03T19:38:04.000Z</CreationDate></Bucket><Bucket><Name>papertrail.datacite.org</Name><CreationDate>2016-01-04T23:25:19.000Z</CreationDate></Bucket><Bucket><Name>pidapalooza.org</Name><CreationDate>2016-06-16T07:28:25.000Z</CreationDate></Bucket><Bucket><Name>schema.datacite.org</Name><CreationDate>2015-09-26T08:01:48.000Z</CreationDate></Bucket><Bucket><Name>schema.test.datacite.org</Name><CreationDate>2017-01-10T11:47:55.000Z</CreationDate></Bucket><Bucket><Name>sitemaps.datacite.org</Name><CreationDate>2017-01-12T21:41:18.000Z</CreationDate></Bucket><Bucket><Name><FOG_DIRECTORY></Name><CreationDate>2017-01-27T07:13:20.000Z</CreationDate></Bucket><Bucket><Name>stats.datacite.org</Name><CreationDate>2016-08-29T10:21:19.000Z</CreationDate></Bucket><Bucket><Name>stats.test.datacite.org</Name><CreationDate>2017-01-13T20:17:34.000Z</CreationDate></Bucket><Bucket><Name>test.datacite.org</Name><CreationDate>2017-01-08T21:50:53.000Z</CreationDate></Bucket><Bucket><Name>www.crosscite.org</Name><CreationDate>2017-01-09T10:18:11.000Z</CreationDate></Bucket><Bucket><Name>www.datacite.org</Name><CreationDate>2015-09-26T08:01:48.000Z</CreationDate></Bucket><Bucket><Name>www.test.datacite.org</Name><CreationDate>2017-01-13T19:50:11.000Z</CreationDate></Bucket></Buckets></ListAllMyBucketsResult>
39
+ http_version:
40
+ recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
41
+ - request:
42
+ method: put
43
+ uri: https://s3-eu-west-1.amazonaws.com/sitemaps.datacite.org/sitemaps/sitemap.xml.gz
44
+ body:
45
+ encoding: US-ASCII
46
+ string: ''
47
+ headers:
48
+ User-Agent:
49
+ - fog-core/1.37.0
50
+ Content-Type:
51
+ - application/gzip
52
+ X-Amz-Acl:
53
+ - public-read
54
+ X-Amz-Date:
55
+ - 20170226T163652Z
56
+ X-Amz-Content-Sha256:
57
+ - STREAMING-AWS4-HMAC-SHA256-PAYLOAD
58
+ X-Amz-Decoded-Content-Length:
59
+ - '29028'
60
+ Authorization:
61
+ - AWS4-HMAC-SHA256 Credential=<AWS_ACCESS_KEY_ID>/20170226/eu-west-1/s3/aws4_request,
62
+ SignedHeaders=content-type;host;x-amz-acl;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length,
63
+ Signature=ddd4a2ae86829a3f25ce393cd4a89f0ccd89a78dbd84cb12bb628697f7dc36d7
64
+ response:
65
+ status:
66
+ code: 200
67
+ message: ''
68
+ headers:
69
+ X-Amz-Id-2:
70
+ - 0OLFDCRTyz69EqCcoTT53/Z/Wd97mLcelgrTLy6V6j062MluMtQRTdDt49KB7zg8m+0HonJcxzA=
71
+ X-Amz-Request-Id:
72
+ - 064AEFC828E17346
73
+ Date:
74
+ - Sun, 26 Feb 2017 16:36:52 GMT
75
+ Etag:
76
+ - '"0a80f98d181c22cf9b6de16c57e741c5"'
77
+ Content-Length:
78
+ - '0'
79
+ Server:
80
+ - AmazonS3
81
+ body:
82
+ encoding: UTF-8
83
+ string: ''
84
+ http_version:
85
+ recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
86
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://search.datacite.org/api?fl=doi,updated&fq=updated:%5B2005-04-07T00:00:00Z%20TO%202005-04-08T23:59:59Z%5D%20AND%20has_metadata:true%20AND%20is_active:true&q=*:*&rows=0&sort=updated%20asc&start=0&wt=json
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - Maremma - https://github.com/datacite/maremma
12
+ Accept:
13
+ - text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5
14
+ response:
15
+ status:
16
+ code: 200
17
+ message: ''
18
+ headers:
19
+ Server:
20
+ - openresty/1.11.2.2
21
+ Date:
22
+ - Sun, 26 Feb 2017 16:36:48 GMT
23
+ Content-Type:
24
+ - application/json;charset=UTF-8
25
+ Connection:
26
+ - keep-alive
27
+ Access-Control-Allow-Origin:
28
+ - "*"
29
+ Access-Control-Allow-Methods:
30
+ - GET, POST, OPTIONS
31
+ body:
32
+ encoding: UTF-8
33
+ string: '{"responseHeader":{"status":0,"QTime":1},"response":{"numFound":0,"start":0,"docs":[]}}
34
+
35
+ '
36
+ http_version:
37
+ recorded_at: Tue, 07 Apr 2015 22:00:00 GMT
38
+ recorded_with: VCR 3.0.3