bolognese 0.7.2 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +4 -1
  3. data/README.md +25 -16
  4. data/bolognese.gemspec +2 -1
  5. data/codemeta.json +39 -0
  6. data/lib/bolognese.rb +4 -0
  7. data/lib/bolognese/array.rb +11 -0
  8. data/lib/bolognese/author_utils.rb +35 -21
  9. data/lib/bolognese/bibtex.rb +4 -4
  10. data/lib/bolognese/codemeta.rb +8 -13
  11. data/lib/bolognese/crossref.rb +22 -20
  12. data/lib/bolognese/datacite.rb +61 -61
  13. data/lib/bolognese/datacite_json.rb +208 -0
  14. data/lib/bolognese/datacite_utils.rb +17 -48
  15. data/lib/bolognese/metadata.rb +83 -22
  16. data/lib/bolognese/schema_org.rb +42 -16
  17. data/lib/bolognese/utils.rb +79 -13
  18. data/lib/bolognese/version.rb +1 -1
  19. data/lib/bolognese/whitelist_scrubber.rb +45 -0
  20. data/spec/array_spec.rb +20 -0
  21. data/spec/author_utils_spec.rb +93 -9
  22. data/spec/bibtex_spec.rb +4 -4
  23. data/spec/cli_spec.rb +5 -0
  24. data/spec/codemeta_spec.rb +41 -31
  25. data/spec/crossref_spec.rb +47 -72
  26. data/spec/datacite_json_spec.rb +65 -0
  27. data/spec/datacite_spec.rb +67 -83
  28. data/spec/datacite_utils_spec.rb +9 -14
  29. data/spec/fixtures/datacite.json +49 -0
  30. data/spec/fixtures/datacite_software.json +18 -0
  31. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_datacite_json.yml +214 -0
  32. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/author_from_schema_org/with_id.yml +930 -0
  33. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/author_to_schema_org/with_id.yml +930 -0
  34. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/authors_as_string/author.yml +137 -860
  35. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/authors_as_string/no_author.yml +137 -860
  36. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/authors_as_string/single_author.yml +137 -860
  37. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/authors_as_string/with_organization.yml +137 -860
  38. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/from_schema_org/with_id.yml +930 -0
  39. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_name_identifier/has_ORCID.yml +155 -0
  40. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_name_identifier/has_no_ORCID.yml +134 -0
  41. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/has_familyName.yml +155 -0
  42. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/has_name_in_display-order.yml +186 -0
  43. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/has_name_in_display-order_with_ORCID.yml +177 -0
  44. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/has_name_in_sort-order.yml +173 -0
  45. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/is_organization.yml +207 -0
  46. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_comma.yml +207 -0
  47. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_family_name.yml +207 -0
  48. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_id.yml +207 -0
  49. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_no_info.yml +207 -0
  50. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_type_organization.yml +207 -0
  51. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_type_person.yml +207 -0
  52. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/sanitize/should_only_keep_specific_tags.yml +930 -0
  53. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/sanitize/should_remove_a_tags.yml +930 -0
  54. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/to_schema_org/with_id.yml +930 -0
  55. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_related_identifiers/related_identifier.yml +173 -0
  56. data/spec/fixtures/vcr_cassettes/Bolognese_DataciteJson/get_metadata_as_bibtex/BlogPosting.yml +155 -0
  57. data/spec/schema_org_spec.rb +17 -14
  58. data/spec/utils_spec.rb +32 -2
  59. metadata +54 -4
@@ -0,0 +1,173 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://doi.org/10.5061/dryad.8515
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - Maremma - https://github.com/datacite/maremma
12
+ Accept:
13
+ - application/vnd.datacite.datacite+xml
14
+ response:
15
+ status:
16
+ code: 303
17
+ message: ''
18
+ headers:
19
+ Server:
20
+ - Apache-Coyote/1.1
21
+ Vary:
22
+ - Accept
23
+ Location:
24
+ - http://data.datacite.org/10.5061%2Fdryad.8515
25
+ Expires:
26
+ - Tue, 28 Feb 2017 19:33:44 GMT
27
+ Content-Type:
28
+ - text/html;charset=utf-8
29
+ Content-Length:
30
+ - '175'
31
+ Date:
32
+ - Tue, 28 Feb 2017 18:40:50 GMT
33
+ body:
34
+ encoding: UTF-8
35
+ string: |-
36
+ <html><head><title>Handle Redirect</title></head>
37
+ <body><a href="http://data.datacite.org/10.5061%2Fdryad.8515">http://data.datacite.org/10.5061%2Fdryad.8515</a></body></html>
38
+ http_version:
39
+ recorded_at: Tue, 28 Feb 2017 18:40:50 GMT
40
+ - request:
41
+ method: get
42
+ uri: http://data.datacite.org/10.5061%2Fdryad.8515
43
+ body:
44
+ encoding: US-ASCII
45
+ string: ''
46
+ headers:
47
+ User-Agent:
48
+ - Maremma - https://github.com/datacite/maremma
49
+ Accept:
50
+ - application/vnd.datacite.datacite+xml
51
+ response:
52
+ status:
53
+ code: 301
54
+ message: ''
55
+ headers:
56
+ Server:
57
+ - openresty/1.11.2.2
58
+ Date:
59
+ - Tue, 28 Feb 2017 18:40:51 GMT
60
+ Content-Type:
61
+ - text/html
62
+ Content-Length:
63
+ - '191'
64
+ Connection:
65
+ - keep-alive
66
+ Location:
67
+ - https://data.datacite.org/10.5061%2Fdryad.8515
68
+ body:
69
+ encoding: UTF-8
70
+ string: "<html>\r\n<head><title>301 Moved Permanently</title></head>\r\n<body
71
+ bgcolor=\"white\">\r\n<center><h1>301 Moved Permanently</h1></center>\r\n<hr><center>openresty/1.11.2.2</center>\r\n</body>\r\n</html>\r\n"
72
+ http_version:
73
+ recorded_at: Tue, 28 Feb 2017 18:40:51 GMT
74
+ - request:
75
+ method: get
76
+ uri: https://data.datacite.org/10.5061%2Fdryad.8515
77
+ body:
78
+ encoding: US-ASCII
79
+ string: ''
80
+ headers:
81
+ User-Agent:
82
+ - Maremma - https://github.com/datacite/maremma
83
+ Accept:
84
+ - application/vnd.datacite.datacite+xml
85
+ response:
86
+ status:
87
+ code: 200
88
+ message: ''
89
+ headers:
90
+ Server:
91
+ - openresty/1.11.2.2
92
+ Date:
93
+ - Tue, 28 Feb 2017 18:40:51 GMT
94
+ Content-Type:
95
+ - application/vnd.datacite.datacite+xml
96
+ Content-Length:
97
+ - '2601'
98
+ Connection:
99
+ - keep-alive
100
+ Cache-Control:
101
+ - no-transform, max-age=3600
102
+ Last-Modified:
103
+ - Sat, 04 Feb 2017 17:54:37 GMT
104
+ Vary:
105
+ - Accept
106
+ Access-Control-Allow-Origin:
107
+ - "*"
108
+ Access-Control-Allow-Methods:
109
+ - GET, POST, OPTIONS
110
+ body:
111
+ encoding: ASCII-8BIT
112
+ string: !binary |-
113
+ PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHJlc291
114
+ cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwt
115
+ MyIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVt
116
+ YS1pbnN0YW5jZSIgeG1sbnM6bWV0cz0iaHR0cDovL3d3dy5sb2MuZ292L01F
117
+ VFMvIiB4bWxuczpkc3BhY2U9Imh0dHA6Ly93d3cuZHNwYWNlLm9yZy94bWxu
118
+ cy9kc3BhY2UvZGltIiB4bWxuczpkaW09Imh0dHA6Ly93d3cuZHNwYWNlLm9y
119
+ Zy94bWxucy9kc3BhY2UvZGltIiB4bWxuczpkcnlhZD0iaHR0cDovL3B1cmwu
120
+ b3JnL2RyeWFkL3Rlcm1zLyIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8v
121
+ ZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtMyBodHRwOi8vc2NoZW1hLmRh
122
+ dGFjaXRlLm9yZy9tZXRhL2tlcm5lbC0zL21ldGFkYXRhLnhzZCI+ICA8aWRl
123
+ bnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC41MDYxL0RSWUFELjg1
124
+ MTU8L2lkZW50aWZpZXI+ICA8dmVyc2lvbj4xPC92ZXJzaW9uPiAgPGNyZWF0
125
+ b3JzPiAgICA8Y3JlYXRvcj4gICAgICA8Y3JlYXRvck5hbWU+T2xsb21vLCBC
126
+ ZW5qYW1pbjwvY3JlYXRvck5hbWU+ICAgIDwvY3JlYXRvcj4gICAgPGNyZWF0
127
+ b3I+ICAgICAgPGNyZWF0b3JOYW1lPkR1cmFuZCwgUGF0cmljazwvY3JlYXRv
128
+ ck5hbWU+ICAgIDwvY3JlYXRvcj4gICAgPGNyZWF0b3I+ICAgICAgPGNyZWF0
129
+ b3JOYW1lPlBydWdub2xsZSwgRnJhbmNrPC9jcmVhdG9yTmFtZT4gICAgPC9j
130
+ cmVhdG9yPiAgICA8Y3JlYXRvcj4gICAgICA8Y3JlYXRvck5hbWU+RG91emVy
131
+ eSwgRW1tYW51ZWwgSi4gUC48L2NyZWF0b3JOYW1lPiAgICA8L2NyZWF0b3I+
132
+ ICAgIDxjcmVhdG9yPiAgICAgIDxjcmVhdG9yTmFtZT5Bcm5hdGhhdSwgQ8Op
133
+ bGluZTwvY3JlYXRvck5hbWU+ICAgIDwvY3JlYXRvcj4gICAgPGNyZWF0b3I+
134
+ ICAgICAgPGNyZWF0b3JOYW1lPk5rb2doZSwgRGlldWRvbm7DqTwvY3JlYXRv
135
+ ck5hbWU+ICAgIDwvY3JlYXRvcj4gICAgPGNyZWF0b3I+ICAgICAgPGNyZWF0
136
+ b3JOYW1lPkxlcm95LCBFcmljPC9jcmVhdG9yTmFtZT4gICAgPC9jcmVhdG9y
137
+ PiAgICA8Y3JlYXRvcj4gICAgICA8Y3JlYXRvck5hbWU+UmVuYXVkLCBGcmFu
138
+ w6dvaXM8L2NyZWF0b3JOYW1lPiAgICA8L2NyZWF0b3I+ICA8L2NyZWF0b3Jz
139
+ PiAgPHRpdGxlcz4gICAgPHRpdGxlPkRhdGEgZnJvbTogQSBuZXcgbWFsYXJp
140
+ YSBhZ2VudCBpbiBBZnJpY2FuIGhvbWluaWRzLjwvdGl0bGU+ICA8L3RpdGxl
141
+ cz4gIDxwdWJsaXNoZXI+RHJ5YWQgRGlnaXRhbCBSZXBvc2l0b3J5PC9wdWJs
142
+ aXNoZXI+ICA8cHVibGljYXRpb25ZZWFyPjIwMTE8L3B1YmxpY2F0aW9uWWVh
143
+ cj4gIDxzdWJqZWN0cz4gICAgPHN1YmplY3Q+UGh5bG9nZW55PC9zdWJqZWN0
144
+ PiAgICA8c3ViamVjdD5NYWxhcmlhPC9zdWJqZWN0PiAgICA8c3ViamVjdD5Q
145
+ YXJhc2l0ZXM8L3N1YmplY3Q+ICAgIDxzdWJqZWN0PlRheG9ub215PC9zdWJq
146
+ ZWN0PiAgICA8c3ViamVjdD5NaXRvY2hvbmRyaWFsIGdlbm9tZTwvc3ViamVj
147
+ dD4gICAgPHN1YmplY3Q+QWZyaWNhPC9zdWJqZWN0PiAgICA8c3ViamVjdD5Q
148
+ bGFzbW9kaXVtPC9zdWJqZWN0PiAgPC9zdWJqZWN0cz4gIDxyZXNvdXJjZVR5
149
+ cGUgcmVzb3VyY2VUeXBlR2VuZXJhbD0iRGF0YXNldCI+RGF0YVBhY2thZ2U8
150
+ L3Jlc291cmNlVHlwZT4gIDxhbHRlcm5hdGVJZGVudGlmaWVycz4gICAgPGFs
151
+ dGVybmF0ZUlkZW50aWZpZXIgYWx0ZXJuYXRlSWRlbnRpZmllclR5cGU9ImNp
152
+ dGF0aW9uIj5PbGxvbW8gQiwgRHVyYW5kIFAsIFBydWdub2xsZSBGLCBEb3V6
153
+ ZXJ5IEVKUCwgQXJuYXRoYXUgQywgTmtvZ2hlIEQsIExlcm95IEUsIFJlbmF1
154
+ ZCBGICgyMDA5KSBBIG5ldyBtYWxhcmlhIGFnZW50IGluIEFmcmljYW4gaG9t
155
+ aW5pZHMuIFBMb1MgUGF0aG9nZW5zIDUoNSk6IGUxMDAwNDQ2LjwvYWx0ZXJu
156
+ YXRlSWRlbnRpZmllcj4gIDwvYWx0ZXJuYXRlSWRlbnRpZmllcnM+ICA8cmVs
157
+ YXRlZElkZW50aWZpZXJzPiAgICA8cmVsYXRlZElkZW50aWZpZXIgcmVsYXRl
158
+ ZElkZW50aWZpZXJUeXBlPSJET0kiIHJlbGF0aW9uVHlwZT0iSGFzUGFydCI+
159
+ MTAuNTA2MS9EUllBRC44NTE1LzE8L3JlbGF0ZWRJZGVudGlmaWVyPiAgICA8
160
+ cmVsYXRlZElkZW50aWZpZXIgcmVsYXRlZElkZW50aWZpZXJUeXBlPSJET0ki
161
+ IHJlbGF0aW9uVHlwZT0iSGFzUGFydCI+MTAuNTA2MS9EUllBRC44NTE1LzI8
162
+ L3JlbGF0ZWRJZGVudGlmaWVyPiAgICA8cmVsYXRlZElkZW50aWZpZXIgcmVs
163
+ YXRlZElkZW50aWZpZXJUeXBlPSJET0kiIHJlbGF0aW9uVHlwZT0iSXNSZWZl
164
+ cmVuY2VkQnkiPjEwLjEzNzEvSk9VUk5BTC5QUEFULjEwMDA0NDY8L3JlbGF0
165
+ ZWRJZGVudGlmaWVyPiAgICA8cmVsYXRlZElkZW50aWZpZXIgcmVsYXRlZElk
166
+ ZW50aWZpZXJUeXBlPSJQTUlEIiByZWxhdGlvblR5cGU9IklzUmVmZXJlbmNl
167
+ ZEJ5Ij4xOTQ3ODg3NzwvcmVsYXRlZElkZW50aWZpZXI+ICA8L3JlbGF0ZWRJ
168
+ ZGVudGlmaWVycz4gIDxyaWdodHNMaXN0PiAgICA8cmlnaHRzIHJpZ2h0c1VS
169
+ ST0iaHR0cDovL2NyZWF0aXZlY29tbW9ucy5vcmcvcHVibGljZG9tYWluL3pl
170
+ cm8vMS4wLyIvPiAgPC9yaWdodHNMaXN0PjwvcmVzb3VyY2U+
171
+ http_version:
172
+ recorded_at: Tue, 28 Feb 2017 18:40:51 GMT
173
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,155 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://doi.org/10.5438/4k3m-nyvg
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - Maremma - https://github.com/datacite/maremma
12
+ Accept:
13
+ - application/vnd.datacite.datacite+xml
14
+ response:
15
+ status:
16
+ code: 303
17
+ message: ''
18
+ headers:
19
+ Server:
20
+ - Apache-Coyote/1.1
21
+ Vary:
22
+ - Accept
23
+ Location:
24
+ - http://data.datacite.org/10.5438%2F4k3m-nyvg
25
+ Expires:
26
+ - Wed, 01 Mar 2017 09:51:10 GMT
27
+ Content-Type:
28
+ - text/html;charset=utf-8
29
+ Content-Length:
30
+ - '173'
31
+ Date:
32
+ - Wed, 01 Mar 2017 08:56:19 GMT
33
+ body:
34
+ encoding: UTF-8
35
+ string: |-
36
+ <html><head><title>Handle Redirect</title></head>
37
+ <body><a href="http://data.datacite.org/10.5438%2F4k3m-nyvg">http://data.datacite.org/10.5438%2F4k3m-nyvg</a></body></html>
38
+ http_version:
39
+ recorded_at: Wed, 01 Mar 2017 08:56:19 GMT
40
+ - request:
41
+ method: get
42
+ uri: http://data.datacite.org/10.5438%2F4k3m-nyvg
43
+ body:
44
+ encoding: US-ASCII
45
+ string: ''
46
+ headers:
47
+ User-Agent:
48
+ - Maremma - https://github.com/datacite/maremma
49
+ Accept:
50
+ - application/vnd.datacite.datacite+xml
51
+ response:
52
+ status:
53
+ code: 301
54
+ message: ''
55
+ headers:
56
+ Server:
57
+ - openresty/1.11.2.2
58
+ Date:
59
+ - Wed, 01 Mar 2017 08:56:19 GMT
60
+ Content-Type:
61
+ - text/html
62
+ Content-Length:
63
+ - '191'
64
+ Connection:
65
+ - keep-alive
66
+ Location:
67
+ - https://data.datacite.org/10.5438%2F4k3m-nyvg
68
+ body:
69
+ encoding: UTF-8
70
+ string: "<html>\r\n<head><title>301 Moved Permanently</title></head>\r\n<body
71
+ bgcolor=\"white\">\r\n<center><h1>301 Moved Permanently</h1></center>\r\n<hr><center>openresty/1.11.2.2</center>\r\n</body>\r\n</html>\r\n"
72
+ http_version:
73
+ recorded_at: Wed, 01 Mar 2017 08:56:19 GMT
74
+ - request:
75
+ method: get
76
+ uri: https://data.datacite.org/10.5438%2F4k3m-nyvg
77
+ body:
78
+ encoding: US-ASCII
79
+ string: ''
80
+ headers:
81
+ User-Agent:
82
+ - Maremma - https://github.com/datacite/maremma
83
+ Accept:
84
+ - application/vnd.datacite.datacite+xml
85
+ response:
86
+ status:
87
+ code: 200
88
+ message: ''
89
+ headers:
90
+ Server:
91
+ - openresty/1.11.2.2
92
+ Date:
93
+ - Wed, 01 Mar 2017 08:56:20 GMT
94
+ Content-Type:
95
+ - application/vnd.datacite.datacite+xml
96
+ Content-Length:
97
+ - '2045'
98
+ Connection:
99
+ - keep-alive
100
+ Cache-Control:
101
+ - no-transform, max-age=3600
102
+ Last-Modified:
103
+ - Mon, 09 Jan 2017 13:53:11 GMT
104
+ Vary:
105
+ - Accept
106
+ Access-Control-Allow-Origin:
107
+ - "*"
108
+ Access-Control-Allow-Methods:
109
+ - GET, POST, OPTIONS
110
+ body:
111
+ encoding: UTF-8
112
+ string: |
113
+ <?xml version="1.0" encoding="UTF-8"?>
114
+ <resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
115
+ <identifier identifierType="DOI">10.5438/4K3M-NYVG</identifier>
116
+ <creators>
117
+ <creator>
118
+ <creatorName>Fenner, Martin</creatorName>
119
+ <givenName>Martin</givenName>
120
+ <familyName>Fenner</familyName>
121
+ <nameIdentifier schemeURI="http://orcid.org/" nameIdentifierScheme="ORCID">0000-0003-1419-2405</nameIdentifier>
122
+ </creator>
123
+ </creators>
124
+ <titles>
125
+ <title>Eating your own Dog Food</title>
126
+ </titles>
127
+ <publisher>DataCite</publisher>
128
+ <publicationYear>2016</publicationYear>
129
+ <resourceType resourceTypeGeneral="Text">BlogPosting</resourceType>
130
+ <alternateIdentifiers>
131
+ <alternateIdentifier alternateIdentifierType="Local accession number">MS-49-3632-5083</alternateIdentifier>
132
+ </alternateIdentifiers>
133
+ <subjects>
134
+ <subject>datacite</subject>
135
+ <subject>doi</subject>
136
+ <subject>metadata</subject>
137
+ </subjects>
138
+ <dates>
139
+ <date dateType="Created">2016-12-20</date>
140
+ <date dateType="Issued">2016-12-20</date>
141
+ <date dateType="Updated">2016-12-20</date>
142
+ </dates>
143
+ <relatedIdentifiers>
144
+ <relatedIdentifier relatedIdentifierType="DOI" relationType="References">10.5438/0012</relatedIdentifier>
145
+ <relatedIdentifier relatedIdentifierType="DOI" relationType="References">10.5438/55E5-T5C0</relatedIdentifier>
146
+ <relatedIdentifier relatedIdentifierType="DOI" relationType="IsPartOf">10.5438/0000-00SS</relatedIdentifier>
147
+ </relatedIdentifiers>
148
+ <version>1.0</version>
149
+ <descriptions>
150
+ <description descriptionType="Abstract">Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...</description>
151
+ </descriptions>
152
+ </resource>
153
+ http_version:
154
+ recorded_at: Wed, 01 Mar 2017 08:56:20 GMT
155
+ recorded_with: VCR 3.0.3
@@ -11,17 +11,16 @@ describe Bolognese::SchemaOrg, vcr: true do
11
11
  expect(subject.id).to eq("https://doi.org/10.5438/4k3m-nyvg")
12
12
  expect(subject.url).to eq("https://blog.datacite.org/eating-your-own-dog-food")
13
13
  expect(subject.type).to eq("BlogPosting")
14
- expect(subject.author).to eq("@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner")
15
- expect(subject.name).to eq("Eating your own Dog Food")
14
+ expect(subject.author).to eq("type"=>"Person", "id"=>"http://orcid.org/0000-0003-1419-2405", "name"=>"Martin Fenner", "givenName"=>"Martin", "familyName"=>"Fenner")
15
+ expect(subject.title).to eq("Eating your own Dog Food")
16
16
  expect(subject.alternate_name).to eq("MS-49-3632-5083")
17
- expect(subject.description).to start_with("Eating your own dog food")
17
+ expect(subject.description["text"]).to start_with("Eating your own dog food")
18
18
  expect(subject.keywords).to eq("datacite, doi, metadata, featured")
19
19
  expect(subject.date_published).to eq("2016-12-20")
20
20
  expect(subject.date_modified).to eq("2016-12-20")
21
- expect(subject.is_part_of).to eq("@type"=>"Blog", "@id"=>"https://doi.org/10.5438/0000-00ss", "name"=>"DataCite Blog")
22
- expect(subject.citation).to eq([{"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/0012"},
23
- {"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/55e5-t5c0"}])
24
- expect(subject.publisher).to eq("@type"=>"Organization", "name"=>"DataCite")
21
+ expect(subject.is_part_of).to eq("id"=>"https://doi.org/10.5438/0000-00ss", "type"=>"Blog", "name"=>"DataCite Blog", "relationType"=>"IsPartOf")
22
+ expect(subject.references).to eq([{"id"=>"https://doi.org/10.5438/0012", "type"=>"CreativeWork", "relationType"=>"References"}, {"id"=>"https://doi.org/10.5438/55e5-t5c0", "type"=>"CreativeWork", "relationType"=>"References"}])
23
+ expect(subject.publisher).to eq("DataCite")
25
24
  end
26
25
 
27
26
  it "BlogPosting schema.org JSON" do
@@ -45,17 +44,21 @@ describe Bolognese::SchemaOrg, vcr: true do
45
44
  expect(subject.id).to eq("https://doi.org/10.5438/4k3m-nyvg")
46
45
  expect(subject.url).to eq("https://blog.datacite.org/eating-your-own-dog-food")
47
46
  expect(subject.type).to eq("BlogPosting")
48
- expect(subject.author).to eq("@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner")
49
- expect(subject.name).to eq("Eating your own Dog Food")
47
+ expect(subject.author).to eq("type"=>"Person", "id"=>"http://orcid.org/0000-0003-1419-2405", "name"=>"Martin Fenner", "givenName"=>"Martin", "familyName"=>"Fenner")
48
+ expect(subject.title).to eq("Eating your own Dog Food")
50
49
  expect(subject.alternate_name).to eq("MS-49-3632-5083")
51
- expect(subject.description).to start_with("Eating your own dog food")
50
+ expect(subject.description["text"]).to start_with("Eating your own dog food")
52
51
  expect(subject.keywords).to eq("datacite, doi, metadata, featured")
53
52
  expect(subject.date_published).to eq("2016-12-20")
54
53
  expect(subject.date_modified).to eq("2016-12-20")
55
- expect(subject.is_part_of).to eq("@type"=>"Blog", "@id"=>"https://doi.org/10.5438/0000-00ss", "name"=>"DataCite Blog")
56
- expect(subject.citation).to eq([{"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/0012"},
57
- {"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/55e5-t5c0"}])
58
- expect(subject.publisher).to eq("@type"=>"Organization", "name"=>"DataCite")
54
+ expect(subject.is_part_of).to eq("id"=>"https://doi.org/10.5438/0000-00ss", "type"=>"Blog", "name"=>"DataCite Blog", "relationType"=>"IsPartOf")
55
+ expect(subject.references).to eq([{"id"=>"https://doi.org/10.5438/0012",
56
+ "type"=>"CreativeWork",
57
+ "relationType"=>"References"},
58
+ {"id"=>"https://doi.org/10.5438/55e5-t5c0",
59
+ "type"=>"CreativeWork",
60
+ "relationType"=>"References"}])
61
+ expect(subject.publisher).to eq("DataCite")
59
62
  end
60
63
  end
61
64
 
data/spec/utils_spec.rb CHANGED
@@ -69,13 +69,43 @@ describe Bolognese::Crossref, vcr: true do
69
69
  it "doi" do
70
70
  ids = [{"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/0012"}, {"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/55E5-T5C0"}]
71
71
  response = subject.normalize_ids(ids)
72
- expect(response).to eq([{"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/0012"}, {"@type"=>"CreativeWork", "@id"=>"https://doi.org/10.5438/55e5-t5c0"}])
72
+ expect(response).to eq([{"id"=>"https://doi.org/10.5438/0012", "type"=>"CreativeWork", "relationType"=>"References"}, {"id"=>"https://doi.org/10.5438/55e5-t5c0", "type"=>"CreativeWork", "relationType"=>"References"}])
73
73
  end
74
74
 
75
75
  it "url" do
76
76
  ids = [{"@type"=>"CreativeWork", "@id"=>"https://blog.datacite.org/eating-your-own-dog-food/"}]
77
77
  response = subject.normalize_ids(ids)
78
- expect(response).to eq("@type"=>"CreativeWork", "@id"=>"https://blog.datacite.org/eating-your-own-dog-food")
78
+ expect(response).to eq("id"=>"https://blog.datacite.org/eating-your-own-dog-food", "type"=>"CreativeWork", "relationType"=>"References")
79
+ end
80
+ end
81
+
82
+ context "to_schema_org" do
83
+ it "with id" do
84
+ author = {"type"=>"Person", "id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner" }
85
+ response = subject.to_schema_org(author)
86
+ expect(response).to eq("givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner", "@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405")
87
+ end
88
+ end
89
+
90
+ context "from_schema_org" do
91
+ it "with @id" do
92
+ author = {"@type"=>"Person", "@id"=>"http://orcid.org/0000-0003-1419-2405", "givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner" }
93
+ response = subject.from_schema_org(author)
94
+ expect(response).to eq("givenName"=>"Martin", "familyName"=>"Fenner", "name"=>"Martin Fenner", "type"=>"Person", "id"=>"http://orcid.org/0000-0003-1419-2405")
95
+ end
96
+ end
97
+
98
+ context "sanitize" do
99
+ it 'should remove a tags' do
100
+ text = "In 1998 <strong>Tim Berners-Lee</strong> coined the term <a href=\"https://www.w3.org/Provider/Style/URI\">cool URIs</a>"
101
+ content = subject.sanitize(text)
102
+ expect(content).to eq("In 1998 <strong>Tim Berners-Lee</strong> coined the term cool URIs")
103
+ end
104
+
105
+ it 'should only keep specific tags' do
106
+ text = "In 1998 <strong>Tim Berners-Lee</strong> coined the term <a href=\"https://www.w3.org/Provider/Style/URI\">cool URIs</a>"
107
+ content = subject.sanitize(text, tags: ["a"])
108
+ expect(content).to eq("In 1998 Tim Berners-Lee coined the term <a href=\"https://www.w3.org/Provider/Style/URI\">cool URIs</a>")
79
109
  end
80
110
  end
81
111
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bolognese
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-25 00:00:00.000000000 Z
11
+ date: 2017-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: maremma
@@ -44,6 +44,26 @@ dependencies:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
46
  version: 1.6.8
47
+ - !ruby/object:Gem::Dependency
48
+ name: loofah
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '2.0'
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 2.0.3
57
+ type: :runtime
58
+ prerelease: false
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - "~>"
62
+ - !ruby/object:Gem::Version
63
+ version: '2.0'
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.0.3
47
67
  - !ruby/object:Gem::Dependency
48
68
  name: builder
49
69
  requirement: !ruby/object:Gem::Requirement
@@ -304,8 +324,8 @@ dependencies:
304
324
  - - "~>"
305
325
  - !ruby/object:Gem::Version
306
326
  version: 0.12.0
307
- description: Convert DOI metadata to and from Crossref and DataCite XML, as well as
308
- schema.org/JSON-LD
327
+ description: Ruby gem and command-line utility for conversion of DOI metadata from
328
+ and to different metadata formats, including schema.org.
309
329
  email: mfenner@datacite.org
310
330
  executables:
311
331
  - bolognese
@@ -322,13 +342,16 @@ files:
322
342
  - Rakefile
323
343
  - bin/bolognese
324
344
  - bolognese.gemspec
345
+ - codemeta.json
325
346
  - lib/bolognese.rb
347
+ - lib/bolognese/array.rb
326
348
  - lib/bolognese/author_utils.rb
327
349
  - lib/bolognese/bibtex.rb
328
350
  - lib/bolognese/cli.rb
329
351
  - lib/bolognese/codemeta.rb
330
352
  - lib/bolognese/crossref.rb
331
353
  - lib/bolognese/datacite.rb
354
+ - lib/bolognese/datacite_json.rb
332
355
  - lib/bolognese/datacite_utils.rb
333
356
  - lib/bolognese/date_utils.rb
334
357
  - lib/bolognese/doi_utils.rb
@@ -339,6 +362,7 @@ files:
339
362
  - lib/bolognese/string.rb
340
363
  - lib/bolognese/utils.rb
341
364
  - lib/bolognese/version.rb
365
+ - lib/bolognese/whitelist_scrubber.rb
342
366
  - resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd
343
367
  - resources/kernel-2.1/include/datacite-contributorType-v2.xsd
344
368
  - resources/kernel-2.1/include/datacite-dateType-v1.1.xsd
@@ -379,20 +403,24 @@ files:
379
403
  - resources/kernel-4/include/datacite-resourceType-v4.xsd
380
404
  - resources/kernel-4/include/datacite-titleType-v4.xsd
381
405
  - resources/kernel-4/metadata.xsd
406
+ - spec/array_spec.rb
382
407
  - spec/author_utils_spec.rb
383
408
  - spec/bibtex_spec.rb
384
409
  - spec/cli_spec.rb
385
410
  - spec/codemeta_spec.rb
386
411
  - spec/crossref_spec.rb
412
+ - spec/datacite_json_spec.rb
387
413
  - spec/datacite_spec.rb
388
414
  - spec/datacite_utils_spec.rb
389
415
  - spec/doi_utils_spec.rb
390
416
  - spec/fixtures/codemeta.json
391
417
  - spec/fixtures/crossref.bib
392
418
  - spec/fixtures/crossref.xml
419
+ - spec/fixtures/datacite.json
393
420
  - spec/fixtures/datacite.xml
394
421
  - spec/fixtures/datacite_dataset.xml
395
422
  - spec/fixtures/datacite_missing_creator.xml
423
+ - spec/fixtures/datacite_software.json
396
424
  - spec/fixtures/maremma/codemeta.json
397
425
  - spec/fixtures/schema_org.json
398
426
  - spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/as_bibtex.yml
@@ -410,6 +438,7 @@ files:
410
438
  - spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/default.yml
411
439
  - spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_bibtex.yml
412
440
  - spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_datacite.yml
441
+ - spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_datacite_json.yml
413
442
  - spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_schema_org.yml
414
443
  - spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/schema_org/as_bibtex.yml
415
444
  - spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/schema_org/as_datacite.yml
@@ -425,6 +454,8 @@ files:
425
454
  - spec/fixtures/vcr_cassettes/Bolognese_Codemeta/get_metadata_as_bibtex/maremma.yml
426
455
  - spec/fixtures/vcr_cassettes/Bolognese_Codemeta/get_metadata_as_datacite_xml/maremma.yml
427
456
  - spec/fixtures/vcr_cassettes/Bolognese_Codemeta/get_metadata_as_schema_org_JSON/maremma.yml
457
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/author_from_schema_org/with_id.yml
458
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/author_to_schema_org/with_id.yml
428
459
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/authors_as_string/author.yml
429
460
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/authors_as_string/no_author.yml
430
461
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/authors_as_string/single_author.yml
@@ -433,6 +464,7 @@ files:
433
464
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml
434
465
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml
435
466
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml
467
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/from_schema_org/with_id.yml
436
468
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_ORCID_ID.yml
437
469
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml
438
470
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml
@@ -449,6 +481,19 @@ files:
449
481
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_datacite_xml/with_data_citation.yml
450
482
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_datacite_xml/with_editor.yml
451
483
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_string/DOI_with_data_citation.yml
484
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_name_identifier/has_ORCID.yml
485
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_name_identifier/has_no_ORCID.yml
486
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/has_familyName.yml
487
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/has_name_in_display-order.yml
488
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/has_name_in_display-order_with_ORCID.yml
489
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/has_name_in_sort-order.yml
490
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_one_author/is_organization.yml
491
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_comma.yml
492
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_family_name.yml
493
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_id.yml
494
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_no_info.yml
495
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_type_organization.yml
496
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/is_personal_name_/has_type_person.yml
452
497
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/SICI_doi.yml
453
498
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/doi.yml
454
499
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/doi_from_url_without_doi_proxy.yml
@@ -472,6 +517,9 @@ files:
472
517
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attributes/hash.yml
473
518
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attributes/nil.yml
474
519
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attributes/string.yml
520
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/sanitize/should_only_keep_specific_tags.yml
521
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/sanitize/should_remove_a_tags.yml
522
+ - spec/fixtures/vcr_cassettes/Bolognese_Crossref/to_schema_org/with_id.yml
475
523
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/validate_url/DOI.yml
476
524
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/validate_url/URL.yml
477
525
  - spec/fixtures/vcr_cassettes/Bolognese_Crossref/validate_url/string.yml
@@ -501,11 +549,13 @@ files:
501
549
  - spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_publisher/insert.yml
502
550
  - spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_related_identifiers/insert.yml
503
551
  - spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_related_identifiers/rel_identifiers.yml
552
+ - spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_related_identifiers/related_identifier.yml
504
553
  - spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_resource_type/insert.yml
505
554
  - spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_rights_list/insert.yml
506
555
  - spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_subjects/insert.yml
507
556
  - spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_titles/insert.yml
508
557
  - spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_version/insert.yml
558
+ - spec/fixtures/vcr_cassettes/Bolognese_DataciteJson/get_metadata_as_bibtex/BlogPosting.yml
509
559
  - spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml
510
560
  - spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml
511
561
  - spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml