cirneco 0.8.12 → 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cb4a217d9d1a32ba1d92b3d505e53903ccd2b25b
4
- data.tar.gz: c51d52740d43affa17960e3e5382208b93ea0cfb
3
+ metadata.gz: 57b29c4bbd81d6692811c2e2c4f56887195f3c0a
4
+ data.tar.gz: 5758d8072bb5e32569859f06c7ad8e8b26d2f71e
5
5
  SHA512:
6
- metadata.gz: d18feee400aae01050747ff636fb13fd31b61f2c36e3cd9fa5fc4236fbc6d7d5d45e82aa0b64a2d4d2b53f679b115a1398ee7d0146f496a608b90b82e8ce8112
7
- data.tar.gz: 0a4a8efc2685ec9460eb9e42ab793ac65dfebab386289d8bd1104202ab0010fc1521e6063595e3b97915d60939d6ef7e10099fa64c2cec5f603f4413ea1ead20
6
+ metadata.gz: 1e08e2b24e7af2e0b41a37c68fee12b0309b640d61d8dadb6f4efdca8240b6b3185cc8ec877a893c3632c0226dc2130daa5f082df1c761a577cabce26d39cf98
7
+ data.tar.gz: 187f2c7707e0217fdefb9de490b02290cbaa4d00633c0723315d26e4064106f12d14b09345c9770b91d811d5f88b553c2469523225fe053bdf74670ae3214144
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cirneco (0.8.12)
4
+ cirneco (0.9)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  base32-crockford-checksum (~> 0.2.2)
7
7
  bergamasco (~> 0.3)
@@ -22,7 +22,7 @@ GEM
22
22
  tzinfo (~> 1.1)
23
23
  addressable (2.3.8)
24
24
  base32-crockford-checksum (0.2.3)
25
- bergamasco (0.3.6)
25
+ bergamasco (0.3.9)
26
26
  activesupport (~> 4.2, >= 4.2.5)
27
27
  addressable (~> 2.3.8)
28
28
  builder (~> 3.2, >= 3.2.2)
@@ -122,4 +122,4 @@ DEPENDENCIES
122
122
  webmock (~> 1.22, >= 1.22.3)
123
123
 
124
124
  BUNDLED WITH
125
- 1.13.6
125
+ 1.12.5
@@ -153,5 +153,18 @@ module Cirneco
153
153
  response = hide_dois_for_all_urls(url, options)
154
154
  puts response
155
155
  end
156
+
157
+ desc "write_jats DOCUMENT", "write_jats document"
158
+ method_option :sitepath, :default => ENV['SITE_SITEPATH']
159
+ method_option :authorpath, :default => ENV['SITE_AUTHORPATH']
160
+ method_option :referencespath, :default => ENV['SITE_REFERENCESPATH']
161
+ method_option :source_dir, :default => ENV['SOURCE_DIR']
162
+ method_option :build_dir, :default => ENV['BUILD_DIR']
163
+ method_option :posts_dir, :default => ENV['POSTS_DIR']
164
+ method_option :csl, :default => ENV['SITE_CSLPATH']
165
+ def write_jats(url)
166
+ response = generate_jats_for_all_urls(url, options)
167
+ puts response
168
+ end
156
169
  end
157
170
  end
@@ -279,8 +279,82 @@ module Cirneco
279
279
  work.delete_metadata(metadata["doi"], options)
280
280
  end
281
281
 
282
- def generate_jats(filepath, options={})
283
- Bergamasco::Pandoc.write_jats(filepath, options)
282
+ def generate_metadata_for_jats(url, options={})
283
+ doc = Nokogiri::HTML(open(url))
284
+ json = doc.at_xpath("//script[@type='application/ld+json']")
285
+ return "Error: no schema.org metadata found" unless json.present?
286
+
287
+ metadata = ActiveSupport::JSON.decode(json.text)
288
+
289
+ return "Error: required metadata missing" unless ["name", "author", "publisher", "datePublished", "@type"].all? { |k| metadata.key? k }
290
+
291
+ # required metadata
292
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(metadata["@id"])
293
+ uri = Addressable::URI.parse(metadata["@id"])
294
+ metadata["doi"] = uri.path[1..-1].upcase
295
+ end
296
+
297
+ metadata["title"] = metadata["name"]
298
+ metadata["author"] = format_authors(metadata["author"]).map do |a|
299
+ { "given_name" => a[:given_name],
300
+ "family_name" => a[:family_name],
301
+ "name" => a[:name],
302
+ "orcid" => a[:orcid] }.compact
303
+ end
304
+
305
+ metadata["publisher"] = metadata.fetch("publisher", {}).fetch("name", nil)
306
+ metadata["tags"] = metadata["keywords"].to_s.split(", ").select { |k| k != "featured" }
307
+ metadata["date"] = metadata.fetch("datePublished", "")
308
+ metadata["publication_year"] = metadata.fetch("date", "")[0..3].to_i
309
+ metadata["publication_month"] = metadata.fetch("date", "")[5..6].to_i
310
+ metadata["publication_day"] = metadata.fetch("date", "")[8..9].to_i
311
+
312
+ if metadata["description"].present?
313
+ metadata["descriptions"] = [{ value: metadata["description"], description_type: "Abstract" }]
314
+ end
315
+
316
+ # use default version 1.0
317
+ metadata["version"] ||= "1.0"
318
+
319
+ # fetch reference metadata if available
320
+ metadata["related_identifiers"] = get_related_identifiers(metadata)
321
+
322
+ if metadata["license"].present?
323
+ metadata["license_name"] = LICENSES.fetch(metadata["license"], nil)
324
+ metadata["license_url"] = metadata["license"]
325
+ end
326
+
327
+ metadata = metadata.extract!(*%w(publisher doi tags title author date
328
+ publication_year publication_month publication_day license_name
329
+ license_url))
330
+ end
331
+
332
+ def generate_jats_for_url(url, options={})
333
+ filename, build_path, source_path = filepath_from_url(url, options)
334
+ metadata = generate_metadata_for_jats(build_path, options)
335
+ file = IO.read(source_path)
336
+ content = Bergamasco::Markdown.split_yaml_frontmatter(file).last
337
+ text = Bergamasco::Markdown.join_yaml_frontmatter(metadata, content)
338
+
339
+ xml = Bergamasco::Pandoc.convert_to_jats(text, options)
340
+
341
+ if metadata["doi"].present?
342
+ xmlname = metadata["doi"].split('/', 2).last
343
+ else
344
+ xmlname = filename.gsub(/\.html\.(erb|md)/, ".xml")
345
+ end
346
+
347
+ xmlpath = build_path.gsub("index.html", xmlname)
348
+ IO.write(xmlpath, xml)
349
+
350
+ "JATS XML written for #{filename}"
351
+ end
352
+
353
+ def generate_jats_for_all_urls(url, options={})
354
+ urls = get_urls_for_works(url)
355
+ urls.map do |u|
356
+ generate_jats_for_url(u, options)
357
+ end.join("\n")
284
358
  end
285
359
 
286
360
  def url_from_path(site_url, filepath)
@@ -292,10 +366,10 @@ module Cirneco
292
366
  orcid = orcid_from_url(author["@id"])
293
367
  name = (author["givenName"].present? || author["familyName"].present?) ? nil : author["name"]
294
368
 
295
- { given_name: author["givenName"],
296
- family_name: author["familyName"],
297
- name: name,
298
- orcid: orcid }.compact
369
+ { :given_name => author["givenName"],
370
+ :family_name => author["familyName"],
371
+ :name => name,
372
+ :orcid => orcid }.compact
299
373
  end
300
374
  end
301
375
 
@@ -1,3 +1,3 @@
1
1
  module Cirneco
2
- VERSION = "0.8.12"
2
+ VERSION = "0.9"
3
3
  end
@@ -15,7 +15,7 @@ describe Cirneco::Doi do
15
15
  let(:username) { ENV['MDS_USERNAME'] }
16
16
  let(:password) { ENV['MDS_PASSWORD'] }
17
17
  let(:csl) { "spec/fixtures/apa.csl" }
18
- let(:bibliography) { "spec/fixtures/bibliography.yaml" }
18
+ let(:bibliography) { "spec/fixtures/references.yaml" }
19
19
  let(:api_options) { { username: username, password: password, sandbox: true } }
20
20
  let(:mint_options) { { username: username, password: password, sandbox: true, source_dir: "/spec/fixtures/", build_dir: "/spec/fixtures/", csl: csl, bibliography: bibliography } }
21
21
 
@@ -122,6 +122,14 @@ describe Cirneco::Doi do
122
122
  expect { subject.mint_and_hide filepath }.to output("DOI 10.5072/0000-03VC minted and hidden for cool-dois.html.md\nDOI 10.5072/0000-00SS minted and hidden for index.html.erb\n").to_stdout
123
123
  end
124
124
  end
125
+
126
+ context "jats" do
127
+ it 'writes jats for list of urls' do
128
+ filepath = fixture_path + 'index.html'
129
+ subject.options = mint_options
130
+ expect { subject.write_jats filepath }.to output("JATS XML written for cool-dois.html.md\nJATS XML written for index.html.erb\n").to_stdout
131
+ end
132
+ end
125
133
  end
126
134
 
127
135
 
@@ -194,14 +194,26 @@ describe Cirneco::DataCenter, vcr: true, :order => :defined do
194
194
  expect(response.body["data"]).to eq("OK")
195
195
  expect(response.status).to eq(200)
196
196
  end
197
+ end
197
198
 
198
- # it 'should generate jats xml' do
199
- # filepath = fixture_path + 'cool-dois.html.md'
200
- # number = 123
201
- # metadata = subject.generate_metadata_for_work(filepath)
202
- # xml_path = subject.generate_jats(filepath, options.merge(metadata: metadata))
203
- # expect(xml_path).to eq(fixture_path + 'cool-dois.xml')
204
- # end
199
+ context "jats" do
200
+ it 'should generate metadata for jats' do
201
+ filepath = fixture_path + 'cool-dois/index.html'
202
+ metadata = subject.generate_metadata_for_jats(filepath)
203
+ expect(metadata["author"]).to eq([{"given_name"=>"Martin", "family_name"=>"Fenner", "orcid"=>"0000-0003-1419-2405"}])
204
+ expect(metadata["license_url"]).to eq("https://creativecommons.org/licenses/by/4.0/")
205
+ end
206
+
207
+ it 'should generate jats xml' do
208
+ filepath = fixture_path + 'cool-dois/index.html'
209
+ expect(subject.generate_jats_for_url(filepath, options)).to eq("JATS XML written for cool-dois.html.md")
210
+ end
211
+
212
+ it 'should generate jats for all urls' do
213
+ filepath = fixture_path + 'index.html'
214
+ response = subject.generate_jats_for_all_urls(filepath, options)
215
+ expect(response).to eq("JATS XML written for cool-dois.html.md\nJATS XML written for index.html.erb")
216
+ end
205
217
  end
206
218
 
207
219
  context "get_related_identifiers" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cirneco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.12
4
+ version: '0.9'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner