bolognese 1.0.29 → 1.0.30

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/lib/bolognese/author_utils.rb +33 -56
  4. data/lib/bolognese/datacite_utils.rb +15 -7
  5. data/lib/bolognese/metadata.rb +13 -18
  6. data/lib/bolognese/metadata_utils.rb +41 -20
  7. data/lib/bolognese/readers/bibtex_reader.rb +21 -17
  8. data/lib/bolognese/readers/citeproc_reader.rb +28 -7
  9. data/lib/bolognese/readers/codemeta_reader.rb +21 -8
  10. data/lib/bolognese/readers/crossref_reader.rb +31 -23
  11. data/lib/bolognese/readers/datacite_json_reader.rb +1 -47
  12. data/lib/bolognese/readers/datacite_reader.rb +26 -16
  13. data/lib/bolognese/readers/ris_reader.rb +20 -13
  14. data/lib/bolognese/readers/schema_org_reader.rb +26 -22
  15. data/lib/bolognese/utils.rb +139 -39
  16. data/lib/bolognese/version.rb +1 -1
  17. data/lib/bolognese/writers/bibtex_writer.rb +7 -5
  18. data/lib/bolognese/writers/citation_writer.rb +1 -1
  19. data/lib/bolognese/writers/codemeta_writer.rb +2 -2
  20. data/lib/bolognese/writers/crosscite_writer.rb +1 -34
  21. data/lib/bolognese/writers/datacite_json_writer.rb +1 -29
  22. data/lib/bolognese/writers/jats_writer.rb +9 -9
  23. data/lib/bolognese/writers/ris_writer.rb +5 -5
  24. data/lib/bolognese/writers/schema_org_writer.rb +9 -9
  25. data/spec/author_utils_spec.rb +17 -47
  26. data/spec/cli_spec.rb +1 -1
  27. data/spec/fixtures/crosscite.json +10 -4
  28. data/spec/fixtures/datacite-example-polygon-v4.1.xml +163 -0
  29. data/spec/fixtures/datacite-seriesinformation.xml +41 -0
  30. data/spec/fixtures/datacite.json +12 -8
  31. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/only_title.yml +37 -0
  32. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/title_and_pages.yml +37 -0
  33. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/title_volume_and_pages.yml +37 -0
  34. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/title_volume_issue_and_pages.yml +37 -0
  35. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/is_personal_name_/has_orcid_id.yml +37 -0
  36. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/to_schema_org_identifiers/with_identifiers.yml +37 -0
  37. data/spec/readers/bibtex_reader_spec.rb +5 -5
  38. data/spec/readers/citeproc_reader_spec.rb +4 -4
  39. data/spec/readers/codemeta_reader_spec.rb +39 -18
  40. data/spec/readers/crosscite_reader_spec.rb +2 -2
  41. data/spec/readers/crossref_reader_spec.rb +26 -32
  42. data/spec/readers/datacite_json_reader_spec.rb +2 -2
  43. data/spec/readers/datacite_reader_spec.rb +156 -101
  44. data/spec/readers/ris_reader_spec.rb +7 -7
  45. data/spec/readers/schema_org_reader_spec.rb +34 -28
  46. data/spec/utils_spec.rb +37 -6
  47. data/spec/writers/bibtex_writer_spec.rb +1 -0
  48. data/spec/writers/codemeta_writer_spec.rb +1 -1
  49. data/spec/writers/crosscite_writer_spec.rb +13 -6
  50. data/spec/writers/datacite_json_writer_spec.rb +13 -5
  51. data/spec/writers/datacite_writer_spec.rb +18 -18
  52. data/spec/writers/schema_org_writer_spec.rb +40 -14
  53. metadata +10 -2
@@ -425,12 +425,12 @@ module Bolognese
425
425
  def parse_attributes(element, options={})
426
426
  content = options[:content] || "__content__"
427
427
 
428
- if element.is_a?(String)
429
- element
428
+ if element.is_a?(String) && options[:content].nil?
429
+ CGI.unescapeHTML(element)
430
430
  elsif element.is_a?(Hash)
431
- element.fetch(content, nil)
431
+ element.fetch( CGI.unescapeHTML(content), nil)
432
432
  elsif element.is_a?(Array)
433
- a = element.map { |e| e.is_a?(Hash) ? e.fetch(content, nil) : e }.uniq
433
+ a = element.map { |e| e.is_a?(Hash) ? e.fetch( CGI.unescapeHTML(content), nil) : e }.uniq
434
434
  a = options[:first] ? a.first : a.unwrap
435
435
  else
436
436
  nil
@@ -476,7 +476,7 @@ module Bolognese
476
476
  return nil unless orcid.present?
477
477
 
478
478
  # turn ORCID ID into URL
479
- "http://orcid.org/" + Addressable::URI.encode(orcid)
479
+ "https://orcid.org/" + Addressable::URI.encode(orcid)
480
480
  end
481
481
 
482
482
  def normalize_ids(ids: nil, relation_type: nil)
@@ -548,35 +548,42 @@ module Bolognese
548
548
  map_hash_keys(element: element, mapping: mapping)
549
549
  end
550
550
 
551
+ def to_schema_org_creators(element)
552
+ element = Array.wrap(element).map do |c|
553
+ c["affiliation"] = { "@type" => "Organization", "name" => c["affiliation"] } if c["affiliation"].present?
554
+ c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
555
+ c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
556
+ c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
557
+ c.except("nameIdentifiers", "nameType").compact
558
+ end.unwrap
559
+ end
560
+
561
+ def to_schema_org_contributors(element)
562
+ element = Array.wrap(element).map do |c|
563
+ c["affiliation"] = { "@type" => "Organization", "name" => c["affiliation"] } if c["affiliation"].present?
564
+ c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
565
+ c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
566
+ c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
567
+ c.except("nameIdentifiers", "nameType").compact
568
+ end.unwrap
569
+ end
570
+
551
571
  def to_schema_org_container(element, options={})
552
572
  return nil unless (element.is_a?(Hash) || (element.nil? && options[:container_title].present?))
553
573
 
554
574
  {
555
- "@id" => element["relatedIdentifier"],
575
+ "@id" => element["identifier"],
556
576
  "@type" => (options[:type] == "Dataset") ? "DataCatalog" : "Periodical",
557
- "name" => element["title"] || options[:container_title] }
577
+ "name" => element["title"] || options[:container_title] }.compact
558
578
  end
559
579
 
560
- def to_schema_org_identifier(element, options={})
561
- ident = {
562
- "@type" => "PropertyValue",
563
- "propertyID" => normalize_doi(element) ? "doi" : "url",
564
- "value" => element }
565
-
566
- if options[:alternate_identifiers].present?
567
- [ident] + Array.wrap(options[:alternate_identifiers]).map do |ai|
568
- if ai["alternateIdentifierType"].to_s.downcase == "url"
569
- ai["alternateIdentifier"]
570
- else
571
- {
572
- "@type" => "PropertyValue",
573
- "propertyID" => ai["alternateIdentifierType"],
574
- "value" => ai["alternateIdentifier"] }
575
- end
576
- end
577
- else
578
- ident
579
- end
580
+ def to_schema_org_identifiers(element, options={})
581
+ Array.wrap(element).map do |ai|
582
+ {
583
+ "@type" => "PropertyValue",
584
+ "propertyID" => ai["identifierType"],
585
+ "value" => ai["identifier"] }
586
+ end.unwrap
580
587
  end
581
588
 
582
589
  def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
@@ -611,19 +618,20 @@ module Bolognese
611
618
  def to_schema_org_spatial_coverage(geo_location)
612
619
  return nil unless geo_location.present?
613
620
 
614
- Array.wrap(geo_location).map do |gl|
621
+ Array.wrap(geo_location).reduce([]) do |sum, gl|
615
622
  if gl.fetch("geoLocationPoint", nil)
616
- {
623
+ sum << {
617
624
  "@type" => "Place",
618
625
  "geo" => {
619
626
  "@type" => "GeoCoordinates",
620
627
  "address" => gl["geoLocationPlace"],
621
628
  "latitude" => gl.dig("geoLocationPoint", "pointLatitude"),
622
- "longitude" => gl.dig("geoLocationPoint", "pointLongitude")
623
- }.compact
624
- }
625
- elsif gl.fetch("geoLocationBox", nil)
626
- {
629
+ "longitude" => gl.dig("geoLocationPoint", "pointLongitude") }
630
+ }.compact
631
+ end
632
+
633
+ if gl.fetch("geoLocationBox", nil)
634
+ sum << {
627
635
  "@type" => "Place",
628
636
  "geo" => {
629
637
  "@type" => "GeoShape",
@@ -631,11 +639,33 @@ module Bolognese
631
639
  "box" => [gl.dig("geoLocationBox", "southBoundLatitude"),
632
640
  gl.dig("geoLocationBox", "westBoundLongitude"),
633
641
  gl.dig("geoLocationBox", "northBoundLatitude"),
634
- gl.dig("geoLocationBox", "eastBoundLongitude")].join(" ")
635
- }.compact
642
+ gl.dig("geoLocationBox", "eastBoundLongitude")].compact.join(" ").presence }.compact
643
+ }.compact
644
+ end
645
+
646
+ if gl.fetch("geoLocationPolygon", nil)
647
+ sum << {
648
+ "@type" => "Place",
649
+ "geo" => {
650
+ "@type" => "GeoShape",
651
+ "address" => gl["geoLocationPlace"],
652
+ "polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
653
+ [glp.dig("polygonPoint", "pointLongitude"), glp.dig("polygonPoint", "pointLatitude")].compact
654
+ end.compact }
636
655
  }
637
656
  end
638
- end.compact.unwrap
657
+
658
+ if gl.fetch("geoLocationPlace", nil) && !gl.fetch("geoLocationPoint", nil) && !gl.fetch("geoLocationBox", nil) && !gl.fetch("geoLocationPolygon", nil)
659
+ sum << {
660
+ "@type" => "Place",
661
+ "geo" => {
662
+ "@type" => "GeoCoordinates",
663
+ "address" => gl["geoLocationPlace"] }
664
+ }.compact
665
+ end
666
+
667
+ sum
668
+ end.unwrap
639
669
  end
640
670
 
641
671
  def from_schema_org(element)
@@ -644,6 +674,22 @@ module Bolognese
644
674
  map_hash_keys(element: element, mapping: mapping)
645
675
  end
646
676
 
677
+ def from_schema_org_creators(element)
678
+ element = Array.wrap(element).map do |c|
679
+ c["nameIdentifier"] = [{ "__content__" => c["@id"], "nameIdentifierScheme" => "ORCID" }] if normalize_orcid(c["@id"])
680
+ c["creatorName"] = { "nameType" => c["@type"].present? ? c["@type"].titleize + "al" : nil, "__content__" => c["name"] }.compact
681
+ c.except("@id", "@type", "name")
682
+ end
683
+ end
684
+
685
+ def from_schema_org_contributors(element)
686
+ element = Array.wrap(element).map do |c|
687
+ c["nameIdentifier"] = [{ "__content__" => c["@id"], "nameIdentifierScheme" => "ORCID" }] if normalize_orcid(c["@id"])
688
+ c["contributorName"] = { "nameType" => c["@type"].present? ? c["@type"].titleize + "al" : nil, "__content__" => c["name"] }.compact
689
+ c.except("@id", "@type", "name")
690
+ end
691
+ end
692
+
647
693
  def map_hash_keys(element: nil, mapping: nil)
648
694
  Array.wrap(element).map do |a|
649
695
  a.map {|k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)|
@@ -685,7 +731,7 @@ module Bolognese
685
731
  a["family"] = a["familyName"]
686
732
  a["given"] = a["givenName"]
687
733
  a["literal"] = a["name"] unless a["familyName"].present?
688
- a.except("type", "@type", "id", "@id", "name", "familyName", "givenName").compact
734
+ a.except("nameType", "type", "@type", "id", "@id", "name", "familyName", "givenName", "affiliation", "nameIdentifiers").compact
689
735
  end.presence
690
736
  end
691
737
 
@@ -816,7 +862,7 @@ module Bolognese
816
862
  end
817
863
 
818
864
  def get_date(dates, date_type)
819
- dd = dates.find { |d| d["dateType"] == date_type } || {}
865
+ dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {}
820
866
  dd.fetch("date", nil)
821
867
  end
822
868
 
@@ -824,6 +870,60 @@ module Bolognese
824
870
  contributor.select { |c| c["contributorType"] == contributor_type }
825
871
  end
826
872
 
873
+ def get_identifier(identifiers, identifier_type)
874
+ id = Array.wrap(identifiers).find { |i| i["identifierType"] == identifier_type } || {}
875
+ id.fetch("identifier", nil)
876
+ end
877
+
878
+ def get_identifier_type(identifier_type)
879
+ identifierTypes = {
880
+ "ark" => "ARK",
881
+ "arxiv" => "arXiv",
882
+ "bibcode" => "bibcode",
883
+ "doi" => "DOI",
884
+ "ean13" => "EAN13",
885
+ "eissn" => "EISSN",
886
+ "handle" => "Handle",
887
+ "igsn" => "IGSN",
888
+ "isbn" => "ISBN",
889
+ "issn" => "ISSN",
890
+ "istc" => "ISTC",
891
+ "lissn" => "LISSN",
892
+ "lsid" => "LSID",
893
+ "pmid" => "PMID",
894
+ "purl" => "PURL",
895
+ "upc" => "UPC",
896
+ "url" => "URL",
897
+ "urn" => "URN",
898
+ "md5" => "md5",
899
+ "minid" => "minid",
900
+ "dataguid" => "dataguid"
901
+ }
902
+
903
+ identifierTypes[identifier_type.downcase] || identifier_type
904
+ end
905
+
906
+ def get_series_information(str)
907
+ return {} unless str.present?
908
+
909
+ str = str.split(",").map(&:strip)
910
+
911
+ title = str.first
912
+ volume_issue = str.length > 2 ? str[1].rpartition(/\(([^)]+)\)/) : nil
913
+ volume = volume_issue.present? ? volume_issue[0].presence || volume_issue[2].presence : nil
914
+ issue = volume_issue.present? ? volume_issue[1][1...-1].presence : nil
915
+ pages = str.length > 1 ? str.last : nil
916
+ first_page = pages.present? ? pages.split("-").map(&:strip).first : nil
917
+ last_page = pages.present? ? pages.split("-").map(&:strip).last : nil
918
+
919
+ {
920
+ "title" => title,
921
+ "volume" => volume,
922
+ "issue" => issue,
923
+ "firstPage" => first_page,
924
+ "lastPage" => last_page }.compact
925
+ end
926
+
827
927
  def jsonlint(json)
828
928
  return ["No JSON provided"] unless json.present?
829
929
 
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "1.0.29"
2
+ VERSION = "1.0.30"
3
3
  end
@@ -6,19 +6,21 @@ module Bolognese
6
6
  def bibtex
7
7
  return nil unless valid?
8
8
 
9
+ pages = container.to_h["firstPage"].present? ? [container["firstPage"], container["lastPage"]].join("-") : nil
10
+
9
11
  bib = {
10
12
  bibtex_type: types["bibtex"].presence || "misc",
11
- bibtex_key: identifier,
13
+ bibtex_key: id,
12
14
  doi: doi,
13
15
  url: url,
14
16
  author: authors_as_string(creators),
15
17
  keywords: subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.join(", ") : nil,
16
18
  language: language,
17
19
  title: parse_attributes(titles, content: "title", first: true),
18
- journal: periodical && periodical["title"],
19
- volume: volume,
20
- issue: issue,
21
- pages: [first_page, last_page].compact.join("-").presence,
20
+ journal: container && container["title"],
21
+ volume: container.to_h["volume"],
22
+ issue: container.to_h["issue"],
23
+ pages: pages,
22
24
  publisher: publisher,
23
25
  year: publication_year
24
26
  }.compact
@@ -6,7 +6,7 @@ module Bolognese
6
6
  def citation
7
7
  cp = CiteProc::Processor.new(style: style, locale: locale, format: 'html')
8
8
  cp.import Array.wrap(citeproc_hsh)
9
- bibliography = cp.render :bibliography, id: identifier
9
+ bibliography = cp.render :bibliography, id: id
10
10
  bibliography.first
11
11
  end
12
12
  end
@@ -9,8 +9,8 @@ module Bolognese
9
9
  hsh = {
10
10
  "@context" => id.present? ? "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld" : nil,
11
11
  "@type" => types["schemaOrg"],
12
- "@id" => identifier,
13
- "identifier" => identifier,
12
+ "@id" => id,
13
+ "identifier" => to_schema_org_identifiers(identifiers),
14
14
  "codeRepository" => url,
15
15
  "title" => parse_attributes(titles, content: "title", first: true),
16
16
  "agents" => creators,
@@ -4,40 +4,7 @@ module Bolognese
4
4
  module Writers
5
5
  module CrossciteWriter
6
6
  def crosscite
7
- hsh = {
8
- "id" => identifier,
9
- "doi" => doi,
10
- "url" => url,
11
- "types" => types,
12
- "creators" => creators,
13
- "titles" => titles,
14
- "publisher" => publisher,
15
- "periodical" => periodical,
16
- "subjects" => subjects,
17
- "contributors" => contributors,
18
- "dates" => dates,
19
- "publication_year" => publication_year,
20
- "language" => language,
21
- "alternate_identifiers" => alternate_identifiers,
22
- "sizes" => sizes,
23
- "formats" => formats,
24
- "version" => version_info,
25
- "rights_list" => rights_list,
26
- "descriptions" => descriptions,
27
- "volume" => volume,
28
- "issue" => issue,
29
- "first_page" => first_page,
30
- "last_page" => last_page,
31
- "geo_locations" => geo_locations,
32
- "funding_references" => funding_references,
33
- "related_identifiers" => related_identifiers,
34
- "schema_version" => schema_version,
35
- "provider_id" => provider_id,
36
- "client_id" => client_id,
37
- "agency" => agency,
38
- "state" => state
39
- }.compact
40
- JSON.pretty_generate hsh.presence
7
+ JSON.pretty_generate crosscite_hsh.presence
41
8
  end
42
9
  end
43
10
  end
@@ -4,35 +4,7 @@ module Bolognese
4
4
  module Writers
5
5
  module DataciteJsonWriter
6
6
  def datacite_json
7
- hsh = {
8
- "id" => identifier,
9
- "doi" => doi,
10
- "url" => url,
11
- "creators" => creators,
12
- "titles" => titles,
13
- "publisher" => publisher,
14
- "periodical" => periodical,
15
- "types" => to_datacite_json(types, first: true),
16
- "subjects" => to_datacite_json(subjects),
17
- "contributors" => contributors,
18
- "dates" => to_datacite_json(dates),
19
- "publicationYear" => publication_year,
20
- "language" => language,
21
- "alternateIdentifiers" => to_datacite_json(alternate_identifiers),
22
- "relatedIdentifiers" => to_datacite_json(related_identifiers),
23
- "sizes" => sizes,
24
- "formats" => formats,
25
- "version" => version_info,
26
- "rightsList" => to_datacite_json(rights_list),
27
- "descriptions" => to_datacite_json(descriptions),
28
- "geoLocations" => to_datacite_json(geo_locations),
29
- "fundingReferences" => to_datacite_json(funding_references),
30
- "schemaVersion" => schema_version,
31
- "providerId" => provider_id,
32
- "clientIsd" => client_id,
33
- "agency" => agency
34
- }.compact
35
- JSON.pretty_generate hsh.presence
7
+ JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) } if crosscite_hsh.present?
36
8
  end
37
9
  end
38
10
  end
@@ -18,10 +18,10 @@ module Bolognese
18
18
  insert_source(xml)
19
19
  insert_publisher_name(xml) if publisher.present? && !is_data?
20
20
  insert_publication_date(xml)
21
- insert_volume(xml) if volume.present?
22
- insert_issue(xml) if issue.present?
23
- insert_fpage(xml) if first_page.present?
24
- insert_lpage(xml) if last_page.present?
21
+ insert_volume(xml) if container.to_h["volume"].present?
22
+ insert_issue(xml) if container.to_h["issue"].present?
23
+ insert_fpage(xml) if container.to_h["firstPage"].present?
24
+ insert_lpage(xml) if container.to_h["lastPage"].present?
25
25
  insert_version(xml) if version_info.present?
26
26
  insert_pub_id(xml)
27
27
  end
@@ -77,7 +77,7 @@ module Bolognese
77
77
 
78
78
  def insert_source(xml)
79
79
  if is_article? || is_data? || is_chapter?
80
- xml.source(periodical && periodical["title"] || publisher)
80
+ xml.source(container && container["title"] || publisher)
81
81
  else
82
82
  xml.source(parse_attributes(titles, content: "title", first: true))
83
83
  end
@@ -96,19 +96,19 @@ module Bolognese
96
96
  end
97
97
 
98
98
  def insert_volume(xml)
99
- xml.volume(volume)
99
+ xml.volume(container["volume"])
100
100
  end
101
101
 
102
102
  def insert_issue(xml)
103
- xml.issue(issue)
103
+ xml.issue(container["issue"])
104
104
  end
105
105
 
106
106
  def insert_fpage(xml)
107
- xml.fpage(first_page)
107
+ xml.fpage(container["firstPage"])
108
108
  end
109
109
 
110
110
  def insert_lpage(xml)
111
- xml.lpage(last_page)
111
+ xml.lpage(container["lastPage"])
112
112
  end
113
113
 
114
114
  def insert_version(xml)
@@ -7,7 +7,7 @@ module Bolognese
7
7
  {
8
8
  "TY" => types["ris"],
9
9
  "T1" => parse_attributes(titles, content: "title", first: true),
10
- "T2" => periodical && periodical["title"],
10
+ "T2" => container && container["title"],
11
11
  "AU" => to_ris(creators),
12
12
  "DO" => doi,
13
13
  "UR" => url,
@@ -16,10 +16,10 @@ module Bolognese
16
16
  "PY" => publication_year,
17
17
  "PB" => publisher,
18
18
  "LA" => language,
19
- "VL" => volume,
20
- "IS" => issue,
21
- "SP" => first_page,
22
- "EP" => last_page,
19
+ "VL" => container.to_h["volume"],
20
+ "IS" => container.to_h["issue"],
21
+ "SP" => container.to_h["firstPage"],
22
+ "EP" => container.to_h["lastPage"],
23
23
  "SN" => Array.wrap(related_identifiers).find { |ri| ri["relationType"] == "IsPartOf" }.to_h.fetch("relatedIdentifier", nil),
24
24
  "ER" => ""
25
25
  }.compact.map { |k, v| v.is_a?(Array) ? v.map { |vi| "#{k} - #{vi}" }.join("\r\n") : "#{k} - #{v}" }.join("\r\n")