RubyGems - briard - Versions diffs - 2.4.1 → 2.6.0 - Mend

briard 2.4.1 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

checksums.yaml +4 -4
data/.github/workflows/codeql-analysis.yml +72 -0
data/.github/workflows/rubocop.yml +50 -0
data/.rubocop.yml +144 -620
data/.rubocop_todo.yml +76 -0
data/CHANGELOG.md +22 -0
data/Gemfile +2 -0
data/Gemfile.lock +43 -6
data/Rakefile +1 -1
data/{bolognese.gemspec → briard.gemspec} +46 -38
data/lib/briard/array.rb +2 -2
data/lib/briard/author_utils.rb +79 -71
data/lib/briard/cli.rb +12 -13
data/lib/briard/crossref_utils.rb +73 -61
data/lib/briard/datacite_utils.rb +132 -106
data/lib/briard/doi_utils.rb +10 -10
data/lib/briard/metadata.rb +96 -106
data/lib/briard/metadata_utils.rb +87 -78
data/lib/briard/readers/bibtex_reader.rb +65 -65
data/lib/briard/readers/cff_reader.rb +88 -70
data/lib/briard/readers/citeproc_reader.rb +90 -84
data/lib/briard/readers/codemeta_reader.rb +68 -50
data/lib/briard/readers/crosscite_reader.rb +2 -2
data/lib/briard/readers/crossref_reader.rb +249 -210
data/lib/briard/readers/datacite_json_reader.rb +3 -3
data/lib/briard/readers/datacite_reader.rb +225 -189
data/lib/briard/readers/npm_reader.rb +49 -42
data/lib/briard/readers/ris_reader.rb +82 -80
data/lib/briard/readers/schema_org_reader.rb +182 -159
data/lib/briard/string.rb +1 -1
data/lib/briard/utils.rb +4 -4
data/lib/briard/version.rb +3 -1
data/lib/briard/whitelist_scrubber.rb +11 -4
data/lib/briard/writers/bibtex_writer.rb +14 -8
data/lib/briard/writers/cff_writer.rb +33 -26
data/lib/briard/writers/codemeta_writer.rb +19 -15
data/lib/briard/writers/csv_writer.rb +6 -4
data/lib/briard/writers/datacite_json_writer.rb +8 -2
data/lib/briard/writers/jats_writer.rb +33 -28
data/lib/briard/writers/rdf_xml_writer.rb +1 -1
data/lib/briard/writers/ris_writer.rb +30 -18
data/lib/briard/writers/turtle_writer.rb +1 -1
data/lib/briard.rb +6 -6
data/rubocop.sarif +0 -0
data/spec/array_spec.rb +5 -5
data/spec/author_utils_spec.rb +151 -132
data/spec/datacite_utils_spec.rb +135 -83
data/spec/doi_utils_spec.rb +168 -164
data/spec/find_from_format_spec.rb +69 -69
data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
data/spec/metadata_spec.rb +91 -90
data/spec/readers/bibtex_reader_spec.rb +43 -38
data/spec/readers/cff_reader_spec.rb +165 -153
data/spec/readers/citeproc_reader_spec.rb +45 -40
data/spec/readers/codemeta_reader_spec.rb +128 -115
data/spec/readers/crosscite_reader_spec.rb +34 -24
data/spec/readers/crossref_reader_spec.rb +1098 -939
data/spec/readers/datacite_json_reader_spec.rb +53 -40
data/spec/readers/datacite_reader_spec.rb +1541 -1337
data/spec/readers/npm_reader_spec.rb +48 -43
data/spec/readers/ris_reader_spec.rb +53 -47
data/spec/readers/schema_org_reader_spec.rb +329 -267
data/spec/spec_helper.rb +6 -5
data/spec/utils_spec.rb +371 -347
data/spec/writers/bibtex_writer_spec.rb +143 -143
data/spec/writers/cff_writer_spec.rb +96 -90
data/spec/writers/citation_writer_spec.rb +34 -33
data/spec/writers/citeproc_writer_spec.rb +226 -224
data/spec/writers/codemeta_writer_spec.rb +18 -16
data/spec/writers/crosscite_writer_spec.rb +91 -73
data/spec/writers/crossref_writer_spec.rb +99 -91
data/spec/writers/csv_writer_spec.rb +70 -70
data/spec/writers/datacite_json_writer_spec.rb +78 -68
data/spec/writers/datacite_writer_spec.rb +417 -322
data/spec/writers/jats_writer_spec.rb +177 -161
data/spec/writers/rdf_xml_writer_spec.rb +68 -63
data/spec/writers/ris_writer_spec.rb +162 -162
data/spec/writers/turtle_writer_spec.rb +47 -47
metadata +250 -160
data/.github/workflows/release.yml +0 -47

data/lib/briard/readers/schema_org_reader.rb CHANGED Viewed

@@ -4,281 +4,304 @@ module Briard
   module Readers
     module SchemaOrgReader
       SO_TO_DC_RELATION_TYPES = {
-        "citation" => "References",
-        "isBasedOn" => "IsSupplementedBy",
-        "sameAs" => "IsIdenticalTo",
-        "isPartOf" => "IsPartOf",
-        "hasPart" => "HasPart",
-        "isPredecessor" => "IsPreviousVersionOf",
-        "isSuccessor" => "IsNewVersionOf"
-      }
+        'citation' => 'References',
+        'isBasedOn' => 'IsSupplementedBy',
+        'sameAs' => 'IsIdenticalTo',
+        'isPartOf' => 'IsPartOf',
+        'hasPart' => 'HasPart',
+        'isPredecessor' => 'IsPreviousVersionOf',
+        'isSuccessor' => 'IsNewVersionOf'
+      }.freeze
       SO_TO_DC_REVERSE_RELATION_TYPES = {
-        "citation" => "IsReferencedBy",
-        "isBasedOn" => "IsSupplementTo",
-        "sameAs" => "IsIdenticalTo",
-        "isPartOf" => "HasPart",
-        "hasPart" => "IsPartOf",
-        "isPredecessor" => "IsNewVersionOf",
-        "isSuccessor" => "IsPreviousVersionOf"
-      }
-      def get_schema_org(id: nil, **options)
-        return { "string" => nil, "state" => "not_found" } unless id.present?
+        'citation' => 'IsReferencedBy',
+        'isBasedOn' => 'IsSupplementTo',
+        'sameAs' => 'IsIdenticalTo',
+        'isPartOf' => 'HasPart',
+        'hasPart' => 'IsPartOf',
+        'isPredecessor' => 'IsNewVersionOf',
+        'isSuccessor' => 'IsPreviousVersionOf'
+      }.freeze
+      def get_schema_org(id: nil, **_options)
+        return { 'string' => nil, 'state' => 'not_found' } unless id.present?
         url = normalize_id(id)
         response = Maremma.get(url, raw: true)
         # some responses are returned as a hash
-        if response.body["data"].is_a?(Hash)
-          string = response.body.dig("data", "html", "head", "script", 1, "__content__")
+        if response.body['data'].is_a?(Hash)
+          string = response.body.dig('data', 'html', 'head', 'script', 1, '__content__')
         else
-          doc = Nokogiri::XML(response.body.fetch("data", nil), nil, 'UTF-8')
+          doc = Nokogiri::XML(response.body.fetch('data', nil), nil, 'UTF-8')
           # workaround for xhtml documents
           nodeset = doc.at("script[type='application/ld+json']")
-          hsh = JSON.parse(nodeset || "{}")
+          hsh = JSON.parse(nodeset || '{}')
           # workaround for doi as canonical_url but not included with schema.org
           link = doc.css("link[rel='canonical']")
-          hsh.merge!({ "@id" => link[0]["href"] }) if link.present?
+          hsh['@id'] = link[0]['href'] if link.present?
           # workaround if license included but not with schema.org
           license = doc.at("meta[name='DCTERMS.license']")
-          hsh.merge!({ "license" => license["content"] }) if license.present?
+          hsh['license'] = license['content'] if license.present?
           # workaround for html language attribute if no language is set via schema.org
           lang = doc.at('html')['lang']
-          hsh.merge!({ "inLanguage" => lang }) if hsh["inLanguage"].blank?
+          hsh['inLanguage'] = lang if hsh['inLanguage'].blank?
           # workaround if issn not included with schema.org
           name = doc.at("meta[property='og:site_name']")
           issn = doc.at("meta[name='citation_issn']")
-          hsh.merge!({ "isPartOf" => { "name" => name ? name["content"] : nil, "issn" => issn ? issn["content"] : nil }.compact })
+          hsh['isPartOf'] = { 'name' => name ? name['content'] : nil,
+                              'issn' => issn ? issn['content'] : nil }.compact
           string = hsh.to_json if hsh.present?
         end
-        { "string" => string }
+        { 'string' => string }
       end
       def read_schema_org(string: nil, **options)
         if string.present?
           errors = jsonlint(string)
-          return { "errors" => errors } if errors.present?
+          return { 'errors' => errors } if errors.present?
         end
-        read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
+        read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
+                                                                                   :sandbox, :validate, :ra))
         meta = string.present? ? Maremma.from_json(string) : {}
-        identifiers = Array.wrap(meta.fetch("identifier", nil)).map do |r|
+        identifiers = Array.wrap(meta.fetch('identifier', nil)).map do |r|
           r = normalize_id(r) if r.is_a?(String)
-          if r.is_a?(String) && !r.start_with?("https://doi.org")
-              { "identifierType" => "URL", "identifier" => r }
+          if r.is_a?(String) && URI(r).host != 'doi.org'
+            { 'identifierType' => 'URL', 'identifier' => r }
           elsif r.is_a?(Hash)
-            { "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
+            { 'identifierType' => get_identifier_type(r['propertyID']), 'identifier' => r['value'] }
           end
         end.compact.uniq
         id = options[:doi]
-        id = meta.fetch("@id", nil) if id.blank? && meta.fetch("@id", nil).to_s.start_with?("https://doi.org")
-        id = meta.fetch("identifier", nil) if id.blank? # && meta.fetch("identifier", nil).to_s.start_with?("https://doi.org")#&& meta.fetch("@", nil).start_with?("https://doi.org")
+        id = meta.fetch('@id', nil) if id.blank? && URI(meta.fetch('@id', '')).host == 'doi.org'
+        id = meta.fetch('identifier', nil) if id.blank?
         id = normalize_id(id)
-        schema_org = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
+        schema_org = meta.fetch('@type', nil) && meta.fetch('@type').camelcase
         resource_type_general = Briard::Utils::SO_TO_DC_TRANSLATIONS[schema_org]
         types = {
-          "resourceTypeGeneral" => resource_type_general,
-          "resourceType" => meta.fetch("additionalType", nil),
-          "schemaOrg" => schema_org,
-          "citeproc" => Briard::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article-journal",
-          "bibtex" => Briard::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
-          "ris" => Briard::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
+          'resourceTypeGeneral' => resource_type_general,
+          'resourceType' => meta.fetch('additionalType', nil),
+          'schemaOrg' => schema_org,
+          'citeproc' => Briard::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || 'article-journal',
+          'bibtex' => Briard::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || 'misc',
+          'ris' => Briard::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || 'GEN'
         }.compact
-        authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
+        authors = meta.fetch('author', nil) || meta.fetch('creator', nil)
         # Authors should be an object, if it's just a plain string don't try and parse it.
-        if not authors.is_a?(String)
+        unless authors.is_a?(String)
           creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
         end
-        contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
-        publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
+        contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch('editor',
+                                                                                      nil))))
+        publisher = parse_attributes(meta.fetch('publisher', nil), content: 'name', first: true)
-        ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
+        ct = schema_org == 'Dataset' ? 'includedInDataCatalog' : 'Periodical'
         container = if meta.fetch(ct, nil).present?
-          url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
-          {
-            "type" => (schema_org == "Dataset") ? "DataRepository" : "Periodical",
-            "title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
-            "identifier" => url,
-            "identifierType" => url.present? ? "URL" : nil,
-            "volume" => meta.fetch("volumeNumber", nil),
-            "issue" => meta.fetch("issueNumber", nil),
-            "firstPage" => meta.fetch("pageStart", nil),
-            "lastPage" => meta.fetch("pageEnd", nil)
-          }.compact
-        elsif ["BlogPosting", "Article"].include?(schema_org)
-          issn = meta.dig("isPartOf", "issn")
-          {
-            "type" => "Blog",
-            "title" => meta.dig("isPartOf", "name"),
-            "identifier" => issn,
-            "identifierType" => issn.present? ? "ISSN" : nil
-          }.compact
-        else
-          {}
-        end
+                      url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: 'url',
+                                                                                   first: true)
+                      {
+                        'type' => schema_org == 'Dataset' ? 'DataRepository' : 'Periodical',
+                        'title' => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: 'name',
+                                                                                          first: true),
+                        'identifier' => url,
+                        'identifierType' => url.present? ? 'URL' : nil,
+                        'volume' => meta.fetch('volumeNumber', nil),
+                        'issue' => meta.fetch('issueNumber', nil),
+                        'firstPage' => meta.fetch('pageStart', nil),
+                        'lastPage' => meta.fetch('pageEnd', nil)
+                      }.compact
+                    elsif %w[BlogPosting Article].include?(schema_org)
+                      issn = meta.dig('isPartOf', 'issn')
+                      {
+                        'type' => 'Blog',
+                        'title' => meta.dig('isPartOf', 'name'),
+                        'identifier' => issn,
+                        'identifierType' => issn.present? ? 'ISSN' : nil
+                      }.compact
+                    else
+                      {}
+                    end
         related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
-          Array.wrap(schema_org_is_part_of(meta)) +
-          Array.wrap(schema_org_has_part(meta)) +
-          Array.wrap(schema_org_is_previous_version_of(meta)) +
-          Array.wrap(schema_org_is_new_version_of(meta)) +
-          Array.wrap(schema_org_references(meta)) +
-          Array.wrap(schema_org_is_referenced_by(meta)) +
-          Array.wrap(schema_org_is_supplement_to(meta)) +
-          Array.wrap(schema_org_is_supplemented_by(meta))
-        rights_list = Array.wrap(meta.fetch("license", nil)).compact.map do |rl|
+                              Array.wrap(schema_org_is_part_of(meta)) +
+                              Array.wrap(schema_org_has_part(meta)) +
+                              Array.wrap(schema_org_is_previous_version_of(meta)) +
+                              Array.wrap(schema_org_is_new_version_of(meta)) +
+                              Array.wrap(schema_org_references(meta)) +
+                              Array.wrap(schema_org_is_referenced_by(meta)) +
+                              Array.wrap(schema_org_is_supplement_to(meta)) +
+                              Array.wrap(schema_org_is_supplemented_by(meta))
+        rights_list = Array.wrap(meta.fetch('license', nil)).compact.map do |rl|
           if rl.is_a?(String)
-            hsh_to_spdx("rightsURI" => rl)
+            hsh_to_spdx('rightsURI' => rl)
           else
-            hsh_to_spdx("__content__" => rl["name"], "rightsURI" => rl["id"])
+            hsh_to_spdx('__content__' => rl['name'], 'rightsURI' => rl['id'])
           end
         end
-        funding_references = Array.wrap(meta.fetch("funder", nil)).compact.map do |fr|
-          if fr["@id"].present?
+        funding_references = Array.wrap(meta.fetch('funder', nil)).compact.map do |fr|
+          if fr['@id'].present?
             {
-              "funderName" => fr["name"],
-              "funderIdentifier" => fr["@id"],
-              "funderIdentifierType" => fr["@id"].to_s.start_with?("https://doi.org/10.13039") ? "Crossref Funder ID" : "Other" }.compact
+              'funderName' => fr['name'],
+              'funderIdentifier' => fr['@id'],
+              'funderIdentifierType' => fr['@id'].to_s.start_with?('https://doi.org/10.13039') ? 'Crossref Funder ID' : 'Other'
+            }.compact
           else
-            {
-              "funderName" => fr["name"] }.compact
+            { 'funderName' => fr['name'] }.compact
           end
         end
         # strip milliseconds from iso8601, as edtf library doesn't handle them
         dates = []
-        dates << { "date" => strip_milliseconds(meta.fetch("datePublished")), "dateType" => "Issued" } if Date.edtf(strip_milliseconds(meta.fetch("datePublished", nil))).present?
-        dates << { "date" => strip_milliseconds(meta.fetch("dateCreated")), "dateType" => "Created" } if Date.edtf(strip_milliseconds(meta.fetch("dateCreated", nil))).present?
-        dates << { "date" => strip_milliseconds(meta.fetch("dateModified")), "dateType" => "Updated" } if Date.edtf(strip_milliseconds(meta.fetch("dateModified", nil))).present?
-        publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
-        if meta.fetch("inLanguage", nil).is_a?(String)
-          language = meta.fetch("inLanguage")
-        elsif meta.fetch("inLanguage", nil).is_a?(Object)
-          language = meta.dig("inLanguage", 'alternateName') || meta.dig("inLanguage", 'name')
-        else
-          language = nil
+        if Date.edtf(strip_milliseconds(meta.fetch('datePublished', nil))).present?
+          dates << { 'date' => strip_milliseconds(meta.fetch('datePublished')),
+                     'dateType' => 'Issued' }
         end
-        state = meta.present? || read_options.present? ? "findable" : "not_found"
-        geo_locations = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
-          if gl.dig("geo", "box")
-            s, w, n, e = gl.dig("geo", "box").split(" ", 4)
+        if Date.edtf(strip_milliseconds(meta.fetch('dateCreated', nil))).present?
+          dates << { 'date' => strip_milliseconds(meta.fetch('dateCreated')),
+                     'dateType' => 'Created' }
+        end
+        if Date.edtf(strip_milliseconds(meta.fetch('dateModified', nil))).present?
+          dates << { 'date' => strip_milliseconds(meta.fetch('dateModified')),
+                     'dateType' => 'Updated' }
+        end
+        publication_year = meta.fetch('datePublished')[0..3] if meta.fetch('datePublished',
+                                                                           nil).present?
+        language = case meta.fetch('inLanguage', nil)
+                   when String
+                     meta.fetch('inLanguage')
+                   when Object
+                     meta.dig('inLanguage', 'alternateName') || meta.dig('inLanguage', 'name')
+                   end
+        state = meta.present? || read_options.present? ? 'findable' : 'not_found'
+        geo_locations = Array.wrap(meta.fetch('spatialCoverage', nil)).map do |gl|
+          if gl.dig('geo', 'box')
+            s, w, n, e = gl.dig('geo', 'box').split(' ', 4)
             geo_location_box = {
-              "westBoundLongitude" => w,
-              "eastBoundLongitude" => e,
-              "southBoundLatitude" => s,
-              "northBoundLatitude" => n,
+              'westBoundLongitude' => w,
+              'eastBoundLongitude' => e,
+              'southBoundLatitude' => s,
+              'northBoundLatitude' => n
             }.compact.presence
           else
             geo_location_box = nil
           end
-          geo_location_point = { "pointLongitude" => gl.dig("geo", "longitude"), "pointLatitude" => gl.dig("geo", "latitude") }.compact.presence
+          geo_location_point = { 'pointLongitude' => gl.dig('geo', 'longitude'),
+                                 'pointLatitude' => gl.dig('geo', 'latitude') }.compact.presence
           {
-            "geoLocationPlace" => gl.dig("geo", "address"),
-            "geoLocationPoint" => geo_location_point,
-            "geoLocationBox" => geo_location_box
+            'geoLocationPlace' => gl.dig('geo', 'address'),
+            'geoLocationPoint' => geo_location_point,
+            'geoLocationBox' => geo_location_box
           }.compact
         end
         # handle keywords as array and as comma-separated string
-        subjects = meta.fetch("keywords", nil)
-        subjects = subjects.to_s.downcase.split(", ") if subjects.is_a?(String)
+        subjects = meta.fetch('keywords', nil)
+        subjects = subjects.to_s.downcase.split(', ') if subjects.is_a?(String)
         subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
           sum += name_to_fos(subject)
           sum
         end
-        { "id" => id,
-          "types" => types,
-          "doi" => validate_doi(id),
-          "identifiers" => identifiers,
-          "url" => normalize_id(meta.fetch("url", nil)),
-          "content_url" => Array.wrap(meta.fetch("contentUrl", nil)),
-          "sizes" => Array.wrap(meta.fetch("contenSize", nil)).presence,
-          "formats" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)),
-          "titles" => meta.fetch("name", nil).present? ? [{ "title" => meta.fetch("name", nil) }] : [{ "title" => meta.fetch("headline", nil) }],
-          "creators" => creators,
-          "contributors" => contributors,
-          "publisher" => publisher,
-          "agency" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
-          "container" => container,
-          "related_identifiers" => related_identifiers,
-          "publication_year" => publication_year,
-          "dates" => dates,
-          "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
-          "rights_list" => rights_list,
-          "version_info" => meta.fetch("version", nil).to_s.presence,
-          "subjects" => subjects,
-          "language" => language,
-          "state" => state,
-          "schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
-          "funding_references" => funding_references,
-          "geo_locations" => geo_locations
-        }.merge(read_options)
+        { 'id' => id,
+          'types' => types,
+          'doi' => validate_doi(id),
+          'identifiers' => identifiers,
+          'url' => normalize_id(meta.fetch('url', nil)),
+          'content_url' => Array.wrap(meta.fetch('contentUrl', nil)),
+          'sizes' => Array.wrap(meta.fetch('contenSize', nil)).presence,
+          'formats' => Array.wrap(meta.fetch('encodingFormat',
+                                             nil) || meta.fetch('fileFormat', nil)),
+          'titles' => if meta.fetch('name', nil).present?
+                        [{ 'title' => meta.fetch('name', nil) }]
+                      else
+                        [{ 'title' => meta.fetch('headline', nil) }]
+                      end,
+          'creators' => creators,
+          'contributors' => contributors,
+          'publisher' => publisher,
+          'agency' => parse_attributes(meta.fetch('provider', nil), content: 'name', first: true),
+          'container' => container,
+          'related_identifiers' => related_identifiers,
+          'publication_year' => publication_year,
+          'dates' => dates,
+          'descriptions' => if meta.fetch('description', nil).present?
+                              [{ 'description' => sanitize(meta.fetch('description')),
+                                 'descriptionType' => 'Abstract' }]
+                            end,
+          'rights_list' => rights_list,
+          'version_info' => meta.fetch('version', nil).to_s.presence,
+          'subjects' => subjects,
+          'language' => language,
+          'state' => state,
+          'schema_version' => meta.fetch('schemaVersion', nil).to_s.presence,
+          'funding_references' => funding_references,
+          'geo_locations' => geo_locations }.merge(read_options)
       end
       def schema_org_related_identifier(meta, relation_type: nil)
-        normalize_ids(ids: meta.fetch(relation_type, nil), relation_type: SO_TO_DC_RELATION_TYPES[relation_type])
+        normalize_ids(ids: meta.fetch(relation_type, nil),
+                      relation_type: SO_TO_DC_RELATION_TYPES[relation_type])
       end
       def schema_org_reverse_related_identifier(meta, relation_type: nil)
-        normalize_ids(ids: meta.dig("@reverse", relation_type), relation_type: SO_TO_DC_REVERSE_RELATION_TYPES[relation_type])
+        normalize_ids(ids: meta.dig('@reverse', relation_type),
+                      relation_type: SO_TO_DC_REVERSE_RELATION_TYPES[relation_type])
       end
       def schema_org_is_identical_to(meta)
-        schema_org_related_identifier(meta, relation_type: "sameAs")
+        schema_org_related_identifier(meta, relation_type: 'sameAs')
       end
       def schema_org_is_part_of(meta)
-        schema_org_related_identifier(meta, relation_type: "isPartOf")
+        schema_org_related_identifier(meta, relation_type: 'isPartOf')
       end
       def schema_org_has_part(meta)
-        schema_org_related_identifier(meta, relation_type: "hasPart")
+        schema_org_related_identifier(meta, relation_type: 'hasPart')
       end
       def schema_org_is_previous_version_of(meta)
-        schema_org_related_identifier(meta, relation_type: "PredecessorOf")
+        schema_org_related_identifier(meta, relation_type: 'PredecessorOf')
       end
       def schema_org_is_new_version_of(meta)
-        schema_org_related_identifier(meta, relation_type: "SuccessorOf")
+        schema_org_related_identifier(meta, relation_type: 'SuccessorOf')
       end
       def schema_org_references(meta)
-        schema_org_related_identifier(meta, relation_type: "citation")
+        schema_org_related_identifier(meta, relation_type: 'citation')
       end
       def schema_org_is_referenced_by(meta)
-        schema_org_reverse_related_identifier(meta, relation_type: "citation")
+        schema_org_reverse_related_identifier(meta, relation_type: 'citation')
       end
       def schema_org_is_supplement_to(meta)
-        schema_org_reverse_related_identifier(meta, relation_type: "isBasedOn")
+        schema_org_reverse_related_identifier(meta, relation_type: 'isBasedOn')
       end
       def schema_org_is_supplemented_by(meta)
-        schema_org_related_identifier(meta, relation_type: "isBasedOn")
+        schema_org_related_identifier(meta, relation_type: 'isBasedOn')
       end
     end
   end
 end

data/lib/briard/string.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 class String
   def my_titleize
-    self.gsub(/\b(['’]?[a-z])/) { "#{$1.capitalize}" }
+    gsub(/\b(['’]?[a-z])/) { ::Regexp.last_match(1).capitalize.to_s }
   end
 end

data/lib/briard/utils.rb CHANGED Viewed

@@ -500,7 +500,7 @@ module Briard
     def find_from_format_by_id(id)
       id = normalize_id(id)
-      if /\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(id)
+      if /\A(?:(http|https):\/(\/)?(dx\.)?(doi\.org|handle\.stage\.datacite\.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(id)
         ra = get_doi_ra(id)
         %w(DataCite Crossref mEDRA KISTI JaLC OP).include?(ra) ? ra.downcase : nil
       elsif /\A(?:(http|https):\/(\/)?orcid\.org\/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z/.match(id)
@@ -537,7 +537,7 @@ module Briard
         "datacite"
       elsif options[:ext] == ".cff"
         "cff"
-      elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
+      elsif options[:ext] == ".json" && URI(Maremma.from_json(string).to_h.fetch("@context", "")).host == "schema.org"
         "schema_org"
       elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
         "codemeta"
@@ -555,7 +555,7 @@ module Briard
         "crossref"
       elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
         "datacite"
-      elsif Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
+      elsif URI(Maremma.from_json(string).to_h.fetch("@context", "")).host == "schema.org"
         "schema_org"
       elsif Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
         "codemeta"
@@ -940,7 +940,7 @@ module Briard
         end
         # alternatively find the nameIdentifier in the sameAs attribute
-        c["@id"] = c["sameAs"].first if Array(c["sameAs"]).find { |item| item.start_with?("https://orcid.org") }
+        c["@id"] = c["sameAs"].first if Array(c["sameAs"]).find { |item| URI(item).host == "orcid.org" }
         c["nameIdentifier"] = [{ "__content__" => c["@id"], "nameIdentifierScheme" => "ORCID", "schemeUri" => "https://orcid.org" }] if normalize_orcid(c["@id"])
         c["@type"] = c["@type"].find { |t| %w(Person Organization).include?(t) } if c["@type"].is_a?(Array)

data/lib/briard/version.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Briard
-  VERSION = "2.4.1"
+  VERSION = '2.6.0'
 end

data/lib/briard/whitelist_scrubber.rb CHANGED Viewed

@@ -4,7 +4,7 @@
 module Briard
   class WhitelistScrubber < Loofah::Scrubber
-    def initialize(options={})
+    def initialize(options = {})
       @direction = :bottom_up
       @tags = options[:tags]
       @attributes = options[:attributes]
@@ -12,6 +12,7 @@ module Briard
     def scrub(node)
       scrub_node_attributes(node) and return CONTINUE if node_allowed?(node)
       node.before node.children
       node.remove
     end
@@ -19,14 +20,17 @@ module Briard
     private
     def scrub_node_attributes(node)
-      fallback_scrub_node_attributes(node) and return true unless @attributes.present? && @attributes.respond_to?(:include?)
+      unless @attributes.present? && @attributes.respond_to?(:include?)
+        fallback_scrub_node_attributes(node) and return true
+      end
       node.attribute_nodes.each do |attr_node|
         attr_node.remove unless @attributes.include?(attr_node.name)
       end
     end
     def allowed_not_element_node_types
-      [ Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE ]
+      [Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE]
     end
     def fallback_scrub_node_attributes(node)
@@ -38,9 +42,12 @@ module Briard
     end
     def node_allowed?(node)
-      return fallback_allowed_element_detection(node) unless @tags.present? && @tags.respond_to?(:include?)
+      unless @tags.present? && @tags.respond_to?(:include?)
+        return fallback_allowed_element_detection(node)
+      end
       return true if allowed_not_element_node_types.include?(node.type)
       return false unless node.type == Nokogiri::XML::Node::ELEMENT_NODE
       @tags.include? node.name
     end
   end

data/lib/briard/writers/bibtex_writer.rb CHANGED Viewed

@@ -6,24 +6,30 @@ module Briard
       def bibtex
         return nil unless valid?
-        pages = container.to_h["firstPage"].present? ? [container["firstPage"], container["lastPage"]].compact.join("-") : nil
+        pages = if container.to_h['firstPage'].present?
+                  [container['firstPage'], container['lastPage']].compact.join('-')
+                end
         bib = {
-          bibtex_type: types["bibtex"].presence || "misc",
+          bibtex_type: types['bibtex'].presence || 'misc',
           bibtex_key: normalize_doi(doi),
           doi: doi,
           url: url,
           author: authors_as_string(creators),
-          keywords: subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.join(", ") : nil,
+          keywords: if subjects.present?
+                      Array.wrap(subjects).map do |k|
+                        parse_attributes(k, content: 'subject', first: true)
+                      end.join(', ')
+                    end,
           language: language,
-          title: parse_attributes(titles, content: "title", first: true),
-          journal: container && container["title"],
-          volume: container.to_h["volume"],
-          issue: container.to_h["issue"],
+          title: parse_attributes(titles, content: 'title', first: true),
+          journal: container && container['title'],
+          volume: container.to_h['volume'],
+          issue: container.to_h['issue'],
           pages: pages,
           publisher: publisher,
           year: publication_year,
-          copyright: Array.wrap(rights_list).map { |l| l["rights"] }.first,
+          copyright: Array.wrap(rights_list).map { |l| l['rights'] }.first
         }.compact
         BibTeX::Entry.new(bib).to_s
       end