relaton-bipm 1.14.1 → 1.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +6 -0
- data/README.adoc +32 -12
- data/grammars/basicdoc.rng +0 -1
- data/grammars/biblio.rng +12 -2
- data/lib/relaton_bipm/bipm_bibliography.rb +12 -310
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +8 -4
- data/lib/relaton_bipm/comment_periond.rb +1 -1
- data/lib/relaton_bipm/data_fetcher.rb +17 -5
- data/lib/relaton_bipm/data_outcomes_parser.rb +68 -29
- data/lib/relaton_bipm/id_parser.rb +134 -0
- data/lib/relaton_bipm/processor.rb +5 -4
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +311 -0
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +176 -0
- data/lib/relaton_bipm/version.rb +1 -1
- data/lib/relaton_bipm.rb +5 -1
- data/relaton_bipm.gemspec +2 -6
- metadata +26 -80
- data/lib/relaton_bipm/index.rb +0 -68
| @@ -7,13 +7,21 @@ module RelatonBipm | |
| 7 7 | 
             
                  "Statement" => "DECL",
         | 
| 8 8 | 
             
                }.freeze
         | 
| 9 9 |  | 
| 10 | 
            +
                TRANSLATIONS = {
         | 
| 11 | 
            +
                  "Déclaration" => "Declaration",
         | 
| 12 | 
            +
                  "Réunion" => "Meeting",
         | 
| 13 | 
            +
                  "Recommandation" => "Recommendation",
         | 
| 14 | 
            +
                  "Résolution" => "Resolution",
         | 
| 15 | 
            +
                  "Décision" => "Decision",
         | 
| 16 | 
            +
                }.freeze
         | 
| 17 | 
            +
             | 
| 10 18 | 
             
                #
         | 
| 11 19 | 
             
                # Create data-outcomes parser
         | 
| 12 20 | 
             
                #
         | 
| 13 21 | 
             
                # @param [RelatonBipm::DataFetcher] data_fetcher data fetcher
         | 
| 14 22 | 
             
                #
         | 
| 15 23 | 
             
                def initialize(data_fetcher)
         | 
| 16 | 
            -
                  @data_fetcher = data_fetcher
         | 
| 24 | 
            +
                  @data_fetcher = WeakRef.new data_fetcher
         | 
| 17 25 | 
             
                end
         | 
| 18 26 |  | 
| 19 27 | 
             
                #
         | 
| @@ -68,21 +76,11 @@ module RelatonBipm | |
| 68 76 | 
             
                # @param [String] dir output directory
         | 
| 69 77 | 
             
                #
         | 
| 70 78 | 
             
                def fetch_meeting(en_file, body, type, dir) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
         | 
| 71 | 
            -
                  en =  | 
| 72 | 
            -
                  en_md = en | 
| 73 | 
            -
                   | 
| 74 | 
            -
                  fr = RelatonBib.parse_yaml File.read(fr_file, encoding: "UTF-8"), [Date]
         | 
| 75 | 
            -
                  fr_md = fr["metadata"]
         | 
| 76 | 
            -
                  gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
         | 
| 77 | 
            -
                  src_en = gh_src + en_file.split("/")[-3..].unshift("main").join("/")
         | 
| 78 | 
            -
                  src_fr = gh_src + fr_file.split("/")[-3..].unshift("main").join("/")
         | 
| 79 | 
            -
                  src = [
         | 
| 80 | 
            -
                    { type: "src", content: src_en, language: "en", script: "Latn" },
         | 
| 81 | 
            -
                    { type: "src", content: src_fr, language: "fr", script: "Latn" },
         | 
| 82 | 
            -
                  ]
         | 
| 79 | 
            +
                  _, en, fr_file, fr = read_files en_file
         | 
| 80 | 
            +
                  en_md, fr_md, num, part = meeting_md en, fr
         | 
| 81 | 
            +
                  src = meeting_links en_file, fr_file
         | 
| 83 82 |  | 
| 84 | 
            -
                   | 
| 85 | 
            -
                  file = "#{num}.yaml"
         | 
| 83 | 
            +
                  file = "#{num}.#{@data_fetcher.ext}"
         | 
| 86 84 | 
             
                  path = File.join dir, file
         | 
| 87 85 | 
             
                  hash = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
         | 
| 88 86 | 
             
                  if @data_fetcher.files.include?(path) && part
         | 
| @@ -92,13 +90,13 @@ module RelatonBipm | |
| 92 90 | 
             
                    has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
         | 
| 93 91 | 
             
                    has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
         | 
| 94 92 | 
             
                    @data_fetcher.write_file path, has_part_item, warn_duplicate: false
         | 
| 95 | 
            -
                    path = File.join dir, "#{num}-#{part}. | 
| 93 | 
            +
                    path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
         | 
| 96 94 | 
             
                  elsif part
         | 
| 97 95 | 
             
                    hash[:title].each { |t| t[:content] = t[:content].sub(/\s\(.+\)$/, "") }
         | 
| 98 96 | 
             
                    h = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
         | 
| 99 97 | 
             
                    add_part h, part
         | 
| 100 98 | 
             
                    part_item = RelatonBipm::BipmBibliographicItem.new(**h)
         | 
| 101 | 
            -
                    part_item_path = File.join dir, "#{num}-#{part}. | 
| 99 | 
            +
                    part_item_path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
         | 
| 102 100 | 
             
                    @data_fetcher.write_file part_item_path, part_item
         | 
| 103 101 | 
             
                    add_to_index part_item, part_item_path
         | 
| 104 102 | 
             
                    hash[:relation] = [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: part_item)]
         | 
| @@ -111,6 +109,40 @@ module RelatonBipm | |
| 111 109 | 
             
                  fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num
         | 
| 112 110 | 
             
                end
         | 
| 113 111 |  | 
| 112 | 
            +
                #
         | 
| 113 | 
            +
                # Read English and French files
         | 
| 114 | 
            +
                #
         | 
| 115 | 
            +
                # @param [String] en_file Path to English file
         | 
| 116 | 
            +
                #
         | 
| 117 | 
            +
                # @return [Array<Hash, String, nil>] English / French metadata and file path
         | 
| 118 | 
            +
                #
         | 
| 119 | 
            +
                def read_files(en_file)
         | 
| 120 | 
            +
                  fr_file = en_file.sub "en", "fr"
         | 
| 121 | 
            +
                  [en_file, fr_file].map do |file|
         | 
| 122 | 
            +
                    if File.exist? file
         | 
| 123 | 
            +
                      data = RelatonBib.parse_yaml(File.read(file, encoding: "UTF-8"), [Date])
         | 
| 124 | 
            +
                      path = file
         | 
| 125 | 
            +
                    end
         | 
| 126 | 
            +
                    [path, data]
         | 
| 127 | 
            +
                  end.flatten
         | 
| 128 | 
            +
                end
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                def meeting_md(eng, frn)
         | 
| 131 | 
            +
                  en_md = eng["metadata"]
         | 
| 132 | 
            +
                  num, part = en_md["identifier"].to_s.split("-")
         | 
| 133 | 
            +
                  [en_md, frn&.dig("metadata"), num, part]
         | 
| 134 | 
            +
                end
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                def meeting_links(en_file, fr_file)
         | 
| 137 | 
            +
                  gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
         | 
| 138 | 
            +
                  { "en" => en_file, "fr" => fr_file }.map do |lang, file|
         | 
| 139 | 
            +
                    next unless file
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                    src = gh_src + file.split("/")[-3..].unshift("main").join("/")
         | 
| 142 | 
            +
                    { type: "src", content: src, language: lang, script: "Latn" }
         | 
| 143 | 
            +
                  end.compact
         | 
| 144 | 
            +
                end
         | 
| 145 | 
            +
             | 
| 114 146 | 
             
                #
         | 
| 115 147 | 
             
                # Parse BIPM resolutions and write them to YAML files
         | 
| 116 148 | 
             
                #
         | 
| @@ -146,9 +178,7 @@ module RelatonBipm | |
| 146 178 | 
             
                    hash[:contributor] = contributors date, args[:body]
         | 
| 147 179 | 
             
                    hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: num
         | 
| 148 180 | 
             
                    item = RelatonBipm::BipmBibliographicItem.new(**hash)
         | 
| 149 | 
            -
                    file = year
         | 
| 150 | 
            -
                    file += "-#{num_justed}" # if num.size < 4
         | 
| 151 | 
            -
                    file += ".yaml"
         | 
| 181 | 
            +
                    file = "#{year}-#{num_justed}.#{@data_fetcher.ext}"
         | 
| 152 182 | 
             
                    out_dir = File.join args[:dir], r["type"].downcase
         | 
| 153 183 | 
             
                    FileUtils.mkdir_p out_dir
         | 
| 154 184 | 
             
                    path = File.join out_dir, file
         | 
| @@ -209,6 +239,9 @@ module RelatonBipm | |
| 209 239 | 
             
                  end
         | 
| 210 240 | 
             
                  key << item.docidentifier.detect { |i| i.language == "fr" }.id
         | 
| 211 241 | 
             
                  @data_fetcher.index[key] = path
         | 
| 242 | 
            +
                  @data_fetcher.index_new.add_or_update key, path
         | 
| 243 | 
            +
                  key2 = Id.new(item.docnumber).normalized_hash
         | 
| 244 | 
            +
                  @data_fetcher.index2.add_or_update key2, path
         | 
| 212 245 | 
             
                end
         | 
| 213 246 |  | 
| 214 247 | 
             
                #
         | 
| @@ -344,8 +377,7 @@ module RelatonBipm | |
| 344 377 | 
             
                  docnum = create_docnum args[:body], args[:type], args[:num], args[:en]["date"]
         | 
| 345 378 | 
             
                  hash = { title: [], type: "proceedings", doctype: args[:type],
         | 
| 346 379 | 
             
                           place: [RelatonBib::Place.new(city: "Paris")] }
         | 
| 347 | 
            -
                  hash[:title]  | 
| 348 | 
            -
                  hash[:title] << create_title(args[:fr]["title"], "fr") if args[:fr]["title"]
         | 
| 380 | 
            +
                  hash[:title] = create_titles args.slice(:en, :fr)
         | 
| 349 381 | 
             
                  hash[:date] = [{ type: "published", on: args[:en]["date"] }]
         | 
| 350 382 | 
             
                  hash[:docid] = create_docids docnum
         | 
| 351 383 | 
             
                  hash[:docnumber] = docnum # .sub(" --", "").sub(/\s\(\d{4}\)/, "")
         | 
| @@ -358,6 +390,12 @@ module RelatonBipm | |
| 358 390 | 
             
                  hash
         | 
| 359 391 | 
             
                end
         | 
| 360 392 |  | 
| 393 | 
            +
                def create_titles(data)
         | 
| 394 | 
            +
                  data.each_with_object([]) do |(lang, md), mem|
         | 
| 395 | 
            +
                    mem << create_title(md["title"], lang.to_s) if md && md["title"]
         | 
| 396 | 
            +
                  end
         | 
| 397 | 
            +
                end
         | 
| 398 | 
            +
             | 
| 361 399 | 
             
                #
         | 
| 362 400 | 
             
                # Create links
         | 
| 363 401 | 
             
                #
         | 
| @@ -366,12 +404,13 @@ module RelatonBipm | |
| 366 404 | 
             
                # @return [Array<Hash>] Array of links
         | 
| 367 405 | 
             
                #
         | 
| 368 406 | 
             
                def create_links(**args)
         | 
| 369 | 
            -
                  links = [
         | 
| 370 | 
            -
                     | 
| 371 | 
            -
             | 
| 372 | 
            -
             | 
| 407 | 
            +
                  links = args.slice(:en, :fr).each_with_object([]) do |(lang, md), mem|
         | 
| 408 | 
            +
                    next unless md && md["url"]
         | 
| 409 | 
            +
             | 
| 410 | 
            +
                    mem << { type: "citation", content: md["url"], language: lang.to_s, script: "Latn" }
         | 
| 411 | 
            +
                  end
         | 
| 373 412 | 
             
                  RelatonBib.array(args[:pdf]).each { |pdf| links << { type: "pdf", content: pdf } }
         | 
| 374 | 
            -
                  links += args[:src] if args[:src] | 
| 413 | 
            +
                  links += args[:src] if args[:src]
         | 
| 375 414 | 
             
                  links
         | 
| 376 415 | 
             
                end
         | 
| 377 416 |  | 
| @@ -456,8 +495,8 @@ module RelatonBipm | |
| 456 495 | 
             
                # @return [RelatonBib::DocumentIdentifier] french document ID
         | 
| 457 496 | 
             
                #
         | 
| 458 497 | 
             
                def create_docid_fr(en_id)
         | 
| 459 | 
            -
                  tr =  | 
| 460 | 
            -
                  id = en_id.sub | 
| 498 | 
            +
                  tr = TRANSLATIONS.detect { |_, v| en_id.include? v }
         | 
| 499 | 
            +
                  id = tr ? en_id.sub(tr[1], tr[0]) : en_id
         | 
| 461 500 | 
             
                  make_docid(id: id, type: "BIPM", primary: true, language: "fr", script: "Latn")
         | 
| 462 501 | 
             
                end
         | 
| 463 502 |  | 
| @@ -0,0 +1,134 @@ | |
| 1 | 
            +
            module RelatonBipm
         | 
| 2 | 
            +
              class Id
         | 
| 3 | 
            +
                class Parser < Parslet::Parser
         | 
| 4 | 
            +
                  rule(:space) { match("\s").repeat(1) }
         | 
| 5 | 
            +
                  rule(:space?) { space.maybe }
         | 
| 6 | 
            +
                  rule(:comma) { str(",") >> space? }
         | 
| 7 | 
            +
                  rule(:lparen) { str("(") }
         | 
| 8 | 
            +
                  rule(:rparen) { str(")") }
         | 
| 9 | 
            +
                  rule(:slash) { str("/") }
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  rule(:delimeter) { str("--") >> space }
         | 
| 12 | 
            +
                  rule(:delimeter?) { delimeter.maybe }
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  rule(:lang) { comma >> match["A-Z"].repeat(2, 2).as(:lang) }
         | 
| 15 | 
            +
                  rule(:lang?) { lang.maybe }
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  rule(:number) { match["0-9-"].repeat(1).as(:number) >> space? }
         | 
| 18 | 
            +
                  rule(:number?) { number.maybe }
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
         | 
| 21 | 
            +
                  rule(:year_paren) { lparen >> year >> lang? >> rparen }
         | 
| 22 | 
            +
                  rule(:num_year) { number? >> year_paren }
         | 
| 23 | 
            +
                  rule(:year_num) { year >> str("-") >> number }
         | 
| 24 | 
            +
                  rule(:num_and_year) { num_year | year_num | number }
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                  rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
         | 
| 27 | 
            +
                  rule(:suff) { match["a-zA-Z-"].repeat(1) }
         | 
| 28 | 
            +
                  rule(:cgmp) { str("CGPM") }
         | 
| 29 | 
            +
                  rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
         | 
| 30 | 
            +
                  rule(:cc) { str("CC") >> suff >> sect.maybe }
         | 
| 31 | 
            +
                  rule(:jc) { str("JC") >> suff }
         | 
| 32 | 
            +
                  rule(:cec) { str("CEC") }
         | 
| 33 | 
            +
                  rule(:wgms) { str("WG-MS") }
         | 
| 34 | 
            +
                  rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                  rule(:type_group) { type >> group >> slash >> num_and_year }
         | 
| 39 | 
            +
                  rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
         | 
| 40 | 
            +
                  rule(:outcome) { group_type | type_group }
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                  rule(:append) { comma >> str("Appendix") >> space >> number }
         | 
| 43 | 
            +
                  rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> append.maybe }
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                  rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  rule(:result) { outcome | brochure | metrologia }
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                  root :result
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                TYPES = {
         | 
| 53 | 
            +
                  "Resolution" => "RES",
         | 
| 54 | 
            +
                  "Résolution" => "RES",
         | 
| 55 | 
            +
                  "Recommendation" => "REC",
         | 
| 56 | 
            +
                  "Recommandation" => "REC",
         | 
| 57 | 
            +
                  "Decision" => "DECN",
         | 
| 58 | 
            +
                  "Décision" => "DECN",
         | 
| 59 | 
            +
                  "Declaration" => "Déclaration",
         | 
| 60 | 
            +
                  "Réunion" => "Meeting",
         | 
| 61 | 
            +
                }.freeze
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                # @return [Hash] the parsed id components
         | 
| 64 | 
            +
                attr_accessor :id
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                #
         | 
| 67 | 
            +
                # Create a new Id object
         | 
| 68 | 
            +
                #
         | 
| 69 | 
            +
                # @param [String] id id string
         | 
| 70 | 
            +
                #
         | 
| 71 | 
            +
                def initialize(id)
         | 
| 72 | 
            +
                  @id = Parser.new.parse(id)
         | 
| 73 | 
            +
                rescue Parslet::ParseFailed => e
         | 
| 74 | 
            +
                  warn "[relaton-bipm] Incorrect reference: #{id}"
         | 
| 75 | 
            +
                  # warn "[relaton-bipm] #{e.parse_failure_cause.ascii_tree}"
         | 
| 76 | 
            +
                  raise RelatonBib::RequestError, e
         | 
| 77 | 
            +
                end
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                #
         | 
| 80 | 
            +
                # Compare two Id objects
         | 
| 81 | 
            +
                #
         | 
| 82 | 
            +
                # @param [RelatonBipm::Id, Hash] other the other Id object
         | 
| 83 | 
            +
                #
         | 
| 84 | 
            +
                # @return [Boolean] true if the two Id objects are equal
         | 
| 85 | 
            +
                #
         | 
| 86 | 
            +
                def ==(other)
         | 
| 87 | 
            +
                  other_hash = other.is_a?(Id) ? other.normalized_hash : other
         | 
| 88 | 
            +
                  hash = normalized_hash
         | 
| 89 | 
            +
                  hash.delete(:year) unless other_hash[:year]
         | 
| 90 | 
            +
                  other_hash.delete(:year) unless hash[:year]
         | 
| 91 | 
            +
                  hash.delete(:lang) unless other_hash[:lang]
         | 
| 92 | 
            +
                  other_hash.delete(:lang) unless hash[:lang]
         | 
| 93 | 
            +
                  hash == other_hash
         | 
| 94 | 
            +
                end
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                #
         | 
| 97 | 
            +
                # Transform ID parts.
         | 
| 98 | 
            +
                # Traslate type into abbreviation, remove leading zeros from number
         | 
| 99 | 
            +
                #
         | 
| 100 | 
            +
                # @return [Hash] the normalized ID parts
         | 
| 101 | 
            +
                #
         | 
| 102 | 
            +
                def normalized_hash # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
         | 
| 103 | 
            +
                  @normalized_hash ||= begin
         | 
| 104 | 
            +
                    hash = { group: id[:group].to_s.sub("CCDS", "CCTF") }
         | 
| 105 | 
            +
                    hash[:type] = normalized_type if id[:type]
         | 
| 106 | 
            +
                    norm_num = normalized_number
         | 
| 107 | 
            +
                    hash[:number] = norm_num unless norm_num.nil? || norm_num.empty?
         | 
| 108 | 
            +
                    hash[:year] = id[:year].to_s if id[:year]
         | 
| 109 | 
            +
                    hash[:lang] = id[:lang].to_s if id[:lang]
         | 
| 110 | 
            +
                    hash
         | 
| 111 | 
            +
                  end
         | 
| 112 | 
            +
                end
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                #
         | 
| 115 | 
            +
                # Translate type into abbreviation
         | 
| 116 | 
            +
                #
         | 
| 117 | 
            +
                # @return [String] the normalized type
         | 
| 118 | 
            +
                #
         | 
| 119 | 
            +
                def normalized_type
         | 
| 120 | 
            +
                  TYPES[id[:type].to_s] || id[:type].to_s
         | 
| 121 | 
            +
                end
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                #
         | 
| 124 | 
            +
                # Remove leading zeros from number
         | 
| 125 | 
            +
                #
         | 
| 126 | 
            +
                # @return [String, nil] the normalized number
         | 
| 127 | 
            +
                #
         | 
| 128 | 
            +
                def normalized_number
         | 
| 129 | 
            +
                  return unless id[:number]
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                  id[:number].to_s.sub(/^0+/, "")
         | 
| 132 | 
            +
                end
         | 
| 133 | 
            +
              end
         | 
| 134 | 
            +
            end
         | 
| @@ -9,7 +9,7 @@ module RelatonBipm | |
| 9 9 | 
             
                  @prefix = "BIPM"
         | 
| 10 10 | 
             
                  @defaultprefix = %r{^(?:BIPM|CCTF|CCDS|CGPM|CIPM)(?!\w)}
         | 
| 11 11 | 
             
                  @idtype = "BIPM"
         | 
| 12 | 
            -
                  @datasets = %w[bipm-data-outcomes bipm-si-brochure]
         | 
| 12 | 
            +
                  @datasets = %w[bipm-data-outcomes bipm-si-brochure rawdata-bipm-metrologia]
         | 
| 13 13 | 
             
                end
         | 
| 14 14 |  | 
| 15 15 | 
             
                # @param code [String]
         | 
| @@ -21,10 +21,11 @@ module RelatonBipm | |
| 21 21 | 
             
                end
         | 
| 22 22 |  | 
| 23 23 | 
             
                #
         | 
| 24 | 
            -
                # Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes
         | 
| 25 | 
            -
                #    | 
| 24 | 
            +
                # Fetch all the documents from https://github.com/metanorma/bipm-data-outcomes,
         | 
| 25 | 
            +
                #   https://github.com/metanorma/bipm-si-brochure, https://github.com/relaton/rawdata-bipm-metrologia
         | 
| 26 26 | 
             
                #
         | 
| 27 | 
            -
                # @param [String] source source name
         | 
| 27 | 
            +
                # @param [String] source source name (bipm-data-outcomes, bipm-si-brochure,
         | 
| 28 | 
            +
                #   rawdata-bipm-metrologia)
         | 
| 28 29 | 
             
                # @param [Hash] opts
         | 
| 29 30 | 
             
                # @option opts [String] :output directory to output documents
         | 
| 30 31 | 
             
                # @option opts [String] :format
         | 
| @@ -0,0 +1,311 @@ | |
| 1 | 
            +
            module RelatonBipm
         | 
| 2 | 
            +
              module RawdataBipmMetrologia
         | 
| 3 | 
            +
                class ArticleParser
         | 
| 4 | 
            +
                  ATTRS = %i[docid title contributor date copyright abstract relation series
         | 
| 5 | 
            +
                             extent type doctype].freeze
         | 
| 6 | 
            +
                  #
         | 
| 7 | 
            +
                  # Create new parser and parse document
         | 
| 8 | 
            +
                  #
         | 
| 9 | 
            +
                  # @param [Nokogiri::XML::Element] doc document XML element
         | 
| 10 | 
            +
                  #
         | 
| 11 | 
            +
                  # @return [RelatonBipm::BipmBibliographicItem] document
         | 
| 12 | 
            +
                  #
         | 
| 13 | 
            +
                  def self.parse(doc)
         | 
| 14 | 
            +
                    new(doc).parse
         | 
| 15 | 
            +
                  end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  #
         | 
| 18 | 
            +
                  # Initialize parser
         | 
| 19 | 
            +
                  #
         | 
| 20 | 
            +
                  # @param [Nokogiri::XML::Element] doc XML document
         | 
| 21 | 
            +
                  #
         | 
| 22 | 
            +
                  def initialize(doc)
         | 
| 23 | 
            +
                    @doc = doc
         | 
| 24 | 
            +
                    @meta = @doc.at("./front/article-meta")
         | 
| 25 | 
            +
                  end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  #
         | 
| 28 | 
            +
                  # Create new document
         | 
| 29 | 
            +
                  #
         | 
| 30 | 
            +
                  # @return [RelatonBipm::BipmBibliographicItem] document
         | 
| 31 | 
            +
                  #
         | 
| 32 | 
            +
                  def parse
         | 
| 33 | 
            +
                    attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
         | 
| 34 | 
            +
                    BipmBibliographicItem.new(**attrs)
         | 
| 35 | 
            +
                  end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                  #
         | 
| 38 | 
            +
                  # Parse docid
         | 
| 39 | 
            +
                  #
         | 
| 40 | 
            +
                  # @return [Array<RelatonBib::DocumentIdentifier>] array of document identifiers
         | 
| 41 | 
            +
                  #
         | 
| 42 | 
            +
                  def parse_docid
         | 
| 43 | 
            +
                    pubid = "#{journal_title} #{volume_issue_article}"
         | 
| 44 | 
            +
                    primary_id = create_docid pubid, "BIPM", true
         | 
| 45 | 
            +
                    @meta.xpath("./article-id[@pub-id-type='doi']")
         | 
| 46 | 
            +
                      .each_with_object([primary_id]) do |id, m|
         | 
| 47 | 
            +
                      m << create_docid(id.text, id["pub-id-type"])
         | 
| 48 | 
            +
                    end
         | 
| 49 | 
            +
                  end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                  #
         | 
| 52 | 
            +
                  # Parse volume, issue and page
         | 
| 53 | 
            +
                  #
         | 
| 54 | 
            +
                  # @return [Array<String>] array of volume, issue and page
         | 
| 55 | 
            +
                  #
         | 
| 56 | 
            +
                  def volume_issue_article
         | 
| 57 | 
            +
                    volume = @meta.at("./volume").text
         | 
| 58 | 
            +
                    issue = @meta.at("./issue").text
         | 
| 59 | 
            +
                    # page = @doc.at("./front/article-meta/fpage")&.text || manuscript
         | 
| 60 | 
            +
                    [volume, issue, article].join(" ")
         | 
| 61 | 
            +
                  end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                  def article
         | 
| 64 | 
            +
                    @meta.at("./article-id[@pub-id-type='manuscript']").text.match(/[^_]+$/).to_s
         | 
| 65 | 
            +
                  end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  #
         | 
| 68 | 
            +
                  # Parse journal title
         | 
| 69 | 
            +
                  #
         | 
| 70 | 
            +
                  # @return [String] journal title
         | 
| 71 | 
            +
                  #
         | 
| 72 | 
            +
                  def journal_title
         | 
| 73 | 
            +
                    @doc.at("./front/journal-meta/journal-title-group/journal-title").text
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                  #
         | 
| 77 | 
            +
                  # Create document identifier
         | 
| 78 | 
            +
                  #
         | 
| 79 | 
            +
                  # @param [String] id document id
         | 
| 80 | 
            +
                  # @param [String] type id type
         | 
| 81 | 
            +
                  # @param [Boolean, nil] primary is primary id
         | 
| 82 | 
            +
                  #
         | 
| 83 | 
            +
                  # @return [RelatonBib::DocumentIdentifier] document identifier
         | 
| 84 | 
            +
                  #
         | 
| 85 | 
            +
                  def create_docid(id, type, primary = nil)
         | 
| 86 | 
            +
                    RelatonBib::DocumentIdentifier.new id: id, type: type, primary: primary
         | 
| 87 | 
            +
                  end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                  #
         | 
| 90 | 
            +
                  # Parse title
         | 
| 91 | 
            +
                  #
         | 
| 92 | 
            +
                  # @return [Array<RelatonBib::TypedTitleString>] array of title strings
         | 
| 93 | 
            +
                  #
         | 
| 94 | 
            +
                  def parse_title
         | 
| 95 | 
            +
                    @meta.xpath("./title-group/article-title").map do |t|
         | 
| 96 | 
            +
                      next if t.text.empty?
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                      RelatonBib::TypedTitleString.new content: t.text, language: t[:"xml:lang"], script: "Latn"
         | 
| 99 | 
            +
                    end.compact
         | 
| 100 | 
            +
                  end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                  #
         | 
| 103 | 
            +
                  # Parse contributor
         | 
| 104 | 
            +
                  #
         | 
| 105 | 
            +
                  # @return [Array<RelatonBib::Contributor>] array of contributors
         | 
| 106 | 
            +
                  #
         | 
| 107 | 
            +
                  def parse_contributor
         | 
| 108 | 
            +
                    @meta.xpath("./contrib-group/contrib").map do |c|
         | 
| 109 | 
            +
                      entity = create_person(c) || create_organization(c)
         | 
| 110 | 
            +
                      RelatonBib::ContributionInfo.new(entity: entity, role: [type: c[:"contrib-type"]])
         | 
| 111 | 
            +
                    end
         | 
| 112 | 
            +
                  end
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                  def create_person(contrib)
         | 
| 115 | 
            +
                    name = contrib.at("./name")
         | 
| 116 | 
            +
                    return unless name
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                    RelatonBib::Person.new name: fullname(name), affiliation: affiliation(contrib)
         | 
| 119 | 
            +
                  end
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                  def create_organization(contrib)
         | 
| 122 | 
            +
                    RelatonBib::Organization.new name: contrib.at("./collab").text
         | 
| 123 | 
            +
                  end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                  #
         | 
| 126 | 
            +
                  # Parse affiliations
         | 
| 127 | 
            +
                  #
         | 
| 128 | 
            +
                  # @param [Nokogiri::XML::Element] contrib contributor element
         | 
| 129 | 
            +
                  #
         | 
| 130 | 
            +
                  # @return [Array<RelatonBib::Affiliation>] array of affiliations
         | 
| 131 | 
            +
                  #
         | 
| 132 | 
            +
                  def affiliation(contrib) # rubocop:disable Metrics/AbcSize
         | 
| 133 | 
            +
                    contrib.xpath("./xref[@ref-type='aff']").map do |x|
         | 
| 134 | 
            +
                      a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
         | 
| 135 | 
            +
                      parts = a.text.split(", ")
         | 
| 136 | 
            +
                      orgname = parts[0..-3].join(", ")
         | 
| 137 | 
            +
                      city, country = parts[-2..]
         | 
| 138 | 
            +
                      address = []
         | 
| 139 | 
            +
                      address << RelatonBib::Address.new(city: city, country: country) if city && country
         | 
| 140 | 
            +
                      org = RelatonBib::Organization.new name: orgname, contact: address
         | 
| 141 | 
            +
                      RelatonBib::Affiliation.new organization: org
         | 
| 142 | 
            +
                    end
         | 
| 143 | 
            +
                  end
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                  #
         | 
| 146 | 
            +
                  # Create full name
         | 
| 147 | 
            +
                  #
         | 
| 148 | 
            +
                  # @param [Nokogiri::XML::Element] contrib contributor element
         | 
| 149 | 
            +
                  #
         | 
| 150 | 
            +
                  # @return [RelatonBib::FullName] full name
         | 
| 151 | 
            +
                  #
         | 
| 152 | 
            +
                  def fullname(name)
         | 
| 153 | 
            +
                    fname = forename name.at("./given-names")
         | 
| 154 | 
            +
                    sname = name.at("./surname").text
         | 
| 155 | 
            +
                    surname = RelatonBib::LocalizedString.new sname, "en", "Latn"
         | 
| 156 | 
            +
                    RelatonBib::FullName.new surname: surname, forename: fname
         | 
| 157 | 
            +
                  end
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                  #
         | 
| 160 | 
            +
                  # Parse forename
         | 
| 161 | 
            +
                  #
         | 
| 162 | 
            +
                  # @param [String] given_name given name
         | 
| 163 | 
            +
                  #
         | 
| 164 | 
            +
                  # @return [Array<RelatonBib::Forename>] array of forenames
         | 
| 165 | 
            +
                  #
         | 
| 166 | 
            +
                  def forename(given_name) # rubocop:disable Metrics/MethodLength
         | 
| 167 | 
            +
                    return [] unless given_name
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                    given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
         | 
| 170 | 
            +
                      if nm.size == 1
         | 
| 171 | 
            +
                        name = nil
         | 
| 172 | 
            +
                        init = nm
         | 
| 173 | 
            +
                      else
         | 
| 174 | 
            +
                        name = nm
         | 
| 175 | 
            +
                        init = int
         | 
| 176 | 
            +
                      end
         | 
| 177 | 
            +
                      RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
         | 
| 178 | 
            +
                    end
         | 
| 179 | 
            +
                  end
         | 
| 180 | 
            +
             | 
| 181 | 
            +
                  #
         | 
| 182 | 
            +
                  # Parse date
         | 
| 183 | 
            +
                  #
         | 
| 184 | 
            +
                  # @return [Array<RelatonBib::BibliographicDate>] array of dates
         | 
| 185 | 
            +
                  #
         | 
| 186 | 
            +
                  def parse_date
         | 
| 187 | 
            +
                    on = dates.min
         | 
| 188 | 
            +
                    [RelatonBib::BibliographicDate.new(type: "published", on: on)]
         | 
| 189 | 
            +
                  end
         | 
| 190 | 
            +
             | 
| 191 | 
            +
                  #
         | 
| 192 | 
            +
                  # Parse date
         | 
| 193 | 
            +
                  #
         | 
| 194 | 
            +
                  # @yield [date, type] date and type
         | 
| 195 | 
            +
                  #
         | 
| 196 | 
            +
                  # @return [Array<String, Object>] string date or whatever block returns
         | 
| 197 | 
            +
                  #
         | 
| 198 | 
            +
                  def dates
         | 
| 199 | 
            +
                    @meta.xpath("./pub-date").map do |d|
         | 
| 200 | 
            +
                      month = date_part(d, "month")
         | 
| 201 | 
            +
                      day = date_part(d, "day")
         | 
| 202 | 
            +
                      date = "#{d.at('./year').text}-#{month}-#{day}"
         | 
| 203 | 
            +
                      block_given? ? yield(date, d[:"pub-type"]) : date
         | 
| 204 | 
            +
                    end
         | 
| 205 | 
            +
                  end
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                  def date_part(date, type)
         | 
| 208 | 
            +
                    part = date.at("./#{type}")&.text
         | 
| 209 | 
            +
                    return "01" if part.nil? || part.empty?
         | 
| 210 | 
            +
             | 
| 211 | 
            +
                    part.rjust(2, "0")
         | 
| 212 | 
            +
                  end
         | 
| 213 | 
            +
             | 
| 214 | 
            +
                  #
         | 
| 215 | 
            +
                  # Parse copyright
         | 
| 216 | 
            +
                  #
         | 
| 217 | 
            +
                  # @return [Array<RelatonBib::CopyrightAssociation>] array of copyright associations
         | 
| 218 | 
            +
                  #
         | 
| 219 | 
            +
                  def parse_copyright
         | 
| 220 | 
            +
                    @meta.xpath("./permissions").each_with_object([]) do |l, m|
         | 
| 221 | 
            +
                      from = l.at("./copyright-year")
         | 
| 222 | 
            +
                      next unless from
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                      owner = l.at("./copyright-statement").text.split(" & ").map do |c|
         | 
| 225 | 
            +
                        /(?<name>[A-z]+(?:\s[A-z]+)*)/ =~ c
         | 
| 226 | 
            +
                        org = RelatonBib::Organization.new name: name
         | 
| 227 | 
            +
                        RelatonBib::ContributionInfo.new(entity: org)
         | 
| 228 | 
            +
                      end
         | 
| 229 | 
            +
                      m << RelatonBib::CopyrightAssociation.new(owner: owner, from: from.text)
         | 
| 230 | 
            +
                    end
         | 
| 231 | 
            +
                  end
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                  #
         | 
| 234 | 
            +
                  # Parse abstract
         | 
| 235 | 
            +
                  #
         | 
| 236 | 
            +
                  # @return [Array<RelatonBib::FormattedString>] array of abstracts
         | 
| 237 | 
            +
                  #
         | 
| 238 | 
            +
                  def parse_abstract
         | 
| 239 | 
            +
                    @meta.xpath("./abstract").map do |a|
         | 
| 240 | 
            +
                      RelatonBib::FormattedString.new(
         | 
| 241 | 
            +
                        content: a.inner_html, language: a[:"xml:lang"], script: ["Latn"], format: "text/html",
         | 
| 242 | 
            +
                      )
         | 
| 243 | 
            +
                    end
         | 
| 244 | 
            +
                  end
         | 
| 245 | 
            +
             | 
| 246 | 
            +
                  #
         | 
| 247 | 
            +
                  # Parese relation
         | 
| 248 | 
            +
                  #
         | 
| 249 | 
            +
                  # @return [Array<RelatonBib::DocumentRelation>] array of document relations
         | 
| 250 | 
            +
                  #
         | 
| 251 | 
            +
                  def parse_relation
         | 
| 252 | 
            +
                    dates do |d, t|
         | 
| 253 | 
            +
                      RelatonBib::DocumentRelation.new(type: "hasManifestation", bibitem: bibitem(d, t))
         | 
| 254 | 
            +
                    end
         | 
| 255 | 
            +
                  end
         | 
| 256 | 
            +
             | 
| 257 | 
            +
                  #
         | 
| 258 | 
            +
                  # Create bibitem
         | 
| 259 | 
            +
                  #
         | 
| 260 | 
            +
                  # @param [String] date
         | 
| 261 | 
            +
                  # @param [String] type date type
         | 
| 262 | 
            +
                  #
         | 
| 263 | 
            +
                  # @return [RelatonBipm::BipmBibliographicItem] bibitem
         | 
| 264 | 
            +
                  #
         | 
| 265 | 
            +
                  def bibitem(date, type)
         | 
| 266 | 
            +
                    dt = RelatonBib::BibliographicDate.new(type: type, on: date)
         | 
| 267 | 
            +
                    carrier = type == "epub" ? "online" : "print"
         | 
| 268 | 
            +
                    medium = RelatonBib::Medium.new carrier: carrier
         | 
| 269 | 
            +
                    BipmBibliographicItem.new title: parse_title, date: [dt], medium: medium
         | 
| 270 | 
            +
                  end
         | 
| 271 | 
            +
             | 
| 272 | 
            +
                  #
         | 
| 273 | 
            +
                  # Parse series
         | 
| 274 | 
            +
                  #
         | 
| 275 | 
            +
                  # @return [Array<RelatonBib::Series>] array of series
         | 
| 276 | 
            +
                  #
         | 
| 277 | 
            +
                  def parse_series
         | 
| 278 | 
            +
                    title = RelatonBib::TypedTitleString.new(
         | 
| 279 | 
            +
                      content: journal_title, language: ["en"], script: ["Latn"],
         | 
| 280 | 
            +
                    )
         | 
| 281 | 
            +
                    [RelatonBib::Series.new(title: title)]
         | 
| 282 | 
            +
                  end
         | 
| 283 | 
            +
             | 
| 284 | 
            +
                  #
         | 
| 285 | 
            +
                  # Parse extent
         | 
| 286 | 
            +
                  #
         | 
| 287 | 
            +
                  # @return [Array<RelatonBib::Extent>] array of extents
         | 
| 288 | 
            +
                  #
         | 
| 289 | 
            +
                  def parse_extent
         | 
| 290 | 
            +
                    @meta.xpath("./volume|./issue|./fpage").map do |e|
         | 
| 291 | 
            +
                      if e.name == "fpage"
         | 
| 292 | 
            +
                        type = "page"
         | 
| 293 | 
            +
                        to = @meta.at("./lpage")&.text
         | 
| 294 | 
            +
                      else
         | 
| 295 | 
            +
                        type = e.name
         | 
| 296 | 
            +
                      end
         | 
| 297 | 
            +
                      RelatonBib::Locality.new type, e.text, to
         | 
| 298 | 
            +
                    end
         | 
| 299 | 
            +
                    # %w[volume issue page].map.with_index do |t, i|
         | 
| 300 | 
            +
                    #   RelatonBib::Locality.new t, volume_issue_page[i]
         | 
| 301 | 
            +
                    # end
         | 
| 302 | 
            +
                  end
         | 
| 303 | 
            +
             | 
| 304 | 
            +
                  def parse_type
         | 
| 305 | 
            +
                    "article"
         | 
| 306 | 
            +
                  end
         | 
| 307 | 
            +
             | 
| 308 | 
            +
                  alias_method :parse_doctype, :parse_type
         | 
| 309 | 
            +
                end
         | 
| 310 | 
            +
              end
         | 
| 311 | 
            +
            end
         |