relaton-w3c 1.12.0 → 1.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +6 -3
- data/lib/relaton_w3c/data_fetcher.rb +102 -34
- data/lib/relaton_w3c/data_index.rb +31 -13
- data/lib/relaton_w3c/data_parser.rb +47 -22
- data/lib/relaton_w3c/processor.rb +3 -3
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/workgroups.yaml +285 -148
- metadata +2 -27
- data/data/reference.W3C.CR-rdf-schema.xml +0 -16
- data/data/reference.W3C.DSig-label.xml +0 -32
- data/data/reference.W3C.P3P-rdfschema.xml +0 -26
- data/data/reference.W3C.P3P.xml +0 -38
- data/data/reference.W3C.PICS-labels.xml +0 -43
- data/data/reference.W3C.PICS-rules.xml +0 -38
- data/data/reference.W3C.PICS-services.xml +0 -37
- data/data/reference.W3C.REC-RUBY.xml +0 -22
- data/data/reference.W3C.REC-XHTML.xml +0 -12
- data/data/reference.W3C.REC-rdf-syntax.xml +0 -31
- data/data/reference.W3C.REC-xml-1998.xml +0 -20
- data/data/reference.W3C.REC-xml-names.xml +0 -28
- data/data/reference.W3C.REC-xml.xml +0 -35
- data/data/reference.W3C.REC-xmlenc-core.xml +0 -23
- data/data/reference.W3C.REC-xmlschema-1.xml +0 -23
- data/data/reference.W3C.REC-xmlschema-2.xml +0 -17
- data/data/reference.W3C.daml-oil-reference.xml +0 -39
- data/data/reference.W3C.soap11.xml +0 -56
- data/data/reference.W3C.soap12-part1.xml +0 -38
- data/data/reference.W3C.soap12-part2.xml +0 -38
- data/data/reference.W3C.xkms.xml +0 -50
- data/data/reference.W3C.xml-c14n.xml +0 -15
- data/data/reference.W3C.xmldsig-core.xml +0 -26
- data/data/reference.W3C.xmlenc-core.xml +0 -20
- data/data/reference.W3C.xpath.xml +0 -22
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b0e1e914bfe536b10f9d3b9e48f946e9d86a66df1db0233e1b6d3f67b29beaa6
         | 
| 4 | 
            +
              data.tar.gz: 635cd920c1b8ff93678083790518eade6ce7051653e190eb8294a25306e3e69a
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: bcdce1b3f1e10def99be7e5d51d98acb868d9d94884211d17814b22ffc64b5d2b4cfdee45bebe784fe02672d1af0cabb1a7dc7f3671433bd63c296a9ed106016
         | 
| 7 | 
            +
              data.tar.gz: 81457b4d1f0f59a6b869a7089bd9173164abd126ed17a923f5f02f1766e7d732ba421c61284cfd35b1ca006b75c0e8e1ff7e80d55f58d7af09f64460bd8e1d8f
         | 
    
        data/README.adoc
    CHANGED
    
    | @@ -151,14 +151,17 @@ RelatonW3c::W3cBibliographicItem.new **bib_hash | |
| 151 151 |  | 
| 152 152 | 
             
            === Fetch data
         | 
| 153 153 |  | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
            The method `RelatonW3c::DataFetcher.fetch(output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
         | 
| 154 | 
            +
            The method `RelatonW3c::DataFetcher.fetch(source, output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
         | 
| 157 155 | 
             
            Arguments:
         | 
| 158 156 |  | 
| 157 | 
            +
            - `source` - name of dataset (`w3c-rdf` or `w3c-tr-archive`)
         | 
| 159 158 | 
             
            - `output` - folder to save documents (default './data').
         | 
| 160 159 | 
             
            - `format` - format in which the documents are saved. Possible formats are: `yaml`, `xml`, `bibxml` (default `yaml`).
         | 
| 161 160 |  | 
| 161 | 
            +
            The available datasets are:
         | 
| 162 | 
            +
            - `w3c-rdf` - The dataset is fetched from http://www.w3.org/2002/01/tr-automation/tr.rdf.
         | 
| 163 | 
            +
            - `w3c-tr-archive` - The archive dataset files should be downloaded from https://github.com/relaton/w3c-tr-archive repository and placed into `w3c-tr-archive` folder.
         | 
| 164 | 
            +
             | 
| 162 165 | 
             
            [source,ruby]
         | 
| 163 166 | 
             
            ----
         | 
| 164 167 | 
             
            RelatonW3c::DataFetcher.fetch
         | 
| @@ -20,22 +20,21 @@ module RelatonW3c | |
| 20 20 | 
             
                  @ext = format.sub(/^bib/, "")
         | 
| 21 21 | 
             
                  dir = File.dirname(File.expand_path(__FILE__))
         | 
| 22 22 | 
             
                  @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
         | 
| 23 | 
            -
                  @ | 
| 24 | 
            -
                  @files = []
         | 
| 25 | 
            -
                  @index = DataIndex.new
         | 
| 23 | 
            +
                  @index = DataIndex.create_from_file
         | 
| 26 24 | 
             
                end
         | 
| 27 25 |  | 
| 28 26 | 
             
                #
         | 
| 29 27 | 
             
                # Initialize fetcher and run fetch
         | 
| 30 28 | 
             
                #
         | 
| 29 | 
            +
                # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
         | 
| 31 30 | 
             
                # @param [Strin] output directory to save files, default: "data"
         | 
| 32 31 | 
             
                # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
         | 
| 33 32 | 
             
                #
         | 
| 34 | 
            -
                def self.fetch(output: "data", format: "yaml")
         | 
| 33 | 
            +
                def self.fetch(source, output: "data", format: "yaml")
         | 
| 35 34 | 
             
                  t1 = Time.now
         | 
| 36 35 | 
             
                  puts "Started at: #{t1}"
         | 
| 37 | 
            -
                  FileUtils.mkdir_p output | 
| 38 | 
            -
                  new(output, format).fetch
         | 
| 36 | 
            +
                  FileUtils.mkdir_p output
         | 
| 37 | 
            +
                  new(output, format).fetch source
         | 
| 39 38 | 
             
                  t2 = Time.now
         | 
| 40 39 | 
             
                  puts "Stopped at: #{t2}"
         | 
| 41 40 | 
             
                  puts "Done in: #{(t2 - t1).round} sec."
         | 
| @@ -44,47 +43,113 @@ module RelatonW3c | |
| 44 43 | 
             
                #
         | 
| 45 44 | 
             
                # Parse documents
         | 
| 46 45 | 
             
                #
         | 
| 47 | 
            -
                 | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
                   | 
| 51 | 
            -
                     | 
| 52 | 
            -
             | 
| 46 | 
            +
                # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
         | 
| 47 | 
            +
                #
         | 
| 48 | 
            +
                def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
         | 
| 49 | 
            +
                  each_dataset(source) do |rdf|
         | 
| 50 | 
            +
                    %i[versioned unversioned].each do |type|
         | 
| 51 | 
            +
                      send("query_#{type}_docs", rdf).each do |sl|
         | 
| 52 | 
            +
                        bib = DataParser.parse(rdf, sl, self)
         | 
| 53 | 
            +
                        add_has_edition_relation(bib) if type == :unversioned
         | 
| 54 | 
            +
                        save_doc bib
         | 
| 55 | 
            +
                      rescue StandardError => e
         | 
| 56 | 
            +
                        link = sl.respond_to?(:link) ? sl.link : sl.version_of
         | 
| 57 | 
            +
                        warn "Error: document #{link} #{e.message}"
         | 
| 58 | 
            +
                        warn e.backtrace.join("\n")
         | 
| 59 | 
            +
                      end
         | 
| 60 | 
            +
                    end
         | 
| 53 61 | 
             
                  end
         | 
| 54 | 
            -
                   | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 62 | 
            +
                  @index.sort!.save
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                #
         | 
| 66 | 
            +
                # Add hasEdition relations form previous parsed document
         | 
| 67 | 
            +
                #
         | 
| 68 | 
            +
                # @param [RelatonW3c::W3cBibliographicItem] bib bibligraphic item
         | 
| 69 | 
            +
                #
         | 
| 70 | 
            +
                def add_has_edition_relation(bib) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
         | 
| 71 | 
            +
                  file = file_name bib.docnumber
         | 
| 72 | 
            +
                  return unless File.exist? file
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                  b = case @format
         | 
| 75 | 
            +
                      when "xml" then XMLParser.from_xml(File.read(file, encoding: "UTF-8"))
         | 
| 76 | 
            +
                      when "yaml"
         | 
| 77 | 
            +
                        hash = YAML.load_file(file)
         | 
| 78 | 
            +
                        W3cBibliographicItem.from_hash(hash)
         | 
| 79 | 
            +
                      when "bibxml" then BibXMLParser.parse File.read(file, encoding: "UTF-8")
         | 
| 80 | 
            +
                      end
         | 
| 81 | 
            +
                  b.relation.each do |r|
         | 
| 82 | 
            +
                    same_edition = bib.relation.detect { |r2| same_edition?(r, r2) }
         | 
| 83 | 
            +
                    bib.relation << r unless same_edition
         | 
| 59 84 | 
             
                  end
         | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 85 | 
            +
                end
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                #
         | 
| 88 | 
            +
                # Compare two relations
         | 
| 89 | 
            +
                #
         | 
| 90 | 
            +
                # @param [RelatonW3c::W3cBibliographicItem] rel1 relation 1
         | 
| 91 | 
            +
                # @param [RelatonW3c::W3cBibliographicItem] rel2 relation 2
         | 
| 92 | 
            +
                #
         | 
| 93 | 
            +
                # @return [Boolean] true if relations are same
         | 
| 94 | 
            +
                #
         | 
| 95 | 
            +
                def same_edition?(rel1, rel2)
         | 
| 96 | 
            +
                  return false unless rel1.type == "hasEdition" && rel1.type == rel2.type
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                  ids1 = rel1.bibitem.docidentifier.map(&:id)
         | 
| 99 | 
            +
                  ids2 = rel2.bibitem.docidentifier.map(&:id)
         | 
| 100 | 
            +
                  (ids1 & ids2).any?
         | 
| 101 | 
            +
                end
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                #
         | 
| 104 | 
            +
                # Yield fetching for each dataset
         | 
| 105 | 
            +
                #
         | 
| 106 | 
            +
                # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
         | 
| 107 | 
            +
                #
         | 
| 108 | 
            +
                # @yield [RDF::Repository] RDF repository
         | 
| 109 | 
            +
                #
         | 
| 110 | 
            +
                def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
         | 
| 111 | 
            +
                  case source
         | 
| 112 | 
            +
                  when "w3c-tr-archive"
         | 
| 113 | 
            +
                    Dir["w3c-tr-archive/*.rdf"].map do |f|
         | 
| 114 | 
            +
                      @files = []
         | 
| 115 | 
            +
                      yield RDF::Repository.load(f)
         | 
| 116 | 
            +
                    end
         | 
| 117 | 
            +
                  when "w3c-rdf"
         | 
| 118 | 
            +
                    @files = []
         | 
| 119 | 
            +
                    rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
         | 
| 120 | 
            +
                    yield rdf
         | 
| 121 | 
            +
                    # parse_static_dataset
         | 
| 66 122 | 
             
                  end
         | 
| 67 | 
            -
                  @index.sort!.save
         | 
| 68 123 | 
             
                end
         | 
| 69 124 |  | 
| 125 | 
            +
                #
         | 
| 126 | 
            +
                # Parse static dataset
         | 
| 127 | 
            +
                #
         | 
| 128 | 
            +
                # def parse_static_dataset
         | 
| 129 | 
            +
                #   Dir[File.expand_path("../../data/*", __dir__)].each do |file|
         | 
| 130 | 
            +
                #     xml = File.read file, encoding: "UTF-8"
         | 
| 131 | 
            +
                #     save_doc BibXMLParser.parse(xml), warn_duplicate: false
         | 
| 132 | 
            +
                #   rescue StandardError => e
         | 
| 133 | 
            +
                #     warn "Error: document #{file} #{e.message}"
         | 
| 134 | 
            +
                #     warn e.backtrace.join("\n")
         | 
| 135 | 
            +
                #   end
         | 
| 136 | 
            +
                # end
         | 
| 137 | 
            +
             | 
| 70 138 | 
             
                #
         | 
| 71 139 | 
             
                # Query RDF source for versioned documents
         | 
| 72 140 | 
             
                #
         | 
| 73 141 | 
             
                # @return [RDF::Query::Solutions] query results
         | 
| 74 142 | 
             
                #
         | 
| 75 | 
            -
                def query_versioned_docs | 
| 143 | 
            +
                def query_versioned_docs(rdf)
         | 
| 76 144 | 
             
                  sse = SPARQL.parse(%(
         | 
| 77 145 | 
             
                    PREFIX : <http://www.w3.org/2001/02pd/rec54#>
         | 
| 78 146 | 
             
                    PREFIX dc: <http://purl.org/dc/elements/1.1/>
         | 
| 79 147 | 
             
                    PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
         | 
| 80 | 
            -
                    # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
         | 
| 81 148 | 
             
                    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
         | 
| 82 | 
            -
                    SELECT ?link ?title ?date | 
| 83 | 
            -
                    WHERE {
         | 
| 84 | 
            -
                      ?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
         | 
| 85 | 
            -
                    }
         | 
| 149 | 
            +
                    SELECT ?link ?title ?date
         | 
| 150 | 
            +
                    WHERE { ?link dc:title ?title ; dc:date ?date . }
         | 
| 86 151 | 
             
                  ))
         | 
| 87 | 
            -
                   | 
| 152 | 
            +
                  rdf.query sse
         | 
| 88 153 | 
             
                end
         | 
| 89 154 |  | 
| 90 155 | 
             
                #
         | 
| @@ -92,13 +157,16 @@ module RelatonW3c | |
| 92 157 | 
             
                #
         | 
| 93 158 | 
             
                # @return [Array<RDF::Query::Solution>] query results
         | 
| 94 159 | 
             
                #
         | 
| 95 | 
            -
                def query_unversioned_docs
         | 
| 160 | 
            +
                def query_unversioned_docs(rdf)
         | 
| 96 161 | 
             
                  sse = SPARQL.parse(%(
         | 
| 97 162 | 
             
                    PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
         | 
| 98 163 | 
             
                    SELECT ?version_of
         | 
| 99 | 
            -
                    WHERE { | 
| 164 | 
            +
                    WHERE {
         | 
| 165 | 
            +
                      ?link doc:versionOf ?version_of .
         | 
| 166 | 
            +
                      FILTER ( isURI(?link) && isURI(?version_of) && ?link != ?version_of )
         | 
| 167 | 
            +
                    }
         | 
| 100 168 | 
             
                  ))
         | 
| 101 | 
            -
                   | 
| 169 | 
            +
                  rdf.query(sse).uniq { |s| s.version_of.to_s.sub(/^https?:\/\//, "").sub(/\/$/, "") }
         | 
| 102 170 | 
             
                end
         | 
| 103 171 |  | 
| 104 172 | 
             
                #
         | 
| @@ -14,15 +14,24 @@ module RelatonW3c | |
| 14 14 | 
             
                end
         | 
| 15 15 |  | 
| 16 16 | 
             
                #
         | 
| 17 | 
            -
                # Add document to index
         | 
| 17 | 
            +
                # Add document to index or update it if already exists
         | 
| 18 18 | 
             
                #
         | 
| 19 19 | 
             
                # @param [String] docnumber document number
         | 
| 20 20 | 
             
                # @param [String] file path to document file
         | 
| 21 21 | 
             
                #
         | 
| 22 | 
            -
                def add(docnumber, file)
         | 
| 22 | 
            +
                def add(docnumber, file) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
         | 
| 23 23 | 
             
                  dnparts = self.class.docnumber_to_parts docnumber
         | 
| 24 | 
            -
                   | 
| 25 | 
            -
                   | 
| 24 | 
            +
                  rec = @index.detect { |i| i[:file] == file }
         | 
| 25 | 
            +
                  if rec
         | 
| 26 | 
            +
                    rec[:code] = dnparts[:code]
         | 
| 27 | 
            +
                    dnparts[:stage] ? rec[:stage] = dnparts[:stage] : rec.delete(:stage)
         | 
| 28 | 
            +
                    dnparts[:type] ? rec[:type] = dnparts[:type] : rec.delete(:type)
         | 
| 29 | 
            +
                    dnparts[:date] ? rec[:date] = dnparts[:date] : rec.delete(:date)
         | 
| 30 | 
            +
                    dnparts[:suff] ? rec[:suff] = dnparts[:suff] : rec.delete(:suff)
         | 
| 31 | 
            +
                  else
         | 
| 32 | 
            +
                    dnparts[:file] = file
         | 
| 33 | 
            +
                    @index << dnparts
         | 
| 34 | 
            +
                  end
         | 
| 26 35 | 
             
                end
         | 
| 27 36 |  | 
| 28 37 | 
             
                #
         | 
| @@ -111,18 +120,27 @@ module RelatonW3c | |
| 111 120 | 
             
                  #
         | 
| 112 121 | 
             
                  # @return [RelatonW3c::DataIndex] data index
         | 
| 113 122 | 
             
                  #
         | 
| 114 | 
            -
                  def create_from_repo | 
| 115 | 
            -
                     | 
| 123 | 
            +
                  def create_from_repo
         | 
| 124 | 
            +
                    uri = URI("#{W3cBibliography::SOURCE}index-w3c.zip").open
         | 
| 125 | 
            +
                    resp = Zip::InputStream.new uri
         | 
| 116 126 | 
             
                    zip = resp.get_next_entry
         | 
| 127 | 
            +
                    index = RelatonBib.parse_yaml(zip.get_input_stream.read, [Symbol])
         | 
| 128 | 
            +
                    new index: index
         | 
| 129 | 
            +
                  end
         | 
| 117 130 |  | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 131 | 
            +
                  #
         | 
| 132 | 
            +
                  # Create index from a file
         | 
| 133 | 
            +
                  #
         | 
| 134 | 
            +
                  # @param [String] index_file path to index file
         | 
| 135 | 
            +
                  #
         | 
| 136 | 
            +
                  # @return [RelatonW3c::DataIndex] data index
         | 
| 137 | 
            +
                  #
         | 
| 138 | 
            +
                  def create_from_file(index_file = "index-w3c.yaml")
         | 
| 139 | 
            +
                    index = if File.exist?(index_file)
         | 
| 140 | 
            +
                              RelatonBib.parse_yaml(File.read(index_file), [Symbol])
         | 
| 141 | 
            +
                            else []
         | 
| 123 142 | 
             
                            end
         | 
| 124 | 
            -
             | 
| 125 | 
            -
                    DataIndex.new index: index
         | 
| 143 | 
            +
                    new index_file: index_file, index: index
         | 
| 126 144 | 
             
                  end
         | 
| 127 145 |  | 
| 128 146 | 
             
                  #
         | 
| @@ -25,7 +25,8 @@ module RelatonW3c | |
| 25 25 | 
             
                # @param [RDF::Query::Solution] sol entry from the SPARQL query
         | 
| 26 26 | 
             
                # @param [RelatonW3c::DataFetcher] fetcher data fetcher
         | 
| 27 27 | 
             
                #
         | 
| 28 | 
            -
                def initialize(sol, fetcher)
         | 
| 28 | 
            +
                def initialize(rdf, sol, fetcher)
         | 
| 29 | 
            +
                  @rdf = rdf
         | 
| 29 30 | 
             
                  @sol = sol
         | 
| 30 31 | 
             
                  @fetcher = fetcher
         | 
| 31 32 | 
             
                end
         | 
| @@ -38,8 +39,8 @@ module RelatonW3c | |
| 38 39 | 
             
                #
         | 
| 39 40 | 
             
                # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
         | 
| 40 41 | 
             
                #
         | 
| 41 | 
            -
                def self.parse(sol, fetcher)
         | 
| 42 | 
            -
                  new(sol, fetcher).parse
         | 
| 42 | 
            +
                def self.parse(rdf, sol, fetcher)
         | 
| 43 | 
            +
                  new(rdf, sol, fetcher).parse
         | 
| 43 44 | 
             
                end
         | 
| 44 45 |  | 
| 45 46 | 
             
                #
         | 
| @@ -100,7 +101,7 @@ module RelatonW3c | |
| 100 101 | 
             
                #
         | 
| 101 102 | 
             
                def parse_link
         | 
| 102 103 | 
             
                  link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
         | 
| 103 | 
            -
                  [RelatonBib::TypedUri.new(type: "src", content: link.to_s)]
         | 
| 104 | 
            +
                  [RelatonBib::TypedUri.new(type: "src", content: link.to_s.strip)]
         | 
| 104 105 | 
             
                end
         | 
| 105 106 |  | 
| 106 107 | 
             
                #
         | 
| @@ -131,7 +132,7 @@ module RelatonW3c | |
| 131 132 | 
             
                #
         | 
| 132 133 | 
             
                def identifier(link = nil)
         | 
| 133 134 | 
             
                  url = link || (@sol.respond_to?(:link) ? @sol.link : @sol.version_of)
         | 
| 134 | 
            -
                  self.class.parse_identifier(url.to_s)
         | 
| 135 | 
            +
                  self.class.parse_identifier(url.to_s.strip)
         | 
| 135 136 | 
             
                end
         | 
| 136 137 |  | 
| 137 138 | 
             
                #
         | 
| @@ -142,7 +143,7 @@ module RelatonW3c | |
| 142 143 | 
             
                # @return [String] identifier
         | 
| 143 144 | 
             
                #
         | 
| 144 145 | 
             
                def self.parse_identifier(url)
         | 
| 145 | 
            -
                  if /.+\/(\w+( | 
| 146 | 
            +
                  if /.+\/(\w+(?:[-+][\w.]+)+(?:\/\w+)?)/ =~ url.to_s
         | 
| 146 147 | 
             
                    $1.to_s
         | 
| 147 148 | 
             
                  else url.to_s.split("/").last
         | 
| 148 149 | 
             
                  end
         | 
| @@ -184,10 +185,10 @@ module RelatonW3c | |
| 184 185 | 
             
                      PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
         | 
| 185 186 | 
             
                      SELECT ?type
         | 
| 186 187 | 
             
                      WHERE {
         | 
| 187 | 
            -
                        { <#{@sol.link}> rdf:type ?type }
         | 
| 188 | 
            +
                        { <#{@sol.link.to_s.strip}> rdf:type ?type }
         | 
| 188 189 | 
             
                      }
         | 
| 189 190 | 
             
                    ))
         | 
| 190 | 
            -
                    @ | 
| 191 | 
            +
                    @rdf.query(sse).map { |s| s.type.to_s.split("#").last }
         | 
| 191 192 | 
             
                  end
         | 
| 192 193 | 
             
                end
         | 
| 193 194 |  | 
| @@ -220,7 +221,7 @@ module RelatonW3c | |
| 220 221 | 
             
                  if @sol.respond_to?(:link)
         | 
| 221 222 | 
             
                    relations + editor_drafts
         | 
| 222 223 | 
             
                  else
         | 
| 223 | 
            -
                    document_versions.map { |r| create_relation(r.link.to_s, "hasEdition") }
         | 
| 224 | 
            +
                    document_versions.map { |r| create_relation(r.link.to_s.strip, "hasEdition") }
         | 
| 224 225 | 
             
                  end
         | 
| 225 226 | 
             
                end
         | 
| 226 227 |  | 
| @@ -253,9 +254,9 @@ module RelatonW3c | |
| 253 254 | 
             
                    PREFIX : <http://www.w3.org/2001/02pd/rec54#>
         | 
| 254 255 | 
             
                    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
         | 
| 255 256 | 
             
                    SELECT ?rel
         | 
| 256 | 
            -
                    WHERE { <#{@sol.link}> :ED ?rel . }
         | 
| 257 | 
            +
                    WHERE { <#{@sol.link.to_s.strip}> :ED ?rel . }
         | 
| 257 258 | 
             
                  ))
         | 
| 258 | 
            -
                  @ | 
| 259 | 
            +
                  @rdf.query(sse).map do |s|
         | 
| 259 260 | 
             
                    create_relation(s.rel.to_s, "hasDraft", "Editor's draft")
         | 
| 260 261 | 
             
                  end
         | 
| 261 262 | 
             
                end
         | 
| @@ -273,30 +274,53 @@ module RelatonW3c | |
| 273 274 | 
             
                    PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
         | 
| 274 275 | 
             
                    PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
         | 
| 275 276 | 
             
                    SELECT ?rel
         | 
| 276 | 
            -
                    WHERE { <#{@sol.link}> #{predicate} ?rel . }
         | 
| 277 | 
            +
                    WHERE { <#{@sol.link.to_s.strip}> #{predicate} ?rel . }
         | 
| 277 278 | 
             
                  ))
         | 
| 278 | 
            -
                  @ | 
| 279 | 
            +
                  @rdf.query(sse).order_by(:rel)
         | 
| 279 280 | 
             
                end
         | 
| 280 281 |  | 
| 281 282 | 
             
                #
         | 
| 282 283 | 
             
                # Query document versions relations
         | 
| 283 284 | 
             
                #
         | 
| 284 | 
            -
                # @return [RDF::Query:: | 
| 285 | 
            +
                # @return [Array<RDF::Query::Solution>] query results
         | 
| 285 286 | 
             
                #
         | 
| 286 287 | 
             
                def document_versions # rubocop:disable Metrics/MethodLength
         | 
| 287 | 
            -
                  @document_versions ||=  | 
| 288 | 
            +
                  @document_versions ||= version_of.each_with_object([]) do |s, acc|
         | 
| 288 289 | 
             
                    sse = SPARQL.parse(%(
         | 
| 289 290 | 
             
                      PREFIX : <http://www.w3.org/2001/02pd/rec54#>
         | 
| 290 291 | 
             
                      PREFIX dc: <http://purl.org/dc/elements/1.1/>
         | 
| 291 292 | 
             
                      PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
         | 
| 292 293 | 
             
                      PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
         | 
| 293 294 | 
             
                      SELECT ?link ?title ?date
         | 
| 294 | 
            -
                      WHERE { | 
| 295 | 
            +
                      WHERE {
         | 
| 296 | 
            +
                        ?link doc:versionOf <#{s.version_of}> ;
         | 
| 297 | 
            +
                        dc:title ?title ;
         | 
| 298 | 
            +
                        dc:date ?date .
         | 
| 299 | 
            +
                      }
         | 
| 295 300 | 
             
                    ))
         | 
| 296 | 
            -
                    @ | 
| 301 | 
            +
                    @rdf.query(sse).each { |r| acc << r }
         | 
| 297 302 | 
             
                  end
         | 
| 298 303 | 
             
                end
         | 
| 299 304 |  | 
| 305 | 
            +
                #
         | 
| 306 | 
            +
                # Query for document versions
         | 
| 307 | 
            +
                #
         | 
| 308 | 
            +
                # @return [RDF::Query::Solutions] query results
         | 
| 309 | 
            +
                #
         | 
| 310 | 
            +
                def version_of
         | 
| 311 | 
            +
                  return [@sol] unless @sol.respond_to?(:link)
         | 
| 312 | 
            +
             | 
| 313 | 
            +
                  sse = SPARQL.parse(%(
         | 
| 314 | 
            +
                    PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
         | 
| 315 | 
            +
                    SELECT ?version_of
         | 
| 316 | 
            +
                    WHERE {
         | 
| 317 | 
            +
                      <#{@sol.link.to_s.strip}> doc:versionOf ?version_of .
         | 
| 318 | 
            +
                      FILTER ( isURI(?version_of) && <#{@sol.link.to_s.strip}> != str(?version_of) )
         | 
| 319 | 
            +
                    }
         | 
| 320 | 
            +
                  ))
         | 
| 321 | 
            +
                  @rdf.query(sse)
         | 
| 322 | 
            +
                end
         | 
| 323 | 
            +
             | 
| 300 324 | 
             
                #
         | 
| 301 325 | 
             
                # Create relation
         | 
| 302 326 | 
             
                #
         | 
| @@ -339,10 +363,10 @@ module RelatonW3c | |
| 339 363 | 
             
                    PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
         | 
| 340 364 | 
             
                    SELECT ?full_name
         | 
| 341 365 | 
             
                    WHERE {
         | 
| 342 | 
            -
                      <#{@sol.link}> :editor/contact:fullName ?full_name
         | 
| 366 | 
            +
                      <#{@sol.link.to_s.strip}> :editor/contact:fullName ?full_name
         | 
| 343 367 | 
             
                    }
         | 
| 344 368 | 
             
                  ))
         | 
| 345 | 
            -
                  @ | 
| 369 | 
            +
                  @rdf.query(sse).order_by(:full_name).map do |ed|
         | 
| 346 370 | 
             
                    cn = RelatonBib::LocalizedString.new(ed.full_name.to_s, "en", "Latn")
         | 
| 347 371 | 
             
                    n = RelatonBib::FullName.new completename: cn
         | 
| 348 372 | 
             
                    p = RelatonBib::Person.new name: n
         | 
| @@ -363,12 +387,13 @@ module RelatonW3c | |
| 363 387 | 
             
                    PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
         | 
| 364 388 | 
             
                    SELECT ?home_page
         | 
| 365 389 | 
             
                    WHERE {
         | 
| 366 | 
            -
                      <#{@sol.link}> org:deliveredBy/contact:homePage ?home_page
         | 
| 390 | 
            +
                      <#{@sol.link.to_s.strip}> org:deliveredBy/contact:homePage ?home_page
         | 
| 367 391 | 
             
                    }
         | 
| 368 392 | 
             
                  ))
         | 
| 369 | 
            -
                  res = @ | 
| 393 | 
            +
                  res = @rdf.query(sse).order_by(:home_page)
         | 
| 370 394 | 
             
                  tc = res.each_with_object([]) do |edg, obj|
         | 
| 371 | 
            -
                     | 
| 395 | 
            +
                    group_path = edg.home_page.to_s.sub(/^https?:\/\//, "").sub(/\/$/, "")
         | 
| 396 | 
            +
                    wg = @fetcher.group_names[group_path]
         | 
| 372 397 | 
             
                    if wg
         | 
| 373 398 | 
             
                      rwg = RelatonBib::WorkGroup.new name: wg["name"]
         | 
| 374 399 | 
             
                      obj << RelatonBib::TechnicalCommittee.new(rwg)
         | 
| @@ -9,7 +9,7 @@ module RelatonW3c | |
| 9 9 | 
             
                  @prefix = "W3C"
         | 
| 10 10 | 
             
                  @defaultprefix = %r{^W3C\s}
         | 
| 11 11 | 
             
                  @idtype = "W3C"
         | 
| 12 | 
            -
                  @datasets = %w[w3c-rdf]
         | 
| 12 | 
            +
                  @datasets = %w[w3c-rdf w3c-tr-archive]
         | 
| 13 13 | 
             
                end
         | 
| 14 14 |  | 
| 15 15 | 
             
                # @param code [String]
         | 
| @@ -28,8 +28,8 @@ module RelatonW3c | |
| 28 28 | 
             
                # @option opts [String] :output directory to output documents
         | 
| 29 29 | 
             
                # @option opts [String] :format
         | 
| 30 30 | 
             
                #
         | 
| 31 | 
            -
                def fetch_data( | 
| 32 | 
            -
                  DataFetcher.fetch(**opts)
         | 
| 31 | 
            +
                def fetch_data(source, opts)
         | 
| 32 | 
            +
                  DataFetcher.fetch(source, **opts)
         | 
| 33 33 | 
             
                end
         | 
| 34 34 |  | 
| 35 35 | 
             
                # @param xml [String]
         | 
    
        data/lib/relaton_w3c/version.rb
    CHANGED