relaton-w3c 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 29e39d8865d85f66950a933d07c2606dae58d379635cd3abb62215c3110db90c
4
+ data.tar.gz: 1e170849762c44614f050a9faffa89b15d80b55e1c679d5aadcaf9449a5ec92e
5
+ SHA512:
6
+ metadata.gz: 99c2c28235d23eb86ac7d365fe6cc57394bab3d7712985164bc395355665664dda8f976d93170aec7a2f06965f35aad8b70f5f47e2b783a70cbe1a6f13fcfc1c
7
+ data.tar.gz: 9376b168be74974acbbac842b0a4819ea9f03ae8c07c5ddb1dace03753a4e049b3e0cbb07cdf239b168831787106b7a511106335d20bb4789250b5d93590d55f
@@ -0,0 +1,38 @@
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
+ name: macos
4
+
5
+ on:
6
+ push:
7
+ branches: [ master ]
8
+ pull_request:
9
+ branches: [ '**' ]
10
+
11
+ jobs:
12
+ test-macos:
13
+ name: Test on Ruby ${{ matrix.ruby }} macOS
14
+ runs-on: macos-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ ruby: [ '2.6', '2.5', '2.4' ]
19
+ steps:
20
+ - uses: actions/checkout@master
21
+ - name: Checkout submodules
22
+ shell: bash
23
+ run: |
24
+ auth_header="$(git config --local --get http.https://github.com/.extraheader)"
25
+ git submodule sync --recursive
26
+ git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
27
+ - name: Use Ruby
28
+ uses: actions/setup-ruby@v1
29
+ with:
30
+ ruby-version: ${{ matrix.ruby }}
31
+ architecture: 'x64'
32
+ - name: Update gems
33
+ run: |
34
+ sudo gem install bundler --force
35
+ bundle install --jobs 4 --retry 3
36
+ - name: Run specs
37
+ run: |
38
+ bundle exec rake
@@ -0,0 +1,38 @@
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
+ name: ubuntu
4
+
5
+ on:
6
+ push:
7
+ branches: [ master ]
8
+ pull_request:
9
+ branches: [ '**' ]
10
+
11
+ jobs:
12
+ test-linux:
13
+ name: Test on Ruby ${{ matrix.ruby }} Ubuntu
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ ruby: [ '2.6', '2.5', '2.4' ]
19
+ steps:
20
+ - uses: actions/checkout@master
21
+ - name: Checkout submodules
22
+ shell: bash
23
+ run: |
24
+ auth_header="$(git config --local --get http.https://github.com/.extraheader)"
25
+ git submodule sync --recursive
26
+ git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
27
+ - name: Use Ruby
28
+ uses: actions/setup-ruby@v1
29
+ with:
30
+ ruby-version: ${{ matrix.ruby }}
31
+ architecture: 'x64'
32
+ - name: Update gems
33
+ run: |
34
+ gem install bundler
35
+ bundle install --jobs 4 --retry 3
36
+ - name: Run specs
37
+ run: |
38
+ bundle exec rake
@@ -0,0 +1,41 @@
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
+ name: windows
4
+
5
+ on:
6
+ push:
7
+ branches: [ master ]
8
+ pull_request:
9
+ branches: [ '**' ]
10
+
11
+ jobs:
12
+ test-windows:
13
+ name: Test on Ruby ${{ matrix.ruby }} Windows
14
+ runs-on: windows-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ ruby: [ '2.6', '2.5', '2.4' ]
19
+ steps:
20
+ - uses: actions/checkout@master
21
+ - name: Checkout submodules
22
+ shell: bash
23
+ run: |
24
+ auth_header="$(git config --local --get http.https://github.com/.extraheader)"
25
+ git submodule sync --recursive
26
+ git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
27
+ - name: Use Ruby
28
+ uses: actions/setup-ruby@v1
29
+ with:
30
+ ruby-version: ${{ matrix.ruby }}
31
+ architecture: 'x64'
32
+ - name: Update gems
33
+ shell: pwsh
34
+ run: |
35
+ gem install bundler
36
+ bundle config --local path vendor/bundle
37
+ bundle update
38
+ bundle install --jobs 4 --retry 3
39
+ - name: Run specs
40
+ run: |
41
+ bundle exec rake
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+ Gemfile.lock
13
+ .rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-master-ci-rubocop-yml
14
+ .vscode/
@@ -0,0 +1,2 @@
1
+ rubocop:
2
+ config_file: .rubocop.yml
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,10 @@
1
+ # This project follows the Ribose OSS style guide.
2
+ # https://github.com/riboseinc/oss-guides
3
+ # All project-specific additions and overrides should be specified in this file.
4
+
5
+ inherit_from:
6
+ - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
+ AllCops:
8
+ TargetRubyVersion: 2.3
9
+ Rails:
10
+ Enabled: true
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in relaton_w3c.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.0"
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Andrei Kislichenko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,147 @@
1
+ = RelatonW3c
2
+
3
+ RelatonW3c is a Ruby gem that implements the https://github.com/metanorma/metanorma-model-iso#iso-bibliographic-item[IsoBibliographicItem model].
4
+
5
+ You can use it to retrieve metadata of W3C Standards from https://w3.org, and access such metadata through the `W3cBibliographicItem` object.
6
+
7
+ == Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ [source,ruby]
12
+ ----
13
+ gem 'relaton_w3c'
14
+ ----
15
+
16
+ And then execute:
17
+
18
+ $ bundle install
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install relaton_w3c
23
+
24
+ == Usage
25
+
26
+ === Search for a standard using keywords
27
+
28
+ ----
29
+ require 'relaton_w3c'
30
+
31
+ hits = RelatonW3c::W3cBibliography.search("W3C JSON-LD 1.1")
32
+ => <RelatonW3c::HitCollection:0x007f93b5e4ff48 @ref=W3C JSON-LD 1.1 @fetched=false>
33
+
34
+ item = hits[0].fetch
35
+ => #<RelatonW3c::W3cBibliographicItem:0x007f93a58a0670
36
+ ...
37
+ ----
38
+
39
+ === XML serialization
40
+
41
+ ----
42
+ item.to_xml
43
+ => "<bibitem type="standard">
44
+ <fetched>2020-04-07</fetched>
45
+ <title type="main" format="text/plain" language="en" script="Latn">JSON-LD 1.1</title>
46
+ <title format="text/plain" language="en" script="Latn">A JSON-based Serialization for Linked Data</title>
47
+ <uri type="src">https://www.w3.org/TR/2020/CR-json-ld11-20200316/</uri>
48
+ <date type="published">
49
+ <on>2020</on>
50
+ </date>
51
+ ...
52
+ </bibitem>"
53
+ ----
54
+
55
+ With argument `bibdata: true` it ouputs XML wrapped by `bibdata` element and adds flavour `ext` element.
56
+
57
+ ----
58
+ item.to_xml bibdata: true
59
+ => "<bibdata type="standard">
60
+ <fetched>2020-04-07</fetched>
61
+ <title type="main" format="text/plain" language="en" script="Latn">JSON-LD 1.1</title>
62
+ <title format="text/plain" language="en" script="Latn">A JSON-based Serialization for Linked Data</title>
63
+ <uri type="src">https://www.w3.org/TR/2020/CR-json-ld11-20200316/</uri>
64
+ <date type="published">
65
+ <on>2020</on>
66
+ </date>
67
+ ...
68
+ <ext>
69
+ <doctype>candidateRecommendation</doctype>
70
+ </ext>
71
+ </bibdata>"
72
+ ----
73
+
74
+ === Get document by title
75
+ ----
76
+ RelatonW3c::W3cBibliography.get "W3C JSON-LD 1.1"
77
+ [relaton-w3c] ("W3C JSON-LD 1.1") fetching...
78
+ [relaton-w3c] ("W3C JSON-LD 1.1") found JSON-LD 1.1
79
+ => #<RelatonW3c::W3cBibliographicItem:0x007f93b655bb48
80
+ ...
81
+ ----
82
+
83
+ === Get document by title and type
84
+ ----
85
+ RelatonW3c::W3cBibliography.get "W3C Candidate Recommendation JSON-LD 1.1"
86
+ [relaton-w3c] ("W3C Candidate Recommendation JSON-LD 1.1") fetching...
87
+ [relaton-w3c] ("W3C Candidate Recommendation JSON-LD 1.1") found JSON-LD 1.1
88
+ => #<RelatonW3c::W3cBibliographicItem:0x007f9357199b90
89
+ ...
90
+ ----
91
+
92
+ === Get document by title and short type
93
+ ----
94
+ RelatonW3c::W3cBibliography.get "W3C CR JSON-LD 1.1"
95
+ [relaton-w3c] ("W3C CR JSON-LD 1.1") fetching...
96
+ [relaton-w3c] ("W3C CR JSON-LD 1.1") found JSON-LD 1.1
97
+ => #<RelatonW3c::W3cBibliographicItem:0x007f93b6d37dd0
98
+ ...
99
+ ----
100
+
101
+ === Get document by title, type, and date
102
+ ----
103
+ RelatonW3c::W3cBibliography.get "W3C WD JSON-LD 1.1 2019-10-18"
104
+ [relaton-w3c] ("W3C WD JSON-LD 1.1 2019-10-18") fetching...
105
+ [relaton-w3c] ("W3C WD JSON-LD 1.1 2019-10-18") found JSON-LD 1.1
106
+ => #<RelatonW3c::W3cBibliographicItem:0x007f9345a198b8
107
+ ...
108
+ ----
109
+
110
+ === Create bibliographic item from XML
111
+ ----
112
+ RelatonW3c::XMLParser.from_xml File.read('spec/fixtures/cr_json_ld11.xml')
113
+ => #<RelatonW3c::W3cBibliographicItem:0x007f9381efce98
114
+ ...
115
+ ----
116
+
117
+ === Create bibliographic item from YAML
118
+ ----
119
+ hash = YAML.load_file 'spec/fixtures/cr_json_ld11.yml'
120
+ => {"title"=>
121
+ [{"type"=>"main", "content"=>"JSON-LD 1.1", "language"=>"en", "script"=>"Latn", "format"=>"text/plain"},
122
+ ...
123
+
124
+ bib_hash = RelatonW3c::HashConverter.hash_to_bib hash
125
+ => {:title=>
126
+ [{:type=>"main", :content=>"JSON-LD 1.1", :language=>"en", :script=>"Latn", :format=>"text/plain"},
127
+ ...
128
+
129
+ RelatonW3c::W3cBibliographicItem.new bib_hash
130
+ => #<RelatonW3c::W3cBibliographicItem:0x007f9381ec6a00
131
+ ...
132
+ ----
133
+
134
+ == Development
135
+
136
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
137
+
138
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
139
+
140
+ == Contributing
141
+
142
+ Bug reports and pull requests are welcome on GitHub at https://github.com/relaton/relaton_w3c.
143
+
144
+
145
+ == License
146
+
147
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "relaton_w3c"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rspec' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rspec-core", "rspec")
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,22 @@
1
+ require "relaton_bib"
2
+ require "relaton_w3c/version"
3
+ require "relaton_w3c/w3c_bibliography"
4
+ require "relaton_w3c/w3c_bibliographic_item"
5
+ require "relaton_w3c/hit_collection"
6
+ require "relaton_w3c/hit"
7
+ require "relaton_w3c/scrapper"
8
+ require "relaton_w3c/xml_parser"
9
+ require "relaton_w3c/hash_converter"
10
+
11
+ module RelatonW3c
12
+ class Error < StandardError; end
13
+
14
+ # Returns hash of XML reammar
15
+ # @return [String]
16
+ def self.grammar_hash
17
+ gem_path = File.expand_path "..", __dir__
18
+ grammars_path = File.join gem_path, "grammars", "*"
19
+ grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
20
+ Digest::MD5.hexdigest grammars
21
+ end
22
+ end
@@ -0,0 +1,4 @@
1
+ module RelatonW3c
2
+ class HashConverter < RelatonBib::HashConverter
3
+ end
4
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonW3c
4
+ # Hit.
5
+ class Hit < RelatonBib::Hit
6
+ #
7
+ # Parse page.
8
+ #
9
+ # @param lang [String, NilClass]
10
+ # @return [RelatonW3c::W3cBibliographicItem]
11
+ def fetch(_lang = nil)
12
+ @fetch ||= Scrapper.parse_page hit
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonW3c
4
+ # Page of hit collection.
5
+ class HitCollection < RelatonBib::HitCollection
6
+ TYPES = {
7
+ "CR" => "Candidate Recommendation",
8
+ "NOTE" => "Group Note",
9
+ "PER" => "Proposed Edited Recommendation",
10
+ "PR" => "Proposed Recommendation",
11
+ "REC" => "Recommendation",
12
+ "RET" => "Retired",
13
+ "WD" => "Working Draft",
14
+ }.freeze
15
+ DOMAIN = "https://www.w3.org"
16
+ DATADIR = File.expand_path(".relaton/w3c", Dir.home).freeze
17
+ DATAFILE = File.expand_path("bibliograhy.yml", DATADIR).freeze
18
+
19
+ # @param ref [String] reference to search
20
+ def initialize(ref)
21
+ %r{
22
+ ^(W3C\s)?
23
+ (?<type>(CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
24
+ Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
25
+ Recommendation|Retired|Working\sDraft))? # type
26
+ \s?
27
+ (?<title_date>.+) # title_date
28
+ }x =~ ref
29
+ super
30
+ @array = from_yaml title_date, type
31
+ end
32
+
33
+ private
34
+
35
+ #
36
+ # Fetch data form yaml
37
+ #
38
+ # @param title_date [String]
39
+ # @param type [String]
40
+ # @return [Array<Hash>]
41
+ def from_yaml(title_date, type)
42
+ /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
43
+ title ||= title_date
44
+ result = data.select do |hit|
45
+ hit["title"] == title && type_date_filter(hit, type, date)
46
+ end
47
+ result.map { |h| Hit.new(h, self) }
48
+ end
49
+
50
+ # @param hit [Hash]
51
+ # @param type [String]
52
+ # @param date [String]
53
+ # @return [TrueClass, FalseClass]
54
+ def type_date_filter(hit, type, date)
55
+ if type && hit["type"] != short_type(type) || date && hit["date"] != date
56
+ history = get_history hit, type, date
57
+ return false unless history.any?
58
+
59
+ hit["type"] = short_type type
60
+ hit["datepub"] = history.first.at("td").text
61
+ hit["link"] = history.first.at("a")[:href]
62
+ end
63
+ true
64
+ end
65
+
66
+ # @param hit [Hash]
67
+ # @param type [String]
68
+ # @param date [String]
69
+ # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
70
+ def get_history(hit, type, date)
71
+ resp = Net::HTTP.get URI.parse(HitCollection::DOMAIN + hit["history"])
72
+ history_doc = Nokogiri::HTML resp
73
+ history = history_doc.xpath(
74
+ "//table//a[contains(.,'#{long_type(type)}')]/../..",
75
+ )
76
+ return filter_history_by_date(history, history_doc, type, date) if date
77
+
78
+ history
79
+ end
80
+
81
+ # @param history [Nokogiri::XML::NodeSet]
82
+ # @param history_doc [Nokogiri::HTML::NodeSet]
83
+ # @param type [String]
84
+ # @param date [String]
85
+ # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
86
+ def filter_history_by_date(history, history_doc, type, date)
87
+ if type
88
+ history.select do |h|
89
+ h.at("td[@class='table_datecol']").text == date
90
+ end
91
+ else
92
+ history_doc.xpath(
93
+ "//table//td[@class='table_datecol'][.='#{date}']/..",
94
+ )
95
+ end
96
+ end
97
+
98
+ #
99
+ # Convetr long type name to short
100
+ #
101
+ # @param type [String]
102
+ # @return [String]
103
+ def short_type(type)
104
+ tp = TYPES.select { |k,v| v == type }.keys
105
+ tp.first || type
106
+ end
107
+
108
+ #
109
+ # Convert shot type name to long
110
+ #
111
+ # @param [String]
112
+ # @return [String]
113
+ def long_type(type)
114
+ TYPES[type] || type
115
+ end
116
+
117
+ #
118
+ # Fetches YAML data
119
+ #
120
+ # @return [Hash]
121
+ def data
122
+ FileUtils.mkdir_p DATADIR
123
+ ctime = File.ctime DATAFILE if File.exist? DATAFILE
124
+ fetch_data if !ctime || ctime.to_date < Date.today
125
+ @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
126
+ end
127
+
128
+ #
129
+ # fetch data form server and save it to file.
130
+ #
131
+ def fetch_data
132
+ resp = Net::HTTP.get_response URI.parse(DOMAIN + "/TR/")
133
+ # return if there aren't any changes since last fetching
134
+ return unless resp.code == "200"
135
+
136
+ doc = Nokogiri::HTML resp.body
137
+ @data = doc.xpath("//ul[@id='container']/li").map do |h_el|
138
+ link = h_el.at("h2/a")
139
+ pubdetails = h_el.at("p[@class='pubdetails']")
140
+ fetch_hit h_el, link, pubdetails
141
+ end
142
+ File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
143
+ end
144
+
145
+ # @param h_el [Nokogiri::XML::Element]
146
+ # @param link [Nokogiri::XML::Element]
147
+ # @param pubdetails [Nokogiri::XML::Element]
148
+ def fetch_hit(h_el, link, pubdetails)
149
+ datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
150
+ editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
151
+ keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
152
+ {
153
+ "title" => link.text.gsub("\u00a0", " "),
154
+ "link" => link[:href],
155
+ "type" => h_el.at("div").text.upcase,
156
+ "workgroup" => h_el.xpath("p[@class='deliverer']").map(&:text),
157
+ "datepub" => datepub,
158
+ "history" => pubdetails.at("a[text()='History']")[:href],
159
+ "editor" => editor,
160
+ "keyword" => keyword,
161
+ }
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,41 @@
1
+ require "relaton/processor"
2
+
3
+ module RelatonW3c
4
+ class Processor < Relaton::Processor
5
+ attr_reader :idtype
6
+
7
+ def initialize
8
+ @short = :relaton_w3c
9
+ @prefix = "W3C"
10
+ @defaultprefix = %r{^W3C\s}
11
+ @idtype = "W3C"
12
+ end
13
+
14
+ # @param code [String]
15
+ # @param date [String, NilClass] year
16
+ # @param opts [Hash]
17
+ # @return [RelatonW3C::W3cBibliographicItem]
18
+ def get(code, date, opts)
19
+ ::RelatonW3c::W3cBibliography.get(code, date, opts)
20
+ end
21
+
22
+ # @param xml [String]
23
+ # @return [RelatonCalconnect::CcBibliographicItem]
24
+ def from_xml(xml)
25
+ ::RelatonW3c::XMLParser.from_xml xml
26
+ end
27
+
28
+ # @param hash [Hash]
29
+ # @return [RelatonIsoBib::CcBibliographicItem]
30
+ def hash_to_bib(hash)
31
+ item_hash = ::RelatonW3c::HashConverter.hash_to_bib(hash)
32
+ ::RelatonW3c::W3cBibliographicItem.new item_hash
33
+ end
34
+
35
+ # Returns hash of XML grammar
36
+ # @return [String]
37
+ def grammar_hash
38
+ @grammar_hash ||= ::RelatonW3c.grammar_hash
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,183 @@
1
+ module RelatonW3c
2
+ class Scrapper
3
+ class << self
4
+ DOCTYPES = {
5
+ "CR" => "candidateRecommendation",
6
+ "NOTE" => "groupNote",
7
+ "PER" => "proposedEditedRecommendation",
8
+ "PR" => "proposedRecommendation",
9
+ "REC" => "recommendation",
10
+ "RET" => "retired",
11
+ "WD" => "workingDraft",
12
+ }.freeze
13
+
14
+ # @param hit [Hash]
15
+ # @return [RelatonW3c::W3cBibliographicItem]
16
+ def parse_page(hit)
17
+ resp = Net::HTTP.get_response URI.parse(hit["link"])
18
+ doc = resp.code == "200" ? Nokogiri::HTML(resp.body) : nil
19
+ W3cBibliographicItem.new(
20
+ type: "standard",
21
+ fetched: Date.today.to_s,
22
+ language: ["en"],
23
+ script: ["Latn"],
24
+ title: fetch_title(hit, doc),
25
+ abstract: fetch_abstract(doc),
26
+ link: fetch_link(hit),
27
+ date: fetch_date(hit, doc),
28
+ doctype: fetch_doctype(hit, doc),
29
+ contributor: fetch_contributor(hit, doc),
30
+ relation: fetch_relation(doc),
31
+ keyword: hit["keyword"],
32
+ )
33
+ end
34
+
35
+ private
36
+
37
+ # @param hit [Hash]
38
+ # @param doc [Nokogiri::HTML::Document]
39
+ # @return [Array<RelatonBib::TypedTitleString>]
40
+ def fetch_title(hit, doc)
41
+ titles = []
42
+ if doc
43
+ title = doc.at("//h1[@id='title']").text
44
+ titles << { content: title, type: "main" }
45
+ subtitle = doc.at("//h2[@id='subtitle']").text
46
+ titles << { content: subtitle, tipe: "subtitle" }
47
+ else
48
+ titles << { content: hit["title"], type: "main" }
49
+ end
50
+ titles.map do |t|
51
+ title = RelatonBib::FormattedString.new(
52
+ content: t[:content], language: "en", script: "Latn",
53
+ )
54
+ RelatonBib::TypedTitleString.new(type: t[:type], title: title)
55
+ end
56
+ end
57
+
58
+ # @param doc [Nokogiri::HTML::Document, NilClass]
59
+ # @return [Array<RelatonBib::FormattedString>]
60
+ def fetch_abstract(doc)
61
+ return [] unless doc
62
+
63
+ content = doc.at("//h2[.='Abstract']/following-sibling::p").text
64
+ [RelatonBib::FormattedString.new(content: content, language: "en",
65
+ script: "Latn")]
66
+ end
67
+
68
+ # @param hit [Hash]
69
+ # @return [Array<RelatonBib::TypedUri>]
70
+ def fetch_link(hit)
71
+ [RelatonBib::TypedUri.new(type: "src", content: hit["link"])]
72
+ end
73
+
74
+ # @param hit [Hash]
75
+ # @param doc [Nokogiri::HTML::Document, NilClass]
76
+ # @return [Array<RelatonBib::BibliographicDate>]
77
+ def fetch_date(hit, doc)
78
+ on = hit["datepub"] || doc && doc.at("//h2/time[@datetime]")[:datetime]
79
+ [RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
80
+ end
81
+
82
+ # @param hit [Hash]
83
+ # @param doc [Nokogiri::HTML::Document, NilClass]
84
+ # @return [String]
85
+ def fetch_doctype(hit, doc)
86
+ if hit["type"]
87
+ DOCTYPES[hit["type"]]
88
+ elsif doc
89
+ type = HitCollection::TYPES.detect do |_k, v|
90
+ doc.at("//h2[contains(., '#{v}')]/time[@datetime]")
91
+ end
92
+ DOCTYPES[type&.first]
93
+ end
94
+ end
95
+
96
+ # @param hit [Hash]
97
+ # @param doc [Nokogiri::HTML::Document, NilClass]
98
+ # @return [Array<RelatonBib::ContributionInfo>]
99
+ def fetch_contributor(hit, doc)
100
+ if doc
101
+ editors = find_contribs(doc, "Editors").map do |ed|
102
+ parse_contrib ed, "editor"
103
+ end
104
+ contribs = find_contribs(doc, "Authors").reduce(editors) do |mem, athr|
105
+ ed = mem.detect { |e| e[:id] && e[:id] == athr["data-editor-id"] }
106
+ if ed
107
+ ed[:role] << { type: "author" }
108
+ else
109
+ mem << parse_contrib(athr, "author")
110
+ end
111
+ mem
112
+ end
113
+ contribs.map { |c| contrib_info c }
114
+ else
115
+ hit["editor"].map do |ed|
116
+ contrib_info name: ed, role: [{ type: "editor" }]
117
+ end
118
+ end
119
+ end
120
+
121
+ # @param doc [Nokogiri::NTML::Document]
122
+ # @param type [String]
123
+ # @return [Array<Nokogiri::XML::Element]
124
+ def find_contribs(doc, type)
125
+ doc.xpath("//dt[contains(.,'#{type}')]/following-sibling::dd"\
126
+ "[preceding-sibling::dt[1][contains(.,'#{type}')]]")
127
+ end
128
+
129
+ # @param element [Nokogiri::XML::Element]
130
+ # @param type [String]
131
+ # @return [Hash]
132
+ def parse_contrib(element, type)
133
+ p = element.at("a")
134
+ contrib = {
135
+ name: p.text,
136
+ url: p[:href],
137
+ role: [{ type: type }],
138
+ id: element["data-editor-id"],
139
+ }
140
+ org = element.at("a[2]")
141
+ contrib[:org] = { name: org.text, url: org[:href] } if org
142
+ contrib
143
+ end
144
+
145
+ # @param name [String]
146
+ # @param url [String, NilClass]
147
+ # @param role [Array<Hash>]
148
+ # @parma org [Hash]
149
+ # @return [RelatonBib::ContributionInfo]
150
+ def contrib_info(**args)
151
+ completename = RelatonBib::LocalizedString.new(args[:name])
152
+ name = RelatonBib::FullName.new completename: completename
153
+ af = []
154
+ if args[:org]
155
+ org = RelatonBib::Organization.new args[:org]
156
+ af << RelatonBib::Affiliation.new(organization: org)
157
+ end
158
+ en = RelatonBib::Person.new name: name, url: args[:url], affiliation: af
159
+ RelatonBib::ContributionInfo.new entity: en, role: args[:role]
160
+ end
161
+
162
+ # @param doc [Nokogiri::HTML::Document]
163
+ # @return [Array<RelatonBib::DocumentRelation>]
164
+ def fetch_relation(doc)
165
+ return [] unless doc && (link = recommendation_link(doc))
166
+
167
+ hit = { "link" => link }
168
+ item = parse_page hit
169
+ [RelatonBib::DocumentRelation.new(type: "obsoleted", bibitem: item)]
170
+ end
171
+
172
+ # @param doc [Nokogiri::HTML::Document]
173
+ # @return [String, NilClass]
174
+ def recommendation_link(doc)
175
+ recom = doc.at("//dt[.='Latest Recommendation:']",
176
+ "//dt[.='Previous Recommendation:']")
177
+ return unless recom
178
+
179
+ recom.at("./following-sibling::dd/a")[:href]
180
+ end
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,3 @@
1
+ module RelatonW3c
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,39 @@
1
+ module RelatonW3c
2
+ class W3cBibliographicItem < RelatonBib::BibliographicItem
3
+ TYPES = %w[
4
+ candidateRecommendation groupNote proposedEditedRecommendation
5
+ proposedRecommendation recommendation retired workingDraft
6
+ ].freeze
7
+
8
+ attr_reader :doctype
9
+
10
+ # @param doctype [String]
11
+ def initialize(**args)
12
+ if args[:doctype] && !TYPES.include?(args[:doctype])
13
+ warn "[relaton-w3c] invalid document type: #{args[:doctype]}"
14
+ end
15
+ @doctype = args.delete :doctype
16
+ super **args
17
+ end
18
+
19
+ # @param builder [Nokogiri::XML::Builder, NilClass]
20
+ # @param opts [Hash]
21
+ # @option opts [TrueClass, FalseClass, NilClass] bibdata
22
+ def to_xml(builder = nil, **opts)
23
+ super builder, **opts do |b|
24
+ if opts[:bibdata] && doctype
25
+ b.ext do |e|
26
+ e.doctype doctype if doctype
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+ # @return [Hash]
33
+ def to_hash
34
+ hash = super
35
+ hash["doctype"] = doctype if doctype
36
+ hash
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonW3c
4
+ # Class methods for search W3C standards.
5
+ class W3cBibliography
6
+ class << self
7
+ # @param text [String]
8
+ # @return [RelatonW3c::HitCollection]
9
+ def search(text)
10
+ HitCollection.new text
11
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
12
+ EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
13
+ Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
14
+ raise RelatonBib::RequestError,
15
+ "Could not access #{HitCollection::DOMAIN}"
16
+ end
17
+
18
+ # @param ref [String] the W3C standard Code to look up
19
+ # @param year [String, NilClass] not used
20
+ # @param opts [Hash] options
21
+ # @return [RelatonW3c::W3cBibliographicItem]
22
+ def get(ref, _year = nil, _opts = {})
23
+ warn "[relaton-w3c] (\"#{ref}\") fetching..."
24
+ result = search(ref)
25
+ return unless result.any?
26
+
27
+ ret = result.first.fetch
28
+ warn "[relaton-w3c] (\"#{ref}\") found #{ret.title.first.title.content}"
29
+ ret
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,32 @@
1
+ module RelatonW3c
2
+ class XMLParser < RelatonBib::XMLParser
3
+ class << self
4
+ # @param xml [String]
5
+ # @return [RelatonW3c::W3cBibliographicItem, NilClass]
6
+ def from_xml(xml)
7
+ doc = Nokogiri::XML xml
8
+ doc.remove_namespaces!
9
+ item = doc.at("/bibitem|/bibdata")
10
+ if item
11
+ W3cBibliographicItem.new(item_data(item))
12
+ else
13
+ warn "[relaton-w3c] can't find bibitem or bibdata element in the XML"
14
+ end
15
+ end
16
+
17
+ private
18
+
19
+ # Override RelatonBib::XMLParser.item_data method.
20
+ # @param item [Nokogiri::XML::Element]
21
+ # @returtn [Hash]
22
+ def item_data(item)
23
+ data = super
24
+ ext = item.at "./ext"
25
+ return data unless ext
26
+
27
+ data[:doctype] = ext.at("./doctype")&.text
28
+ data
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,43 @@
1
+ lib = File.expand_path("lib", __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "relaton_w3c/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "relaton-w3c"
7
+ spec.version = RelatonW3c::VERSION
8
+ spec.authors = ["Ribose Inc."]
9
+ spec.email = ["open.source@ribose.com"]
10
+
11
+ spec.summary = "RelatonIso: retrieve W3C Standards for bibliographic "\
12
+ "use using the IsoBibliographicItem model"
13
+ spec.description = "RelatonIso: retrieve W3C Standards for bibliographic "\
14
+ "use using the IsoBibliographicItem model"
15
+ spec.homepage = "https://github.com/relaton/relaton-wc3"
16
+ spec.license = "BSD-2-Clause"
17
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
18
+
19
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
+
21
+ spec.metadata["homepage_uri"] = spec.homepage
22
+ # spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
23
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
24
+
25
+ # Specify which files should be added to the gem when it is released.
26
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
27
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
28
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
29
+ end
30
+ spec.bindir = "exe"
31
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
+ spec.require_paths = ["lib"]
33
+
34
+ spec.add_development_dependency "debase"
35
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
36
+ spec.add_development_dependency "ruby-debug-ide"
37
+ spec.add_development_dependency "ruby-jing"
38
+ spec.add_development_dependency "simplecov"
39
+ spec.add_development_dependency "vcr"
40
+ spec.add_development_dependency "webmock"
41
+
42
+ spec.add_dependency "relaton-bib", ">= 0.9.2"
43
+ end
metadata ADDED
@@ -0,0 +1,183 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: relaton-w3c
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-04-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: debase
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: equivalent-xml
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-debug-ide
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: ruby-jing
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: vcr
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: webmock
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: relaton-bib
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: 0.9.2
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: 0.9.2
125
+ description: 'RelatonIso: retrieve W3C Standards for bibliographic use using the IsoBibliographicItem
126
+ model'
127
+ email:
128
+ - open.source@ribose.com
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".github/workflows/macos.yml"
134
+ - ".github/workflows/ubuntu.yml"
135
+ - ".github/workflows/windows.yml"
136
+ - ".gitignore"
137
+ - ".hound.yml"
138
+ - ".rspec"
139
+ - ".rubocop.yml"
140
+ - Gemfile
141
+ - LICENSE.txt
142
+ - README.adoc
143
+ - Rakefile
144
+ - bin/console
145
+ - bin/rspec
146
+ - bin/setup
147
+ - lib/relaton_w3c.rb
148
+ - lib/relaton_w3c/hash_converter.rb
149
+ - lib/relaton_w3c/hit.rb
150
+ - lib/relaton_w3c/hit_collection.rb
151
+ - lib/relaton_w3c/processor.rb
152
+ - lib/relaton_w3c/scrapper.rb
153
+ - lib/relaton_w3c/version.rb
154
+ - lib/relaton_w3c/w3c_bibliographic_item.rb
155
+ - lib/relaton_w3c/w3c_bibliography.rb
156
+ - lib/relaton_w3c/xml_parser.rb
157
+ - relaton_w3c.gemspec
158
+ homepage: https://github.com/relaton/relaton-wc3
159
+ licenses:
160
+ - BSD-2-Clause
161
+ metadata:
162
+ homepage_uri: https://github.com/relaton/relaton-wc3
163
+ post_install_message:
164
+ rdoc_options: []
165
+ require_paths:
166
+ - lib
167
+ required_ruby_version: !ruby/object:Gem::Requirement
168
+ requirements:
169
+ - - ">="
170
+ - !ruby/object:Gem::Version
171
+ version: 2.4.0
172
+ required_rubygems_version: !ruby/object:Gem::Requirement
173
+ requirements:
174
+ - - ">="
175
+ - !ruby/object:Gem::Version
176
+ version: '0'
177
+ requirements: []
178
+ rubygems_version: 3.0.6
179
+ signing_key:
180
+ specification_version: 4
181
+ summary: 'RelatonIso: retrieve W3C Standards for bibliographic use using the IsoBibliographicItem
182
+ model'
183
+ test_files: []