relaton-w3c 1.0.1 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ubuntu.yml +1 -0
- data/.rubocop.yml +2 -2
- data/grammars/biblio.rng +36 -6
- data/lib/relaton_w3c/hash_converter.rb +7 -0
- data/lib/relaton_w3c/hit_collection.rb +3 -0
- data/lib/relaton_w3c/scrapper.rb +39 -16
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliographic_item.rb +0 -23
- data/lib/relaton_w3c/w3c_bibliography.rb +3 -1
- data/lib/relaton_w3c/xml_parser.rb +6 -13
- data/relaton_w3c.gemspec +3 -3
- metadata +9 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8591c4405cfb771a9cf41e2c74a7d36f21f66e02776389efdf670e639818524a
|
4
|
+
data.tar.gz: be0e0ea3effefd0ab0a48d72cb42eb5da6c62537deaa6dca876d3ee000cc12c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de33be8c4b6a7d9b3ee165454f6988044f5e614239d35e5c9bfa4b8dfc724430f6366dfb90d9bd1c648c78ef1ef7a3701e4ac4461eff9d82be2e2502e588f8bf
|
7
|
+
data.tar.gz: fa7c426893f99844fa702dcbd349bcc141800994e167da25657ea1d5143de442aa5be4be03242221635edf9836d4c1c678e4a522f52c964ddd50f851fdda8d4a
|
data/.rubocop.yml
CHANGED
data/grammars/biblio.rng
CHANGED
@@ -88,7 +88,7 @@
|
|
88
88
|
<text/>
|
89
89
|
</element>
|
90
90
|
</define>
|
91
|
-
<define name="
|
91
|
+
<define name="LocalizedString1">
|
92
92
|
<optional>
|
93
93
|
<!-- multiple languages and scripts possible: comma delimit them if so -->
|
94
94
|
<attribute name="language"/>
|
@@ -98,6 +98,16 @@
|
|
98
98
|
</optional>
|
99
99
|
<text/>
|
100
100
|
</define>
|
101
|
+
<define name="LocalizedString">
|
102
|
+
<choice>
|
103
|
+
<ref name="LocalizedString1"/>
|
104
|
+
<oneOrMore>
|
105
|
+
<element name="variant">
|
106
|
+
<ref name="LocalizedString1"/>
|
107
|
+
</element>
|
108
|
+
</oneOrMore>
|
109
|
+
</choice>
|
110
|
+
</define>
|
101
111
|
<!--
|
102
112
|
Unlike UML, change type to format: type is overloaded
|
103
113
|
Would be need if plain were default value and could omit the attribute
|
@@ -121,7 +131,7 @@
|
|
121
131
|
</optional>
|
122
132
|
<ref name="LocalizedStringOrXsAny"/>
|
123
133
|
</define>
|
124
|
-
<define name="
|
134
|
+
<define name="LocalizedStringOrXsAny1">
|
125
135
|
<optional>
|
126
136
|
<!-- multiple languages and scripts possible: comma delimit them if so -->
|
127
137
|
<attribute name="language"/>
|
@@ -136,6 +146,16 @@
|
|
136
146
|
</choice>
|
137
147
|
</oneOrMore>
|
138
148
|
</define>
|
149
|
+
<define name="LocalizedStringOrXsAny">
|
150
|
+
<choice>
|
151
|
+
<ref name="LocalizedStringOrXsAny1"/>
|
152
|
+
<oneOrMore>
|
153
|
+
<element name="variant">
|
154
|
+
<ref name="LocalizedStringOrXsAny1"/>
|
155
|
+
</element>
|
156
|
+
</oneOrMore>
|
157
|
+
</choice>
|
158
|
+
</define>
|
139
159
|
<define name="contributor">
|
140
160
|
<element name="contributor">
|
141
161
|
<zeroOrMore>
|
@@ -512,7 +532,7 @@
|
|
512
532
|
</define>
|
513
533
|
<define name="LocalityType">
|
514
534
|
<data type="string">
|
515
|
-
<param name="pattern">section|clause|part|paragraph|chapter|page|whole|table|annex|figure|note|list|example|volume|issue|time|locality:[a-zA-Z0-9_]+</param>
|
535
|
+
<param name="pattern">section|clause|part|paragraph|chapter|page|whole|table|annex|figure|note|list|example|volume|issue|time|anchor|locality:[a-zA-Z0-9_]+</param>
|
516
536
|
</data>
|
517
537
|
</define>
|
518
538
|
<define name="referenceFrom">
|
@@ -641,9 +661,9 @@
|
|
641
661
|
<optional>
|
642
662
|
<ref name="status"/>
|
643
663
|
</optional>
|
644
|
-
<
|
664
|
+
<zeroOrMore>
|
645
665
|
<ref name="copyright"/>
|
646
|
-
</
|
666
|
+
</zeroOrMore>
|
647
667
|
<zeroOrMore>
|
648
668
|
<ref name="docrelation"/>
|
649
669
|
</zeroOrMore>
|
@@ -1001,7 +1021,17 @@
|
|
1001
1021
|
<optional>
|
1002
1022
|
<ref name="to"/>
|
1003
1023
|
</optional>
|
1004
|
-
<
|
1024
|
+
<oneOrMore>
|
1025
|
+
<ref name="owner"/>
|
1026
|
+
</oneOrMore>
|
1027
|
+
<optional>
|
1028
|
+
<ref name="copyright_scope"/>
|
1029
|
+
</optional>
|
1030
|
+
</element>
|
1031
|
+
</define>
|
1032
|
+
<define name="copyright_scope">
|
1033
|
+
<element name="scope">
|
1034
|
+
<text/>
|
1005
1035
|
</element>
|
1006
1036
|
</define>
|
1007
1037
|
<define name="from">
|
data/lib/relaton_w3c/scrapper.rb
CHANGED
@@ -13,7 +13,7 @@ module RelatonW3c
|
|
13
13
|
|
14
14
|
# @param hit [Hash]
|
15
15
|
# @return [RelatonW3c::W3cBibliographicItem]
|
16
|
-
def parse_page(hit)
|
16
|
+
def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
17
17
|
resp = Net::HTTP.get_response URI.parse(hit["link"])
|
18
18
|
doc = resp.code == "200" ? Nokogiri::HTML(resp.body) : nil
|
19
19
|
W3cBibliographicItem.new(
|
@@ -28,7 +28,7 @@ module RelatonW3c
|
|
28
28
|
doctype: fetch_doctype(hit, doc),
|
29
29
|
contributor: fetch_contributor(hit, doc),
|
30
30
|
relation: fetch_relation(doc),
|
31
|
-
keyword: hit["keyword"]
|
31
|
+
keyword: hit["keyword"]
|
32
32
|
)
|
33
33
|
end
|
34
34
|
|
@@ -37,19 +37,21 @@ module RelatonW3c
|
|
37
37
|
# @param hit [Hash]
|
38
38
|
# @param doc [Nokogiri::HTML::Document]
|
39
39
|
# @return [Array<RelatonBib::TypedTitleString>]
|
40
|
-
def fetch_title(hit, doc)
|
40
|
+
def fetch_title(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
41
41
|
titles = []
|
42
42
|
if doc
|
43
|
-
title = doc.at("//h1[@id
|
44
|
-
titles << { content: title, type: "main" }
|
45
|
-
subtitle = doc.at(
|
46
|
-
|
47
|
-
|
43
|
+
title = doc.at("//h1[contains(@id, 'title')]")&.text
|
44
|
+
titles << { content: title, type: "main" } if title
|
45
|
+
subtitle = doc.at(
|
46
|
+
"//h2[@id='subtitle']|//p[contains(@class, 'subline')]"
|
47
|
+
)&.text
|
48
|
+
titles << { content: subtitle, tipe: "subtitle" } if subtitle
|
49
|
+
elsif hit["title"]
|
48
50
|
titles << { content: hit["title"], type: "main" }
|
49
51
|
end
|
50
52
|
titles.map do |t|
|
51
53
|
title = RelatonBib::FormattedString.new(
|
52
|
-
content: t[:content], language: "en", script: "Latn"
|
54
|
+
content: t[:content], language: "en", script: "Latn"
|
53
55
|
)
|
54
56
|
RelatonBib::TypedTitleString.new(type: t[:type], title: title)
|
55
57
|
end
|
@@ -75,10 +77,27 @@ module RelatonW3c
|
|
75
77
|
# @param doc [Nokogiri::HTML::Document, NilClass]
|
76
78
|
# @return [Array<RelatonBib::BibliographicDate>]
|
77
79
|
def fetch_date(hit, doc)
|
78
|
-
on = hit["datepub"] || doc
|
80
|
+
on = hit["datepub"] || doc&.at("//h2/time[@datetime]")&.attr(:datetime)
|
81
|
+
on ||= fetch_date1(doc) || fetch_date2(doc)
|
79
82
|
[RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
|
80
83
|
end
|
81
84
|
|
85
|
+
# @param doc [Nokogiri::HTML::Document, NilClass]
|
86
|
+
# @return [String]
|
87
|
+
def fetch_date1(doc)
|
88
|
+
d = doc&.at("//h2[@property='dc:issued']")&.attr(:content)
|
89
|
+
d&.match(/\d{4}-\d{2}-\d{2}/)&.to_s
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param doc [Nokogiri::HTML::Document, NilClass]
|
93
|
+
# @return [String]
|
94
|
+
def fetch_date2(doc)
|
95
|
+
d = doc&.at("//h2[contains(@id, 'w3c-recommendation')]")
|
96
|
+
return unless d
|
97
|
+
|
98
|
+
Date.parse(d.attr(:id.match(/\d{2}-\w+-\d{4}/).to_s)).to_s
|
99
|
+
end
|
100
|
+
|
82
101
|
# @param hit [Hash]
|
83
102
|
# @param doc [Nokogiri::HTML::Document, NilClass]
|
84
103
|
# @return [String]
|
@@ -96,17 +115,19 @@ module RelatonW3c
|
|
96
115
|
# @param hit [Hash]
|
97
116
|
# @param doc [Nokogiri::HTML::Document, NilClass]
|
98
117
|
# @return [Array<RelatonBib::ContributionInfo>]
|
99
|
-
def fetch_contributor(hit, doc)
|
118
|
+
def fetch_contributor(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
100
119
|
if doc
|
101
|
-
editors = find_contribs(doc, "Editors").
|
102
|
-
parse_contrib ed, "editor"
|
120
|
+
editors = find_contribs(doc, "Editors").reduce([]) do |mem, ed|
|
121
|
+
c = parse_contrib ed, "editor"
|
122
|
+
mem << c if c
|
123
|
+
mem
|
103
124
|
end
|
104
|
-
contribs = find_contribs(doc, "Authors").reduce(editors) do |mem,
|
105
|
-
ed = mem.detect { |e| e[:id] && e[:id] ==
|
125
|
+
contribs = find_contribs(doc, "Authors").reduce(editors) do |mem, ath|
|
126
|
+
ed = mem.detect { |e| e[:id] && e[:id] == ath["data-editor-id"] }
|
106
127
|
if ed
|
107
128
|
ed[:role] << { type: "author" }
|
108
129
|
else
|
109
|
-
mem << parse_contrib(
|
130
|
+
mem << parse_contrib(ath, "author")
|
110
131
|
end
|
111
132
|
mem
|
112
133
|
end
|
@@ -131,6 +152,8 @@ module RelatonW3c
|
|
131
152
|
# @return [Hash]
|
132
153
|
def parse_contrib(element, type)
|
133
154
|
p = element.at("a")
|
155
|
+
return unless p
|
156
|
+
|
134
157
|
contrib = {
|
135
158
|
name: p.text,
|
136
159
|
url: p[:href],
|
data/lib/relaton_w3c/version.rb
CHANGED
@@ -5,35 +5,12 @@ module RelatonW3c
|
|
5
5
|
proposedRecommendation recommendation retired workingDraft
|
6
6
|
].freeze
|
7
7
|
|
8
|
-
attr_reader :doctype
|
9
|
-
|
10
8
|
# @param doctype [String]
|
11
9
|
def initialize(**args)
|
12
10
|
if args[:doctype] && !TYPES.include?(args[:doctype])
|
13
11
|
warn "[relaton-w3c] invalid document type: #{args[:doctype]}"
|
14
12
|
end
|
15
|
-
@doctype = args.delete :doctype
|
16
13
|
super **args
|
17
14
|
end
|
18
|
-
|
19
|
-
# @param builder [Nokogiri::XML::Builder, NilClass]
|
20
|
-
# @param opts [Hash]
|
21
|
-
# @option opts [TrueClass, FalseClass, NilClass] bibdata
|
22
|
-
def to_xml(builder = nil, **opts)
|
23
|
-
super builder, **opts do |b|
|
24
|
-
if opts[:bibdata] && doctype
|
25
|
-
b.ext do |e|
|
26
|
-
e.doctype doctype if doctype
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
# @return [Hash]
|
33
|
-
def to_hash
|
34
|
-
hash = super
|
35
|
-
hash["doctype"] = doctype if doctype
|
36
|
-
hash
|
37
|
-
end
|
38
15
|
end
|
39
16
|
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "net/http"
|
4
|
+
|
3
5
|
module RelatonW3c
|
4
6
|
# Class methods for search W3C standards.
|
5
7
|
class W3cBibliography
|
@@ -10,7 +12,7 @@ module RelatonW3c
|
|
10
12
|
HitCollection.new text
|
11
13
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
12
14
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
13
|
-
Net::ProtocolError,
|
15
|
+
Net::ProtocolError, Errno::ETIMEDOUT
|
14
16
|
raise RelatonBib::RequestError,
|
15
17
|
"Could not access #{HitCollection::DOMAIN}"
|
16
18
|
end
|
@@ -1,19 +1,6 @@
|
|
1
1
|
module RelatonW3c
|
2
2
|
class XMLParser < RelatonBib::XMLParser
|
3
3
|
class << self
|
4
|
-
# @param xml [String]
|
5
|
-
# @return [RelatonW3c::W3cBibliographicItem, NilClass]
|
6
|
-
def from_xml(xml)
|
7
|
-
doc = Nokogiri::XML xml
|
8
|
-
doc.remove_namespaces!
|
9
|
-
item = doc.at("/bibitem|/bibdata")
|
10
|
-
if item
|
11
|
-
W3cBibliographicItem.new(item_data(item))
|
12
|
-
else
|
13
|
-
warn "[relaton-w3c] can't find bibitem or bibdata element in the XML"
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
4
|
private
|
18
5
|
|
19
6
|
# Override RelatonBib::XMLParser.item_data method.
|
@@ -27,6 +14,12 @@ module RelatonW3c
|
|
27
14
|
data[:doctype] = ext.at("./doctype")&.text
|
28
15
|
data
|
29
16
|
end
|
17
|
+
|
18
|
+
# @param item_hash [Hash]
|
19
|
+
# @return [RelatonBib::BibliographicItem]
|
20
|
+
def bib_item(item_hash)
|
21
|
+
W3cBibliographicItem.new item_hash
|
22
|
+
end
|
30
23
|
end
|
31
24
|
end
|
32
25
|
end
|
data/relaton_w3c.gemspec
CHANGED
@@ -9,9 +9,9 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ["open.source@ribose.com"]
|
10
10
|
|
11
11
|
spec.summary = "RelatonIso: retrieve W3C Standards for bibliographic "\
|
12
|
-
"
|
12
|
+
"using the IsoBibliographicItem model"
|
13
13
|
spec.description = "RelatonIso: retrieve W3C Standards for bibliographic "\
|
14
|
-
"
|
14
|
+
"using the IsoBibliographicItem model"
|
15
15
|
spec.homepage = "https://github.com/relaton/relaton-wc3"
|
16
16
|
spec.license = "BSD-2-Clause"
|
17
17
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
@@ -39,5 +39,5 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_development_dependency "vcr"
|
40
40
|
spec.add_development_dependency "webmock"
|
41
41
|
|
42
|
-
spec.add_dependency "relaton-bib", ">= 1.0
|
42
|
+
spec.add_dependency "relaton-bib", ">= 1.3.0"
|
43
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|
@@ -114,15 +114,15 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - ">="
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 1.0
|
117
|
+
version: 1.3.0
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: 1.0
|
125
|
-
description: 'RelatonIso: retrieve W3C Standards for bibliographic
|
124
|
+
version: 1.3.0
|
125
|
+
description: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
|
126
126
|
model'
|
127
127
|
email:
|
128
128
|
- open.source@ribose.com
|
@@ -162,7 +162,7 @@ licenses:
|
|
162
162
|
- BSD-2-Clause
|
163
163
|
metadata:
|
164
164
|
homepage_uri: https://github.com/relaton/relaton-wc3
|
165
|
-
post_install_message:
|
165
|
+
post_install_message:
|
166
166
|
rdoc_options: []
|
167
167
|
require_paths:
|
168
168
|
- lib
|
@@ -178,8 +178,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
178
|
version: '0'
|
179
179
|
requirements: []
|
180
180
|
rubygems_version: 3.0.6
|
181
|
-
signing_key:
|
181
|
+
signing_key:
|
182
182
|
specification_version: 4
|
183
|
-
summary: 'RelatonIso: retrieve W3C Standards for bibliographic
|
183
|
+
summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
|
184
184
|
model'
|
185
185
|
test_files: []
|