relaton-w3c 1.7.1 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,205 @@
1
+ module RelatonW3c
2
+ class DataParser
3
+ #
4
+ # Document parser initalization
5
+ #
6
+ # @param [RDF::Query::Solution] sol entry from the SPARQL query
7
+ # @param [RelatonW3c::DataFetcher] fetcher data fetcher
8
+ #
9
+ def initialize(sol, fetcher)
10
+ @sol = sol
11
+ @fetcher = fetcher
12
+ end
13
+
14
+ #
15
+ # Initialize document parser and run it
16
+ #
17
+ # @param [RDF::Query::Solution] sol entry from the SPARQL query
18
+ # @param [RelatonW3c::DataFetcher] fetcher data fetcher
19
+ #
20
+ # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
21
+ #
22
+ def self.parse(sol, fetcher)
23
+ new(sol, fetcher).parse
24
+ end
25
+
26
+ #
27
+ # Parse document
28
+ #
29
+ # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
30
+ #
31
+ def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
32
+ return unless @fetcher.class::USED_TYPES.include? type
33
+
34
+ RelatonW3c::W3cBibliographicItem.new(
35
+ type: "standard",
36
+ doctype: parse_doctype,
37
+ fetched: Date.today.to_s,
38
+ language: ["en"],
39
+ script: ["Latn"],
40
+ title: parse_title,
41
+ link: parse_link,
42
+ docid: parse_docid,
43
+ docnumber: identifier(@sol.link.to_s),
44
+ series: parse_series,
45
+ date: parse_date,
46
+ relation: parse_relation,
47
+ contributor: parse_contrib,
48
+ editorialgroup: parse_editorialgroup,
49
+ )
50
+ end
51
+
52
+ #
53
+ # Parse title
54
+ #
55
+ # @return [RelatonBib::TypedTitleStringCollection] title
56
+ #
57
+ def parse_title
58
+ t = RelatonBib::TypedTitleString.new title: @sol.title.to_s
59
+ RelatonBib::TypedTitleStringCollection.new [t]
60
+ end
61
+
62
+ #
63
+ # Parse link
64
+ #
65
+ # @return [Array<RelatonBib::TypedUri>] link
66
+ #
67
+ def parse_link
68
+ [RelatonBib::TypedUri.new(type: "src", content: @sol.link.to_s)]
69
+ end
70
+
71
+ #
72
+ # Parse docidentifier
73
+ #
74
+ # @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
75
+ #
76
+ def parse_docid
77
+ id = pub_id(@sol.link.to_s)
78
+ [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id)]
79
+ end
80
+
81
+ #
82
+ # Generate PubID
83
+ #
84
+ # @param [String] url url
85
+ #
86
+ # @return [String] PubID
87
+ #
88
+ def pub_id(url)
89
+ "W3C #{identifier(url)}"
90
+ end
91
+
92
+ def identifier(url)
93
+ /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/.match(url)[1].to_s
94
+ end
95
+
96
+ #
97
+ # Parse series
98
+ #
99
+ # @return [Array<RelatonBib::Series>] series
100
+ #
101
+ def parse_series
102
+ title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
103
+ [RelatonBib::Series.new(title: title, number: identifier(@sol.link.to_s))]
104
+ end
105
+
106
+ def type # rubocop:disable Metrics/MethodLength
107
+ @type ||= begin
108
+ sse = SPARQL.parse(%(
109
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
110
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
111
+ SELECT ?type
112
+ WHERE {
113
+ { <#{@sol.link}> rdf:type ?type }
114
+ }
115
+ ))
116
+ tps = @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
117
+ tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
118
+ end
119
+ end
120
+
121
+ #
122
+ # Parse doctype
123
+ #
124
+ # @return [Strinf] doctype
125
+ #
126
+ def parse_doctype
127
+ Scrapper::DOCTYPES[type]
128
+ end
129
+
130
+ def parse_date
131
+ [RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
132
+ end
133
+
134
+ #
135
+ # Parse relation
136
+ #
137
+ # @return [Array<RelatonBib::DocumentRelation>] relation
138
+ #
139
+ def parse_relation # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
140
+ sse = SPARQL.parse(%(
141
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
142
+ SELECT ?obsoletes
143
+ WHERE {
144
+ VALUES ?p { doc:obsoletes }
145
+ { <#{@sol.link}> ?p ?obsoletes }
146
+ }
147
+ ))
148
+ @fetcher.data.query(sse).order_by(:obsoletes).map do |r|
149
+ tp, url = r.to_h.first
150
+ fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
151
+ bib = W3cBibliographicItem.new formattedref: fr
152
+ RelatonBib::DocumentRelation.new(type: tp.to_s, bibitem: bib)
153
+ end
154
+ end
155
+
156
+ #
157
+ # Parse contributor
158
+ #
159
+ # @return [Array<RelatonBib::ContributionInfo>] contributor
160
+ #
161
+ def parse_contrib # rubocop:disable Metrics/MethodLength
162
+ sse = SPARQL.parse(%(
163
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
164
+ PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
165
+ SELECT ?full_name
166
+ WHERE {
167
+ <#{@sol.link}> :editor/contact:fullName ?full_name
168
+ }
169
+ ))
170
+ @fetcher.data.query(sse).order_by(:full_name).map do |ed|
171
+ cn = RelatonBib::LocalizedString.new(ed.full_name.to_s, "en", "Latn")
172
+ n = RelatonBib::FullName.new completename: cn
173
+ p = RelatonBib::Person.new name: n
174
+ RelatonBib::ContributionInfo.new entity: p, role: [type: "editor"]
175
+ end
176
+ end
177
+
178
+ #
179
+ # Parse editorialgroup
180
+ #
181
+ # @return [RelatonBib::EditorialGroup] editorialgroup
182
+ #
183
+ def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
184
+ sse = SPARQL.parse(%(
185
+ PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
186
+ PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
187
+ SELECT ?home_page
188
+ WHERE {
189
+ <#{@sol.link}> org:deliveredBy/contact:homePage ?home_page
190
+ }
191
+ ))
192
+ res = @fetcher.data.query(sse).order_by(:home_page)
193
+ tc = res.each_with_object([]) do |edg, obj|
194
+ wg = @fetcher.group_names[edg.home_page.to_s.sub(/\/$/, "")]
195
+ if wg
196
+ rwg = RelatonBib::WorkGroup.new name: wg["name"]
197
+ obj << RelatonBib::TechnicalCommittee.new(rwg)
198
+ else
199
+ warn "Working group name not found for #{edg.home_page}"
200
+ end
201
+ end
202
+ RelatonBib::EditorialGroup.new tc
203
+ end
204
+ end
205
+ end
@@ -4,7 +4,7 @@ module RelatonW3c
4
4
  # @param item_hash [Hash]
5
5
  # @return [RelatonW3c::W3cBibliographicItem]
6
6
  def bib_item(item_hash)
7
- W3cBibliographicItem.new **item_hash
7
+ W3cBibliographicItem.new(**item_hash)
8
8
  end
9
9
  end
10
10
  end
@@ -22,8 +22,8 @@ module RelatonW3c
22
22
  # @param ref [String] reference to search
23
23
  def initialize(ref)
24
24
  %r{
25
- ^(W3C\s)?
26
- (?<type>(CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
25
+ ^(?:W3C\s)?
26
+ (?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
27
27
  Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
28
28
  Recommendation|Retired|Working\sDraft))? # type
29
29
  \s?
@@ -41,7 +41,7 @@ module RelatonW3c
41
41
  # @param title_date [String]
42
42
  # @param type [String]
43
43
  # @return [Array<Hash>]
44
- def from_yaml(title_date, type)
44
+ def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
45
45
  /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
46
46
  title ||= title_date
47
47
  result = data.select do |hit|
@@ -50,7 +50,7 @@ module RelatonW3c
50
50
  type_date_filter(hit, type, date)
51
51
  end
52
52
  if result.empty?
53
- result = data.select { |h| h["link"].split("/").last.match? /#{title}/ }
53
+ result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
54
54
  end
55
55
  result.map { |h| Hit.new(h, self) }
56
56
  end
@@ -109,7 +109,7 @@ module RelatonW3c
109
109
  # @param type [String]
110
110
  # @return [String]
111
111
  def short_type(type)
112
- tp = TYPES.select { |k,v| v == type }.keys
112
+ tp = TYPES.select { |_, v| v == type }.keys
113
113
  tp.first || type
114
114
  end
115
115
 
@@ -137,7 +137,7 @@ module RelatonW3c
137
137
  # fetch data form server and save it to file.
138
138
  #
139
139
  def fetch_data
140
- resp = Net::HTTP.get_response URI.parse(DOMAIN + "/TR/")
140
+ resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
141
141
  # return if there aren't any changes since last fetching
142
142
  return unless resp.code == "200"
143
143
 
@@ -153,7 +153,7 @@ module RelatonW3c
153
153
  # @param h_el [Nokogiri::XML::Element]
154
154
  # @param link [Nokogiri::XML::Element]
155
155
  # @param pubdetails [Nokogiri::XML::Element]
156
- def fetch_hit(h_el, link, pubdetails)
156
+ def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
157
157
  datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
158
158
  editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
159
159
  keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
@@ -4,11 +4,12 @@ module RelatonW3c
4
4
  class Processor < Relaton::Processor
5
5
  attr_reader :idtype
6
6
 
7
- def initialize
7
+ def initialize # rubocop:disable Lint/MissingSuper
8
8
  @short = :relaton_w3c
9
9
  @prefix = "W3C"
10
10
  @defaultprefix = %r{^W3C\s}
11
11
  @idtype = "W3C"
12
+ @datasets = %w[w3c-rdf]
12
13
  end
13
14
 
14
15
  # @param code [String]
@@ -19,6 +20,18 @@ module RelatonW3c
19
20
  ::RelatonW3c::W3cBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from http://www.w3.org/2002/01/tr-automation/tr.rdf
25
+ #
26
+ # @param [String] _source source name
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonCalconnect::CcBibliographicItem]
24
37
  def from_xml(xml)
@@ -29,7 +42,7 @@ module RelatonW3c
29
42
  # @return [RelatonIsoBib::CcBibliographicItem]
30
43
  def hash_to_bib(hash)
31
44
  item_hash = ::RelatonW3c::HashConverter.hash_to_bib(hash)
32
- ::RelatonW3c::W3cBibliographicItem.new item_hash
45
+ ::RelatonW3c::W3cBibliographicItem.new(**item_hash)
33
46
  end
34
47
 
35
48
  # Returns hash of XML grammar
@@ -1,16 +1,16 @@
1
1
  module RelatonW3c
2
2
  class Scrapper
3
- class << self
4
- DOCTYPES = {
5
- "CR" => "candidateRecommendation",
6
- "NOTE" => "groupNote",
7
- "PER" => "proposedEditedRecommendation",
8
- "PR" => "proposedRecommendation",
9
- "REC" => "recommendation",
10
- "RET" => "retired",
11
- "WD" => "workingDraft",
12
- }.freeze
3
+ DOCTYPES = {
4
+ "CR" => "candidateRecommendation",
5
+ "NOTE" => "groupNote",
6
+ "PER" => "proposedEditedRecommendation",
7
+ "PR" => "proposedRecommendation",
8
+ "REC" => "recommendation",
9
+ "RET" => "retired",
10
+ "WD" => "workingDraft",
11
+ }.freeze
13
12
 
13
+ class << self
14
14
  # @param hit [Hash]
15
15
  # @return [RelatonW3c::W3cBibliographicItem]
16
16
  def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
@@ -29,7 +29,7 @@ module RelatonW3c
29
29
  doctype: fetch_doctype(hit, doc),
30
30
  contributor: fetch_contributor(hit, doc),
31
31
  relation: fetch_relation(doc),
32
- keyword: hit["keyword"]
32
+ keyword: hit["keyword"],
33
33
  )
34
34
  end
35
35
 
@@ -53,7 +53,7 @@ module RelatonW3c
53
53
  titles << { content: title.gsub(/\n/, " "), type: "main" }
54
54
  end
55
55
  subtitle = doc.at(
56
- "//h2[@id='subtitle']|//p[contains(@class, 'subline')]"
56
+ "//h2[@id='subtitle']|//p[contains(@class, 'subline')]",
57
57
  )&.text
58
58
  titles << { content: subtitle, tipe: "subtitle" } if subtitle
59
59
  end
@@ -62,7 +62,7 @@ module RelatonW3c
62
62
  end
63
63
  titles.map do |t|
64
64
  title = RelatonBib::FormattedString.new(
65
- content: t[:content], language: "en", script: "Latn"
65
+ content: t[:content], language: "en", script: "Latn",
66
66
  )
67
67
  RelatonBib::TypedTitleString.new(type: t[:type], title: title)
68
68
  end
@@ -88,7 +88,7 @@ module RelatonW3c
88
88
  # @param hit [Hash]
89
89
  # @param doc [Nokogiri::HTML::Document, NilClass]
90
90
  # @return [Array<RelatonBib::BibliographicDate>]
91
- def fetch_date(hit, doc)
91
+ def fetch_date(hit, doc) # rubocop:disable Metrics/CyclomaticComplexity
92
92
  on = hit["datepub"] || doc&.at("//h2/time[@datetime]")&.attr(:datetime)
93
93
  on ||= fetch_date1(doc) || fetch_date2(doc)
94
94
  [RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
@@ -143,7 +143,7 @@ module RelatonW3c
143
143
  end
144
144
  mem
145
145
  end
146
- contribs.map { |c| contrib_info **c }
146
+ contribs.map { |c| contrib_info(**c) }
147
147
  else
148
148
  hit["editor"].map do |ed|
149
149
  contrib_info name: ed, role: [{ type: "editor" }]
@@ -162,7 +162,7 @@ module RelatonW3c
162
162
  # @param element [Nokogiri::XML::Element]
163
163
  # @param type [String]
164
164
  # @return [Hash]
165
- def parse_contrib(element, type)
165
+ def parse_contrib(element, type) # rubocop:disable Metrics/MethodLength
166
166
  p = element.at("a")
167
167
  return unless p
168
168
 
@@ -187,7 +187,7 @@ module RelatonW3c
187
187
  name = RelatonBib::FullName.new completename: completename
188
188
  af = []
189
189
  if args[:org]
190
- org = RelatonBib::Organization.new **args[:org]
190
+ org = RelatonBib::Organization.new(**args[:org])
191
191
  af << RelatonBib::Affiliation.new(organization: org)
192
192
  end
193
193
  en = RelatonBib::Person.new name: name, url: args[:url], affiliation: af
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.7.1".freeze
2
+ VERSION = "1.9.1".freeze
3
3
  end
@@ -10,7 +10,7 @@ module RelatonW3c
10
10
  if args[:doctype] && !TYPES.include?(args[:doctype])
11
11
  warn "[relaton-w3c] invalid document type: #{args[:doctype]}"
12
12
  end
13
- super **args
13
+ super
14
14
  end
15
15
  end
16
16
  end