relaton-w3c 1.7.2 → 1.9.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,205 @@
1
+ module RelatonW3c
2
+ class DataParser
3
+ #
4
+ # Document parser initalization
5
+ #
6
+ # @param [RDF::Query::Solution] sol entry from the SPARQL query
7
+ # @param [RelatonW3c::DataFetcher] fetcher data fetcher
8
+ #
9
+ def initialize(sol, fetcher)
10
+ @sol = sol
11
+ @fetcher = fetcher
12
+ end
13
+
14
+ #
15
+ # Initialize document parser and run it
16
+ #
17
+ # @param [RDF::Query::Solution] sol entry from the SPARQL query
18
+ # @param [RelatonW3c::DataFetcher] fetcher data fetcher
19
+ #
20
+ # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
21
+ #
22
+ def self.parse(sol, fetcher)
23
+ new(sol, fetcher).parse
24
+ end
25
+
26
+ #
27
+ # Parse document
28
+ #
29
+ # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
30
+ #
31
+ def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
32
+ return unless @fetcher.class::USED_TYPES.include? type
33
+
34
+ RelatonW3c::W3cBibliographicItem.new(
35
+ type: "standard",
36
+ doctype: parse_doctype,
37
+ fetched: Date.today.to_s,
38
+ language: ["en"],
39
+ script: ["Latn"],
40
+ title: parse_title,
41
+ link: parse_link,
42
+ docid: parse_docid,
43
+ docnumber: identifier(@sol.link.to_s),
44
+ series: parse_series,
45
+ date: parse_date,
46
+ relation: parse_relation,
47
+ contributor: parse_contrib,
48
+ editorialgroup: parse_editorialgroup,
49
+ )
50
+ end
51
+
52
+ #
53
+ # Parse title
54
+ #
55
+ # @return [RelatonBib::TypedTitleStringCollection] title
56
+ #
57
+ def parse_title
58
+ t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
59
+ RelatonBib::TypedTitleStringCollection.new [t]
60
+ end
61
+
62
+ #
63
+ # Parse link
64
+ #
65
+ # @return [Array<RelatonBib::TypedUri>] link
66
+ #
67
+ def parse_link
68
+ [RelatonBib::TypedUri.new(type: "src", content: @sol.link.to_s)]
69
+ end
70
+
71
+ #
72
+ # Parse docidentifier
73
+ #
74
+ # @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
75
+ #
76
+ def parse_docid
77
+ id = pub_id(@sol.link.to_s)
78
+ [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id)]
79
+ end
80
+
81
+ #
82
+ # Generate PubID
83
+ #
84
+ # @param [String] url url
85
+ #
86
+ # @return [String] PubID
87
+ #
88
+ def pub_id(url)
89
+ "W3C #{identifier(url)}"
90
+ end
91
+
92
+ def identifier(url)
93
+ /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/.match(url)[1].to_s
94
+ end
95
+
96
+ #
97
+ # Parse series
98
+ #
99
+ # @return [Array<RelatonBib::Series>] series
100
+ #
101
+ def parse_series
102
+ title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
103
+ [RelatonBib::Series.new(title: title, number: identifier(@sol.link.to_s))]
104
+ end
105
+
106
+ def type # rubocop:disable Metrics/MethodLength
107
+ @type ||= begin
108
+ sse = SPARQL.parse(%(
109
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
110
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
111
+ SELECT ?type
112
+ WHERE {
113
+ { <#{@sol.link}> rdf:type ?type }
114
+ }
115
+ ))
116
+ tps = @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
117
+ tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
118
+ end
119
+ end
120
+
121
+ #
122
+ # Parse doctype
123
+ #
124
+ # @return [Strinf] doctype
125
+ #
126
+ def parse_doctype
127
+ Scrapper::DOCTYPES[type]
128
+ end
129
+
130
+ def parse_date
131
+ [RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
132
+ end
133
+
134
+ #
135
+ # Parse relation
136
+ #
137
+ # @return [Array<RelatonBib::DocumentRelation>] relation
138
+ #
139
+ def parse_relation # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
140
+ sse = SPARQL.parse(%(
141
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
142
+ SELECT ?obsoletes
143
+ WHERE {
144
+ VALUES ?p { doc:obsoletes }
145
+ { <#{@sol.link}> ?p ?obsoletes }
146
+ }
147
+ ))
148
+ @fetcher.data.query(sse).order_by(:obsoletes).map do |r|
149
+ tp, url = r.to_h.first
150
+ fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
151
+ bib = W3cBibliographicItem.new formattedref: fr
152
+ RelatonBib::DocumentRelation.new(type: tp.to_s, bibitem: bib)
153
+ end
154
+ end
155
+
156
+ #
157
+ # Parse contributor
158
+ #
159
+ # @return [Array<RelatonBib::ContributionInfo>] contributor
160
+ #
161
+ def parse_contrib # rubocop:disable Metrics/MethodLength
162
+ sse = SPARQL.parse(%(
163
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
164
+ PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
165
+ SELECT ?full_name
166
+ WHERE {
167
+ <#{@sol.link}> :editor/contact:fullName ?full_name
168
+ }
169
+ ))
170
+ @fetcher.data.query(sse).order_by(:full_name).map do |ed|
171
+ cn = RelatonBib::LocalizedString.new(ed.full_name.to_s, "en", "Latn")
172
+ n = RelatonBib::FullName.new completename: cn
173
+ p = RelatonBib::Person.new name: n
174
+ RelatonBib::ContributionInfo.new entity: p, role: [type: "editor"]
175
+ end
176
+ end
177
+
178
+ #
179
+ # Parse editorialgroup
180
+ #
181
+ # @return [RelatonBib::EditorialGroup] editorialgroup
182
+ #
183
+ def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
184
+ sse = SPARQL.parse(%(
185
+ PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
186
+ PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
187
+ SELECT ?home_page
188
+ WHERE {
189
+ <#{@sol.link}> org:deliveredBy/contact:homePage ?home_page
190
+ }
191
+ ))
192
+ res = @fetcher.data.query(sse).order_by(:home_page)
193
+ tc = res.each_with_object([]) do |edg, obj|
194
+ wg = @fetcher.group_names[edg.home_page.to_s.sub(/\/$/, "")]
195
+ if wg
196
+ rwg = RelatonBib::WorkGroup.new name: wg["name"]
197
+ obj << RelatonBib::TechnicalCommittee.new(rwg)
198
+ else
199
+ warn "Working group name not found for #{edg.home_page}"
200
+ end
201
+ end
202
+ RelatonBib::EditorialGroup.new tc
203
+ end
204
+ end
205
+ end
@@ -4,7 +4,7 @@ module RelatonW3c
4
4
  # @param item_hash [Hash]
5
5
  # @return [RelatonW3c::W3cBibliographicItem]
6
6
  def bib_item(item_hash)
7
- W3cBibliographicItem.new **item_hash
7
+ W3cBibliographicItem.new(**item_hash)
8
8
  end
9
9
  end
10
10
  end
@@ -22,8 +22,8 @@ module RelatonW3c
22
22
  # @param ref [String] reference to search
23
23
  def initialize(ref)
24
24
  %r{
25
- ^(W3C\s)?
26
- (?<type>(CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
25
+ ^(?:W3C\s)?
26
+ (?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
27
27
  Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
28
28
  Recommendation|Retired|Working\sDraft))? # type
29
29
  \s?
@@ -41,7 +41,7 @@ module RelatonW3c
41
41
  # @param title_date [String]
42
42
  # @param type [String]
43
43
  # @return [Array<Hash>]
44
- def from_yaml(title_date, type)
44
+ def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
45
45
  /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
46
46
  title ||= title_date
47
47
  result = data.select do |hit|
@@ -50,7 +50,7 @@ module RelatonW3c
50
50
  type_date_filter(hit, type, date)
51
51
  end
52
52
  if result.empty?
53
- result = data.select { |h| h["link"].split("/").last.match? /#{title}/ }
53
+ result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
54
54
  end
55
55
  result.map { |h| Hit.new(h, self) }
56
56
  end
@@ -109,7 +109,7 @@ module RelatonW3c
109
109
  # @param type [String]
110
110
  # @return [String]
111
111
  def short_type(type)
112
- tp = TYPES.select { |k,v| v == type }.keys
112
+ tp = TYPES.select { |_, v| v == type }.keys
113
113
  tp.first || type
114
114
  end
115
115
 
@@ -137,7 +137,7 @@ module RelatonW3c
137
137
  # fetch data form server and save it to file.
138
138
  #
139
139
  def fetch_data
140
- resp = Net::HTTP.get_response URI.parse(DOMAIN + "/TR/")
140
+ resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
141
141
  # return if there aren't any changes since last fetching
142
142
  return unless resp.code == "200"
143
143
 
@@ -153,7 +153,7 @@ module RelatonW3c
153
153
  # @param h_el [Nokogiri::XML::Element]
154
154
  # @param link [Nokogiri::XML::Element]
155
155
  # @param pubdetails [Nokogiri::XML::Element]
156
- def fetch_hit(h_el, link, pubdetails)
156
+ def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
157
157
  datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
158
158
  editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
159
159
  keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
@@ -4,11 +4,12 @@ module RelatonW3c
4
4
  class Processor < Relaton::Processor
5
5
  attr_reader :idtype
6
6
 
7
- def initialize
7
+ def initialize # rubocop:disable Lint/MissingSuper
8
8
  @short = :relaton_w3c
9
9
  @prefix = "W3C"
10
10
  @defaultprefix = %r{^W3C\s}
11
11
  @idtype = "W3C"
12
+ @datasets = %w[w3c-rdf]
12
13
  end
13
14
 
14
15
  # @param code [String]
@@ -19,6 +20,18 @@ module RelatonW3c
19
20
  ::RelatonW3c::W3cBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from http://www.w3.org/2002/01/tr-automation/tr.rdf
25
+ #
26
+ # @param [String] _source source name
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonCalconnect::CcBibliographicItem]
24
37
  def from_xml(xml)
@@ -29,7 +42,7 @@ module RelatonW3c
29
42
  # @return [RelatonIsoBib::CcBibliographicItem]
30
43
  def hash_to_bib(hash)
31
44
  item_hash = ::RelatonW3c::HashConverter.hash_to_bib(hash)
32
- ::RelatonW3c::W3cBibliographicItem.new **item_hash
45
+ ::RelatonW3c::W3cBibliographicItem.new(**item_hash)
33
46
  end
34
47
 
35
48
  # Returns hash of XML grammar
@@ -1,16 +1,16 @@
1
1
  module RelatonW3c
2
2
  class Scrapper
3
- class << self
4
- DOCTYPES = {
5
- "CR" => "candidateRecommendation",
6
- "NOTE" => "groupNote",
7
- "PER" => "proposedEditedRecommendation",
8
- "PR" => "proposedRecommendation",
9
- "REC" => "recommendation",
10
- "RET" => "retired",
11
- "WD" => "workingDraft",
12
- }.freeze
3
+ DOCTYPES = {
4
+ "CR" => "candidateRecommendation",
5
+ "NOTE" => "groupNote",
6
+ "PER" => "proposedEditedRecommendation",
7
+ "PR" => "proposedRecommendation",
8
+ "REC" => "recommendation",
9
+ "RET" => "retired",
10
+ "WD" => "workingDraft",
11
+ }.freeze
13
12
 
13
+ class << self
14
14
  # @param hit [Hash]
15
15
  # @return [RelatonW3c::W3cBibliographicItem]
16
16
  def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
@@ -29,7 +29,7 @@ module RelatonW3c
29
29
  doctype: fetch_doctype(hit, doc),
30
30
  contributor: fetch_contributor(hit, doc),
31
31
  relation: fetch_relation(doc),
32
- keyword: hit["keyword"]
32
+ keyword: hit["keyword"],
33
33
  )
34
34
  end
35
35
 
@@ -53,7 +53,7 @@ module RelatonW3c
53
53
  titles << { content: title.gsub(/\n/, " "), type: "main" }
54
54
  end
55
55
  subtitle = doc.at(
56
- "//h2[@id='subtitle']|//p[contains(@class, 'subline')]"
56
+ "//h2[@id='subtitle']|//p[contains(@class, 'subline')]",
57
57
  )&.text
58
58
  titles << { content: subtitle, tipe: "subtitle" } if subtitle
59
59
  end
@@ -62,7 +62,7 @@ module RelatonW3c
62
62
  end
63
63
  titles.map do |t|
64
64
  title = RelatonBib::FormattedString.new(
65
- content: t[:content], language: "en", script: "Latn"
65
+ content: t[:content], language: "en", script: "Latn",
66
66
  )
67
67
  RelatonBib::TypedTitleString.new(type: t[:type], title: title)
68
68
  end
@@ -88,7 +88,7 @@ module RelatonW3c
88
88
  # @param hit [Hash]
89
89
  # @param doc [Nokogiri::HTML::Document, NilClass]
90
90
  # @return [Array<RelatonBib::BibliographicDate>]
91
- def fetch_date(hit, doc)
91
+ def fetch_date(hit, doc) # rubocop:disable Metrics/CyclomaticComplexity
92
92
  on = hit["datepub"] || doc&.at("//h2/time[@datetime]")&.attr(:datetime)
93
93
  on ||= fetch_date1(doc) || fetch_date2(doc)
94
94
  [RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
@@ -143,7 +143,7 @@ module RelatonW3c
143
143
  end
144
144
  mem
145
145
  end
146
- contribs.map { |c| contrib_info **c }
146
+ contribs.map { |c| contrib_info(**c) }
147
147
  else
148
148
  hit["editor"].map do |ed|
149
149
  contrib_info name: ed, role: [{ type: "editor" }]
@@ -162,7 +162,7 @@ module RelatonW3c
162
162
  # @param element [Nokogiri::XML::Element]
163
163
  # @param type [String]
164
164
  # @return [Hash]
165
- def parse_contrib(element, type)
165
+ def parse_contrib(element, type) # rubocop:disable Metrics/MethodLength
166
166
  p = element.at("a")
167
167
  return unless p
168
168
 
@@ -187,7 +187,7 @@ module RelatonW3c
187
187
  name = RelatonBib::FullName.new completename: completename
188
188
  af = []
189
189
  if args[:org]
190
- org = RelatonBib::Organization.new **args[:org]
190
+ org = RelatonBib::Organization.new(**args[:org])
191
191
  af << RelatonBib::Affiliation.new(organization: org)
192
192
  end
193
193
  en = RelatonBib::Person.new name: name, url: args[:url], affiliation: af
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.7.2".freeze
2
+ VERSION = "1.9.2".freeze
3
3
  end
@@ -10,7 +10,7 @@ module RelatonW3c
10
10
  if args[:doctype] && !TYPES.include?(args[:doctype])
11
11
  warn "[relaton-w3c] invalid document type: #{args[:doctype]}"
12
12
  end
13
- super **args
13
+ super
14
14
  end
15
15
  end
16
16
  end