relaton-w3c 1.11.2 → 1.11.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,10 +9,10 @@ module RelatonW3c
9
9
 
10
10
  class << self
11
11
  # @param text [String]
12
- # @return [RelatonW3c::HitCollection]
13
- def search(text) # rubocop:disable Metrics/MethodLength
12
+ # @return [RelatonW3c::W3cBibliographicItem]
13
+ def search(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
14
14
  ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
15
- file = DataIndex.create_from_repo.search(ref)
15
+ file = DataIndex.create_from_repo.search ref.gsub(" ", "-").squeeze("-")
16
16
  return unless file
17
17
 
18
18
  url = "#{SOURCE}#{file}"
data/lib/relaton_w3c.rb CHANGED
@@ -2,9 +2,6 @@ require "relaton_bib"
2
2
  require "relaton_w3c/version"
3
3
  require "relaton_w3c/w3c_bibliography"
4
4
  require "relaton_w3c/w3c_bibliographic_item"
5
- # require "relaton_w3c/hit_collection"
6
- # require "relaton_w3c/hit"
7
- # require "relaton_w3c/scrapper"
8
5
  require "relaton_w3c/xml_parser"
9
6
  require "relaton_w3c/bibxml_parser"
10
7
  require "relaton_w3c/hash_converter"
data/relaton_w3c.gemspec CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
14
14
  "using the IsoBibliographicItem model"
15
15
  spec.homepage = "https://github.com/relaton/relaton-wc3"
16
16
  spec.license = "BSD-2-Clause"
17
- spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
17
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.6.0")
18
18
 
19
19
  # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
20
 
@@ -39,10 +39,10 @@ Gem::Specification.new do |spec|
39
39
 
40
40
  spec.add_dependency "linkeddata", "~> 3.1.0"
41
41
  spec.add_dependency "mechanize", "~> 2.8.0"
42
- # spec.add_dependency "picky"
43
- spec.add_dependency "rdf", "~> 3.1.0"
44
- spec.add_dependency "rdf-normalize", "~> 0.4.0"
42
+ spec.add_dependency "rdf", "~> 3.2.0"
43
+ spec.add_dependency "rdf-normalize", "~> 0.5.0"
45
44
  spec.add_dependency "relaton-bib", "~> 1.11.0"
45
+ spec.add_dependency "rubyzip", "~> 2.3.0"
46
46
  spec.add_dependency "shex", "~> 0.6.0"
47
47
  spec.add_dependency "sparql", "~> 3.1.0"
48
48
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.2
4
+ version: 1.11.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-20 00:00:00.000000000 Z
11
+ date: 2022-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -114,28 +114,28 @@ dependencies:
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: 3.1.0
117
+ version: 3.2.0
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: 3.1.0
124
+ version: 3.2.0
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: rdf-normalize
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: 0.4.0
131
+ version: 0.5.0
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: 0.4.0
138
+ version: 0.5.0
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: relaton-bib
141
141
  requirement: !ruby/object:Gem::Requirement
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
152
  version: 1.11.0
153
+ - !ruby/object:Gem::Dependency
154
+ name: rubyzip
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 2.3.0
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 2.3.0
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: shex
155
169
  requirement: !ruby/object:Gem::Requirement
@@ -233,10 +247,7 @@ files:
233
247
  - lib/relaton_w3c/data_index.rb
234
248
  - lib/relaton_w3c/data_parser.rb
235
249
  - lib/relaton_w3c/hash_converter.rb
236
- - lib/relaton_w3c/hit.rb
237
- - lib/relaton_w3c/hit_collection.rb
238
250
  - lib/relaton_w3c/processor.rb
239
- - lib/relaton_w3c/scrapper.rb
240
251
  - lib/relaton_w3c/version.rb
241
252
  - lib/relaton_w3c/w3c_bibliographic_item.rb
242
253
  - lib/relaton_w3c/w3c_bibliography.rb
@@ -256,14 +267,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
256
267
  requirements:
257
268
  - - ">="
258
269
  - !ruby/object:Gem::Version
259
- version: 2.5.0
270
+ version: 2.6.0
260
271
  required_rubygems_version: !ruby/object:Gem::Requirement
261
272
  requirements:
262
273
  - - ">="
263
274
  - !ruby/object:Gem::Version
264
275
  version: '0'
265
276
  requirements: []
266
- rubygems_version: 3.3.7
277
+ rubygems_version: 3.2.3
267
278
  signing_key:
268
279
  specification_version: 4
269
280
  summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module RelatonW3c
4
- # Hit.
5
- class Hit < RelatonBib::Hit
6
- #
7
- # Parse page.
8
- #
9
- # @param lang [String, NilClass]
10
- # @return [RelatonW3c::W3cBibliographicItem]
11
- def fetch(_lang = nil)
12
- @fetch ||= Scrapper.parse_page hit
13
- end
14
- end
15
- end
@@ -1,172 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "fileutils"
4
- require "yaml"
5
-
6
- module RelatonW3c
7
- # Page of hit collection.
8
- class HitCollection < RelatonBib::HitCollection
9
- TYPES = {
10
- "CR" => "Candidate Recommendation",
11
- "NOTE" => "Group Note",
12
- "PER" => "Proposed Edited Recommendation",
13
- "PR" => "Proposed Recommendation",
14
- "REC" => "Recommendation",
15
- "RET" => "Retired",
16
- "WD" => "Working Draft",
17
- }.freeze
18
- DOMAIN = "https://www.w3.org"
19
- DATADIR = File.expand_path(".relaton/w3c", Dir.home).freeze
20
- DATAFILE = File.expand_path("bibliography.yml", DATADIR).freeze
21
-
22
- # @param ref [String] reference to search
23
- def initialize(ref)
24
- %r{
25
- ^(?:W3C\s)?
26
- (?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
27
- Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
28
- Recommendation|Retired|Working\sDraft))? # type
29
- \s?
30
- (?<title_date>.+) # title_date
31
- }x =~ ref
32
- super
33
- @array = from_yaml title_date, type
34
- end
35
-
36
- private
37
-
38
- #
39
- # Fetch data form yaml
40
- #
41
- # @param title_date [String]
42
- # @param type [String]
43
- # @return [Array<Hash>]
44
- def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
45
- /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
46
- title ||= title_date
47
- result = data.select do |hit|
48
- (hit["title"].casecmp?(title) ||
49
- hit["link"].split("/").last.match?(/-#{title}-/)) &&
50
- type_date_filter(hit, type, date)
51
- end
52
- if result.empty?
53
- result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
54
- end
55
- result.map { |h| Hit.new(h, self) }
56
- end
57
-
58
- # @param hit [Hash]
59
- # @param type [String]
60
- # @param date [String]
61
- # @return [TrueClass, FalseClass]
62
- def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
63
- if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
64
- history = get_history hit, type, date
65
- return false unless history.any?
66
-
67
- hit["type"] = short_type type
68
- hit["datepub"] = history.first.at("td").text
69
- hit["link"] = history.first.at("a")[:href]
70
- end
71
- true
72
- end
73
-
74
- # @param hit [Hash]
75
- # @param type [String]
76
- # @param date [String]
77
- # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
78
- def get_history(hit, type, date)
79
- resp = Net::HTTP.get URI.parse(HitCollection::DOMAIN + hit["history"])
80
- history_doc = Nokogiri::HTML resp
81
- history = history_doc.xpath(
82
- "//table//a[contains(.,'#{long_type(type)}')]/../..",
83
- )
84
- return filter_history_by_date(history, history_doc, type, date) if date
85
-
86
- history
87
- end
88
-
89
- # @param history [Nokogiri::XML::NodeSet]
90
- # @param history_doc [Nokogiri::HTML::NodeSet]
91
- # @param type [String]
92
- # @param date [String]
93
- # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
94
- def filter_history_by_date(history, history_doc, type, date)
95
- if type
96
- history.select do |h|
97
- h.at("td[@class='table_datecol']").text == date
98
- end
99
- else
100
- history_doc.xpath(
101
- "//table//td[@class='table_datecol'][.='#{date}']/..",
102
- )
103
- end
104
- end
105
-
106
- #
107
- # Convetr long type name to short
108
- #
109
- # @param type [String]
110
- # @return [String]
111
- def short_type(type)
112
- tp = TYPES.select { |_, v| v == type }.keys
113
- tp.first || type
114
- end
115
-
116
- #
117
- # Convert shot type name to long
118
- #
119
- # @param [String]
120
- # @return [String]
121
- def long_type(type)
122
- TYPES[type] || type
123
- end
124
-
125
- #
126
- # Fetches YAML data
127
- #
128
- # @return [Hash]
129
- def data
130
- FileUtils.mkdir_p DATADIR
131
- ctime = File.ctime DATAFILE if File.exist? DATAFILE
132
- fetch_data if !ctime || ctime.to_date < Date.today
133
- @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
134
- end
135
-
136
- #
137
- # fetch data form server and save it to file.
138
- #
139
- def fetch_data
140
- resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
141
- # return if there aren't any changes since last fetching
142
- return unless resp.code == "200"
143
-
144
- doc = Nokogiri::HTML resp.body
145
- @data = doc.xpath("//ul[@id='container']/li").map do |h_el|
146
- link = h_el.at("h2/a")
147
- pubdetails = h_el.at("p[@class='pubdetails']")
148
- fetch_hit h_el, link, pubdetails
149
- end
150
- File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
151
- end
152
-
153
- # @param h_el [Nokogiri::XML::Element]
154
- # @param link [Nokogiri::XML::Element]
155
- # @param pubdetails [Nokogiri::XML::Element]
156
- def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
157
- datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
158
- editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
159
- keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
160
- {
161
- "title" => link.text.gsub("\u00a0", " "),
162
- "link" => link[:href],
163
- "type" => h_el.at("div").text.upcase,
164
- "workgroup" => h_el.xpath("p[@class='deliverer']").map(&:text),
165
- "datepub" => datepub,
166
- "history" => pubdetails.at("a[text()='History']")[:href],
167
- "editor" => editor,
168
- "keyword" => keyword,
169
- }
170
- end
171
- end
172
- end
@@ -1,218 +0,0 @@
1
- module RelatonW3c
2
- class Scrapper
3
- DOCTYPES = {
4
- "CR" => "candidateRecommendation",
5
- "NOTE" => "groupNote",
6
- "PER" => "proposedEditedRecommendation",
7
- "PR" => "proposedRecommendation",
8
- "REC" => "recommendation",
9
- "RET" => "retired",
10
- "WD" => "workingDraft",
11
- }.freeze
12
-
13
- class << self
14
- # @param hit [Hash]
15
- # @return [RelatonW3c::W3cBibliographicItem]
16
- def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
17
- resp = Net::HTTP.get_response URI.parse(hit["link"])
18
- doc = resp.code == "200" ? Nokogiri::HTML(resp.body) : nil
19
- W3cBibliographicItem.new(
20
- type: "standard",
21
- docid: fetch_docid(hit),
22
- fetched: Date.today.to_s,
23
- language: ["en"],
24
- script: ["Latn"],
25
- title: fetch_title(hit, doc),
26
- abstract: fetch_abstract(doc),
27
- link: fetch_link(hit),
28
- date: fetch_date(hit, doc),
29
- doctype: fetch_doctype(hit, doc),
30
- contributor: fetch_contributor(hit, doc),
31
- relation: fetch_relation(doc),
32
- keyword: hit["keyword"],
33
- )
34
- end
35
-
36
- private
37
-
38
- # @param hit [Hash]
39
- # @return [Array<RelatonBib::DocumentIdentifier>]
40
- def fetch_docid(hit)
41
- id = hit["link"].split("/").last
42
- [RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)]
43
- end
44
-
45
- # @param hit [Hash]
46
- # @param doc [Nokogiri::HTML::Document]
47
- # @return [Array<RelatonBib::TypedTitleString>]
48
- def fetch_title(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
49
- titles = []
50
- if doc
51
- title = doc.at("//*[contains(@id, 'title')]")&.text
52
- if title && !title.empty?
53
- titles << { content: title.gsub(/\n/, " "), type: "main" }
54
- end
55
- subtitle = doc.at(
56
- "//h2[@id='subtitle']|//p[contains(@class, 'subline')]",
57
- )&.text
58
- titles << { content: subtitle, tipe: "subtitle" } if subtitle
59
- end
60
- if titles.empty? && hit["title"]
61
- titles << { content: hit["title"], type: "main" }
62
- end
63
- titles.map do |t|
64
- title = RelatonBib::FormattedString.new(
65
- content: t[:content], language: "en", script: "Latn",
66
- )
67
- RelatonBib::TypedTitleString.new(type: t[:type], title: title)
68
- end
69
- end
70
-
71
- # @param doc [Nokogiri::HTML::Document, NilClass]
72
- # @return [Array<RelatonBib::FormattedString>]
73
- def fetch_abstract(doc)
74
- return [] unless doc
75
-
76
- content = doc.at("//h2[.='Abstract']/following-sibling::p",
77
- "//div[@class='abstract']/p").text
78
- [RelatonBib::FormattedString.new(content: content, language: "en",
79
- script: "Latn")]
80
- end
81
-
82
- # @param hit [Hash]
83
- # @return [Array<RelatonBib::TypedUri>]
84
- def fetch_link(hit)
85
- [RelatonBib::TypedUri.new(type: "src", content: hit["link"])]
86
- end
87
-
88
- # @param hit [Hash]
89
- # @param doc [Nokogiri::HTML::Document, NilClass]
90
- # @return [Array<RelatonBib::BibliographicDate>]
91
- def fetch_date(hit, doc) # rubocop:disable Metrics/CyclomaticComplexity
92
- on = hit["datepub"] || doc&.at("//h2/time[@datetime]")&.attr(:datetime)
93
- on ||= fetch_date1(doc) || fetch_date2(doc)
94
- [RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
95
- end
96
-
97
- # @param doc [Nokogiri::HTML::Document, NilClass]
98
- # @return [String]
99
- def fetch_date1(doc)
100
- d = doc&.at("//h2[@property='dc:issued']")&.attr(:content)
101
- d&.match(/\d{4}-\d{2}-\d{2}/)&.to_s
102
- end
103
-
104
- # @param doc [Nokogiri::HTML::Document, NilClass]
105
- # @return [String]
106
- def fetch_date2(doc)
107
- d = doc&.at("//h2[contains(@id, 'w3c-recommendation')]")
108
- return unless d
109
-
110
- Date.parse(d.attr(:id.match(/\d{2}-\w+-\d{4}/).to_s)).to_s
111
- end
112
-
113
- # @param hit [Hash]
114
- # @param doc [Nokogiri::HTML::Document, NilClass]
115
- # @return [String]
116
- def fetch_doctype(hit, doc)
117
- if hit["type"]
118
- DOCTYPES[hit["type"]]
119
- elsif doc
120
- type = HitCollection::TYPES.detect do |_k, v|
121
- doc.at("//h2[contains(., '#{v}')]/time[@datetime]")
122
- end
123
- DOCTYPES[type&.first]
124
- end
125
- end
126
-
127
- # @param hit [Hash]
128
- # @param doc [Nokogiri::HTML::Document, NilClass]
129
- # @return [Array<RelatonBib::ContributionInfo>]
130
- def fetch_contributor(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
131
- if doc
132
- editors = find_contribs(doc, "Editors").reduce([]) do |mem, ed|
133
- c = parse_contrib ed, "editor"
134
- mem << c if c
135
- mem
136
- end
137
- contribs = find_contribs(doc, "Authors").reduce(editors) do |mem, ath|
138
- ed = mem.detect { |e| e[:id] && e[:id] == ath["data-editor-id"] }
139
- if ed
140
- ed[:role] << { type: "author" }
141
- else
142
- mem << parse_contrib(ath, "author")
143
- end
144
- mem
145
- end
146
- contribs.map { |c| contrib_info(**c) }
147
- else
148
- hit["editor"].map do |ed|
149
- contrib_info name: ed, role: [{ type: "editor" }]
150
- end
151
- end
152
- end
153
-
154
- # @param doc [Nokogiri::NTML::Document]
155
- # @param type [String]
156
- # @return [Array<Nokogiri::XML::Element]
157
- def find_contribs(doc, type)
158
- doc.xpath("//dt[contains(.,'#{type}')]/following-sibling::dd"\
159
- "[preceding-sibling::dt[1][contains(.,'#{type}')]]")
160
- end
161
-
162
- # @param element [Nokogiri::XML::Element]
163
- # @param type [String]
164
- # @return [Hash]
165
- def parse_contrib(element, type) # rubocop:disable Metrics/MethodLength
166
- p = element.at("a")
167
- return unless p
168
-
169
- contrib = {
170
- name: p.text,
171
- url: p[:href],
172
- role: [{ type: type }],
173
- id: element["data-editor-id"],
174
- }
175
- org = element.at("a[2]")
176
- contrib[:org] = { name: org.text, url: org[:href] } if org
177
- contrib
178
- end
179
-
180
- # @param name [String]
181
- # @param url [String, NilClass]
182
- # @param role [Array<Hash>]
183
- # @parma org [Hash]
184
- # @return [RelatonBib::ContributionInfo]
185
- def contrib_info(**args)
186
- completename = RelatonBib::LocalizedString.new(args[:name])
187
- name = RelatonBib::FullName.new completename: completename
188
- af = []
189
- if args[:org]
190
- org = RelatonBib::Organization.new(**args[:org])
191
- af << RelatonBib::Affiliation.new(organization: org)
192
- end
193
- en = RelatonBib::Person.new name: name, url: args[:url], affiliation: af
194
- RelatonBib::ContributionInfo.new entity: en, role: args[:role]
195
- end
196
-
197
- # @param doc [Nokogiri::HTML::Document]
198
- # @return [Array<RelatonBib::DocumentRelation>]
199
- def fetch_relation(doc)
200
- return [] unless doc && (link = recommendation_link(doc))
201
-
202
- hit = { "link" => link }
203
- item = parse_page hit
204
- [RelatonBib::DocumentRelation.new(type: "obsoletedBy", bibitem: item)]
205
- end
206
-
207
- # @param doc [Nokogiri::HTML::Document]
208
- # @return [String, NilClass]
209
- def recommendation_link(doc)
210
- recom = doc.at("//dt[.='Latest Recommendation:']",
211
- "//dt[.='Previous Recommendation:']")
212
- return unless recom
213
-
214
- recom.at("./following-sibling::dd/a")[:href]
215
- end
216
- end
217
- end
218
- end