relaton-w3c 1.11.2 → 1.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,10 +9,10 @@ module RelatonW3c
9
9
 
10
10
  class << self
11
11
  # @param text [String]
12
- # @return [RelatonW3c::HitCollection]
13
- def search(text) # rubocop:disable Metrics/MethodLength
12
+ # @return [RelatonW3c::W3cBibliographicItem]
13
+ def search(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
14
14
  ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
15
- file = DataIndex.create_from_repo.search(ref)
15
+ file = DataIndex.create_from_repo.search ref.gsub(" ", "-").squeeze("-")
16
16
  return unless file
17
17
 
18
18
  url = "#{SOURCE}#{file}"
data/lib/relaton_w3c.rb CHANGED
@@ -2,9 +2,6 @@ require "relaton_bib"
2
2
  require "relaton_w3c/version"
3
3
  require "relaton_w3c/w3c_bibliography"
4
4
  require "relaton_w3c/w3c_bibliographic_item"
5
- # require "relaton_w3c/hit_collection"
6
- # require "relaton_w3c/hit"
7
- # require "relaton_w3c/scrapper"
8
5
  require "relaton_w3c/xml_parser"
9
6
  require "relaton_w3c/bibxml_parser"
10
7
  require "relaton_w3c/hash_converter"
data/relaton_w3c.gemspec CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
14
14
  "using the IsoBibliographicItem model"
15
15
  spec.homepage = "https://github.com/relaton/relaton-wc3"
16
16
  spec.license = "BSD-2-Clause"
17
- spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
17
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.6.0")
18
18
 
19
19
  # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
20
 
@@ -39,10 +39,10 @@ Gem::Specification.new do |spec|
39
39
 
40
40
  spec.add_dependency "linkeddata", "~> 3.1.0"
41
41
  spec.add_dependency "mechanize", "~> 2.8.0"
42
- # spec.add_dependency "picky"
43
- spec.add_dependency "rdf", "~> 3.1.0"
44
- spec.add_dependency "rdf-normalize", "~> 0.4.0"
42
+ spec.add_dependency "rdf", "~> 3.2.0"
43
+ spec.add_dependency "rdf-normalize", "~> 0.5.0"
45
44
  spec.add_dependency "relaton-bib", "~> 1.11.0"
45
+ spec.add_dependency "rubyzip", "~> 2.3.0"
46
46
  spec.add_dependency "shex", "~> 0.6.0"
47
47
  spec.add_dependency "sparql", "~> 3.1.0"
48
48
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.2
4
+ version: 1.11.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-20 00:00:00.000000000 Z
11
+ date: 2022-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -114,28 +114,28 @@ dependencies:
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: 3.1.0
117
+ version: 3.2.0
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: 3.1.0
124
+ version: 3.2.0
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: rdf-normalize
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: 0.4.0
131
+ version: 0.5.0
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: 0.4.0
138
+ version: 0.5.0
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: relaton-bib
141
141
  requirement: !ruby/object:Gem::Requirement
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
152
  version: 1.11.0
153
+ - !ruby/object:Gem::Dependency
154
+ name: rubyzip
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 2.3.0
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 2.3.0
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: shex
155
169
  requirement: !ruby/object:Gem::Requirement
@@ -233,10 +247,7 @@ files:
233
247
  - lib/relaton_w3c/data_index.rb
234
248
  - lib/relaton_w3c/data_parser.rb
235
249
  - lib/relaton_w3c/hash_converter.rb
236
- - lib/relaton_w3c/hit.rb
237
- - lib/relaton_w3c/hit_collection.rb
238
250
  - lib/relaton_w3c/processor.rb
239
- - lib/relaton_w3c/scrapper.rb
240
251
  - lib/relaton_w3c/version.rb
241
252
  - lib/relaton_w3c/w3c_bibliographic_item.rb
242
253
  - lib/relaton_w3c/w3c_bibliography.rb
@@ -256,14 +267,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
256
267
  requirements:
257
268
  - - ">="
258
269
  - !ruby/object:Gem::Version
259
- version: 2.5.0
270
+ version: 2.6.0
260
271
  required_rubygems_version: !ruby/object:Gem::Requirement
261
272
  requirements:
262
273
  - - ">="
263
274
  - !ruby/object:Gem::Version
264
275
  version: '0'
265
276
  requirements: []
266
- rubygems_version: 3.3.7
277
+ rubygems_version: 3.2.3
267
278
  signing_key:
268
279
  specification_version: 4
269
280
  summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module RelatonW3c
4
- # Hit.
5
- class Hit < RelatonBib::Hit
6
- #
7
- # Parse page.
8
- #
9
- # @param lang [String, NilClass]
10
- # @return [RelatonW3c::W3cBibliographicItem]
11
- def fetch(_lang = nil)
12
- @fetch ||= Scrapper.parse_page hit
13
- end
14
- end
15
- end
@@ -1,172 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "fileutils"
4
- require "yaml"
5
-
6
- module RelatonW3c
7
- # Page of hit collection.
8
- class HitCollection < RelatonBib::HitCollection
9
- TYPES = {
10
- "CR" => "Candidate Recommendation",
11
- "NOTE" => "Group Note",
12
- "PER" => "Proposed Edited Recommendation",
13
- "PR" => "Proposed Recommendation",
14
- "REC" => "Recommendation",
15
- "RET" => "Retired",
16
- "WD" => "Working Draft",
17
- }.freeze
18
- DOMAIN = "https://www.w3.org"
19
- DATADIR = File.expand_path(".relaton/w3c", Dir.home).freeze
20
- DATAFILE = File.expand_path("bibliography.yml", DATADIR).freeze
21
-
22
- # @param ref [String] reference to search
23
- def initialize(ref)
24
- %r{
25
- ^(?:W3C\s)?
26
- (?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
27
- Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
28
- Recommendation|Retired|Working\sDraft))? # type
29
- \s?
30
- (?<title_date>.+) # title_date
31
- }x =~ ref
32
- super
33
- @array = from_yaml title_date, type
34
- end
35
-
36
- private
37
-
38
- #
39
- # Fetch data form yaml
40
- #
41
- # @param title_date [String]
42
- # @param type [String]
43
- # @return [Array<Hash>]
44
- def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
45
- /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
46
- title ||= title_date
47
- result = data.select do |hit|
48
- (hit["title"].casecmp?(title) ||
49
- hit["link"].split("/").last.match?(/-#{title}-/)) &&
50
- type_date_filter(hit, type, date)
51
- end
52
- if result.empty?
53
- result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
54
- end
55
- result.map { |h| Hit.new(h, self) }
56
- end
57
-
58
- # @param hit [Hash]
59
- # @param type [String]
60
- # @param date [String]
61
- # @return [TrueClass, FalseClass]
62
- def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
63
- if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
64
- history = get_history hit, type, date
65
- return false unless history.any?
66
-
67
- hit["type"] = short_type type
68
- hit["datepub"] = history.first.at("td").text
69
- hit["link"] = history.first.at("a")[:href]
70
- end
71
- true
72
- end
73
-
74
- # @param hit [Hash]
75
- # @param type [String]
76
- # @param date [String]
77
- # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
78
- def get_history(hit, type, date)
79
- resp = Net::HTTP.get URI.parse(HitCollection::DOMAIN + hit["history"])
80
- history_doc = Nokogiri::HTML resp
81
- history = history_doc.xpath(
82
- "//table//a[contains(.,'#{long_type(type)}')]/../..",
83
- )
84
- return filter_history_by_date(history, history_doc, type, date) if date
85
-
86
- history
87
- end
88
-
89
- # @param history [Nokogiri::XML::NodeSet]
90
- # @param history_doc [Nokogiri::HTML::NodeSet]
91
- # @param type [String]
92
- # @param date [String]
93
- # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
94
- def filter_history_by_date(history, history_doc, type, date)
95
- if type
96
- history.select do |h|
97
- h.at("td[@class='table_datecol']").text == date
98
- end
99
- else
100
- history_doc.xpath(
101
- "//table//td[@class='table_datecol'][.='#{date}']/..",
102
- )
103
- end
104
- end
105
-
106
- #
107
- # Convetr long type name to short
108
- #
109
- # @param type [String]
110
- # @return [String]
111
- def short_type(type)
112
- tp = TYPES.select { |_, v| v == type }.keys
113
- tp.first || type
114
- end
115
-
116
- #
117
- # Convert shot type name to long
118
- #
119
- # @param [String]
120
- # @return [String]
121
- def long_type(type)
122
- TYPES[type] || type
123
- end
124
-
125
- #
126
- # Fetches YAML data
127
- #
128
- # @return [Hash]
129
- def data
130
- FileUtils.mkdir_p DATADIR
131
- ctime = File.ctime DATAFILE if File.exist? DATAFILE
132
- fetch_data if !ctime || ctime.to_date < Date.today
133
- @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
134
- end
135
-
136
- #
137
- # fetch data form server and save it to file.
138
- #
139
- def fetch_data
140
- resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
141
- # return if there aren't any changes since last fetching
142
- return unless resp.code == "200"
143
-
144
- doc = Nokogiri::HTML resp.body
145
- @data = doc.xpath("//ul[@id='container']/li").map do |h_el|
146
- link = h_el.at("h2/a")
147
- pubdetails = h_el.at("p[@class='pubdetails']")
148
- fetch_hit h_el, link, pubdetails
149
- end
150
- File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
151
- end
152
-
153
- # @param h_el [Nokogiri::XML::Element]
154
- # @param link [Nokogiri::XML::Element]
155
- # @param pubdetails [Nokogiri::XML::Element]
156
- def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
157
- datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
158
- editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
159
- keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
160
- {
161
- "title" => link.text.gsub("\u00a0", " "),
162
- "link" => link[:href],
163
- "type" => h_el.at("div").text.upcase,
164
- "workgroup" => h_el.xpath("p[@class='deliverer']").map(&:text),
165
- "datepub" => datepub,
166
- "history" => pubdetails.at("a[text()='History']")[:href],
167
- "editor" => editor,
168
- "keyword" => keyword,
169
- }
170
- end
171
- end
172
- end
@@ -1,218 +0,0 @@
1
- module RelatonW3c
2
- class Scrapper
3
- DOCTYPES = {
4
- "CR" => "candidateRecommendation",
5
- "NOTE" => "groupNote",
6
- "PER" => "proposedEditedRecommendation",
7
- "PR" => "proposedRecommendation",
8
- "REC" => "recommendation",
9
- "RET" => "retired",
10
- "WD" => "workingDraft",
11
- }.freeze
12
-
13
- class << self
14
- # @param hit [Hash]
15
- # @return [RelatonW3c::W3cBibliographicItem]
16
- def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
17
- resp = Net::HTTP.get_response URI.parse(hit["link"])
18
- doc = resp.code == "200" ? Nokogiri::HTML(resp.body) : nil
19
- W3cBibliographicItem.new(
20
- type: "standard",
21
- docid: fetch_docid(hit),
22
- fetched: Date.today.to_s,
23
- language: ["en"],
24
- script: ["Latn"],
25
- title: fetch_title(hit, doc),
26
- abstract: fetch_abstract(doc),
27
- link: fetch_link(hit),
28
- date: fetch_date(hit, doc),
29
- doctype: fetch_doctype(hit, doc),
30
- contributor: fetch_contributor(hit, doc),
31
- relation: fetch_relation(doc),
32
- keyword: hit["keyword"],
33
- )
34
- end
35
-
36
- private
37
-
38
- # @param hit [Hash]
39
- # @return [Array<RelatonBib::DocumentIdentifier>]
40
- def fetch_docid(hit)
41
- id = hit["link"].split("/").last
42
- [RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)]
43
- end
44
-
45
- # @param hit [Hash]
46
- # @param doc [Nokogiri::HTML::Document]
47
- # @return [Array<RelatonBib::TypedTitleString>]
48
- def fetch_title(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
49
- titles = []
50
- if doc
51
- title = doc.at("//*[contains(@id, 'title')]")&.text
52
- if title && !title.empty?
53
- titles << { content: title.gsub(/\n/, " "), type: "main" }
54
- end
55
- subtitle = doc.at(
56
- "//h2[@id='subtitle']|//p[contains(@class, 'subline')]",
57
- )&.text
58
- titles << { content: subtitle, tipe: "subtitle" } if subtitle
59
- end
60
- if titles.empty? && hit["title"]
61
- titles << { content: hit["title"], type: "main" }
62
- end
63
- titles.map do |t|
64
- title = RelatonBib::FormattedString.new(
65
- content: t[:content], language: "en", script: "Latn",
66
- )
67
- RelatonBib::TypedTitleString.new(type: t[:type], title: title)
68
- end
69
- end
70
-
71
- # @param doc [Nokogiri::HTML::Document, NilClass]
72
- # @return [Array<RelatonBib::FormattedString>]
73
- def fetch_abstract(doc)
74
- return [] unless doc
75
-
76
- content = doc.at("//h2[.='Abstract']/following-sibling::p",
77
- "//div[@class='abstract']/p").text
78
- [RelatonBib::FormattedString.new(content: content, language: "en",
79
- script: "Latn")]
80
- end
81
-
82
- # @param hit [Hash]
83
- # @return [Array<RelatonBib::TypedUri>]
84
- def fetch_link(hit)
85
- [RelatonBib::TypedUri.new(type: "src", content: hit["link"])]
86
- end
87
-
88
- # @param hit [Hash]
89
- # @param doc [Nokogiri::HTML::Document, NilClass]
90
- # @return [Array<RelatonBib::BibliographicDate>]
91
- def fetch_date(hit, doc) # rubocop:disable Metrics/CyclomaticComplexity
92
- on = hit["datepub"] || doc&.at("//h2/time[@datetime]")&.attr(:datetime)
93
- on ||= fetch_date1(doc) || fetch_date2(doc)
94
- [RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
95
- end
96
-
97
- # @param doc [Nokogiri::HTML::Document, NilClass]
98
- # @return [String]
99
- def fetch_date1(doc)
100
- d = doc&.at("//h2[@property='dc:issued']")&.attr(:content)
101
- d&.match(/\d{4}-\d{2}-\d{2}/)&.to_s
102
- end
103
-
104
- # @param doc [Nokogiri::HTML::Document, NilClass]
105
- # @return [String]
106
- def fetch_date2(doc)
107
- d = doc&.at("//h2[contains(@id, 'w3c-recommendation')]")
108
- return unless d
109
-
110
- Date.parse(d.attr(:id.match(/\d{2}-\w+-\d{4}/).to_s)).to_s
111
- end
112
-
113
- # @param hit [Hash]
114
- # @param doc [Nokogiri::HTML::Document, NilClass]
115
- # @return [String]
116
- def fetch_doctype(hit, doc)
117
- if hit["type"]
118
- DOCTYPES[hit["type"]]
119
- elsif doc
120
- type = HitCollection::TYPES.detect do |_k, v|
121
- doc.at("//h2[contains(., '#{v}')]/time[@datetime]")
122
- end
123
- DOCTYPES[type&.first]
124
- end
125
- end
126
-
127
- # @param hit [Hash]
128
- # @param doc [Nokogiri::HTML::Document, NilClass]
129
- # @return [Array<RelatonBib::ContributionInfo>]
130
- def fetch_contributor(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
131
- if doc
132
- editors = find_contribs(doc, "Editors").reduce([]) do |mem, ed|
133
- c = parse_contrib ed, "editor"
134
- mem << c if c
135
- mem
136
- end
137
- contribs = find_contribs(doc, "Authors").reduce(editors) do |mem, ath|
138
- ed = mem.detect { |e| e[:id] && e[:id] == ath["data-editor-id"] }
139
- if ed
140
- ed[:role] << { type: "author" }
141
- else
142
- mem << parse_contrib(ath, "author")
143
- end
144
- mem
145
- end
146
- contribs.map { |c| contrib_info(**c) }
147
- else
148
- hit["editor"].map do |ed|
149
- contrib_info name: ed, role: [{ type: "editor" }]
150
- end
151
- end
152
- end
153
-
154
- # @param doc [Nokogiri::NTML::Document]
155
- # @param type [String]
156
- # @return [Array<Nokogiri::XML::Element]
157
- def find_contribs(doc, type)
158
- doc.xpath("//dt[contains(.,'#{type}')]/following-sibling::dd"\
159
- "[preceding-sibling::dt[1][contains(.,'#{type}')]]")
160
- end
161
-
162
- # @param element [Nokogiri::XML::Element]
163
- # @param type [String]
164
- # @return [Hash]
165
- def parse_contrib(element, type) # rubocop:disable Metrics/MethodLength
166
- p = element.at("a")
167
- return unless p
168
-
169
- contrib = {
170
- name: p.text,
171
- url: p[:href],
172
- role: [{ type: type }],
173
- id: element["data-editor-id"],
174
- }
175
- org = element.at("a[2]")
176
- contrib[:org] = { name: org.text, url: org[:href] } if org
177
- contrib
178
- end
179
-
180
- # @param name [String]
181
- # @param url [String, NilClass]
182
- # @param role [Array<Hash>]
183
- # @parma org [Hash]
184
- # @return [RelatonBib::ContributionInfo]
185
- def contrib_info(**args)
186
- completename = RelatonBib::LocalizedString.new(args[:name])
187
- name = RelatonBib::FullName.new completename: completename
188
- af = []
189
- if args[:org]
190
- org = RelatonBib::Organization.new(**args[:org])
191
- af << RelatonBib::Affiliation.new(organization: org)
192
- end
193
- en = RelatonBib::Person.new name: name, url: args[:url], affiliation: af
194
- RelatonBib::ContributionInfo.new entity: en, role: args[:role]
195
- end
196
-
197
- # @param doc [Nokogiri::HTML::Document]
198
- # @return [Array<RelatonBib::DocumentRelation>]
199
- def fetch_relation(doc)
200
- return [] unless doc && (link = recommendation_link(doc))
201
-
202
- hit = { "link" => link }
203
- item = parse_page hit
204
- [RelatonBib::DocumentRelation.new(type: "obsoletedBy", bibitem: item)]
205
- end
206
-
207
- # @param doc [Nokogiri::HTML::Document]
208
- # @return [String, NilClass]
209
- def recommendation_link(doc)
210
- recom = doc.at("//dt[.='Latest Recommendation:']",
211
- "//dt[.='Previous Recommendation:']")
212
- return unless recom
213
-
214
- recom.at("./following-sibling::dd/a")[:href]
215
- end
216
- end
217
- end
218
- end