relaton-cen 1.8.pre1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,198 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonCen
4
+ # Scrapper.
5
+ module Scrapper
6
+ COMMITTEES = {
7
+ "TC 459" =>
8
+ "ECISS - European Committee for Iron and Steel Standardization",
9
+ }.freeze
10
+
11
+ class << self
12
+ # Parse page.
13
+ # @param hit [RelatonCen::Hit]
14
+ # @return [RelatonBib::BibliographicItem]
15
+ def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
16
+ doc = hit.hit_collection.agent.get hit.hit[:url]
17
+ RelatonIsoBib::IsoBibliographicItem.new(
18
+ fetched: Date.today.to_s,
19
+ type: "standard",
20
+ docid: fetch_docid(hit.hit[:code]),
21
+ language: ["en"],
22
+ script: ["Latn"],
23
+ title: fetch_titles(doc),
24
+ doctype: "international-standard",
25
+ docstatus: fetch_status(doc),
26
+ ics: fetch_ics(doc),
27
+ date: fetch_dates(doc),
28
+ # contributor: fetch_contributors(doc),
29
+ editorialgroup: fetch_editorialgroup(doc),
30
+ structuredidentifier: fetch_structuredid(hit.hit),
31
+ abstract: fetch_abstract(doc),
32
+ copyright: fetch_copyright(doc),
33
+ link: fetch_link(doc.uri.to_s),
34
+ relation: fetch_relations(doc),
35
+ place: ["London"],
36
+ )
37
+ end
38
+
39
+ private
40
+
41
+ # @param doc [Mechanize::Page]
42
+ # @return [Array<RelatonIsobib::Ics>]
43
+ def fetch_ics(doc)
44
+ doc.xpath("//tr[th[.='ICS']]/td/text()").map do |ics|
45
+ RelatonIsoBib::Ics.new ics.text.match(/[^\s]+/).to_s
46
+ end
47
+ end
48
+
49
+ # Fetch abstracts.
50
+ # @param doc [Mechanize::Page]
51
+ # @return [Array<Hash>]
52
+ def fetch_abstract(doc)
53
+ content = doc.at("//tr[th[.='Abstract/Scope']]/td")
54
+ [{ content: content.text, language: "en", script: "Latn" }]
55
+ end
56
+
57
+ # Fetch docid.
58
+ # @param ref [String]
59
+ # @return [Array<RelatonBib::DocumentIdentifier>]
60
+ def fetch_docid(ref)
61
+ [RelatonBib::DocumentIdentifier.new(type: "CEN", id: ref)]
62
+ end
63
+
64
+ # Fetch status.
65
+ # @param doc [Mechanize::Page]
66
+ # @return [RelatonBib::DocumentStatus, NilClass]
67
+ def fetch_status(doc)
68
+ s = doc.at("//tr[th[.='Status']]/td")
69
+ return unless s
70
+
71
+ RelatonBib::DocumentStatus.new(stage: s.text.strip)
72
+ end
73
+
74
+ # Fetch workgroup.
75
+ # @param doc [Mechanize::Page]
76
+ # @return [RelatonIsoBib::EditorialGroup]
77
+ def fetch_editorialgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
78
+ code = doc.at("//tr/td/h1/text()").text
79
+ title = doc.at("//tr/td/h1/span").text
80
+ %r{/(?<type>\w+)(?:\s(?<num>[^/]+))?$} =~ code
81
+ tc = []
82
+ COMMITTEES.each do |k, v|
83
+ next unless code.include? k
84
+
85
+ t, n = k.split " "
86
+ tc << RelatonIsoBib::IsoSubgroup.new(name: v, type: t, number: n)
87
+ end
88
+ sc = []
89
+ if tc.any?
90
+ sc << RelatonIsoBib::IsoSubgroup.new(name: title, type: type, number: num)
91
+ else
92
+ tc << RelatonIsoBib::IsoSubgroup.new(name: title, type: type, number: num)
93
+ end
94
+ RelatonIsoBib::EditorialGroup.new(technical_committee: tc,
95
+ subcommittee: sc)
96
+ end
97
+
98
+ # @param hit [RelatonCen::Hit]
99
+ # @return [RelatonIsoBib::StructuredIdentifier]
100
+ def fetch_structuredid(hit)
101
+ %r{(?<pnum>\d+)(?:-(?<part>\d+))?(?:-(?<subpart>\d+))?} =~ hit[:code]
102
+ RelatonIsoBib::StructuredIdentifier.new(
103
+ project_number: pnum, part: part, subpart: subpart, type: "CEN",
104
+ )
105
+ end
106
+
107
+ # Fetch relations.
108
+ # @param doc [Mechanize::Page]
109
+ # @return [Array<Hash>]
110
+ def fetch_relations(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
111
+ doc.xpath(
112
+ "//div[@id='DASHBOARD_LISTRELATIONS']/table/tr[th[.!='Sales Points']]",
113
+ ).each_with_object([]) do |rt, a|
114
+ t = rt.at("th").text
115
+ type = case t
116
+ when "Supersedes" then "obsoletes"
117
+ when /Normative reference/ then "cites"
118
+ else t.downcase
119
+ end
120
+ rt.xpath("td/a").each do |r|
121
+ fref = RelatonBib::FormattedRef.new(content: r.text, language: "en",
122
+ script: "Latn")
123
+ link = fetch_link HitCollection::DOMAIN + r[:href]
124
+ bibitem = RelatonBib::BibliographicItem.new(
125
+ formattedref: fref, type: "standard", link: link,
126
+ )
127
+ a << { type: type, bibitem: bibitem }
128
+ end
129
+ end
130
+ end
131
+
132
+ # Fetch titles.
133
+ # @param doc [Mechanize::Page]
134
+ # @return [RelatonBib::TypedTitleStringCollection]
135
+ def fetch_titles(doc)
136
+ te = doc.at("//tr[th[.='Title']]/td").text.strip
137
+ RelatonBib::TypedTitleString.from_string te, "en", "Latn"
138
+ end
139
+
140
+ # Fetch dates
141
+ # @param hit [Mechanize::Page]
142
+ # @return [Array<Hash>]
143
+ def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
144
+ doc.xpath("//div[@id='DASHBOARD_LISTIMPLEMENTATIONDATES']/table/tr")
145
+ .each_with_object([]) do |d, a|
146
+ on = d.at("td").text
147
+ next if on.empty?
148
+
149
+ t = d.at("th").text
150
+ type = case t
151
+ when /DOR/ then "adapted"
152
+ when /DAV/ then "issued"
153
+ when /DOA/ then "announced"
154
+ when /DOP/ then "published"
155
+ when /DOW/ then "obsoleted"
156
+ else t.downcase
157
+ end
158
+ a << { type: type, on: on }
159
+ end
160
+ end
161
+
162
+ # Fetch contributors
163
+ # @param doc [Mechanize::Page]
164
+ # @return [Array<Hash>]
165
+ # def fetch_contributors(doc)
166
+ # contrib = { role: [type: "publisher"] }
167
+ # contrib[:entity] = owner_entity doc
168
+ # [contrib]
169
+ # end
170
+
171
+ # Fetch links.
172
+ # @param url [String]
173
+ # @return [Array<Hash>]
174
+ def fetch_link(url)
175
+ [{ type: "src", content: url }]
176
+ end
177
+
178
+ # Fetch copyright.
179
+ # @param doc [Mechanize::Page]
180
+ # @return [Array<Hash>]
181
+ def fetch_copyright(doc)
182
+ date = doc.at("//tr[th[.='date of Availability (DAV)']]/td").text
183
+ owner = owner_entity
184
+ from = date.match(/^\d{4}/).to_s
185
+ [{ owner: [owner], from: from }]
186
+ end
187
+
188
+ # @return [Hash]
189
+ def owner_entity
190
+ {
191
+ abbreviation: "CEN",
192
+ name: "European Committee for Standardization",
193
+ url: "https://cen.eu",
194
+ }
195
+ end
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonCen
4
+ VERSION = "1.8.pre1"
5
+ end
@@ -0,0 +1,28 @@
1
+ require "nokogiri"
2
+
3
+ module RelatonCen
4
+ class XMLParser < RelatonIsoBib::XMLParser
5
+ class << self
6
+ private
7
+
8
+ # Override RelatonBib::XMLParser#item_data method.
9
+ # @param isoitem [Nokogiri::XML::Element]
10
+ # @returtn [Hash]
11
+ # def item_data(isoitem)
12
+ # data = super
13
+ # ext = isoitem.at "./ext"
14
+ # return data unless ext
15
+
16
+ # data[:price_code] = ext.at("./price-code")&.text
17
+ # data[:cen_processing] = ext.at("./cen-processing")&.text
18
+ # data
19
+ # end
20
+
21
+ # @param item_hash [Hash]
22
+ # @return [RelatonBib::BibliographicItem]
23
+ # def bib_item(item_hash)
24
+ # RelatonIsoBib::IsoBibliographicItem.new **item_hash
25
+ # end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/relaton_cen/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "relaton-cen"
7
+ spec.version = RelatonCen::VERSION
8
+ spec.authors = ["Ribose Inc."]
9
+ spec.email = ["open.source@ribose.com"]
10
+
11
+ spec.summary = "RelatonItu: retrieve Cenelec Standards for bibliographic use "\
12
+ "using the BibliographicItem model"
13
+ spec.description = "RelatonItu: retrieve Cenelec Standards for bibliographic use "\
14
+ "using the BibliographicItem model"
15
+ spec.homepage = "https://github.com/metanorma/relaton-cen"
16
+ spec.license = "BSD-2-Clause"
17
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
18
+
19
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
+
21
+ spec.metadata["homepage_uri"] = spec.homepage
22
+ spec.metadata["source_code_uri"] = spec.homepage
23
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
24
+
25
+ # Specify which files should be added to the gem when it is released.
26
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
27
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
28
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
29
+ end
30
+ spec.bindir = "exe"
31
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
+ spec.require_paths = ["lib"]
33
+
34
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
35
+ spec.add_development_dependency "pry-byebug"
36
+ spec.add_development_dependency "ruby-jing"
37
+ spec.add_development_dependency "simplecov"
38
+ spec.add_development_dependency "vcr", "~> 5.0.0"
39
+ spec.add_development_dependency "webmock"
40
+
41
+ spec.add_dependency "mechanize"
42
+ spec.add_dependency "relaton-iso-bib", "~> 1.8.0"
43
+
44
+ # For more information and examples about making a new gem, checkout our
45
+ # guide at: https://bundler.io/guides/creating_gem.html
46
+ end
metadata ADDED
@@ -0,0 +1,185 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: relaton-cen
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.8.pre1
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-06-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: equivalent-xml
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry-byebug
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-jing
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: simplecov
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: vcr
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 5.0.0
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 5.0.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: webmock
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: mechanize
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: relaton-iso-bib
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 1.8.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 1.8.0
125
+ description: 'RelatonItu: retrieve Cenelec Standards for bibliographic use using the
126
+ BibliographicItem model'
127
+ email:
128
+ - open.source@ribose.com
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".github/workflows/rake.yml"
134
+ - ".gitignore"
135
+ - ".rspec"
136
+ - ".rubocop.yml"
137
+ - Gemfile
138
+ - LICENSE.txt
139
+ - README.adoc
140
+ - Rakefile
141
+ - bin/console
142
+ - bin/rspec
143
+ - bin/setup
144
+ - grammars/basicdoc.rng
145
+ - grammars/biblio.rng
146
+ - grammars/isodoc.rng
147
+ - grammars/isostandard.rng
148
+ - grammars/reqt.rng
149
+ - lib/relaton_cen.rb
150
+ - lib/relaton_cen/cen_bibliography.rb
151
+ - lib/relaton_cen/committees.yaml
152
+ - lib/relaton_cen/hit.rb
153
+ - lib/relaton_cen/hit_collection.rb
154
+ - lib/relaton_cen/processor.rb
155
+ - lib/relaton_cen/scrapper.rb
156
+ - lib/relaton_cen/version.rb
157
+ - lib/relaton_cen/xml_parser.rb
158
+ - relaton_cen.gemspec
159
+ homepage: https://github.com/metanorma/relaton-cen
160
+ licenses:
161
+ - BSD-2-Clause
162
+ metadata:
163
+ homepage_uri: https://github.com/metanorma/relaton-cen
164
+ source_code_uri: https://github.com/metanorma/relaton-cen
165
+ post_install_message:
166
+ rdoc_options: []
167
+ require_paths:
168
+ - lib
169
+ required_ruby_version: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: 2.4.0
174
+ required_rubygems_version: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - ">"
177
+ - !ruby/object:Gem::Version
178
+ version: 1.3.1
179
+ requirements: []
180
+ rubygems_version: 3.2.3
181
+ signing_key:
182
+ specification_version: 4
183
+ summary: 'RelatonItu: retrieve Cenelec Standards for bibliographic use using the BibliographicItem
184
+ model'
185
+ test_files: []