relaton-bsi 1.7.pre1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonBsi
4
+ # Scrapper.
5
+ module Scrapper
6
+ class << self
7
+ # Parse page.
8
+ # @param hit [RelatonBsi::Hit]
9
+ # @return [Hash]
10
+ def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
11
+ doc = hit.hit_collection.agent.get hit.hit[:url]
12
+ BsiBibliographicItem.new(
13
+ fetched: Date.today.to_s,
14
+ type: "standard",
15
+ docid: fetch_docid(doc),
16
+ language: ["en"],
17
+ script: ["Latn"],
18
+ title: fetch_titles(doc),
19
+ # doctype: hit_data[:type],
20
+ docstatus: fetch_status(doc),
21
+ ics: fetch_ics(doc),
22
+ date: fetch_dates(hit),
23
+ contributor: fetch_contributors(doc),
24
+ editorialgroup: fetch_editorialgroup(doc),
25
+ abstract: fetch_abstract(doc),
26
+ copyright: fetch_copyright(doc, hit),
27
+ link: fetch_link(HitCollection::DOMAIN + hit.hit[:url]),
28
+ relation: fetch_relations(doc),
29
+ place: ["London"]
30
+ )
31
+ end
32
+
33
+ private
34
+
35
+ # @param doc [Mechanize::Page]
36
+ # @return [Array<RelatonIsobib::Ics>]
37
+ def fetch_ics(doc)
38
+ ics = doc.at("//tr[th='ICS']/td").text
39
+ [RelatonIsoBib::Ics.new(ics)]
40
+ end
41
+
42
+ # Fetch abstracts.
43
+ # @param doc [Mechanize::Page]
44
+ # @return [Array<Hash>]
45
+ def fetch_abstract(doc)
46
+ content = doc.at("//tr[th='Descriptors']/td")
47
+ [{ content: content.text, language: "en", script: "Latn", }]
48
+ end
49
+
50
+ # Fetch docid.
51
+ # @param doc [Mechanize::Page]
52
+ # @return [Array<Hash>]
53
+ def fetch_docid(doc)
54
+ docids = []
55
+ docid = doc.at("//tr[th[.='Standard Number']]/td").text
56
+ docids << RelatonBib::DocumentIdentifier.new(type: "BSI", id: docid)
57
+ isbn = doc.at("//tr[th[.='ISBN']]/td").text
58
+ docids << RelatonBib::DocumentIdentifier.new(type: "ISBN", id: isbn)
59
+ docids
60
+ end
61
+
62
+ # Fetch status.
63
+ # @param doc [Mechanize::Page]
64
+ # @return [RelatonBib::DocumentStatus, NilClass]
65
+ def fetch_status(doc)
66
+ s = doc.at("//tr[th='Status']/td")
67
+ return unless s
68
+
69
+ RelatonBib::DocumentStatus.new(stage: s.text)
70
+ end
71
+
72
+ # Fetch workgroup.
73
+ # @param doc [Mechanize::Page]
74
+ # @return [RelatonIsoBib::EditorialGroup]
75
+ def fetch_editorialgroup(doc)
76
+ wg = doc.at("//tr[th='Committee']/td")
77
+ tc = RelatonIsoBib::IsoSubgroup.new name: wg.text
78
+ RelatonIsoBib::EditorialGroup.new technical_committee: [tc]
79
+ end
80
+
81
+ # Fetch relations.
82
+ # @param doc [Mechanize::Page]
83
+ # @return [Array<Hash>]
84
+ def fetch_relations(doc)
85
+ doc.xpath("//tr[th='Replaces']/td/a").map do |r|
86
+ fref = RelatonBib::FormattedRef.new(content: r.text, language: "en", script: "Latn")
87
+ link = fetch_link r[:href]
88
+ bibitem = BsiBibliographicItem.new(formattedref: fref, type: "standard", link: link)
89
+ { type: "complements", bibitem: bibitem }
90
+ end
91
+ end
92
+
93
+ # Fetch titles.
94
+ # @param doc [Mechanize::Page]
95
+ # @return [Array<Hash>]
96
+ def fetch_titles(doc)
97
+ te = doc.at("//div[@id='title']/h2").text.strip
98
+ ttls = RelatonBib::TypedTitleString.from_string te, "en", "Latn"
99
+ tf = doc.at("//tr[th[.='Title in French']]/td")
100
+ if tf
101
+ ttls += RelatonBib::TypedTitleString.from_string tf.text.strip, "fr", "Latn"
102
+ end
103
+ tf = doc.at("//tr[th[.='Title in German']]/td")
104
+ if tf
105
+ ttls += RelatonBib::TypedTitleString.from_string tf.text.strip, "de", "Latn"
106
+ end
107
+ ttls
108
+ end
109
+
110
+ # Fetch dates
111
+ # @param hit [RelatonBsi:Hit]
112
+ # @return [Array<Hash>]
113
+ def fetch_dates(hit)
114
+ [{ type: "published", on: hit.hit[:date].to_s }]
115
+ end
116
+
117
+ # Fetch contributors
118
+ # @param doc [Mechanize::Page]
119
+ # @return [Array<Hash>]
120
+ def fetch_contributors(doc)
121
+ contrib = { role: [type: "publisher"] }
122
+ contrib[:entity] = owner_entity doc
123
+ [contrib]
124
+ end
125
+
126
+ # Fetch links.
127
+ # @param url [String]
128
+ # @return [Array<Hash>]
129
+ def fetch_link(url)
130
+ [{ type: "src", content: url }]
131
+ end
132
+
133
+ # Fetch copyright.
134
+ # @param doc [Mechanize::Page]
135
+ # @param hit [RelatonBsi::Hit]
136
+ # @return [Array<Hash>]
137
+ def fetch_copyright(doc, hit)
138
+ owner = owner_entity doc
139
+ from = hit.hit[:date].year.to_s
140
+ [{ owner: [owner], from: from }]
141
+ end
142
+
143
+ # @param doc [Mechanize::Page]
144
+ # @return [Hash]
145
+ def owner_entity(doc)
146
+ abbrev = doc.at("//tr[th='Publisher']/td").text
147
+ case abbrev
148
+ when "BSI"
149
+ { abbreviation: abbrev, name: "British Standards Institution", url: "https://www.bsigroup.com/" }
150
+ else
151
+ { name: abbrev }
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonBsi
4
+ VERSION = "1.7.pre1"
5
+ end
@@ -0,0 +1,28 @@
1
+ require "nokogiri"
2
+
3
+ module RelatonBsi
4
+ class XMLParser < RelatonIsoBib::XMLParser
5
+ class << self
6
+ private
7
+
8
+ # Override RelatonBib::XMLParser.item_data method.
9
+ # @param isoitem [Nokogiri::XML::Element]
10
+ # @returtn [Hash]
11
+ def item_data(isoitem)
12
+ data = super
13
+ ext = isoitem.at "./ext"
14
+ return data unless ext
15
+
16
+ data[:price_code] = ext.at("./price-code")&.text
17
+ data[:cen_processing] = ext.at("./cen-processing")&.text
18
+ data
19
+ end
20
+
21
+ # @param item_hash [Hash]
22
+ # @return [RelatonBsi::BsiBibliographicItem]
23
+ def bib_item(item_hash)
24
+ hash = BsiBibliographicItem.new **item_hash
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/relaton_bsi/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "relaton-bsi"
7
+ spec.version = RelatonBsi::VERSION
8
+ spec.authors = ["Ribose Inc."]
9
+ spec.email = ["open.source@ribose.com"]
10
+
11
+ spec.summary = "RelatonBsi: retrieve BSI Standards for bibliographic use "\
12
+ "using the BibliographicItem model"
13
+ spec.description = "RelatonBsi: retrieve BSI Standards for bibliographic use "\
14
+ "using the BibliographicItem model"
15
+ spec.homepage = "https://github.com/metanorma/relaton-bsi"
16
+ spec.license = "BSD-2-Clause"
17
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = spec.homepage
21
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
22
+
23
+ # Specify which files should be added to the gem when it is released.
24
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
25
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
26
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
27
+ end
28
+ spec.bindir = "exe"
29
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ # Uncomment to register a new dependency of your gem
33
+ # spec.add_dependency "example-gem", "~> 1.0"
34
+
35
+ # For more information and examples about making a new gem, checkout our
36
+ # guide at: https://bundler.io/guides/creating_gem.html
37
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
38
+ spec.add_development_dependency "pry-byebug"
39
+ # spec.add_development_dependency "rake", "~> 13.0"
40
+ # spec.add_development_dependency "rspec", "~> 3.0"
41
+ # spec.add_development_dependency "ruby-debug-ide"
42
+ spec.add_development_dependency "ruby-jing"
43
+ spec.add_development_dependency "simplecov"
44
+ spec.add_development_dependency "vcr", "~> 5.0.0"
45
+ spec.add_development_dependency "webmock"
46
+
47
+ spec.add_dependency "mechanize"
48
+ spec.add_dependency "relaton-iso-bib", "~> 1.7.0"
49
+ end
metadata ADDED
@@ -0,0 +1,187 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: relaton-bsi
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.7.pre1
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-04-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: equivalent-xml
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry-byebug
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-jing
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: simplecov
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: vcr
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 5.0.0
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 5.0.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: webmock
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: mechanize
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: relaton-iso-bib
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 1.7.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 1.7.0
125
+ description: 'RelatonBsi: retrieve BSI Standards for bibliographic use using the BibliographicItem
126
+ model'
127
+ email:
128
+ - open.source@ribose.com
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".github/workflows/rake.yml"
134
+ - ".gitignore"
135
+ - ".rspec"
136
+ - ".rubocop.yml"
137
+ - Gemfile
138
+ - LICENSE.txt
139
+ - README.adoc
140
+ - Rakefile
141
+ - bin/console
142
+ - bin/rspec
143
+ - bin/setup
144
+ - grammars/basicdoc.rng
145
+ - grammars/biblio.rng
146
+ - grammars/bsi.rng
147
+ - grammars/isodoc.rng
148
+ - grammars/isostandard.rng
149
+ - grammars/reqt.rng
150
+ - lib/relaton_bsi.rb
151
+ - lib/relaton_bsi/bsi_bibliographic_item.rb
152
+ - lib/relaton_bsi/bsi_bibliography.rb
153
+ - lib/relaton_bsi/hash_converter.rb
154
+ - lib/relaton_bsi/hit.rb
155
+ - lib/relaton_bsi/hit_collection.rb
156
+ - lib/relaton_bsi/processor.rb
157
+ - lib/relaton_bsi/scrapper.rb
158
+ - lib/relaton_bsi/version.rb
159
+ - lib/relaton_bsi/xml_parser.rb
160
+ - relaton_bsi.gemspec
161
+ homepage: https://github.com/metanorma/relaton-bsi
162
+ licenses:
163
+ - BSD-2-Clause
164
+ metadata:
165
+ homepage_uri: https://github.com/metanorma/relaton-bsi
166
+ source_code_uri: https://github.com/metanorma/relaton-bsi
167
+ post_install_message:
168
+ rdoc_options: []
169
+ require_paths:
170
+ - lib
171
+ required_ruby_version: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ version: 2.4.0
176
+ required_rubygems_version: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">"
179
+ - !ruby/object:Gem::Version
180
+ version: 1.3.1
181
+ requirements: []
182
+ rubygems_version: 3.0.6
183
+ signing_key:
184
+ specification_version: 4
185
+ summary: 'RelatonBsi: retrieve BSI Standards for bibliographic use using the BibliographicItem
186
+ model'
187
+ test_files: []