relaton-bsi 1.7.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonBsi
4
+ # Scrapper.
5
+ module Scrapper
6
+ class << self
7
+ # Parse page.
8
+ # @param hit [RelatonBsi::Hit]
9
+ # @return [Hash]
10
+ def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
11
+ doc = hit.hit_collection.agent.get hit.hit[:url]
12
+ BsiBibliographicItem.new(
13
+ fetched: Date.today.to_s,
14
+ type: "standard",
15
+ docid: fetch_docid(doc),
16
+ language: ["en"],
17
+ script: ["Latn"],
18
+ title: fetch_titles(doc),
19
+ # doctype: hit_data[:type],
20
+ docstatus: fetch_status(doc),
21
+ ics: fetch_ics(doc),
22
+ date: fetch_dates(hit),
23
+ contributor: fetch_contributors(doc),
24
+ editorialgroup: fetch_editorialgroup(doc),
25
+ abstract: fetch_abstract(doc),
26
+ copyright: fetch_copyright(doc, hit),
27
+ link: fetch_link(HitCollection::DOMAIN + hit.hit[:url]),
28
+ relation: fetch_relations(doc),
29
+ place: ["London"]
30
+ )
31
+ end
32
+
33
+ private
34
+
35
+ # @param doc [Mechanize::Page]
36
+ # @return [Array<RelatonIsobib::Ics>]
37
+ def fetch_ics(doc)
38
+ ics = doc.at("//tr[th='ICS']/td").text
39
+ [RelatonIsoBib::Ics.new(ics)]
40
+ end
41
+
42
+ # Fetch abstracts.
43
+ # @param doc [Mechanize::Page]
44
+ # @return [Array<Hash>]
45
+ def fetch_abstract(doc)
46
+ content = doc.at("//tr[th='Descriptors']/td")
47
+ [{ content: content.text, language: "en", script: "Latn", }]
48
+ end
49
+
50
+ # Fetch docid.
51
+ # @param doc [Mechanize::Page]
52
+ # @return [Array<Hash>]
53
+ def fetch_docid(doc)
54
+ docids = []
55
+ docid = doc.at("//tr[th[.='Standard Number']]/td").text
56
+ docids << RelatonBib::DocumentIdentifier.new(type: "BSI", id: docid)
57
+ isbn = doc.at("//tr[th[.='ISBN']]/td").text
58
+ docids << RelatonBib::DocumentIdentifier.new(type: "ISBN", id: isbn)
59
+ docids
60
+ end
61
+
62
+ # Fetch status.
63
+ # @param doc [Mechanize::Page]
64
+ # @return [RelatonBib::DocumentStatus, NilClass]
65
+ def fetch_status(doc)
66
+ s = doc.at("//tr[th='Status']/td")
67
+ return unless s
68
+
69
+ RelatonBib::DocumentStatus.new(stage: s.text)
70
+ end
71
+
72
+ # Fetch workgroup.
73
+ # @param doc [Mechanize::Page]
74
+ # @return [RelatonIsoBib::EditorialGroup]
75
+ def fetch_editorialgroup(doc)
76
+ wg = doc.at("//tr[th='Committee']/td")
77
+ tc = RelatonIsoBib::IsoSubgroup.new name: wg.text
78
+ RelatonIsoBib::EditorialGroup.new technical_committee: [tc]
79
+ end
80
+
81
+ # Fetch relations.
82
+ # @param doc [Mechanize::Page]
83
+ # @return [Array<Hash>]
84
+ def fetch_relations(doc)
85
+ doc.xpath("//tr[th='Replaces']/td/a").map do |r|
86
+ fref = RelatonBib::FormattedRef.new(content: r.text, language: "en", script: "Latn")
87
+ link = fetch_link r[:href]
88
+ bibitem = BsiBibliographicItem.new(formattedref: fref, type: "standard", link: link)
89
+ { type: "complements", bibitem: bibitem }
90
+ end
91
+ end
92
+
93
+ # Fetch titles.
94
+ # @param doc [Mechanize::Page]
95
+ # @return [Array<Hash>]
96
+ def fetch_titles(doc)
97
+ te = doc.at("//div[@id='title']/h2").text.strip
98
+ ttls = RelatonBib::TypedTitleString.from_string te, "en", "Latn"
99
+ tf = doc.at("//tr[th[.='Title in French']]/td")
100
+ if tf
101
+ ttls += RelatonBib::TypedTitleString.from_string tf.text.strip, "fr", "Latn"
102
+ end
103
+ tf = doc.at("//tr[th[.='Title in German']]/td")
104
+ if tf
105
+ ttls += RelatonBib::TypedTitleString.from_string tf.text.strip, "de", "Latn"
106
+ end
107
+ ttls
108
+ end
109
+
110
+ # Fetch dates
111
+ # @param hit [RelatonBsi:Hit]
112
+ # @return [Array<Hash>]
113
+ def fetch_dates(hit)
114
+ [{ type: "published", on: hit.hit[:date].to_s }]
115
+ end
116
+
117
+ # Fetch contributors
118
+ # @param doc [Mechanize::Page]
119
+ # @return [Array<Hash>]
120
+ def fetch_contributors(doc)
121
+ contrib = { role: [type: "publisher"] }
122
+ contrib[:entity] = owner_entity doc
123
+ [contrib]
124
+ end
125
+
126
+ # Fetch links.
127
+ # @param url [String]
128
+ # @return [Array<Hash>]
129
+ def fetch_link(url)
130
+ [{ type: "src", content: url }]
131
+ end
132
+
133
+ # Fetch copyright.
134
+ # @param doc [Mechanize::Page]
135
+ # @param hit [RelatonBsi::Hit]
136
+ # @return [Array<Hash>]
137
+ def fetch_copyright(doc, hit)
138
+ owner = owner_entity doc
139
+ from = hit.hit[:date].year.to_s
140
+ [{ owner: [owner], from: from }]
141
+ end
142
+
143
+ # @param doc [Mechanize::Page]
144
+ # @return [Hash]
145
+ def owner_entity(doc)
146
+ abbrev = doc.at("//tr[th='Publisher']/td").text
147
+ case abbrev
148
+ when "BSI"
149
+ { abbreviation: abbrev, name: "British Standards Institution", url: "https://www.bsigroup.com/" }
150
+ else
151
+ { name: abbrev }
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonBsi
4
+ VERSION = "1.7.pre1"
5
+ end
@@ -0,0 +1,28 @@
1
+ require "nokogiri"
2
+
3
+ module RelatonBsi
4
+ class XMLParser < RelatonIsoBib::XMLParser
5
+ class << self
6
+ private
7
+
8
+ # Override RelatonBib::XMLParser.item_data method.
9
+ # @param isoitem [Nokogiri::XML::Element]
10
+ # @returtn [Hash]
11
+ def item_data(isoitem)
12
+ data = super
13
+ ext = isoitem.at "./ext"
14
+ return data unless ext
15
+
16
+ data[:price_code] = ext.at("./price-code")&.text
17
+ data[:cen_processing] = ext.at("./cen-processing")&.text
18
+ data
19
+ end
20
+
21
+ # @param item_hash [Hash]
22
+ # @return [RelatonBsi::BsiBibliographicItem]
23
+ def bib_item(item_hash)
24
+ hash = BsiBibliographicItem.new **item_hash
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/relaton_bsi/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "relaton-bsi"
7
+ spec.version = RelatonBsi::VERSION
8
+ spec.authors = ["Ribose Inc."]
9
+ spec.email = ["open.source@ribose.com"]
10
+
11
+ spec.summary = "RelatonBsi: retrieve BSI Standards for bibliographic use "\
12
+ "using the BibliographicItem model"
13
+ spec.description = "RelatonBsi: retrieve BSI Standards for bibliographic use "\
14
+ "using the BibliographicItem model"
15
+ spec.homepage = "https://github.com/metanorma/relaton-bsi"
16
+ spec.license = "BSD-2-Clause"
17
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = spec.homepage
21
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
22
+
23
+ # Specify which files should be added to the gem when it is released.
24
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
25
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
26
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
27
+ end
28
+ spec.bindir = "exe"
29
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ # Uncomment to register a new dependency of your gem
33
+ # spec.add_dependency "example-gem", "~> 1.0"
34
+
35
+ # For more information and examples about making a new gem, checkout our
36
+ # guide at: https://bundler.io/guides/creating_gem.html
37
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
38
+ spec.add_development_dependency "pry-byebug"
39
+ # spec.add_development_dependency "rake", "~> 13.0"
40
+ # spec.add_development_dependency "rspec", "~> 3.0"
41
+ # spec.add_development_dependency "ruby-debug-ide"
42
+ spec.add_development_dependency "ruby-jing"
43
+ spec.add_development_dependency "simplecov"
44
+ spec.add_development_dependency "vcr", "~> 5.0.0"
45
+ spec.add_development_dependency "webmock"
46
+
47
+ spec.add_dependency "mechanize"
48
+ spec.add_dependency "relaton-iso-bib", "~> 1.7.0"
49
+ end
metadata ADDED
@@ -0,0 +1,187 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: relaton-bsi
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.7.pre1
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-04-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: equivalent-xml
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry-byebug
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-jing
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: simplecov
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: vcr
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 5.0.0
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 5.0.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: webmock
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: mechanize
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: relaton-iso-bib
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 1.7.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 1.7.0
125
+ description: 'RelatonBsi: retrieve BSI Standards for bibliographic use using the BibliographicItem
126
+ model'
127
+ email:
128
+ - open.source@ribose.com
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".github/workflows/rake.yml"
134
+ - ".gitignore"
135
+ - ".rspec"
136
+ - ".rubocop.yml"
137
+ - Gemfile
138
+ - LICENSE.txt
139
+ - README.adoc
140
+ - Rakefile
141
+ - bin/console
142
+ - bin/rspec
143
+ - bin/setup
144
+ - grammars/basicdoc.rng
145
+ - grammars/biblio.rng
146
+ - grammars/bsi.rng
147
+ - grammars/isodoc.rng
148
+ - grammars/isostandard.rng
149
+ - grammars/reqt.rng
150
+ - lib/relaton_bsi.rb
151
+ - lib/relaton_bsi/bsi_bibliographic_item.rb
152
+ - lib/relaton_bsi/bsi_bibliography.rb
153
+ - lib/relaton_bsi/hash_converter.rb
154
+ - lib/relaton_bsi/hit.rb
155
+ - lib/relaton_bsi/hit_collection.rb
156
+ - lib/relaton_bsi/processor.rb
157
+ - lib/relaton_bsi/scrapper.rb
158
+ - lib/relaton_bsi/version.rb
159
+ - lib/relaton_bsi/xml_parser.rb
160
+ - relaton_bsi.gemspec
161
+ homepage: https://github.com/metanorma/relaton-bsi
162
+ licenses:
163
+ - BSD-2-Clause
164
+ metadata:
165
+ homepage_uri: https://github.com/metanorma/relaton-bsi
166
+ source_code_uri: https://github.com/metanorma/relaton-bsi
167
+ post_install_message:
168
+ rdoc_options: []
169
+ require_paths:
170
+ - lib
171
+ required_ruby_version: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ version: 2.4.0
176
+ required_rubygems_version: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">"
179
+ - !ruby/object:Gem::Version
180
+ version: 1.3.1
181
+ requirements: []
182
+ rubygems_version: 3.0.6
183
+ signing_key:
184
+ specification_version: 4
185
+ summary: 'RelatonBsi: retrieve BSI Standards for bibliographic use using the BibliographicItem
186
+ model'
187
+ test_files: []