relaton-bsi 1.8.4 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,82 +1,151 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "graphql/client"
4
+ require "graphql/client/http"
5
+
6
+ Encoding.default_internal = Encoding::UTF_8
7
+ Encoding.default_external = Encoding::UTF_8
8
+
3
9
  module RelatonBsi
4
10
  # Scrapper.
5
11
  module Scrapper
12
+ HTTP = GraphQL::Client::HTTP.new "https://shop-bsi.myshopify.com/api/2021-04/graphql.json" do
13
+ def headers(_context)
14
+ { "x-shopify-storefront-access-token": "c935c196c0b7d1d86bfb5139006cfd46" }
15
+ end
16
+ end
17
+
18
+ Schema = GraphQL::Client.load_schema File.join(__dir__, "schema.json")
19
+
20
+ Client = GraphQL::Client.new(schema: Schema, execute: HTTP)
21
+
22
+ Product = Client.parse <<~'GRAPHQL'
23
+ fragment ProductFragment on Product {
24
+ createdAt
25
+ publishedAt
26
+ updatedAt
27
+ productType
28
+ committee: metafield(namespace: "global", key: "committee") {
29
+ value
30
+ }
31
+ designated: metafield(namespace: "global", key: "designatedStandard") {
32
+ value
33
+ }
34
+ packContents: metafield(namespace: "global", key: "packContents") {
35
+ value
36
+ }
37
+ summary: metafield(namespace: "global", key: "summary") {
38
+ value
39
+ }
40
+ corrigendumHandle: metafield(namespace: "global", key: "corrigendumHandle") {
41
+ value
42
+ }
43
+ variants(first: 250) {
44
+ edges {
45
+ node {
46
+ version: metafield(namespace: "global", key: "version") {
47
+ value
48
+ }
49
+ isbn: metafield(namespace: "global", key: "isbn") {
50
+ value
51
+ }
52
+ }
53
+ }
54
+ }
55
+ description
56
+ }
57
+ GRAPHQL
58
+
59
+ Query = Client.parse <<~GRAPHQL
60
+ query GetProducts($h0: String!) {
61
+ productByHandle(handle: $h0) {
62
+ ...RelatonBsi::Scrapper::Product::ProductFragment
63
+ }
64
+ }
65
+ GRAPHQL
66
+
6
67
  class << self
7
68
  # Parse page.
8
69
  # @param hit [RelatonBsi::Hit]
9
70
  # @return [Hash]
10
71
  def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
11
- doc = hit.hit_collection.agent.get hit.hit[:url]
72
+ # doc = hit.hit_collection.agent.get hit.hit[:url]
73
+ result = Client.query(Query::GetProducts, variables: { h0: hit.hit[:url] })
74
+ data = result.data.product_by_handle.to_h
12
75
  BsiBibliographicItem.new(
13
76
  fetched: Date.today.to_s,
14
77
  type: "standard",
15
- docid: fetch_docid(doc),
78
+ docid: fetch_docid(hit.hit[:code], data),
16
79
  language: ["en"],
17
80
  script: ["Latn"],
18
- title: fetch_titles(doc),
19
- doctype: "specification",
20
- docstatus: fetch_status(doc),
21
- ics: fetch_ics(doc),
81
+ title: fetch_titles(hit.hit[:title]),
82
+ doctype: hit.hit[:doctype],
83
+ docstatus: fetch_status(hit.hit[:status]),
84
+ ics: fetch_ics(hit.hit[:ics]),
22
85
  date: fetch_dates(hit),
23
- contributor: fetch_contributors(doc),
24
- editorialgroup: fetch_editorialgroup(doc),
86
+ contributor: fetch_contributors(hit),
87
+ editorialgroup: fetch_editorialgroup(data),
25
88
  structuredidentifier: fetch_structuredid(hit),
26
- abstract: fetch_abstract(doc),
27
- copyright: fetch_copyright(doc, hit),
28
- link: fetch_link(HitCollection::DOMAIN + hit.hit[:url]),
29
- relation: fetch_relations(doc),
30
- place: ["London"]
89
+ abstract: fetch_abstract(data),
90
+ copyright: fetch_copyright(hit),
91
+ link: fetch_link(hit.hit[:url]),
92
+ # relation: fetch_relations(doc),
93
+ place: ["London"],
31
94
  )
32
95
  end
33
96
 
34
97
  private
35
98
 
36
- # @param doc [Mechanize::Page]
99
+ # @param ics [Array<String>]
37
100
  # @return [Array<RelatonIsobib::Ics>]
38
- def fetch_ics(doc)
39
- doc.xpath("//tr[th='ICS']/td/node()").map(&:text).reject { |a| a.empty? }.map do |ics|
40
- RelatonIsoBib::Ics.new(ics)
101
+ def fetch_ics(ics)
102
+ ics.map do |s|
103
+ code, = s.split
104
+ RelatonIsoBib::Ics.new(code)
41
105
  end
42
106
  end
43
107
 
44
108
  # Fetch abstracts.
45
- # @param doc [Mechanize::Page]
109
+ # @param data [Hash]
46
110
  # @return [Array<Hash>]
47
- def fetch_abstract(doc)
48
- content = doc.at("//tr[th='Descriptors']/td")
49
- [{ content: content.text, language: "en", script: "Latn", }]
111
+ def fetch_abstract(data)
112
+ return [] unless data["description"]
113
+
114
+ [{ content: data["description"], language: "en", script: "Latn" }]
50
115
  end
51
116
 
52
117
  # Fetch docid.
53
- # @param doc [Mechanize::Page]
54
- # @return [Array<Hash>]
55
- def fetch_docid(doc)
56
- docids = []
57
- docid = doc.at("//tr[th[.='Standard Number']]/td").text
58
- docids << RelatonBib::DocumentIdentifier.new(type: "BSI", id: docid)
59
- isbn = doc.at("//tr[th[.='ISBN']]/td").text
60
- docids << RelatonBib::DocumentIdentifier.new(type: "ISBN", id: isbn)
61
- docids
118
+ # @param docid [String]
119
+ # @param data [Hash]
120
+ # @return [Array<RelatonBib::DocumentIdentifier>]
121
+ def fetch_docid(docid, data)
122
+ ids = [{ type: "BSI", id: docid }]
123
+ if data.any?
124
+ isbn = data["variants"]["edges"][0]["node"]["isbn"]["value"]
125
+ ids << { type: "ISBN", id: isbn }
126
+ end
127
+ ids.map do |did|
128
+ RelatonBib::DocumentIdentifier.new(type: did[:type], id: did[:id])
129
+ end
62
130
  end
63
131
 
64
132
  # Fetch status.
65
- # @param doc [Mechanize::Page]
66
- # @return [RelatonBib::DocumentStatus, NilClass]
67
- def fetch_status(doc)
68
- s = doc.at("//tr[th='Status']/td")
69
- return unless s
133
+ # @param status [String]
134
+ # @return [RelatonBib::DocumentStatus, nil]
135
+ def fetch_status(status)
136
+ return unless status
70
137
 
71
- RelatonBib::DocumentStatus.new(stage: s.text)
138
+ RelatonBib::DocumentStatus.new(stage: status)
72
139
  end
73
140
 
74
141
  # Fetch workgroup.
75
- # @param doc [Mechanize::Page]
142
+ # @param data [Hash]
76
143
  # @return [RelatonIsoBib::EditorialGroup]
77
- def fetch_editorialgroup(doc)
78
- wg = doc.at("//tr[th='Committee']/td")
79
- tc = RelatonIsoBib::IsoSubgroup.new name: wg.text
144
+ def fetch_editorialgroup(data)
145
+ wg = data["committee"]&.fetch("value")
146
+ return unless wg
147
+
148
+ tc = RelatonBib::WorkGroup.new name: wg
80
149
  RelatonIsoBib::EditorialGroup.new technical_committee: [tc]
81
150
  end
82
151
 
@@ -89,74 +158,63 @@ module RelatonBsi
89
158
  # Fetch relations.
90
159
  # @param doc [Mechanize::Page]
91
160
  # @return [Array<Hash>]
92
- def fetch_relations(doc)
93
- doc.xpath("//tr[th='Replaces']/td/a").map do |r|
94
- fref = RelatonBib::FormattedRef.new(content: r.text, language: "en", script: "Latn")
95
- link = fetch_link r[:href]
96
- bibitem = BsiBibliographicItem.new(formattedref: fref, type: "standard", link: link)
97
- { type: "complements", bibitem: bibitem }
98
- end
99
- end
161
+ # def fetch_relations(doc)
162
+ # doc.xpath("//tr[th='Replaces']/td/a").map do |r|
163
+ # fref = RelatonBib::FormattedRef.new(content: r.text, language: "en", script: "Latn")
164
+ # link = fetch_link r[:href]
165
+ # bibitem = BsiBibliographicItem.new(formattedref: fref, type: "standard", link: link)
166
+ # { type: "complements", bibitem: bibitem }
167
+ # end
168
+ # end
100
169
 
101
170
  # Fetch titles.
102
- # @param doc [Mechanize::Page]
103
- # @return [Array<Hash>]
104
- def fetch_titles(doc)
105
- te = doc.at("//div[@id='title']/h2").text.strip
106
- ttls = RelatonBib::TypedTitleString.from_string te, "en", "Latn"
107
- tf = doc.at("//tr[th[.='Title in French']]/td")
108
- if tf
109
- ttls += RelatonBib::TypedTitleString.from_string tf.text.strip, "fr", "Latn"
110
- end
111
- tf = doc.at("//tr[th[.='Title in German']]/td")
112
- if tf
113
- ttls += RelatonBib::TypedTitleString.from_string tf.text.strip, "de", "Latn"
114
- end
115
- ttls
171
+ # @param title [String]
172
+ # @return [RelatonBib::TypedTitleStringCollection]
173
+ def fetch_titles(title)
174
+ RelatonBib::TypedTitleString.from_string title, "en", "Latn"
116
175
  end
117
176
 
118
177
  # Fetch dates
119
178
  # @param hit [RelatonBsi:Hit]
120
179
  # @return [Array<Hash>]
121
180
  def fetch_dates(hit)
122
- [{ type: "published", on: hit.hit[:date].to_s }]
181
+ [{ type: "published", on: hit.hit[:date] }]
123
182
  end
124
183
 
125
184
  # Fetch contributors
126
- # @param doc [Mechanize::Page]
185
+ # @param hit [RelatonBsi::Hit]
127
186
  # @return [Array<Hash>]
128
- def fetch_contributors(doc)
187
+ def fetch_contributors(hit)
129
188
  contrib = { role: [type: "publisher"] }
130
- contrib[:entity] = owner_entity doc
189
+ contrib[:entity] = owner_entity hit
131
190
  [contrib]
132
191
  end
133
192
 
134
193
  # Fetch links.
135
- # @param url [String]
194
+ # @param path [String]
136
195
  # @return [Array<Hash>]
137
- def fetch_link(url)
196
+ def fetch_link(path)
197
+ url = "#{HitCollection::DOMAIN}/products/#{path}"
138
198
  [{ type: "src", content: url }]
139
199
  end
140
200
 
141
201
  # Fetch copyright.
142
- # @param doc [Mechanize::Page]
143
202
  # @param hit [RelatonBsi::Hit]
144
203
  # @return [Array<Hash>]
145
- def fetch_copyright(doc, hit)
146
- owner = owner_entity doc
147
- from = hit.hit[:date].year.to_s
204
+ def fetch_copyright(hit)
205
+ owner = owner_entity hit
206
+ from = Date.parse(hit.hit[:date]).year.to_s
148
207
  [{ owner: [owner], from: from }]
149
208
  end
150
209
 
151
- # @param doc [Mechanize::Page]
210
+ # @param hit [RelatonBsi::Hit]
152
211
  # @return [Hash]
153
- def owner_entity(doc)
154
- abbrev = doc.at("//tr[th='Publisher']/td").text
155
- case abbrev
212
+ def owner_entity(hit)
213
+ case hit.hit[:publisher]
156
214
  when "BSI"
157
- { abbreviation: abbrev, name: "British Standards Institution", url: "https://www.bsigroup.com/" }
215
+ { abbreviation: hit.hit[:publisher], name: "British Standards Institution", url: "https://www.bsigroup.com/" }
158
216
  else
159
- { name: abbrev }
217
+ { name: hit.hit[:publisher] }
160
218
  end
161
219
  end
162
220
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonBsi
4
- VERSION = "1.8.4"
4
+ VERSION = "1.9.0"
5
5
  end
@@ -13,15 +13,13 @@ module RelatonBsi
13
13
  ext = isoitem.at "./ext"
14
14
  return data unless ext
15
15
 
16
- data[:price_code] = ext.at("./price-code")&.text
17
- data[:cen_processing] = ext.at("./cen-processing")&.text
18
16
  data
19
17
  end
20
18
 
21
19
  # @param item_hash [Hash]
22
20
  # @return [RelatonBsi::BsiBibliographicItem]
23
21
  def bib_item(item_hash)
24
- hash = BsiBibliographicItem.new **item_hash
22
+ BsiBibliographicItem.new(**item_hash)
25
23
  end
26
24
  end
27
25
  end
data/relaton_bsi.gemspec CHANGED
@@ -29,21 +29,15 @@ Gem::Specification.new do |spec|
29
29
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
 
32
- # Uncomment to register a new dependency of your gem
33
- # spec.add_dependency "example-gem", "~> 1.0"
34
-
35
- # For more information and examples about making a new gem, checkout our
36
- # guide at: https://bundler.io/guides/creating_gem.html
37
32
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
38
33
  spec.add_development_dependency "pry-byebug"
39
- # spec.add_development_dependency "rake", "~> 13.0"
40
- # spec.add_development_dependency "rspec", "~> 3.0"
41
- # spec.add_development_dependency "ruby-debug-ide"
42
34
  spec.add_development_dependency "ruby-jing"
43
35
  spec.add_development_dependency "simplecov"
44
36
  spec.add_development_dependency "vcr", "~> 5.0.0"
45
37
  spec.add_development_dependency "webmock"
46
38
 
47
- spec.add_dependency "mechanize", "~> 2.8.0"
48
- spec.add_dependency "relaton-iso-bib", "~> 1.8.0"
39
+ spec.add_dependency "algolia", "~> 2.1.1"
40
+ spec.add_dependency "graphql-client", "~> 0.16.0"
41
+ # spec.add_dependency "mechanize", "~> 2.8.0"
42
+ spec.add_dependency "relaton-iso-bib", "~> 1.9.0"
49
43
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-bsi
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.4
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-07-01 00:00:00.000000000 Z
11
+ date: 2021-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -95,33 +95,47 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
- name: mechanize
98
+ name: algolia
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 2.8.0
103
+ version: 2.1.1
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 2.8.0
110
+ version: 2.1.1
111
+ - !ruby/object:Gem::Dependency
112
+ name: graphql-client
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.16.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.16.0
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: relaton-iso-bib
113
127
  requirement: !ruby/object:Gem::Requirement
114
128
  requirements:
115
129
  - - "~>"
116
130
  - !ruby/object:Gem::Version
117
- version: 1.8.0
131
+ version: 1.9.0
118
132
  type: :runtime
119
133
  prerelease: false
120
134
  version_requirements: !ruby/object:Gem::Requirement
121
135
  requirements:
122
136
  - - "~>"
123
137
  - !ruby/object:Gem::Version
124
- version: 1.8.0
138
+ version: 1.9.0
125
139
  description: 'RelatonBsi: retrieve BSI Standards for bibliographic use using the BibliographicItem
126
140
  model'
127
141
  email:
@@ -154,6 +168,7 @@ files:
154
168
  - lib/relaton_bsi/hit.rb
155
169
  - lib/relaton_bsi/hit_collection.rb
156
170
  - lib/relaton_bsi/processor.rb
171
+ - lib/relaton_bsi/schema.json
157
172
  - lib/relaton_bsi/scrapper.rb
158
173
  - lib/relaton_bsi/version.rb
159
174
  - lib/relaton_bsi/xml_parser.rb