relaton-bsi 1.8.4 → 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/grammars/biblio.rng +1 -0
- data/grammars/bsi.rng +30 -18
- data/grammars/isodoc.rng +246 -10
- data/grammars/isostandard.rng +19 -3
- data/grammars/reqt.rng +31 -2
- data/lib/relaton_bsi/bsi_bibliographic_item.rb +13 -14
- data/lib/relaton_bsi/bsi_bibliography.rb +3 -5
- data/lib/relaton_bsi/hit_collection.rb +24 -13
- data/lib/relaton_bsi/schema.json +24882 -0
- data/lib/relaton_bsi/scrapper.rb +136 -78
- data/lib/relaton_bsi/version.rb +1 -1
- data/lib/relaton_bsi/xml_parser.rb +1 -3
- data/relaton_bsi.gemspec +4 -10
- metadata +22 -7
data/lib/relaton_bsi/scrapper.rb
CHANGED
@@ -1,82 +1,151 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "graphql/client"
|
4
|
+
require "graphql/client/http"
|
5
|
+
|
6
|
+
Encoding.default_internal = Encoding::UTF_8
|
7
|
+
Encoding.default_external = Encoding::UTF_8
|
8
|
+
|
3
9
|
module RelatonBsi
|
4
10
|
# Scrapper.
|
5
11
|
module Scrapper
|
12
|
+
HTTP = GraphQL::Client::HTTP.new "https://shop-bsi.myshopify.com/api/2021-04/graphql.json" do
|
13
|
+
def headers(_context)
|
14
|
+
{ "x-shopify-storefront-access-token": "c935c196c0b7d1d86bfb5139006cfd46" }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
Schema = GraphQL::Client.load_schema File.join(__dir__, "schema.json")
|
19
|
+
|
20
|
+
Client = GraphQL::Client.new(schema: Schema, execute: HTTP)
|
21
|
+
|
22
|
+
Product = Client.parse <<~'GRAPHQL'
|
23
|
+
fragment ProductFragment on Product {
|
24
|
+
createdAt
|
25
|
+
publishedAt
|
26
|
+
updatedAt
|
27
|
+
productType
|
28
|
+
committee: metafield(namespace: "global", key: "committee") {
|
29
|
+
value
|
30
|
+
}
|
31
|
+
designated: metafield(namespace: "global", key: "designatedStandard") {
|
32
|
+
value
|
33
|
+
}
|
34
|
+
packContents: metafield(namespace: "global", key: "packContents") {
|
35
|
+
value
|
36
|
+
}
|
37
|
+
summary: metafield(namespace: "global", key: "summary") {
|
38
|
+
value
|
39
|
+
}
|
40
|
+
corrigendumHandle: metafield(namespace: "global", key: "corrigendumHandle") {
|
41
|
+
value
|
42
|
+
}
|
43
|
+
variants(first: 250) {
|
44
|
+
edges {
|
45
|
+
node {
|
46
|
+
version: metafield(namespace: "global", key: "version") {
|
47
|
+
value
|
48
|
+
}
|
49
|
+
isbn: metafield(namespace: "global", key: "isbn") {
|
50
|
+
value
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
}
|
55
|
+
description
|
56
|
+
}
|
57
|
+
GRAPHQL
|
58
|
+
|
59
|
+
Query = Client.parse <<~GRAPHQL
|
60
|
+
query GetProducts($h0: String!) {
|
61
|
+
productByHandle(handle: $h0) {
|
62
|
+
...RelatonBsi::Scrapper::Product::ProductFragment
|
63
|
+
}
|
64
|
+
}
|
65
|
+
GRAPHQL
|
66
|
+
|
6
67
|
class << self
|
7
68
|
# Parse page.
|
8
69
|
# @param hit [RelatonBsi::Hit]
|
9
70
|
# @return [Hash]
|
10
71
|
def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
11
|
-
doc = hit.hit_collection.agent.get hit.hit[:url]
|
72
|
+
# doc = hit.hit_collection.agent.get hit.hit[:url]
|
73
|
+
result = Client.query(Query::GetProducts, variables: { h0: hit.hit[:url] })
|
74
|
+
data = result.data.product_by_handle.to_h
|
12
75
|
BsiBibliographicItem.new(
|
13
76
|
fetched: Date.today.to_s,
|
14
77
|
type: "standard",
|
15
|
-
docid: fetch_docid(
|
78
|
+
docid: fetch_docid(hit.hit[:code], data),
|
16
79
|
language: ["en"],
|
17
80
|
script: ["Latn"],
|
18
|
-
title: fetch_titles(
|
19
|
-
doctype:
|
20
|
-
docstatus: fetch_status(
|
21
|
-
ics: fetch_ics(
|
81
|
+
title: fetch_titles(hit.hit[:title]),
|
82
|
+
doctype: hit.hit[:doctype],
|
83
|
+
docstatus: fetch_status(hit.hit[:status]),
|
84
|
+
ics: fetch_ics(hit.hit[:ics]),
|
22
85
|
date: fetch_dates(hit),
|
23
|
-
contributor: fetch_contributors(
|
24
|
-
editorialgroup: fetch_editorialgroup(
|
86
|
+
contributor: fetch_contributors(hit),
|
87
|
+
editorialgroup: fetch_editorialgroup(data),
|
25
88
|
structuredidentifier: fetch_structuredid(hit),
|
26
|
-
abstract: fetch_abstract(
|
27
|
-
copyright: fetch_copyright(
|
28
|
-
link: fetch_link(
|
29
|
-
relation: fetch_relations(doc),
|
30
|
-
place: ["London"]
|
89
|
+
abstract: fetch_abstract(data),
|
90
|
+
copyright: fetch_copyright(hit),
|
91
|
+
link: fetch_link(hit.hit[:url]),
|
92
|
+
# relation: fetch_relations(doc),
|
93
|
+
place: ["London"],
|
31
94
|
)
|
32
95
|
end
|
33
96
|
|
34
97
|
private
|
35
98
|
|
36
|
-
# @param
|
99
|
+
# @param ics [Array<String>]
|
37
100
|
# @return [Array<RelatonIsobib::Ics>]
|
38
|
-
def fetch_ics(
|
39
|
-
|
40
|
-
|
101
|
+
def fetch_ics(ics)
|
102
|
+
ics.map do |s|
|
103
|
+
code, = s.split
|
104
|
+
RelatonIsoBib::Ics.new(code)
|
41
105
|
end
|
42
106
|
end
|
43
107
|
|
44
108
|
# Fetch abstracts.
|
45
|
-
# @param
|
109
|
+
# @param data [Hash]
|
46
110
|
# @return [Array<Hash>]
|
47
|
-
def fetch_abstract(
|
48
|
-
|
49
|
-
|
111
|
+
def fetch_abstract(data)
|
112
|
+
return [] unless data["description"]
|
113
|
+
|
114
|
+
[{ content: data["description"], language: "en", script: "Latn" }]
|
50
115
|
end
|
51
116
|
|
52
117
|
# Fetch docid.
|
53
|
-
# @param
|
54
|
-
# @
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
118
|
+
# @param docid [String]
|
119
|
+
# @param data [Hash]
|
120
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
121
|
+
def fetch_docid(docid, data)
|
122
|
+
ids = [{ type: "BSI", id: docid }]
|
123
|
+
if data.any?
|
124
|
+
isbn = data["variants"]["edges"][0]["node"]["isbn"]["value"]
|
125
|
+
ids << { type: "ISBN", id: isbn }
|
126
|
+
end
|
127
|
+
ids.map do |did|
|
128
|
+
RelatonBib::DocumentIdentifier.new(type: did[:type], id: did[:id])
|
129
|
+
end
|
62
130
|
end
|
63
131
|
|
64
132
|
# Fetch status.
|
65
|
-
# @param
|
66
|
-
# @return [RelatonBib::DocumentStatus,
|
67
|
-
def fetch_status(
|
68
|
-
|
69
|
-
return unless s
|
133
|
+
# @param status [String]
|
134
|
+
# @return [RelatonBib::DocumentStatus, nil]
|
135
|
+
def fetch_status(status)
|
136
|
+
return unless status
|
70
137
|
|
71
|
-
RelatonBib::DocumentStatus.new(stage:
|
138
|
+
RelatonBib::DocumentStatus.new(stage: status)
|
72
139
|
end
|
73
140
|
|
74
141
|
# Fetch workgroup.
|
75
|
-
# @param
|
142
|
+
# @param data [Hash]
|
76
143
|
# @return [RelatonIsoBib::EditorialGroup]
|
77
|
-
def fetch_editorialgroup(
|
78
|
-
wg =
|
79
|
-
|
144
|
+
def fetch_editorialgroup(data)
|
145
|
+
wg = data["committee"]&.fetch("value")
|
146
|
+
return unless wg
|
147
|
+
|
148
|
+
tc = RelatonBib::WorkGroup.new name: wg
|
80
149
|
RelatonIsoBib::EditorialGroup.new technical_committee: [tc]
|
81
150
|
end
|
82
151
|
|
@@ -89,74 +158,63 @@ module RelatonBsi
|
|
89
158
|
# Fetch relations.
|
90
159
|
# @param doc [Mechanize::Page]
|
91
160
|
# @return [Array<Hash>]
|
92
|
-
def fetch_relations(doc)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
end
|
161
|
+
# def fetch_relations(doc)
|
162
|
+
# doc.xpath("//tr[th='Replaces']/td/a").map do |r|
|
163
|
+
# fref = RelatonBib::FormattedRef.new(content: r.text, language: "en", script: "Latn")
|
164
|
+
# link = fetch_link r[:href]
|
165
|
+
# bibitem = BsiBibliographicItem.new(formattedref: fref, type: "standard", link: link)
|
166
|
+
# { type: "complements", bibitem: bibitem }
|
167
|
+
# end
|
168
|
+
# end
|
100
169
|
|
101
170
|
# Fetch titles.
|
102
|
-
# @param
|
103
|
-
# @return [
|
104
|
-
def fetch_titles(
|
105
|
-
|
106
|
-
ttls = RelatonBib::TypedTitleString.from_string te, "en", "Latn"
|
107
|
-
tf = doc.at("//tr[th[.='Title in French']]/td")
|
108
|
-
if tf
|
109
|
-
ttls += RelatonBib::TypedTitleString.from_string tf.text.strip, "fr", "Latn"
|
110
|
-
end
|
111
|
-
tf = doc.at("//tr[th[.='Title in German']]/td")
|
112
|
-
if tf
|
113
|
-
ttls += RelatonBib::TypedTitleString.from_string tf.text.strip, "de", "Latn"
|
114
|
-
end
|
115
|
-
ttls
|
171
|
+
# @param title [String]
|
172
|
+
# @return [RelatonBib::TypedTitleStringCollection]
|
173
|
+
def fetch_titles(title)
|
174
|
+
RelatonBib::TypedTitleString.from_string title, "en", "Latn"
|
116
175
|
end
|
117
176
|
|
118
177
|
# Fetch dates
|
119
178
|
# @param hit [RelatonBsi:Hit]
|
120
179
|
# @return [Array<Hash>]
|
121
180
|
def fetch_dates(hit)
|
122
|
-
[{ type: "published", on: hit.hit[:date]
|
181
|
+
[{ type: "published", on: hit.hit[:date] }]
|
123
182
|
end
|
124
183
|
|
125
184
|
# Fetch contributors
|
126
|
-
# @param
|
185
|
+
# @param hit [RelatonBsi::Hit]
|
127
186
|
# @return [Array<Hash>]
|
128
|
-
def fetch_contributors(
|
187
|
+
def fetch_contributors(hit)
|
129
188
|
contrib = { role: [type: "publisher"] }
|
130
|
-
contrib[:entity] = owner_entity
|
189
|
+
contrib[:entity] = owner_entity hit
|
131
190
|
[contrib]
|
132
191
|
end
|
133
192
|
|
134
193
|
# Fetch links.
|
135
|
-
# @param
|
194
|
+
# @param path [String]
|
136
195
|
# @return [Array<Hash>]
|
137
|
-
def fetch_link(
|
196
|
+
def fetch_link(path)
|
197
|
+
url = "#{HitCollection::DOMAIN}/products/#{path}"
|
138
198
|
[{ type: "src", content: url }]
|
139
199
|
end
|
140
200
|
|
141
201
|
# Fetch copyright.
|
142
|
-
# @param doc [Mechanize::Page]
|
143
202
|
# @param hit [RelatonBsi::Hit]
|
144
203
|
# @return [Array<Hash>]
|
145
|
-
def fetch_copyright(
|
146
|
-
owner = owner_entity
|
147
|
-
from = hit.hit[:date].year.to_s
|
204
|
+
def fetch_copyright(hit)
|
205
|
+
owner = owner_entity hit
|
206
|
+
from = Date.parse(hit.hit[:date]).year.to_s
|
148
207
|
[{ owner: [owner], from: from }]
|
149
208
|
end
|
150
209
|
|
151
|
-
# @param
|
210
|
+
# @param hit [RelatonBsi::Hit]
|
152
211
|
# @return [Hash]
|
153
|
-
def owner_entity(
|
154
|
-
|
155
|
-
case abbrev
|
212
|
+
def owner_entity(hit)
|
213
|
+
case hit.hit[:publisher]
|
156
214
|
when "BSI"
|
157
|
-
{ abbreviation:
|
215
|
+
{ abbreviation: hit.hit[:publisher], name: "British Standards Institution", url: "https://www.bsigroup.com/" }
|
158
216
|
else
|
159
|
-
{ name:
|
217
|
+
{ name: hit.hit[:publisher] }
|
160
218
|
end
|
161
219
|
end
|
162
220
|
end
|
data/lib/relaton_bsi/version.rb
CHANGED
@@ -13,15 +13,13 @@ module RelatonBsi
|
|
13
13
|
ext = isoitem.at "./ext"
|
14
14
|
return data unless ext
|
15
15
|
|
16
|
-
data[:price_code] = ext.at("./price-code")&.text
|
17
|
-
data[:cen_processing] = ext.at("./cen-processing")&.text
|
18
16
|
data
|
19
17
|
end
|
20
18
|
|
21
19
|
# @param item_hash [Hash]
|
22
20
|
# @return [RelatonBsi::BsiBibliographicItem]
|
23
21
|
def bib_item(item_hash)
|
24
|
-
|
22
|
+
BsiBibliographicItem.new(**item_hash)
|
25
23
|
end
|
26
24
|
end
|
27
25
|
end
|
data/relaton_bsi.gemspec
CHANGED
@@ -29,21 +29,15 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
|
32
|
-
# Uncomment to register a new dependency of your gem
|
33
|
-
# spec.add_dependency "example-gem", "~> 1.0"
|
34
|
-
|
35
|
-
# For more information and examples about making a new gem, checkout our
|
36
|
-
# guide at: https://bundler.io/guides/creating_gem.html
|
37
32
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
38
33
|
spec.add_development_dependency "pry-byebug"
|
39
|
-
# spec.add_development_dependency "rake", "~> 13.0"
|
40
|
-
# spec.add_development_dependency "rspec", "~> 3.0"
|
41
|
-
# spec.add_development_dependency "ruby-debug-ide"
|
42
34
|
spec.add_development_dependency "ruby-jing"
|
43
35
|
spec.add_development_dependency "simplecov"
|
44
36
|
spec.add_development_dependency "vcr", "~> 5.0.0"
|
45
37
|
spec.add_development_dependency "webmock"
|
46
38
|
|
47
|
-
spec.add_dependency "
|
48
|
-
spec.add_dependency "
|
39
|
+
spec.add_dependency "algolia", "~> 2.1.1"
|
40
|
+
spec.add_dependency "graphql-client", "~> 0.16.0"
|
41
|
+
# spec.add_dependency "mechanize", "~> 2.8.0"
|
42
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.9.0"
|
49
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-bsi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -95,33 +95,47 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: algolia
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 2.
|
103
|
+
version: 2.1.1
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 2.
|
110
|
+
version: 2.1.1
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: graphql-client
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.16.0
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.16.0
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: relaton-iso-bib
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
114
128
|
requirements:
|
115
129
|
- - "~>"
|
116
130
|
- !ruby/object:Gem::Version
|
117
|
-
version: 1.
|
131
|
+
version: 1.9.0
|
118
132
|
type: :runtime
|
119
133
|
prerelease: false
|
120
134
|
version_requirements: !ruby/object:Gem::Requirement
|
121
135
|
requirements:
|
122
136
|
- - "~>"
|
123
137
|
- !ruby/object:Gem::Version
|
124
|
-
version: 1.
|
138
|
+
version: 1.9.0
|
125
139
|
description: 'RelatonBsi: retrieve BSI Standards for bibliographic use using the BibliographicItem
|
126
140
|
model'
|
127
141
|
email:
|
@@ -154,6 +168,7 @@ files:
|
|
154
168
|
- lib/relaton_bsi/hit.rb
|
155
169
|
- lib/relaton_bsi/hit_collection.rb
|
156
170
|
- lib/relaton_bsi/processor.rb
|
171
|
+
- lib/relaton_bsi/schema.json
|
157
172
|
- lib/relaton_bsi/scrapper.rb
|
158
173
|
- lib/relaton_bsi/version.rb
|
159
174
|
- lib/relaton_bsi/xml_parser.rb
|