relaton-iso 1.7.4 → 1.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.rubocop.yml +1 -1
- data/README.adoc +108 -1
- data/bin/rackup +29 -0
- data/bin/rubocop +29 -0
- data/bin/ruby-parse +29 -0
- data/bin/ruby-rewrite +29 -0
- data/lib/relaton_iso/hit.rb +9 -5
- data/lib/relaton_iso/hit_collection.rb +75 -35
- data/lib/relaton_iso/iso_bibliography.rb +52 -51
- data/lib/relaton_iso/scrapper.rb +66 -51
- data/lib/relaton_iso/version.rb +1 -1
- data/relaton_iso.gemspec +7 -3
- metadata +57 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 29eb84a194e2b30d8c1c96245b684aa095db3e318bad16f2b79b5a5ac82d3b94
|
4
|
+
data.tar.gz: 9ebd46ccc359db4e25d963569d489f2e8f9ae50e6c76fc3fb77e09ce947525cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7f9ed212717dbf26fd10f8c041d4a182afa4212f9d961c1a7d8dfde3b41d6ad28123feddf2efb405223ae5aac3ec5e07ec12563c597ed2f03c5288270608fd5
|
7
|
+
data.tar.gz: 117fb353efb5922529713e747a4ec6a1de144693ba5af0063b46a2b1ab7814631211a00659705ebc106a8d12b0a2fe338a5c0911ab6fee3b7b48bb41b4e6f727
|
data/.github/workflows/rake.yml
CHANGED
@@ -16,19 +16,9 @@ jobs:
|
|
16
16
|
strategy:
|
17
17
|
fail-fast: false
|
18
18
|
matrix:
|
19
|
-
ruby: [ '
|
19
|
+
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
20
|
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
21
|
experimental: [ false ]
|
22
|
-
include:
|
23
|
-
- ruby: '3.0'
|
24
|
-
os: 'ubuntu-latest'
|
25
|
-
experimental: true
|
26
|
-
- ruby: '3.0'
|
27
|
-
os: 'windows-latest'
|
28
|
-
experimental: true
|
29
|
-
- ruby: '3.0'
|
30
|
-
os: 'macos-latest'
|
31
|
-
experimental: true
|
32
22
|
steps:
|
33
23
|
- uses: actions/checkout@v2
|
34
24
|
with:
|
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
@@ -31,7 +31,7 @@ Or install it yourself as:
|
|
31
31
|
|
32
32
|
== Usage
|
33
33
|
|
34
|
-
=== Search for
|
34
|
+
=== Search for standards using keywords
|
35
35
|
|
36
36
|
[source,ruby]
|
37
37
|
----
|
@@ -50,6 +50,7 @@ item = hit_collection[2].fetch
|
|
50
50
|
[#<RelatonBib::FormattedString:0x007fa5dca88458
|
51
51
|
@content=
|
52
52
|
"ISO/TS 19115-3:2016 defines an integrated XML implementation of ISO 19115‑1, ..."
|
53
|
+
...
|
53
54
|
|
54
55
|
item.docidentifier
|
55
56
|
=> [#<RelatonBib::DocumentIdentifier:0x007fd9ce9c6878 @id="ISO/TS 19115-3:2016", @scope=nil, @type="ISO">,
|
@@ -59,6 +60,100 @@ item.docidentifier
|
|
59
60
|
=> "urn:iso:std:iso-ts:ts:19115:-3:stage-90.92:ed-1:en,fr"
|
60
61
|
----
|
61
62
|
|
63
|
+
=== Fetch document by reference and year
|
64
|
+
|
65
|
+
[source,ruby]
|
66
|
+
----
|
67
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
|
68
|
+
[relaton-iso] ("ISO 19115:2003") fetching...
|
69
|
+
[relaton-iso] ("ISO 19115:2003") found ISO 19115:2003
|
70
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83429e30
|
71
|
+
...
|
72
|
+
|
73
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115", "2003"
|
74
|
+
[relaton-iso] ("ISO 19115") fetching...
|
75
|
+
[relaton-iso] ("ISO 19115") found ISO 19115:2003
|
76
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c828d3180
|
77
|
+
...
|
78
|
+
|
79
|
+
item.docidentifier[0].id
|
80
|
+
=> "ISO 19115:2003"
|
81
|
+
----
|
82
|
+
|
83
|
+
=== Fetch non-part document
|
84
|
+
|
85
|
+
[source,ruby]
|
86
|
+
----
|
87
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115"
|
88
|
+
[relaton-iso] ("ISO 19115") fetching...
|
89
|
+
[relaton-iso] ("ISO 19115") found ISO 19115:2003
|
90
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830275a8
|
91
|
+
...
|
92
|
+
|
93
|
+
item.docidentifier[0].id
|
94
|
+
=> "ISO 19115:2003"
|
95
|
+
----
|
96
|
+
|
97
|
+
=== Fetch part document
|
98
|
+
|
99
|
+
[source,ruby]
|
100
|
+
----
|
101
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115-1"
|
102
|
+
[relaton-iso] ("ISO 19115-1") fetching...
|
103
|
+
[relaton-iso] ("ISO 19115-1") found ISO 19115-1:2014
|
104
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83408af0
|
105
|
+
...
|
106
|
+
|
107
|
+
item.docidentifier[0].id
|
108
|
+
=> "ISO 19115-1:2014"
|
109
|
+
----
|
110
|
+
|
111
|
+
=== Fetch all-parts document
|
112
|
+
|
113
|
+
[source,ruby]
|
114
|
+
----
|
115
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115 (all parts)"
|
116
|
+
[relaton-iso] ("ISO 19115") fetching...
|
117
|
+
[relaton-iso] ("ISO 19115") found ISO 19115 (all parts)
|
118
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8ca216e118
|
119
|
+
...
|
120
|
+
|
121
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115", nil, all_parts: true
|
122
|
+
[relaton-iso] ("ISO 19115") fetching...
|
123
|
+
[relaton-iso] ("ISO 19115") found ISO 19115 (all parts)
|
124
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830f3d38
|
125
|
+
...
|
126
|
+
|
127
|
+
item.docidentifier[0].id
|
128
|
+
=> "ISO 19115 (all parts)"
|
129
|
+
|
130
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115-1 (all parts)"
|
131
|
+
[relaton-iso] ("ISO 19115-1") fetching...
|
132
|
+
[relaton-iso] ("ISO 19115-1") found ISO 19115 (all parts)
|
133
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c8290e5a0
|
134
|
+
|
135
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115-1", nil, all_parts: true
|
136
|
+
[relaton-iso] ("ISO 19115-1") fetching...
|
137
|
+
[relaton-iso] ("ISO 19115-1") found ISO 19115 (all parts)
|
138
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c925355b8
|
139
|
+
...
|
140
|
+
|
141
|
+
item.docidentifier[0].id
|
142
|
+
=> "ISO 19115 (all parts)"
|
143
|
+
----
|
144
|
+
|
145
|
+
=== Search for ISO/IEC Directives
|
146
|
+
|
147
|
+
The ISO/IEC Derectives are stored in a static cache in a relaton gem. It needs to use the relaton gem to fetch the ISO/IEC Directives. Folloving reaferences are allowed to fetch:
|
148
|
+
|
149
|
+
- ISO/IEC DIR 1 - Procedures for the technical work
|
150
|
+
- ISO/IEC DIR 1 IEC SUP - Procedures for the technical work – Procedures specific to IEC
|
151
|
+
- ISO/IEC DIR 1 ISO SUP - Consolidated ISO Supplement -- Procedures specific to ISO
|
152
|
+
- ISO/IEC DIR 2 IEC - Principles and rules for the structure and drafting of ISO and IEC documents
|
153
|
+
- ISO/IEC DIR 2 ISO - Principles and rules for the structure and drafting of ISO and IEC documents
|
154
|
+
- ISO/IEC DIR IEC SUP - Procedures specific to IEC
|
155
|
+
- ISO/IEC DIR JTC 1 SUP - Procedures specific to JTC 1
|
156
|
+
|
62
157
|
=== XML serialization
|
63
158
|
|
64
159
|
Possible options:
|
@@ -164,6 +259,18 @@ item.title lang: 'fr'
|
|
164
259
|
@script=["Latn"]>
|
165
260
|
----
|
166
261
|
|
262
|
+
=== Typed links
|
263
|
+
|
264
|
+
Each ISO document has `src` type link and optional `obp`, `rss`, and `pub` link types.
|
265
|
+
|
266
|
+
[source,ruby]
|
267
|
+
----
|
268
|
+
item.link
|
269
|
+
=> [#<RelatonBib::TypedUri:0x00007ffdf001eb90 @content=#<Addressable::URI:0xaa0 URI:https://www.iso.org/standard/53798.html>, @type="src">,
|
270
|
+
#<RelatonBib::TypedUri:0x00007ffdf001e960 @content=#<Addressable::URI:0xab4 URI:https://www.iso.org/obp/ui/#!iso:std:53798:en>, @type="obp">,
|
271
|
+
#<RelatonBib::TypedUri:0x00007ffdf001e7a8 @content=#<Addressable::URI:0xac8 URI:https://www.iso.org/contents/data/standard/05/37/53798.detail.rss>, @type="rss">]
|
272
|
+
----
|
273
|
+
|
167
274
|
== Development
|
168
275
|
|
169
276
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/bin/rackup
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rackup' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rack", "rackup")
|
data/bin/rubocop
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rubocop' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rubocop", "rubocop")
|
data/bin/ruby-parse
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'ruby-parse' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("parser", "ruby-parse")
|
data/bin/ruby-rewrite
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'ruby-rewrite' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("parser", "ruby-rewrite")
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -3,6 +3,9 @@
|
|
3
3
|
module RelatonIso
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
7
|
+
attr_writer :fetch
|
8
|
+
|
6
9
|
# Parse page.
|
7
10
|
# @param lang [String, NilClass]
|
8
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
@@ -12,11 +15,12 @@ module RelatonIso
|
|
12
15
|
|
13
16
|
# @return [Integer]
|
14
17
|
def sort_weight
|
15
|
-
case hit[
|
16
|
-
when "
|
17
|
-
when "
|
18
|
-
when "
|
19
|
-
|
18
|
+
case hit[:status] # && hit["publicationStatus"]["key"]
|
19
|
+
when "Published" then 0
|
20
|
+
when "Under development" then 1
|
21
|
+
when "Withdrawn" then 2
|
22
|
+
when "Deleted" then 3
|
23
|
+
else 4
|
20
24
|
end
|
21
25
|
end
|
22
26
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "algolia"
|
3
4
|
require "relaton_iso/hit"
|
4
5
|
|
5
6
|
module RelatonIso
|
@@ -10,42 +11,28 @@ module RelatonIso
|
|
10
11
|
# @param text [String] reference to search
|
11
12
|
def initialize(text)
|
12
13
|
super
|
13
|
-
|
14
|
-
http = Net::HTTP.new "www.iso.org", 443
|
15
|
-
http.use_ssl = true
|
16
|
-
search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
|
17
|
-
search << "docNumber=#{num}"
|
18
|
-
search << "docPartNo=#{part}" if part
|
19
|
-
q = search.join "&"
|
20
|
-
resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
|
21
|
-
"Accept" => "application/json, text/plain, */*")
|
22
|
-
return if resp.body.empty?
|
23
|
-
|
24
|
-
json = JSON.parse resp.body
|
25
|
-
@array = json["standards"].map { |h| Hit.new h, self }.sort! do |a, b|
|
26
|
-
if a.sort_weight == b.sort_weight
|
27
|
-
(parse_date(b.hit) - parse_date(a.hit)).to_i
|
28
|
-
else
|
29
|
-
a.sort_weight - b.sort_weight
|
30
|
-
end
|
31
|
-
end
|
14
|
+
@array = text.match?(/^ISO\sTC\s184\/SC\s?4/) ? fetch_github : fetch_iso
|
32
15
|
end
|
33
16
|
|
34
17
|
# @param lang [String, NilClass]
|
35
18
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
36
19
|
def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
|
37
|
-
parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
38
|
-
hit =
|
20
|
+
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
21
|
+
hit = @array.min_by do |h|
|
22
|
+
IsoBibliography.ref_components(h.hit[:title])[1].to_i
|
23
|
+
end
|
39
24
|
return @array.first.fetch lang unless hit
|
40
25
|
|
41
26
|
bibitem = hit.fetch lang
|
42
27
|
all_parts_item = bibitem.to_all_parts
|
43
|
-
|
28
|
+
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
29
|
+
%r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
|
30
|
+
(?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
|
44
31
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
45
|
-
formattedref: RelatonBib::FormattedRef.new(content:
|
32
|
+
formattedref: RelatonBib::FormattedRef.new(content: fr),
|
46
33
|
)
|
47
34
|
all_parts_item.relation << RelatonBib::DocumentRelation.new(
|
48
|
-
type: "instance", bibitem: isobib
|
35
|
+
type: "instance", bibitem: isobib,
|
49
36
|
)
|
50
37
|
end
|
51
38
|
all_parts_item
|
@@ -54,18 +41,71 @@ module RelatonIso
|
|
54
41
|
|
55
42
|
private
|
56
43
|
|
57
|
-
#
|
58
|
-
#
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
44
|
+
#
|
45
|
+
# Fetch document from GitHub repository
|
46
|
+
#
|
47
|
+
# @return [Array<RelatonIso::Hit]
|
48
|
+
#
|
49
|
+
def fetch_github # rubocop:disable Metrics/AbcSize
|
50
|
+
ref = text.gsub(/[\s\/]/, "_").upcase
|
51
|
+
url = "https://raw.githubusercontent.com/relaton/relaton-data-iso/main/data/#{ref}.yaml"
|
52
|
+
resp = Net::HTTP.get_response URI(url)
|
53
|
+
return [] unless resp.code == "200"
|
54
|
+
|
55
|
+
hash = YAML.safe_load resp.body
|
56
|
+
bib_hash = RelatonIsoBib::HashConverter.hash_to_bib hash
|
57
|
+
bib = RelatonIsoBib::IsoBibliographicItem.new(**bib_hash)
|
58
|
+
hit = Hit.new({ title: text }, self)
|
59
|
+
hit.fetch = bib
|
60
|
+
[hit]
|
61
|
+
end
|
62
|
+
|
63
|
+
#
|
64
|
+
# Fetch hits from iso.org
|
65
|
+
#
|
66
|
+
# @return [Array<RelatonIso::Hit>]
|
67
|
+
#
|
68
|
+
def fetch_iso # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
69
|
+
# %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?} =~ text
|
70
|
+
# http = Net::HTTP.new "www.iso.org", 443
|
71
|
+
# http.use_ssl = true
|
72
|
+
# search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
|
73
|
+
# search << "docNumber=#{num}"
|
74
|
+
# search << "docPartNo=#{part}" if part
|
75
|
+
# q = search.join "&"
|
76
|
+
# resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
|
77
|
+
# "Accept" => "application/json, text/plain, */*")
|
78
|
+
config = Algolia::Search::Config.new(application_id: "JCL49WV5AR", api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0")
|
79
|
+
client = Algolia::Search::Client.new config, logger: ::Logger.new($stderr)
|
80
|
+
index = client.init_index "all_en"
|
81
|
+
resp = index.search text, hitsPerPage: 100, filters: "category:standard"
|
82
|
+
# return [] if resp.body.empty?
|
83
|
+
|
84
|
+
# json = JSON.parse resp.body
|
85
|
+
# json["standards"]
|
86
|
+
resp[:hits].map { |h| Hit.new h, self }.sort! do |a, b|
|
87
|
+
if a.sort_weight == b.sort_weight && b.hit[:year] = a.hit[:year]
|
88
|
+
a.hit[:title] <=> b.hit[:title]
|
89
|
+
elsif a.sort_weight == b.sort_weight
|
90
|
+
b.hit[:year] - a.hit[:year]
|
91
|
+
else
|
92
|
+
a.sort_weight - b.sort_weight
|
93
|
+
end
|
68
94
|
end
|
69
95
|
end
|
96
|
+
|
97
|
+
# @param hit [Hash]
|
98
|
+
# @return [Date]
|
99
|
+
# def parse_date(hit)
|
100
|
+
# if hit["publicationDate"]
|
101
|
+
# Date.strptime(hit["publicationDate"], "%Y-%m")
|
102
|
+
# elsif %r{:(?<year>\d{4})} =~ hit["docRef"]
|
103
|
+
# Date.strptime(year, "%Y")
|
104
|
+
# elsif hit["newProjectDate"]
|
105
|
+
# Date.parse hit["newProjectDate"]
|
106
|
+
# else
|
107
|
+
# Date.new 0
|
108
|
+
# end
|
109
|
+
# end
|
70
110
|
end
|
71
111
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
# require 'relaton_iso/iso_bibliographic_item'
|
4
4
|
require "relaton_iso/scrapper"
|
5
5
|
require "relaton_iso/hit_collection"
|
6
|
-
require "relaton_iec"
|
6
|
+
# require "relaton_iec"
|
7
7
|
|
8
8
|
module RelatonIso
|
9
9
|
# Class methods for search ISO standards.
|
@@ -15,8 +15,9 @@ module RelatonIso
|
|
15
15
|
HitCollection.new text.gsub(/\u2013/, "-")
|
16
16
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
17
17
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
18
|
-
Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
|
19
|
-
|
18
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
|
19
|
+
Algolia::AlgoliaUnreachableHostError => e
|
20
|
+
raise RelatonBib::RequestError, e.message
|
20
21
|
end
|
21
22
|
|
22
23
|
# @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
|
@@ -25,44 +26,39 @@ module RelatonIso
|
|
25
26
|
# reference is required, :keep_year if undated reference should
|
26
27
|
# return actual reference with year
|
27
28
|
# @return [String] Relaton XML serialisation of reference
|
28
|
-
def get(ref, year = nil, opts = {})
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
}
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
/^(?<code1>[^\s]+(\s\w+)?\s[\d-]+)(:(?<year1>\d{4}))?(?<code2>\s\w+)?/ =~ code
|
41
|
-
/:(?<year2>\d{4})/ =~ corr
|
42
|
-
unless code1.nil?
|
43
|
-
code = code1 + code2.to_s
|
44
|
-
year = year2 || year1
|
45
|
-
end
|
46
|
-
end
|
47
|
-
%r{\s(?<num>\d+)(-(?<part>[\d-]+))?} =~ code
|
48
|
-
opts[:part] = part
|
49
|
-
opts[:num] = num
|
50
|
-
opts[:corr] = corr
|
51
|
-
opts[:all_parts] ||= !part && opts[:all_parts].nil? && code2.nil?
|
52
|
-
if %r[^ISO/IEC DIR].match? code
|
53
|
-
return RelatonIec::IecBibliography.get(code, year, opts)
|
54
|
-
end
|
29
|
+
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
|
30
|
+
code = ref.gsub(/\u2013/, "-")
|
31
|
+
# %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?(?::(?<year1>\d{4}))?} =~ code
|
32
|
+
_, _part, year1, = ref_components ref
|
33
|
+
year ||= year1
|
34
|
+
code.sub! " (all parts)", ""
|
35
|
+
opts[:all_parts] ||= $~ && opts[:all_parts].nil?
|
36
|
+
# opts[:keep_year] ||= opts[:keep_year].nil?
|
37
|
+
# code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
|
38
|
+
# if %r[^ISO/IEC DIR].match? code
|
39
|
+
# return RelatonIec::IecBibliography.get(code, year, opts)
|
40
|
+
# end
|
55
41
|
|
56
42
|
ret = isobib_get1(code, year, opts)
|
57
43
|
return nil if ret.nil?
|
58
44
|
|
59
|
-
if year || opts[:keep_year] || opts[:all_parts]
|
45
|
+
if year && opts[:keep_year].nil? || opts[:keep_year] || opts[:all_parts]
|
60
46
|
ret
|
61
47
|
else
|
62
48
|
ret.to_most_recent_reference
|
63
49
|
end
|
64
50
|
end
|
65
51
|
|
52
|
+
def ref_components(ref)
|
53
|
+
%r{
|
54
|
+
^(?<code>ISO(?:\s|/)[^-/:()]+\d+)
|
55
|
+
(?:-(?<part>[\w-]+))?
|
56
|
+
(?::(?<year>\d{4}))?
|
57
|
+
(?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
|
58
|
+
}x =~ ref
|
59
|
+
[code&.strip, part, year, corr, coryear]
|
60
|
+
end
|
61
|
+
|
66
62
|
private
|
67
63
|
|
68
64
|
# rubocop:disable Metrics/MethodLength
|
@@ -70,19 +66,19 @@ module RelatonIso
|
|
70
66
|
def fetch_ref_err(code, year, missed_years)
|
71
67
|
id = year ? "#{code}:#{year}" : code
|
72
68
|
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
73
|
-
|
69
|
+
"The code must be exactly like it is on the standards website."
|
74
70
|
unless missed_years.empty?
|
75
71
|
warn "[relaton-iso] (There was no match for #{year}, though there "\
|
76
|
-
|
72
|
+
"were matches found for #{missed_years.join(', ')}.)"
|
77
73
|
end
|
78
74
|
if /\d-\d/.match? code
|
79
75
|
warn "[relaton-iso] The provided document part may not exist, "\
|
80
|
-
|
76
|
+
"or the document may no longer be published in parts."
|
81
77
|
else
|
82
78
|
warn "[relaton-iso] If you wanted to cite all document parts for "\
|
83
|
-
|
84
|
-
|
85
|
-
|
79
|
+
"the reference, use \"#{code} (all parts)\".\nIf the document "\
|
80
|
+
"is not a standard, use its document type abbreviation "\
|
81
|
+
"(TS, TR, PAS, Guide)."
|
86
82
|
end
|
87
83
|
nil
|
88
84
|
end
|
@@ -95,18 +91,20 @@ module RelatonIso
|
|
95
91
|
# @param opts [Hash]
|
96
92
|
# @return [Array<RelatonIso::Hit>]
|
97
93
|
def isobib_search_filter(code, opts)
|
98
|
-
|
99
|
-
|
94
|
+
ref = remove_part code, opts[:all_parts]
|
95
|
+
warn "[relaton-iso] (\"#{code}\") fetching..."
|
96
|
+
result = search(ref)
|
100
97
|
res = search_code result, code, opts
|
101
98
|
return res unless res.empty?
|
102
99
|
|
103
100
|
# try stages
|
104
|
-
|
101
|
+
case code
|
102
|
+
when %r{^\w+/[^/]+\s\d+} # code like ISO/IEC 123, ISO/IEC/IEE 123
|
105
103
|
res = try_stages(result, opts) do |st|
|
106
104
|
code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
|
107
105
|
end
|
108
106
|
return res unless res.empty?
|
109
|
-
|
107
|
+
when %r{^\w+\s\d+} # code like ISO 123
|
110
108
|
res = try_stages(result, opts) do |st|
|
111
109
|
code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
|
112
110
|
end
|
@@ -122,6 +120,12 @@ module RelatonIso
|
|
122
120
|
end
|
123
121
|
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
124
122
|
|
123
|
+
def remove_part(ref, all_parts)
|
124
|
+
return ref unless all_parts
|
125
|
+
|
126
|
+
ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
|
127
|
+
end
|
128
|
+
|
125
129
|
# @param result [RelatonIso::HitCollection]
|
126
130
|
# @param opts [Hash]
|
127
131
|
# @return [RelatonIso::HitCollection]
|
@@ -139,15 +143,12 @@ module RelatonIso
|
|
139
143
|
# @param code [String]
|
140
144
|
# @param opts [Hash]
|
141
145
|
# @return [RelatonIso::HitCollection]
|
142
|
-
def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
143
|
-
|
144
|
-
corr_regex = %r{^#{code}[\w-]*(:\d{4})?/#{opts[:corr]}}
|
145
|
-
no_corr_regex = %r{^#{code}[\w-]*(:\d{4})?/}
|
146
|
+
def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,PerceivedComplexity
|
147
|
+
code1, part1, _, corr1, coryear1 = ref_components code
|
146
148
|
result.select do |i|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
)
|
149
|
+
code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
|
150
|
+
code1 == code2 && ((opts[:all_parts] && part2) || (!opts[:all_parts] && part1 == part2)) &&
|
151
|
+
corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
|
151
152
|
end
|
152
153
|
end
|
153
154
|
|
@@ -163,7 +164,7 @@ module RelatonIso
|
|
163
164
|
def isobib_results_filter(result, year, opts)
|
164
165
|
missed_years = []
|
165
166
|
hits = result.reduce!([]) do |hts, h|
|
166
|
-
if !year || %r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[
|
167
|
+
if !year || (%r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[:title] && iyear == year)
|
167
168
|
hts << h
|
168
169
|
else
|
169
170
|
missed_years << iyear
|
@@ -188,7 +189,7 @@ module RelatonIso
|
|
188
189
|
result = isobib_search_filter(code, opts) || return
|
189
190
|
ret = isobib_results_filter(result, year, opts)
|
190
191
|
if ret[:ret]
|
191
|
-
warn "[relaton-iso] (\"#{
|
192
|
+
warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
192
193
|
ret[:ret]
|
193
194
|
else
|
194
195
|
fetch_ref_err(code, year, ret[:years])
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -7,8 +7,7 @@ require "net/http"
|
|
7
7
|
|
8
8
|
module RelatonIso
|
9
9
|
# Scrapper.
|
10
|
-
# rubocop:disable Metrics/ModuleLength
|
11
|
-
module Scrapper
|
10
|
+
module Scrapper # rubocop:disable Metrics/ModuleLength
|
12
11
|
DOMAIN = "https://www.iso.org"
|
13
12
|
|
14
13
|
TYPES = {
|
@@ -55,9 +54,9 @@ module RelatonIso
|
|
55
54
|
# @param lang [String, NilClass]
|
56
55
|
# @return [Hash]
|
57
56
|
def parse_page(hit_data, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
58
|
-
path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
59
|
-
"#{hit_data['csnumber']}.html"
|
60
|
-
doc, url = get_page path
|
57
|
+
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
58
|
+
# "#{hit_data['csnumber']}.html"
|
59
|
+
doc, url = get_page "#{hit_data[:path].sub '/sites/isoorg', ''}.html"
|
61
60
|
|
62
61
|
# Fetch edition.
|
63
62
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
@@ -67,24 +66,24 @@ module RelatonIso
|
|
67
66
|
|
68
67
|
RelatonIsoBib::IsoBibliographicItem.new(
|
69
68
|
fetched: Date.today.to_s,
|
70
|
-
docid: fetch_docid(
|
69
|
+
docid: fetch_docid(doc, edition, langs),
|
71
70
|
docnumber: fetch_docnumber(doc),
|
72
71
|
edition: edition,
|
73
72
|
language: langs.map { |l| l[:lang] },
|
74
73
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
75
74
|
title: titles,
|
76
|
-
doctype: fetch_type(hit_data[
|
75
|
+
doctype: fetch_type(hit_data[:title]),
|
77
76
|
docstatus: fetch_status(doc),
|
78
77
|
ics: fetch_ics(doc),
|
79
|
-
date: fetch_dates(doc, hit_data[
|
80
|
-
contributor: fetch_contributors(hit_data[
|
78
|
+
date: fetch_dates(doc, hit_data[:title]),
|
79
|
+
contributor: fetch_contributors(hit_data[:title]),
|
81
80
|
editorialgroup: fetch_workgroup(doc),
|
82
81
|
abstract: abstract,
|
83
|
-
copyright: fetch_copyright(
|
82
|
+
copyright: fetch_copyright(doc),
|
84
83
|
link: fetch_link(doc, url),
|
85
84
|
relation: fetch_relations(doc),
|
86
85
|
place: ["Geneva"],
|
87
|
-
structuredidentifier: fetch_structuredidentifier(doc)
|
86
|
+
structuredidentifier: fetch_structuredidentifier(doc),
|
88
87
|
)
|
89
88
|
end
|
90
89
|
|
@@ -94,8 +93,7 @@ module RelatonIso
|
|
94
93
|
# @param doc [Nokigiri::HTML::Document]
|
95
94
|
# @param lang [String, NilClass]
|
96
95
|
# @return [Array<Array>]
|
97
|
-
# rubocop:disable Metrics/AbcSize,
|
98
|
-
def fetch_titles_abstract(doc, lang)
|
96
|
+
def fetch_titles_abstract(doc, lang) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
99
97
|
titles = RelatonBib::TypedTitleStringCollection.new
|
100
98
|
abstract = []
|
101
99
|
langs = languages(doc, lang).reduce([]) do |s, l|
|
@@ -107,7 +105,11 @@ module RelatonIso
|
|
107
105
|
titles += fetch_title(d, l[:lang])
|
108
106
|
|
109
107
|
# Fetch abstracts.
|
110
|
-
abstract_content = d.
|
108
|
+
abstract_content = d.xpath(
|
109
|
+
"//div[@itemprop='description']/p|//div[@itemprop='description']/ul/li",
|
110
|
+
).map do |a|
|
111
|
+
a.name == "li" ? "- #{a.text}" : a.text
|
112
|
+
end.reject(&:empty?).join("\n")
|
111
113
|
unless abstract_content.empty?
|
112
114
|
abstract << {
|
113
115
|
content: abstract_content,
|
@@ -121,7 +123,6 @@ module RelatonIso
|
|
121
123
|
end
|
122
124
|
[titles, abstract, langs]
|
123
125
|
end
|
124
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
125
126
|
|
126
127
|
# Returns available languages.
|
127
128
|
# @param doc [Nokogiri::HTML::Document]
|
@@ -131,7 +132,7 @@ module RelatonIso
|
|
131
132
|
lgs = [{ lang: "en" }]
|
132
133
|
doc.css("li#lang-switcher ul li a").each do |lang_link|
|
133
134
|
lang_path = lang_link.attr("href")
|
134
|
-
l = lang_path.match(%r{
|
135
|
+
l = lang_path.match(%r{^/(fr)/})
|
135
136
|
lgs << { lang: l[1], path: lang_path } if l && (!lang || l[1] == lang)
|
136
137
|
end
|
137
138
|
lgs
|
@@ -168,67 +169,80 @@ module RelatonIso
|
|
168
169
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
169
170
|
|
170
171
|
# Fetch docid.
|
171
|
-
# @param
|
172
|
+
# @param doc [Nokogiri:HTML::Document]
|
173
|
+
# @param edition [String]
|
172
174
|
# @param langs [Array<Hash>]
|
173
175
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
174
|
-
def fetch_docid(
|
176
|
+
def fetch_docid(doc, edition, langs)
|
177
|
+
pubid = item_ref doc
|
175
178
|
[
|
176
|
-
RelatonBib::DocumentIdentifier.new(id:
|
177
|
-
RelatonBib::DocumentIdentifier.new(
|
178
|
-
|
179
|
+
RelatonBib::DocumentIdentifier.new(id: pubid, type: "ISO"),
|
180
|
+
RelatonBib::DocumentIdentifier.new(
|
181
|
+
id: fetch_urn(doc, pubid, edition, langs), type: "URN",
|
182
|
+
),
|
179
183
|
]
|
180
184
|
end
|
181
185
|
|
182
|
-
# @param
|
186
|
+
# @param doc [Nokogiri:HTML::Document]
|
187
|
+
# @param pubid [String]
|
188
|
+
# @param edition [String]
|
183
189
|
# @param langs [Array<Hash>]
|
184
190
|
# @returnt [String]
|
185
|
-
def fetch_urn(
|
186
|
-
orig =
|
187
|
-
%r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~
|
191
|
+
def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
|
192
|
+
orig = pubid.split.first.downcase.split("/").join "-"
|
193
|
+
%r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
|
194
|
+
_, part, _year, corr, = IsoBibliography.ref_components pubid
|
188
195
|
urn = "urn:iso:std:#{orig}"
|
189
196
|
urn += ":#{type.downcase}" if type
|
190
|
-
urn += ":#{
|
191
|
-
urn += ":-#{
|
192
|
-
urn += ":stage-#{
|
193
|
-
urn += ":ed-#{
|
194
|
-
if
|
195
|
-
|
197
|
+
urn += ":#{fetch_docnumber(doc)}"
|
198
|
+
urn += ":-#{part}" if part
|
199
|
+
urn += ":stage-#{stage_code(doc)}"
|
200
|
+
urn += ":ed-#{edition}" if edition
|
201
|
+
if corr
|
202
|
+
corrparts = corr.split
|
203
|
+
urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
|
196
204
|
end
|
197
|
-
urn += "
|
205
|
+
urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
|
198
206
|
urn
|
199
207
|
end
|
200
208
|
|
201
209
|
def fetch_docnumber(doc)
|
202
|
-
|
203
|
-
id&.match(/\d+/)&.to_s
|
210
|
+
item_ref(doc)&.match(/\d+/)&.to_s
|
204
211
|
end
|
205
212
|
|
206
213
|
# @param doc [Nokogiri::HTML::Document]
|
207
214
|
def fetch_structuredidentifier(doc) # rubocop:disable Metrics/MethodLength
|
208
|
-
|
209
|
-
unless
|
215
|
+
ref = item_ref doc
|
216
|
+
unless ref
|
210
217
|
return RelatonIsoBib::StructuredIdentifier.new(
|
211
|
-
project_number: "?", part_number: "", prefix: nil, id: "?"
|
218
|
+
project_number: "?", part_number: "", prefix: nil, id: "?",
|
212
219
|
)
|
213
220
|
end
|
214
221
|
|
215
|
-
m =
|
222
|
+
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
216
223
|
RelatonIsoBib::StructuredIdentifier.new(
|
217
|
-
project_number: m[1],
|
218
|
-
id: item_ref.text, type: "ISO"
|
224
|
+
project_number: m[1], part: m[2], type: "ISO",
|
219
225
|
)
|
220
226
|
end
|
221
227
|
|
228
|
+
def item_ref(doc)
|
229
|
+
doc.at("//nav[contains(@class, 'heading-condensed')]/h1")&.text
|
230
|
+
end
|
231
|
+
|
222
232
|
# Fetch status.
|
223
233
|
# @param doc [Nokogiri::HTML::Document]
|
224
234
|
# @param status [String]
|
225
235
|
# @return [Hash]
|
226
236
|
def fetch_status(doc)
|
227
|
-
stg, substg = doc.
|
228
|
-
.text.split "."
|
237
|
+
stg, substg = stage_code(doc).split "."
|
229
238
|
RelatonBib::DocumentStatus.new(stage: stg, substage: substg)
|
230
239
|
end
|
231
240
|
|
241
|
+
def stage_code(doc)
|
242
|
+
doc.at("//ul[@class='dropdown-menu']/li[@class='active']"\
|
243
|
+
"/a/span[@class='stage-code']").text
|
244
|
+
end
|
245
|
+
|
232
246
|
# def stage(stg, substg)
|
233
247
|
# abbr = STGABBR[stg].is_a?(Hash) ? STGABBR[stg][substg] : STGABBR[stg]
|
234
248
|
# RelatonBib::DocumentStatus::Stage.new value: stg, abbreviation: abbr
|
@@ -241,13 +255,15 @@ module RelatonIso
|
|
241
255
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
242
256
|
# wg_url = DOMAIN + wg_link['href']
|
243
257
|
workgroup = wg_link.text.split "/"
|
258
|
+
type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
|
244
259
|
{
|
245
260
|
name: "International Organization for Standardization",
|
246
261
|
abbreviation: "ISO",
|
247
262
|
url: "www.iso.org",
|
248
263
|
technical_committee: [{
|
249
|
-
name:
|
250
|
-
|
264
|
+
name: doc.css("div.entry-title")[0].text,
|
265
|
+
identifier: wg_link.text,
|
266
|
+
type: type,
|
251
267
|
number: workgroup[1]&.match(/\d+/)&.to_s&.to_i,
|
252
268
|
}],
|
253
269
|
}
|
@@ -274,10 +290,10 @@ module RelatonIso
|
|
274
290
|
else
|
275
291
|
a + r.css("a").map do |id|
|
276
292
|
fref = RelatonBib::FormattedRef.new(
|
277
|
-
content: id.text, format: "text/plain"
|
293
|
+
content: id.text, format: "text/plain",
|
278
294
|
)
|
279
295
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
280
|
-
formattedref: fref, date: date
|
296
|
+
formattedref: fref, date: date,
|
281
297
|
)
|
282
298
|
{ type: type, bibitem: bibitem }
|
283
299
|
end
|
@@ -311,7 +327,7 @@ module RelatonIso
|
|
311
327
|
def fetch_title(doc, lang)
|
312
328
|
content = doc.at(
|
313
329
|
"//nav[contains(@class,'heading-condensed')]/h2 | "\
|
314
|
-
"//nav[contains(@class,'heading-condensed')]/h3"
|
330
|
+
"//nav[contains(@class,'heading-condensed')]/h3",
|
315
331
|
)&.text&.gsub(/\u2014/, "-")
|
316
332
|
return RelatonBib::TypedTitleStringCollection.new unless content
|
317
333
|
|
@@ -373,7 +389,7 @@ module RelatonIso
|
|
373
389
|
def fetch_ics(doc)
|
374
390
|
doc.xpath("//strong[contains(text(), "\
|
375
391
|
"'ICS')]/../following-sibling::dd/div/a").map do |i|
|
376
|
-
code = i.text.match(/[\d
|
392
|
+
code = i.text.match(/[\d.]+/).to_s.split "."
|
377
393
|
{ field: code[0], group: code[1], subgroup: code[2] }
|
378
394
|
end
|
379
395
|
end
|
@@ -395,10 +411,10 @@ module RelatonIso
|
|
395
411
|
end
|
396
412
|
|
397
413
|
# Fetch copyright.
|
398
|
-
# @param ref [String]
|
399
414
|
# @param doc [Nokogiri::HTML::Document]
|
400
415
|
# @return [Array<Hash>]
|
401
|
-
def fetch_copyright(
|
416
|
+
def fetch_copyright(doc)
|
417
|
+
ref = item_ref doc
|
402
418
|
owner_name = ref.match(/.*?(?=\s)/).to_s
|
403
419
|
from = ref.match(/(?<=:)\d{4}/).to_s
|
404
420
|
if from.empty?
|
@@ -408,5 +424,4 @@ module RelatonIso
|
|
408
424
|
end
|
409
425
|
end
|
410
426
|
end
|
411
|
-
# rubocop:enable Metrics/ModuleLength
|
412
427
|
end
|
data/lib/relaton_iso/version.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -30,13 +30,17 @@ Gem::Specification.new do |spec|
|
|
30
30
|
# spec.add_development_dependency "debase"
|
31
31
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
32
32
|
spec.add_development_dependency "pry-byebug"
|
33
|
-
spec.add_development_dependency "rake", "~>
|
33
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
34
34
|
spec.add_development_dependency "rspec", "~> 3.0"
|
35
|
+
spec.add_development_dependency "rubocop"
|
36
|
+
spec.add_development_dependency "rubocop-performance"
|
37
|
+
spec.add_development_dependency "rubocop-rails"
|
35
38
|
# spec.add_development_dependency "ruby-debug-ide"
|
36
39
|
spec.add_development_dependency "simplecov"
|
37
40
|
spec.add_development_dependency "vcr"
|
38
41
|
spec.add_development_dependency "webmock"
|
39
42
|
|
40
|
-
spec.add_dependency "relaton-iec", "~> 1.
|
41
|
-
spec.add_dependency "
|
43
|
+
# spec.add_dependency "relaton-iec", "~> 1.8.0"
|
44
|
+
spec.add_dependency "algolia"
|
45
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.9.0"
|
42
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '13.0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '13.0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,48 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '3.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rubocop
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rubocop-performance
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rubocop-rails
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
83
125
|
- !ruby/object:Gem::Dependency
|
84
126
|
name: simplecov
|
85
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -123,33 +165,33 @@ dependencies:
|
|
123
165
|
- !ruby/object:Gem::Version
|
124
166
|
version: '0'
|
125
167
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
168
|
+
name: algolia
|
127
169
|
requirement: !ruby/object:Gem::Requirement
|
128
170
|
requirements:
|
129
|
-
- - "
|
171
|
+
- - ">="
|
130
172
|
- !ruby/object:Gem::Version
|
131
|
-
version:
|
173
|
+
version: '0'
|
132
174
|
type: :runtime
|
133
175
|
prerelease: false
|
134
176
|
version_requirements: !ruby/object:Gem::Requirement
|
135
177
|
requirements:
|
136
|
-
- - "
|
178
|
+
- - ">="
|
137
179
|
- !ruby/object:Gem::Version
|
138
|
-
version:
|
180
|
+
version: '0'
|
139
181
|
- !ruby/object:Gem::Dependency
|
140
182
|
name: relaton-iso-bib
|
141
183
|
requirement: !ruby/object:Gem::Requirement
|
142
184
|
requirements:
|
143
185
|
- - "~>"
|
144
186
|
- !ruby/object:Gem::Version
|
145
|
-
version: 1.
|
187
|
+
version: 1.9.0
|
146
188
|
type: :runtime
|
147
189
|
prerelease: false
|
148
190
|
version_requirements: !ruby/object:Gem::Requirement
|
149
191
|
requirements:
|
150
192
|
- - "~>"
|
151
193
|
- !ruby/object:Gem::Version
|
152
|
-
version: 1.
|
194
|
+
version: 1.9.0
|
153
195
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
154
196
|
model'
|
155
197
|
email:
|
@@ -179,9 +221,13 @@ files:
|
|
179
221
|
- bin/nokogiri
|
180
222
|
- bin/pry
|
181
223
|
- bin/racc
|
224
|
+
- bin/rackup
|
182
225
|
- bin/rake
|
183
226
|
- bin/rdebug-ide
|
184
227
|
- bin/rspec
|
228
|
+
- bin/rubocop
|
229
|
+
- bin/ruby-parse
|
230
|
+
- bin/ruby-rewrite
|
185
231
|
- bin/safe_yaml
|
186
232
|
- bin/setup
|
187
233
|
- lib/relaton_iso.rb
|