relaton-iso 1.7.4 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.rubocop.yml +1 -1
- data/README.adoc +108 -1
- data/bin/rackup +29 -0
- data/bin/rubocop +29 -0
- data/bin/ruby-parse +29 -0
- data/bin/ruby-rewrite +29 -0
- data/lib/relaton_iso/hit.rb +9 -5
- data/lib/relaton_iso/hit_collection.rb +75 -35
- data/lib/relaton_iso/iso_bibliography.rb +52 -51
- data/lib/relaton_iso/scrapper.rb +66 -51
- data/lib/relaton_iso/version.rb +1 -1
- data/relaton_iso.gemspec +7 -3
- metadata +57 -11
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 29eb84a194e2b30d8c1c96245b684aa095db3e318bad16f2b79b5a5ac82d3b94
|
|
4
|
+
data.tar.gz: 9ebd46ccc359db4e25d963569d489f2e8f9ae50e6c76fc3fb77e09ce947525cb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a7f9ed212717dbf26fd10f8c041d4a182afa4212f9d961c1a7d8dfde3b41d6ad28123feddf2efb405223ae5aac3ec5e07ec12563c597ed2f03c5288270608fd5
|
|
7
|
+
data.tar.gz: 117fb353efb5922529713e747a4ec6a1de144693ba5af0063b46a2b1ab7814631211a00659705ebc106a8d12b0a2fe338a5c0911ab6fee3b7b48bb41b4e6f727
|
data/.github/workflows/rake.yml
CHANGED
|
@@ -16,19 +16,9 @@ jobs:
|
|
|
16
16
|
strategy:
|
|
17
17
|
fail-fast: false
|
|
18
18
|
matrix:
|
|
19
|
-
ruby: [ '
|
|
19
|
+
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
|
20
20
|
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
|
21
21
|
experimental: [ false ]
|
|
22
|
-
include:
|
|
23
|
-
- ruby: '3.0'
|
|
24
|
-
os: 'ubuntu-latest'
|
|
25
|
-
experimental: true
|
|
26
|
-
- ruby: '3.0'
|
|
27
|
-
os: 'windows-latest'
|
|
28
|
-
experimental: true
|
|
29
|
-
- ruby: '3.0'
|
|
30
|
-
os: 'macos-latest'
|
|
31
|
-
experimental: true
|
|
32
22
|
steps:
|
|
33
23
|
- uses: actions/checkout@v2
|
|
34
24
|
with:
|
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
|
@@ -31,7 +31,7 @@ Or install it yourself as:
|
|
|
31
31
|
|
|
32
32
|
== Usage
|
|
33
33
|
|
|
34
|
-
=== Search for
|
|
34
|
+
=== Search for standards using keywords
|
|
35
35
|
|
|
36
36
|
[source,ruby]
|
|
37
37
|
----
|
|
@@ -50,6 +50,7 @@ item = hit_collection[2].fetch
|
|
|
50
50
|
[#<RelatonBib::FormattedString:0x007fa5dca88458
|
|
51
51
|
@content=
|
|
52
52
|
"ISO/TS 19115-3:2016 defines an integrated XML implementation of ISO 19115‑1, ..."
|
|
53
|
+
...
|
|
53
54
|
|
|
54
55
|
item.docidentifier
|
|
55
56
|
=> [#<RelatonBib::DocumentIdentifier:0x007fd9ce9c6878 @id="ISO/TS 19115-3:2016", @scope=nil, @type="ISO">,
|
|
@@ -59,6 +60,100 @@ item.docidentifier
|
|
|
59
60
|
=> "urn:iso:std:iso-ts:ts:19115:-3:stage-90.92:ed-1:en,fr"
|
|
60
61
|
----
|
|
61
62
|
|
|
63
|
+
=== Fetch document by reference and year
|
|
64
|
+
|
|
65
|
+
[source,ruby]
|
|
66
|
+
----
|
|
67
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
|
|
68
|
+
[relaton-iso] ("ISO 19115:2003") fetching...
|
|
69
|
+
[relaton-iso] ("ISO 19115:2003") found ISO 19115:2003
|
|
70
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83429e30
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115", "2003"
|
|
74
|
+
[relaton-iso] ("ISO 19115") fetching...
|
|
75
|
+
[relaton-iso] ("ISO 19115") found ISO 19115:2003
|
|
76
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c828d3180
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
item.docidentifier[0].id
|
|
80
|
+
=> "ISO 19115:2003"
|
|
81
|
+
----
|
|
82
|
+
|
|
83
|
+
=== Fetch non-part document
|
|
84
|
+
|
|
85
|
+
[source,ruby]
|
|
86
|
+
----
|
|
87
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115"
|
|
88
|
+
[relaton-iso] ("ISO 19115") fetching...
|
|
89
|
+
[relaton-iso] ("ISO 19115") found ISO 19115:2003
|
|
90
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830275a8
|
|
91
|
+
...
|
|
92
|
+
|
|
93
|
+
item.docidentifier[0].id
|
|
94
|
+
=> "ISO 19115:2003"
|
|
95
|
+
----
|
|
96
|
+
|
|
97
|
+
=== Fetch part document
|
|
98
|
+
|
|
99
|
+
[source,ruby]
|
|
100
|
+
----
|
|
101
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115-1"
|
|
102
|
+
[relaton-iso] ("ISO 19115-1") fetching...
|
|
103
|
+
[relaton-iso] ("ISO 19115-1") found ISO 19115-1:2014
|
|
104
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83408af0
|
|
105
|
+
...
|
|
106
|
+
|
|
107
|
+
item.docidentifier[0].id
|
|
108
|
+
=> "ISO 19115-1:2014"
|
|
109
|
+
----
|
|
110
|
+
|
|
111
|
+
=== Fetch all-parts document
|
|
112
|
+
|
|
113
|
+
[source,ruby]
|
|
114
|
+
----
|
|
115
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115 (all parts)"
|
|
116
|
+
[relaton-iso] ("ISO 19115") fetching...
|
|
117
|
+
[relaton-iso] ("ISO 19115") found ISO 19115 (all parts)
|
|
118
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8ca216e118
|
|
119
|
+
...
|
|
120
|
+
|
|
121
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115", nil, all_parts: true
|
|
122
|
+
[relaton-iso] ("ISO 19115") fetching...
|
|
123
|
+
[relaton-iso] ("ISO 19115") found ISO 19115 (all parts)
|
|
124
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830f3d38
|
|
125
|
+
...
|
|
126
|
+
|
|
127
|
+
item.docidentifier[0].id
|
|
128
|
+
=> "ISO 19115 (all parts)"
|
|
129
|
+
|
|
130
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115-1 (all parts)"
|
|
131
|
+
[relaton-iso] ("ISO 19115-1") fetching...
|
|
132
|
+
[relaton-iso] ("ISO 19115-1") found ISO 19115 (all parts)
|
|
133
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c8290e5a0
|
|
134
|
+
|
|
135
|
+
item = RelatonIso::IsoBibliography.get "ISO 19115-1", nil, all_parts: true
|
|
136
|
+
[relaton-iso] ("ISO 19115-1") fetching...
|
|
137
|
+
[relaton-iso] ("ISO 19115-1") found ISO 19115 (all parts)
|
|
138
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c925355b8
|
|
139
|
+
...
|
|
140
|
+
|
|
141
|
+
item.docidentifier[0].id
|
|
142
|
+
=> "ISO 19115 (all parts)"
|
|
143
|
+
----
|
|
144
|
+
|
|
145
|
+
=== Search for ISO/IEC Directives
|
|
146
|
+
|
|
147
|
+
The ISO/IEC Derectives are stored in a static cache in a relaton gem. It needs to use the relaton gem to fetch the ISO/IEC Directives. Folloving reaferences are allowed to fetch:
|
|
148
|
+
|
|
149
|
+
- ISO/IEC DIR 1 - Procedures for the technical work
|
|
150
|
+
- ISO/IEC DIR 1 IEC SUP - Procedures for the technical work – Procedures specific to IEC
|
|
151
|
+
- ISO/IEC DIR 1 ISO SUP - Consolidated ISO Supplement -- Procedures specific to ISO
|
|
152
|
+
- ISO/IEC DIR 2 IEC - Principles and rules for the structure and drafting of ISO and IEC documents
|
|
153
|
+
- ISO/IEC DIR 2 ISO - Principles and rules for the structure and drafting of ISO and IEC documents
|
|
154
|
+
- ISO/IEC DIR IEC SUP - Procedures specific to IEC
|
|
155
|
+
- ISO/IEC DIR JTC 1 SUP - Procedures specific to JTC 1
|
|
156
|
+
|
|
62
157
|
=== XML serialization
|
|
63
158
|
|
|
64
159
|
Possible options:
|
|
@@ -164,6 +259,18 @@ item.title lang: 'fr'
|
|
|
164
259
|
@script=["Latn"]>
|
|
165
260
|
----
|
|
166
261
|
|
|
262
|
+
=== Typed links
|
|
263
|
+
|
|
264
|
+
Each ISO document has `src` type link and optional `obp`, `rss`, and `pub` link types.
|
|
265
|
+
|
|
266
|
+
[source,ruby]
|
|
267
|
+
----
|
|
268
|
+
item.link
|
|
269
|
+
=> [#<RelatonBib::TypedUri:0x00007ffdf001eb90 @content=#<Addressable::URI:0xaa0 URI:https://www.iso.org/standard/53798.html>, @type="src">,
|
|
270
|
+
#<RelatonBib::TypedUri:0x00007ffdf001e960 @content=#<Addressable::URI:0xab4 URI:https://www.iso.org/obp/ui/#!iso:std:53798:en>, @type="obp">,
|
|
271
|
+
#<RelatonBib::TypedUri:0x00007ffdf001e7a8 @content=#<Addressable::URI:0xac8 URI:https://www.iso.org/contents/data/standard/05/37/53798.detail.rss>, @type="rss">]
|
|
272
|
+
----
|
|
273
|
+
|
|
167
274
|
== Development
|
|
168
275
|
|
|
169
276
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/bin/rackup
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# This file was generated by Bundler.
|
|
6
|
+
#
|
|
7
|
+
# The application 'rackup' is installed as part of a gem, and
|
|
8
|
+
# this file is here to facilitate running it.
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require "pathname"
|
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
|
13
|
+
Pathname.new(__FILE__).realpath)
|
|
14
|
+
|
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
|
16
|
+
|
|
17
|
+
if File.file?(bundle_binstub)
|
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
|
19
|
+
load(bundle_binstub)
|
|
20
|
+
else
|
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require "rubygems"
|
|
27
|
+
require "bundler/setup"
|
|
28
|
+
|
|
29
|
+
load Gem.bin_path("rack", "rackup")
|
data/bin/rubocop
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# This file was generated by Bundler.
|
|
6
|
+
#
|
|
7
|
+
# The application 'rubocop' is installed as part of a gem, and
|
|
8
|
+
# this file is here to facilitate running it.
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require "pathname"
|
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
|
13
|
+
Pathname.new(__FILE__).realpath)
|
|
14
|
+
|
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
|
16
|
+
|
|
17
|
+
if File.file?(bundle_binstub)
|
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
|
19
|
+
load(bundle_binstub)
|
|
20
|
+
else
|
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require "rubygems"
|
|
27
|
+
require "bundler/setup"
|
|
28
|
+
|
|
29
|
+
load Gem.bin_path("rubocop", "rubocop")
|
data/bin/ruby-parse
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# This file was generated by Bundler.
|
|
6
|
+
#
|
|
7
|
+
# The application 'ruby-parse' is installed as part of a gem, and
|
|
8
|
+
# this file is here to facilitate running it.
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require "pathname"
|
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
|
13
|
+
Pathname.new(__FILE__).realpath)
|
|
14
|
+
|
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
|
16
|
+
|
|
17
|
+
if File.file?(bundle_binstub)
|
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
|
19
|
+
load(bundle_binstub)
|
|
20
|
+
else
|
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require "rubygems"
|
|
27
|
+
require "bundler/setup"
|
|
28
|
+
|
|
29
|
+
load Gem.bin_path("parser", "ruby-parse")
|
data/bin/ruby-rewrite
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# This file was generated by Bundler.
|
|
6
|
+
#
|
|
7
|
+
# The application 'ruby-rewrite' is installed as part of a gem, and
|
|
8
|
+
# this file is here to facilitate running it.
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require "pathname"
|
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
|
13
|
+
Pathname.new(__FILE__).realpath)
|
|
14
|
+
|
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
|
16
|
+
|
|
17
|
+
if File.file?(bundle_binstub)
|
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
|
19
|
+
load(bundle_binstub)
|
|
20
|
+
else
|
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require "rubygems"
|
|
27
|
+
require "bundler/setup"
|
|
28
|
+
|
|
29
|
+
load Gem.bin_path("parser", "ruby-rewrite")
|
data/lib/relaton_iso/hit.rb
CHANGED
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
module RelatonIso
|
|
4
4
|
# Hit.
|
|
5
5
|
class Hit < RelatonBib::Hit
|
|
6
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
|
7
|
+
attr_writer :fetch
|
|
8
|
+
|
|
6
9
|
# Parse page.
|
|
7
10
|
# @param lang [String, NilClass]
|
|
8
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
|
@@ -12,11 +15,12 @@ module RelatonIso
|
|
|
12
15
|
|
|
13
16
|
# @return [Integer]
|
|
14
17
|
def sort_weight
|
|
15
|
-
case hit[
|
|
16
|
-
when "
|
|
17
|
-
when "
|
|
18
|
-
when "
|
|
19
|
-
|
|
18
|
+
case hit[:status] # && hit["publicationStatus"]["key"]
|
|
19
|
+
when "Published" then 0
|
|
20
|
+
when "Under development" then 1
|
|
21
|
+
when "Withdrawn" then 2
|
|
22
|
+
when "Deleted" then 3
|
|
23
|
+
else 4
|
|
20
24
|
end
|
|
21
25
|
end
|
|
22
26
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "algolia"
|
|
3
4
|
require "relaton_iso/hit"
|
|
4
5
|
|
|
5
6
|
module RelatonIso
|
|
@@ -10,42 +11,28 @@ module RelatonIso
|
|
|
10
11
|
# @param text [String] reference to search
|
|
11
12
|
def initialize(text)
|
|
12
13
|
super
|
|
13
|
-
|
|
14
|
-
http = Net::HTTP.new "www.iso.org", 443
|
|
15
|
-
http.use_ssl = true
|
|
16
|
-
search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
|
|
17
|
-
search << "docNumber=#{num}"
|
|
18
|
-
search << "docPartNo=#{part}" if part
|
|
19
|
-
q = search.join "&"
|
|
20
|
-
resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
|
|
21
|
-
"Accept" => "application/json, text/plain, */*")
|
|
22
|
-
return if resp.body.empty?
|
|
23
|
-
|
|
24
|
-
json = JSON.parse resp.body
|
|
25
|
-
@array = json["standards"].map { |h| Hit.new h, self }.sort! do |a, b|
|
|
26
|
-
if a.sort_weight == b.sort_weight
|
|
27
|
-
(parse_date(b.hit) - parse_date(a.hit)).to_i
|
|
28
|
-
else
|
|
29
|
-
a.sort_weight - b.sort_weight
|
|
30
|
-
end
|
|
31
|
-
end
|
|
14
|
+
@array = text.match?(/^ISO\sTC\s184\/SC\s?4/) ? fetch_github : fetch_iso
|
|
32
15
|
end
|
|
33
16
|
|
|
34
17
|
# @param lang [String, NilClass]
|
|
35
18
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
|
36
19
|
def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
|
|
37
|
-
parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
|
38
|
-
hit =
|
|
20
|
+
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
|
21
|
+
hit = @array.min_by do |h|
|
|
22
|
+
IsoBibliography.ref_components(h.hit[:title])[1].to_i
|
|
23
|
+
end
|
|
39
24
|
return @array.first.fetch lang unless hit
|
|
40
25
|
|
|
41
26
|
bibitem = hit.fetch lang
|
|
42
27
|
all_parts_item = bibitem.to_all_parts
|
|
43
|
-
|
|
28
|
+
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
|
29
|
+
%r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
|
|
30
|
+
(?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
|
|
44
31
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
|
45
|
-
formattedref: RelatonBib::FormattedRef.new(content:
|
|
32
|
+
formattedref: RelatonBib::FormattedRef.new(content: fr),
|
|
46
33
|
)
|
|
47
34
|
all_parts_item.relation << RelatonBib::DocumentRelation.new(
|
|
48
|
-
type: "instance", bibitem: isobib
|
|
35
|
+
type: "instance", bibitem: isobib,
|
|
49
36
|
)
|
|
50
37
|
end
|
|
51
38
|
all_parts_item
|
|
@@ -54,18 +41,71 @@ module RelatonIso
|
|
|
54
41
|
|
|
55
42
|
private
|
|
56
43
|
|
|
57
|
-
#
|
|
58
|
-
#
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
44
|
+
#
|
|
45
|
+
# Fetch document from GitHub repository
|
|
46
|
+
#
|
|
47
|
+
# @return [Array<RelatonIso::Hit]
|
|
48
|
+
#
|
|
49
|
+
def fetch_github # rubocop:disable Metrics/AbcSize
|
|
50
|
+
ref = text.gsub(/[\s\/]/, "_").upcase
|
|
51
|
+
url = "https://raw.githubusercontent.com/relaton/relaton-data-iso/main/data/#{ref}.yaml"
|
|
52
|
+
resp = Net::HTTP.get_response URI(url)
|
|
53
|
+
return [] unless resp.code == "200"
|
|
54
|
+
|
|
55
|
+
hash = YAML.safe_load resp.body
|
|
56
|
+
bib_hash = RelatonIsoBib::HashConverter.hash_to_bib hash
|
|
57
|
+
bib = RelatonIsoBib::IsoBibliographicItem.new(**bib_hash)
|
|
58
|
+
hit = Hit.new({ title: text }, self)
|
|
59
|
+
hit.fetch = bib
|
|
60
|
+
[hit]
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
#
|
|
64
|
+
# Fetch hits from iso.org
|
|
65
|
+
#
|
|
66
|
+
# @return [Array<RelatonIso::Hit>]
|
|
67
|
+
#
|
|
68
|
+
def fetch_iso # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
69
|
+
# %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?} =~ text
|
|
70
|
+
# http = Net::HTTP.new "www.iso.org", 443
|
|
71
|
+
# http.use_ssl = true
|
|
72
|
+
# search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
|
|
73
|
+
# search << "docNumber=#{num}"
|
|
74
|
+
# search << "docPartNo=#{part}" if part
|
|
75
|
+
# q = search.join "&"
|
|
76
|
+
# resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
|
|
77
|
+
# "Accept" => "application/json, text/plain, */*")
|
|
78
|
+
config = Algolia::Search::Config.new(application_id: "JCL49WV5AR", api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0")
|
|
79
|
+
client = Algolia::Search::Client.new config, logger: ::Logger.new($stderr)
|
|
80
|
+
index = client.init_index "all_en"
|
|
81
|
+
resp = index.search text, hitsPerPage: 100, filters: "category:standard"
|
|
82
|
+
# return [] if resp.body.empty?
|
|
83
|
+
|
|
84
|
+
# json = JSON.parse resp.body
|
|
85
|
+
# json["standards"]
|
|
86
|
+
resp[:hits].map { |h| Hit.new h, self }.sort! do |a, b|
|
|
87
|
+
if a.sort_weight == b.sort_weight && b.hit[:year] = a.hit[:year]
|
|
88
|
+
a.hit[:title] <=> b.hit[:title]
|
|
89
|
+
elsif a.sort_weight == b.sort_weight
|
|
90
|
+
b.hit[:year] - a.hit[:year]
|
|
91
|
+
else
|
|
92
|
+
a.sort_weight - b.sort_weight
|
|
93
|
+
end
|
|
68
94
|
end
|
|
69
95
|
end
|
|
96
|
+
|
|
97
|
+
# @param hit [Hash]
|
|
98
|
+
# @return [Date]
|
|
99
|
+
# def parse_date(hit)
|
|
100
|
+
# if hit["publicationDate"]
|
|
101
|
+
# Date.strptime(hit["publicationDate"], "%Y-%m")
|
|
102
|
+
# elsif %r{:(?<year>\d{4})} =~ hit["docRef"]
|
|
103
|
+
# Date.strptime(year, "%Y")
|
|
104
|
+
# elsif hit["newProjectDate"]
|
|
105
|
+
# Date.parse hit["newProjectDate"]
|
|
106
|
+
# else
|
|
107
|
+
# Date.new 0
|
|
108
|
+
# end
|
|
109
|
+
# end
|
|
70
110
|
end
|
|
71
111
|
end
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# require 'relaton_iso/iso_bibliographic_item'
|
|
4
4
|
require "relaton_iso/scrapper"
|
|
5
5
|
require "relaton_iso/hit_collection"
|
|
6
|
-
require "relaton_iec"
|
|
6
|
+
# require "relaton_iec"
|
|
7
7
|
|
|
8
8
|
module RelatonIso
|
|
9
9
|
# Class methods for search ISO standards.
|
|
@@ -15,8 +15,9 @@ module RelatonIso
|
|
|
15
15
|
HitCollection.new text.gsub(/\u2013/, "-")
|
|
16
16
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
|
17
17
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
|
18
|
-
Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
|
|
19
|
-
|
|
18
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
|
|
19
|
+
Algolia::AlgoliaUnreachableHostError => e
|
|
20
|
+
raise RelatonBib::RequestError, e.message
|
|
20
21
|
end
|
|
21
22
|
|
|
22
23
|
# @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
|
|
@@ -25,44 +26,39 @@ module RelatonIso
|
|
|
25
26
|
# reference is required, :keep_year if undated reference should
|
|
26
27
|
# return actual reference with year
|
|
27
28
|
# @return [String] Relaton XML serialisation of reference
|
|
28
|
-
def get(ref, year = nil, opts = {})
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
/^(?<code1>[^\s]+(\s\w+)?\s[\d-]+)(:(?<year1>\d{4}))?(?<code2>\s\w+)?/ =~ code
|
|
41
|
-
/:(?<year2>\d{4})/ =~ corr
|
|
42
|
-
unless code1.nil?
|
|
43
|
-
code = code1 + code2.to_s
|
|
44
|
-
year = year2 || year1
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
%r{\s(?<num>\d+)(-(?<part>[\d-]+))?} =~ code
|
|
48
|
-
opts[:part] = part
|
|
49
|
-
opts[:num] = num
|
|
50
|
-
opts[:corr] = corr
|
|
51
|
-
opts[:all_parts] ||= !part && opts[:all_parts].nil? && code2.nil?
|
|
52
|
-
if %r[^ISO/IEC DIR].match? code
|
|
53
|
-
return RelatonIec::IecBibliography.get(code, year, opts)
|
|
54
|
-
end
|
|
29
|
+
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
|
|
30
|
+
code = ref.gsub(/\u2013/, "-")
|
|
31
|
+
# %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?(?::(?<year1>\d{4}))?} =~ code
|
|
32
|
+
_, _part, year1, = ref_components ref
|
|
33
|
+
year ||= year1
|
|
34
|
+
code.sub! " (all parts)", ""
|
|
35
|
+
opts[:all_parts] ||= $~ && opts[:all_parts].nil?
|
|
36
|
+
# opts[:keep_year] ||= opts[:keep_year].nil?
|
|
37
|
+
# code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
|
|
38
|
+
# if %r[^ISO/IEC DIR].match? code
|
|
39
|
+
# return RelatonIec::IecBibliography.get(code, year, opts)
|
|
40
|
+
# end
|
|
55
41
|
|
|
56
42
|
ret = isobib_get1(code, year, opts)
|
|
57
43
|
return nil if ret.nil?
|
|
58
44
|
|
|
59
|
-
if year || opts[:keep_year] || opts[:all_parts]
|
|
45
|
+
if year && opts[:keep_year].nil? || opts[:keep_year] || opts[:all_parts]
|
|
60
46
|
ret
|
|
61
47
|
else
|
|
62
48
|
ret.to_most_recent_reference
|
|
63
49
|
end
|
|
64
50
|
end
|
|
65
51
|
|
|
52
|
+
def ref_components(ref)
|
|
53
|
+
%r{
|
|
54
|
+
^(?<code>ISO(?:\s|/)[^-/:()]+\d+)
|
|
55
|
+
(?:-(?<part>[\w-]+))?
|
|
56
|
+
(?::(?<year>\d{4}))?
|
|
57
|
+
(?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
|
|
58
|
+
}x =~ ref
|
|
59
|
+
[code&.strip, part, year, corr, coryear]
|
|
60
|
+
end
|
|
61
|
+
|
|
66
62
|
private
|
|
67
63
|
|
|
68
64
|
# rubocop:disable Metrics/MethodLength
|
|
@@ -70,19 +66,19 @@ module RelatonIso
|
|
|
70
66
|
def fetch_ref_err(code, year, missed_years)
|
|
71
67
|
id = year ? "#{code}:#{year}" : code
|
|
72
68
|
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
|
73
|
-
|
|
69
|
+
"The code must be exactly like it is on the standards website."
|
|
74
70
|
unless missed_years.empty?
|
|
75
71
|
warn "[relaton-iso] (There was no match for #{year}, though there "\
|
|
76
|
-
|
|
72
|
+
"were matches found for #{missed_years.join(', ')}.)"
|
|
77
73
|
end
|
|
78
74
|
if /\d-\d/.match? code
|
|
79
75
|
warn "[relaton-iso] The provided document part may not exist, "\
|
|
80
|
-
|
|
76
|
+
"or the document may no longer be published in parts."
|
|
81
77
|
else
|
|
82
78
|
warn "[relaton-iso] If you wanted to cite all document parts for "\
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
79
|
+
"the reference, use \"#{code} (all parts)\".\nIf the document "\
|
|
80
|
+
"is not a standard, use its document type abbreviation "\
|
|
81
|
+
"(TS, TR, PAS, Guide)."
|
|
86
82
|
end
|
|
87
83
|
nil
|
|
88
84
|
end
|
|
@@ -95,18 +91,20 @@ module RelatonIso
|
|
|
95
91
|
# @param opts [Hash]
|
|
96
92
|
# @return [Array<RelatonIso::Hit>]
|
|
97
93
|
def isobib_search_filter(code, opts)
|
|
98
|
-
|
|
99
|
-
|
|
94
|
+
ref = remove_part code, opts[:all_parts]
|
|
95
|
+
warn "[relaton-iso] (\"#{code}\") fetching..."
|
|
96
|
+
result = search(ref)
|
|
100
97
|
res = search_code result, code, opts
|
|
101
98
|
return res unless res.empty?
|
|
102
99
|
|
|
103
100
|
# try stages
|
|
104
|
-
|
|
101
|
+
case code
|
|
102
|
+
when %r{^\w+/[^/]+\s\d+} # code like ISO/IEC 123, ISO/IEC/IEE 123
|
|
105
103
|
res = try_stages(result, opts) do |st|
|
|
106
104
|
code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
|
|
107
105
|
end
|
|
108
106
|
return res unless res.empty?
|
|
109
|
-
|
|
107
|
+
when %r{^\w+\s\d+} # code like ISO 123
|
|
110
108
|
res = try_stages(result, opts) do |st|
|
|
111
109
|
code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
|
|
112
110
|
end
|
|
@@ -122,6 +120,12 @@ module RelatonIso
|
|
|
122
120
|
end
|
|
123
121
|
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
124
122
|
|
|
123
|
+
def remove_part(ref, all_parts)
|
|
124
|
+
return ref unless all_parts
|
|
125
|
+
|
|
126
|
+
ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
|
|
127
|
+
end
|
|
128
|
+
|
|
125
129
|
# @param result [RelatonIso::HitCollection]
|
|
126
130
|
# @param opts [Hash]
|
|
127
131
|
# @return [RelatonIso::HitCollection]
|
|
@@ -139,15 +143,12 @@ module RelatonIso
|
|
|
139
143
|
# @param code [String]
|
|
140
144
|
# @param opts [Hash]
|
|
141
145
|
# @return [RelatonIso::HitCollection]
|
|
142
|
-
def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
|
143
|
-
|
|
144
|
-
corr_regex = %r{^#{code}[\w-]*(:\d{4})?/#{opts[:corr]}}
|
|
145
|
-
no_corr_regex = %r{^#{code}[\w-]*(:\d{4})?/}
|
|
146
|
+
def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,PerceivedComplexity
|
|
147
|
+
code1, part1, _, corr1, coryear1 = ref_components code
|
|
146
148
|
result.select do |i|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
)
|
|
149
|
+
code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
|
|
150
|
+
code1 == code2 && ((opts[:all_parts] && part2) || (!opts[:all_parts] && part1 == part2)) &&
|
|
151
|
+
corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
|
|
151
152
|
end
|
|
152
153
|
end
|
|
153
154
|
|
|
@@ -163,7 +164,7 @@ module RelatonIso
|
|
|
163
164
|
def isobib_results_filter(result, year, opts)
|
|
164
165
|
missed_years = []
|
|
165
166
|
hits = result.reduce!([]) do |hts, h|
|
|
166
|
-
if !year || %r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[
|
|
167
|
+
if !year || (%r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[:title] && iyear == year)
|
|
167
168
|
hts << h
|
|
168
169
|
else
|
|
169
170
|
missed_years << iyear
|
|
@@ -188,7 +189,7 @@ module RelatonIso
|
|
|
188
189
|
result = isobib_search_filter(code, opts) || return
|
|
189
190
|
ret = isobib_results_filter(result, year, opts)
|
|
190
191
|
if ret[:ret]
|
|
191
|
-
warn "[relaton-iso] (\"#{
|
|
192
|
+
warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
|
192
193
|
ret[:ret]
|
|
193
194
|
else
|
|
194
195
|
fetch_ref_err(code, year, ret[:years])
|
data/lib/relaton_iso/scrapper.rb
CHANGED
|
@@ -7,8 +7,7 @@ require "net/http"
|
|
|
7
7
|
|
|
8
8
|
module RelatonIso
|
|
9
9
|
# Scrapper.
|
|
10
|
-
# rubocop:disable Metrics/ModuleLength
|
|
11
|
-
module Scrapper
|
|
10
|
+
module Scrapper # rubocop:disable Metrics/ModuleLength
|
|
12
11
|
DOMAIN = "https://www.iso.org"
|
|
13
12
|
|
|
14
13
|
TYPES = {
|
|
@@ -55,9 +54,9 @@ module RelatonIso
|
|
|
55
54
|
# @param lang [String, NilClass]
|
|
56
55
|
# @return [Hash]
|
|
57
56
|
def parse_page(hit_data, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
58
|
-
path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
|
59
|
-
"#{hit_data['csnumber']}.html"
|
|
60
|
-
doc, url = get_page path
|
|
57
|
+
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
|
58
|
+
# "#{hit_data['csnumber']}.html"
|
|
59
|
+
doc, url = get_page "#{hit_data[:path].sub '/sites/isoorg', ''}.html"
|
|
61
60
|
|
|
62
61
|
# Fetch edition.
|
|
63
62
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
|
@@ -67,24 +66,24 @@ module RelatonIso
|
|
|
67
66
|
|
|
68
67
|
RelatonIsoBib::IsoBibliographicItem.new(
|
|
69
68
|
fetched: Date.today.to_s,
|
|
70
|
-
docid: fetch_docid(
|
|
69
|
+
docid: fetch_docid(doc, edition, langs),
|
|
71
70
|
docnumber: fetch_docnumber(doc),
|
|
72
71
|
edition: edition,
|
|
73
72
|
language: langs.map { |l| l[:lang] },
|
|
74
73
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
|
75
74
|
title: titles,
|
|
76
|
-
doctype: fetch_type(hit_data[
|
|
75
|
+
doctype: fetch_type(hit_data[:title]),
|
|
77
76
|
docstatus: fetch_status(doc),
|
|
78
77
|
ics: fetch_ics(doc),
|
|
79
|
-
date: fetch_dates(doc, hit_data[
|
|
80
|
-
contributor: fetch_contributors(hit_data[
|
|
78
|
+
date: fetch_dates(doc, hit_data[:title]),
|
|
79
|
+
contributor: fetch_contributors(hit_data[:title]),
|
|
81
80
|
editorialgroup: fetch_workgroup(doc),
|
|
82
81
|
abstract: abstract,
|
|
83
|
-
copyright: fetch_copyright(
|
|
82
|
+
copyright: fetch_copyright(doc),
|
|
84
83
|
link: fetch_link(doc, url),
|
|
85
84
|
relation: fetch_relations(doc),
|
|
86
85
|
place: ["Geneva"],
|
|
87
|
-
structuredidentifier: fetch_structuredidentifier(doc)
|
|
86
|
+
structuredidentifier: fetch_structuredidentifier(doc),
|
|
88
87
|
)
|
|
89
88
|
end
|
|
90
89
|
|
|
@@ -94,8 +93,7 @@ module RelatonIso
|
|
|
94
93
|
# @param doc [Nokigiri::HTML::Document]
|
|
95
94
|
# @param lang [String, NilClass]
|
|
96
95
|
# @return [Array<Array>]
|
|
97
|
-
# rubocop:disable Metrics/AbcSize,
|
|
98
|
-
def fetch_titles_abstract(doc, lang)
|
|
96
|
+
def fetch_titles_abstract(doc, lang) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
99
97
|
titles = RelatonBib::TypedTitleStringCollection.new
|
|
100
98
|
abstract = []
|
|
101
99
|
langs = languages(doc, lang).reduce([]) do |s, l|
|
|
@@ -107,7 +105,11 @@ module RelatonIso
|
|
|
107
105
|
titles += fetch_title(d, l[:lang])
|
|
108
106
|
|
|
109
107
|
# Fetch abstracts.
|
|
110
|
-
abstract_content = d.
|
|
108
|
+
abstract_content = d.xpath(
|
|
109
|
+
"//div[@itemprop='description']/p|//div[@itemprop='description']/ul/li",
|
|
110
|
+
).map do |a|
|
|
111
|
+
a.name == "li" ? "- #{a.text}" : a.text
|
|
112
|
+
end.reject(&:empty?).join("\n")
|
|
111
113
|
unless abstract_content.empty?
|
|
112
114
|
abstract << {
|
|
113
115
|
content: abstract_content,
|
|
@@ -121,7 +123,6 @@ module RelatonIso
|
|
|
121
123
|
end
|
|
122
124
|
[titles, abstract, langs]
|
|
123
125
|
end
|
|
124
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
|
125
126
|
|
|
126
127
|
# Returns available languages.
|
|
127
128
|
# @param doc [Nokogiri::HTML::Document]
|
|
@@ -131,7 +132,7 @@ module RelatonIso
|
|
|
131
132
|
lgs = [{ lang: "en" }]
|
|
132
133
|
doc.css("li#lang-switcher ul li a").each do |lang_link|
|
|
133
134
|
lang_path = lang_link.attr("href")
|
|
134
|
-
l = lang_path.match(%r{
|
|
135
|
+
l = lang_path.match(%r{^/(fr)/})
|
|
135
136
|
lgs << { lang: l[1], path: lang_path } if l && (!lang || l[1] == lang)
|
|
136
137
|
end
|
|
137
138
|
lgs
|
|
@@ -168,67 +169,80 @@ module RelatonIso
|
|
|
168
169
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
|
169
170
|
|
|
170
171
|
# Fetch docid.
|
|
171
|
-
# @param
|
|
172
|
+
# @param doc [Nokogiri:HTML::Document]
|
|
173
|
+
# @param edition [String]
|
|
172
174
|
# @param langs [Array<Hash>]
|
|
173
175
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
|
174
|
-
def fetch_docid(
|
|
176
|
+
def fetch_docid(doc, edition, langs)
|
|
177
|
+
pubid = item_ref doc
|
|
175
178
|
[
|
|
176
|
-
RelatonBib::DocumentIdentifier.new(id:
|
|
177
|
-
RelatonBib::DocumentIdentifier.new(
|
|
178
|
-
|
|
179
|
+
RelatonBib::DocumentIdentifier.new(id: pubid, type: "ISO"),
|
|
180
|
+
RelatonBib::DocumentIdentifier.new(
|
|
181
|
+
id: fetch_urn(doc, pubid, edition, langs), type: "URN",
|
|
182
|
+
),
|
|
179
183
|
]
|
|
180
184
|
end
|
|
181
185
|
|
|
182
|
-
# @param
|
|
186
|
+
# @param doc [Nokogiri:HTML::Document]
|
|
187
|
+
# @param pubid [String]
|
|
188
|
+
# @param edition [String]
|
|
183
189
|
# @param langs [Array<Hash>]
|
|
184
190
|
# @returnt [String]
|
|
185
|
-
def fetch_urn(
|
|
186
|
-
orig =
|
|
187
|
-
%r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~
|
|
191
|
+
def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
|
|
192
|
+
orig = pubid.split.first.downcase.split("/").join "-"
|
|
193
|
+
%r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
|
|
194
|
+
_, part, _year, corr, = IsoBibliography.ref_components pubid
|
|
188
195
|
urn = "urn:iso:std:#{orig}"
|
|
189
196
|
urn += ":#{type.downcase}" if type
|
|
190
|
-
urn += ":#{
|
|
191
|
-
urn += ":-#{
|
|
192
|
-
urn += ":stage-#{
|
|
193
|
-
urn += ":ed-#{
|
|
194
|
-
if
|
|
195
|
-
|
|
197
|
+
urn += ":#{fetch_docnumber(doc)}"
|
|
198
|
+
urn += ":-#{part}" if part
|
|
199
|
+
urn += ":stage-#{stage_code(doc)}"
|
|
200
|
+
urn += ":ed-#{edition}" if edition
|
|
201
|
+
if corr
|
|
202
|
+
corrparts = corr.split
|
|
203
|
+
urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
|
|
196
204
|
end
|
|
197
|
-
urn += "
|
|
205
|
+
urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
|
|
198
206
|
urn
|
|
199
207
|
end
|
|
200
208
|
|
|
201
209
|
def fetch_docnumber(doc)
|
|
202
|
-
|
|
203
|
-
id&.match(/\d+/)&.to_s
|
|
210
|
+
item_ref(doc)&.match(/\d+/)&.to_s
|
|
204
211
|
end
|
|
205
212
|
|
|
206
213
|
# @param doc [Nokogiri::HTML::Document]
|
|
207
214
|
def fetch_structuredidentifier(doc) # rubocop:disable Metrics/MethodLength
|
|
208
|
-
|
|
209
|
-
unless
|
|
215
|
+
ref = item_ref doc
|
|
216
|
+
unless ref
|
|
210
217
|
return RelatonIsoBib::StructuredIdentifier.new(
|
|
211
|
-
project_number: "?", part_number: "", prefix: nil, id: "?"
|
|
218
|
+
project_number: "?", part_number: "", prefix: nil, id: "?",
|
|
212
219
|
)
|
|
213
220
|
end
|
|
214
221
|
|
|
215
|
-
m =
|
|
222
|
+
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
|
216
223
|
RelatonIsoBib::StructuredIdentifier.new(
|
|
217
|
-
project_number: m[1],
|
|
218
|
-
id: item_ref.text, type: "ISO"
|
|
224
|
+
project_number: m[1], part: m[2], type: "ISO",
|
|
219
225
|
)
|
|
220
226
|
end
|
|
221
227
|
|
|
228
|
+
def item_ref(doc)
|
|
229
|
+
doc.at("//nav[contains(@class, 'heading-condensed')]/h1")&.text
|
|
230
|
+
end
|
|
231
|
+
|
|
222
232
|
# Fetch status.
|
|
223
233
|
# @param doc [Nokogiri::HTML::Document]
|
|
224
234
|
# @param status [String]
|
|
225
235
|
# @return [Hash]
|
|
226
236
|
def fetch_status(doc)
|
|
227
|
-
stg, substg = doc.
|
|
228
|
-
.text.split "."
|
|
237
|
+
stg, substg = stage_code(doc).split "."
|
|
229
238
|
RelatonBib::DocumentStatus.new(stage: stg, substage: substg)
|
|
230
239
|
end
|
|
231
240
|
|
|
241
|
+
def stage_code(doc)
|
|
242
|
+
doc.at("//ul[@class='dropdown-menu']/li[@class='active']"\
|
|
243
|
+
"/a/span[@class='stage-code']").text
|
|
244
|
+
end
|
|
245
|
+
|
|
232
246
|
# def stage(stg, substg)
|
|
233
247
|
# abbr = STGABBR[stg].is_a?(Hash) ? STGABBR[stg][substg] : STGABBR[stg]
|
|
234
248
|
# RelatonBib::DocumentStatus::Stage.new value: stg, abbreviation: abbr
|
|
@@ -241,13 +255,15 @@ module RelatonIso
|
|
|
241
255
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
|
242
256
|
# wg_url = DOMAIN + wg_link['href']
|
|
243
257
|
workgroup = wg_link.text.split "/"
|
|
258
|
+
type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
|
|
244
259
|
{
|
|
245
260
|
name: "International Organization for Standardization",
|
|
246
261
|
abbreviation: "ISO",
|
|
247
262
|
url: "www.iso.org",
|
|
248
263
|
technical_committee: [{
|
|
249
|
-
name:
|
|
250
|
-
|
|
264
|
+
name: doc.css("div.entry-title")[0].text,
|
|
265
|
+
identifier: wg_link.text,
|
|
266
|
+
type: type,
|
|
251
267
|
number: workgroup[1]&.match(/\d+/)&.to_s&.to_i,
|
|
252
268
|
}],
|
|
253
269
|
}
|
|
@@ -274,10 +290,10 @@ module RelatonIso
|
|
|
274
290
|
else
|
|
275
291
|
a + r.css("a").map do |id|
|
|
276
292
|
fref = RelatonBib::FormattedRef.new(
|
|
277
|
-
content: id.text, format: "text/plain"
|
|
293
|
+
content: id.text, format: "text/plain",
|
|
278
294
|
)
|
|
279
295
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
|
280
|
-
formattedref: fref, date: date
|
|
296
|
+
formattedref: fref, date: date,
|
|
281
297
|
)
|
|
282
298
|
{ type: type, bibitem: bibitem }
|
|
283
299
|
end
|
|
@@ -311,7 +327,7 @@ module RelatonIso
|
|
|
311
327
|
def fetch_title(doc, lang)
|
|
312
328
|
content = doc.at(
|
|
313
329
|
"//nav[contains(@class,'heading-condensed')]/h2 | "\
|
|
314
|
-
"//nav[contains(@class,'heading-condensed')]/h3"
|
|
330
|
+
"//nav[contains(@class,'heading-condensed')]/h3",
|
|
315
331
|
)&.text&.gsub(/\u2014/, "-")
|
|
316
332
|
return RelatonBib::TypedTitleStringCollection.new unless content
|
|
317
333
|
|
|
@@ -373,7 +389,7 @@ module RelatonIso
|
|
|
373
389
|
def fetch_ics(doc)
|
|
374
390
|
doc.xpath("//strong[contains(text(), "\
|
|
375
391
|
"'ICS')]/../following-sibling::dd/div/a").map do |i|
|
|
376
|
-
code = i.text.match(/[\d
|
|
392
|
+
code = i.text.match(/[\d.]+/).to_s.split "."
|
|
377
393
|
{ field: code[0], group: code[1], subgroup: code[2] }
|
|
378
394
|
end
|
|
379
395
|
end
|
|
@@ -395,10 +411,10 @@ module RelatonIso
|
|
|
395
411
|
end
|
|
396
412
|
|
|
397
413
|
# Fetch copyright.
|
|
398
|
-
# @param ref [String]
|
|
399
414
|
# @param doc [Nokogiri::HTML::Document]
|
|
400
415
|
# @return [Array<Hash>]
|
|
401
|
-
def fetch_copyright(
|
|
416
|
+
def fetch_copyright(doc)
|
|
417
|
+
ref = item_ref doc
|
|
402
418
|
owner_name = ref.match(/.*?(?=\s)/).to_s
|
|
403
419
|
from = ref.match(/(?<=:)\d{4}/).to_s
|
|
404
420
|
if from.empty?
|
|
@@ -408,5 +424,4 @@ module RelatonIso
|
|
|
408
424
|
end
|
|
409
425
|
end
|
|
410
426
|
end
|
|
411
|
-
# rubocop:enable Metrics/ModuleLength
|
|
412
427
|
end
|
data/lib/relaton_iso/version.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
|
@@ -30,13 +30,17 @@ Gem::Specification.new do |spec|
|
|
|
30
30
|
# spec.add_development_dependency "debase"
|
|
31
31
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
|
32
32
|
spec.add_development_dependency "pry-byebug"
|
|
33
|
-
spec.add_development_dependency "rake", "~>
|
|
33
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
34
34
|
spec.add_development_dependency "rspec", "~> 3.0"
|
|
35
|
+
spec.add_development_dependency "rubocop"
|
|
36
|
+
spec.add_development_dependency "rubocop-performance"
|
|
37
|
+
spec.add_development_dependency "rubocop-rails"
|
|
35
38
|
# spec.add_development_dependency "ruby-debug-ide"
|
|
36
39
|
spec.add_development_dependency "simplecov"
|
|
37
40
|
spec.add_development_dependency "vcr"
|
|
38
41
|
spec.add_development_dependency "webmock"
|
|
39
42
|
|
|
40
|
-
spec.add_dependency "relaton-iec", "~> 1.
|
|
41
|
-
spec.add_dependency "
|
|
43
|
+
# spec.add_dependency "relaton-iec", "~> 1.8.0"
|
|
44
|
+
spec.add_dependency "algolia"
|
|
45
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.9.0"
|
|
42
46
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-iso
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.9.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-
|
|
11
|
+
date: 2021-10-28 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: byebug
|
|
@@ -58,14 +58,14 @@ dependencies:
|
|
|
58
58
|
requirements:
|
|
59
59
|
- - "~>"
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: '
|
|
61
|
+
version: '13.0'
|
|
62
62
|
type: :development
|
|
63
63
|
prerelease: false
|
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
|
66
66
|
- - "~>"
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: '
|
|
68
|
+
version: '13.0'
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: rspec
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -80,6 +80,48 @@ dependencies:
|
|
|
80
80
|
- - "~>"
|
|
81
81
|
- !ruby/object:Gem::Version
|
|
82
82
|
version: '3.0'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: rubocop
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - ">="
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '0'
|
|
90
|
+
type: :development
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - ">="
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '0'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: rubocop-performance
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - ">="
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '0'
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - ">="
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '0'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: rubocop-rails
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - ">="
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: '0'
|
|
118
|
+
type: :development
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - ">="
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: '0'
|
|
83
125
|
- !ruby/object:Gem::Dependency
|
|
84
126
|
name: simplecov
|
|
85
127
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -123,33 +165,33 @@ dependencies:
|
|
|
123
165
|
- !ruby/object:Gem::Version
|
|
124
166
|
version: '0'
|
|
125
167
|
- !ruby/object:Gem::Dependency
|
|
126
|
-
name:
|
|
168
|
+
name: algolia
|
|
127
169
|
requirement: !ruby/object:Gem::Requirement
|
|
128
170
|
requirements:
|
|
129
|
-
- - "
|
|
171
|
+
- - ">="
|
|
130
172
|
- !ruby/object:Gem::Version
|
|
131
|
-
version:
|
|
173
|
+
version: '0'
|
|
132
174
|
type: :runtime
|
|
133
175
|
prerelease: false
|
|
134
176
|
version_requirements: !ruby/object:Gem::Requirement
|
|
135
177
|
requirements:
|
|
136
|
-
- - "
|
|
178
|
+
- - ">="
|
|
137
179
|
- !ruby/object:Gem::Version
|
|
138
|
-
version:
|
|
180
|
+
version: '0'
|
|
139
181
|
- !ruby/object:Gem::Dependency
|
|
140
182
|
name: relaton-iso-bib
|
|
141
183
|
requirement: !ruby/object:Gem::Requirement
|
|
142
184
|
requirements:
|
|
143
185
|
- - "~>"
|
|
144
186
|
- !ruby/object:Gem::Version
|
|
145
|
-
version: 1.
|
|
187
|
+
version: 1.9.0
|
|
146
188
|
type: :runtime
|
|
147
189
|
prerelease: false
|
|
148
190
|
version_requirements: !ruby/object:Gem::Requirement
|
|
149
191
|
requirements:
|
|
150
192
|
- - "~>"
|
|
151
193
|
- !ruby/object:Gem::Version
|
|
152
|
-
version: 1.
|
|
194
|
+
version: 1.9.0
|
|
153
195
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
|
154
196
|
model'
|
|
155
197
|
email:
|
|
@@ -179,9 +221,13 @@ files:
|
|
|
179
221
|
- bin/nokogiri
|
|
180
222
|
- bin/pry
|
|
181
223
|
- bin/racc
|
|
224
|
+
- bin/rackup
|
|
182
225
|
- bin/rake
|
|
183
226
|
- bin/rdebug-ide
|
|
184
227
|
- bin/rspec
|
|
228
|
+
- bin/rubocop
|
|
229
|
+
- bin/ruby-parse
|
|
230
|
+
- bin/ruby-rewrite
|
|
185
231
|
- bin/safe_yaml
|
|
186
232
|
- bin/setup
|
|
187
233
|
- lib/relaton_iso.rb
|