relaton-iso 1.11.0 → 1.12.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -24
- data/Gemfile +0 -3
- data/bin/thor +29 -0
- data/lib/relaton_iso/document_identifier.rb +27 -0
- data/lib/relaton_iso/hit.rb +8 -3
- data/lib/relaton_iso/hit_collection.rb +4 -8
- data/lib/relaton_iso/iso_bibliography.rb +102 -125
- data/lib/relaton_iso/scrapper.rb +55 -68
- data/lib/relaton_iso/version.rb +1 -1
- data/lib/relaton_iso.rb +2 -0
- data/relaton_iso.gemspec +2 -1
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4479e38048aa0dfae8bcc85f1e9de03b5fe0561048b658ec47b3df8ca64794eb
|
4
|
+
data.tar.gz: c297ddc7b15d8186b85fbb7d4d3f84863d7df6e20ad243f0740364262fe43807
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fd11d9fe01bd36052cf2762df6e8f728d361fbe23410dcb0229e95436a9f7909e51e51a8be0abc9f7bd269ffd10d642cb0aa7792dba76c0c174b606e319bf58
|
7
|
+
data.tar.gz: 060edbed6bb5b11033911db2200a4e19e98585c4133a1b4a09eaa79b582f2cf23b39f424b84d3110429ff1800247cec4f2b6522a82bbbdc45cb093e63a339bda
|
data/.github/workflows/rake.yml
CHANGED
@@ -10,27 +10,4 @@ on:
|
|
10
10
|
|
11
11
|
jobs:
|
12
12
|
rake:
|
13
|
-
|
14
|
-
runs-on: ${{ matrix.os }}
|
15
|
-
continue-on-error: ${{ matrix.experimental }}
|
16
|
-
strategy:
|
17
|
-
fail-fast: false
|
18
|
-
matrix:
|
19
|
-
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
|
-
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
|
-
experimental: [ false ]
|
22
|
-
steps:
|
23
|
-
- uses: actions/checkout@v2
|
24
|
-
with:
|
25
|
-
submodules: true
|
26
|
-
|
27
|
-
# https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
|
28
|
-
- if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
|
29
|
-
run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
|
30
|
-
|
31
|
-
- uses: ruby/setup-ruby@v1
|
32
|
-
with:
|
33
|
-
ruby-version: ${{ matrix.ruby }}
|
34
|
-
bundler-cache: true
|
35
|
-
|
36
|
-
- run: bundle exec rake
|
13
|
+
uses: relaton/support/.github/workflows/rake.yml@master
|
data/Gemfile
CHANGED
data/bin/thor
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'thor' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("thor", "thor")
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module RelatonIso
|
2
|
+
class DocumentIdentifier < RelatonBib::DocumentIdentifier
|
3
|
+
def id
|
4
|
+
id_str = @id.to_s.sub(/\sED\d+/, "")
|
5
|
+
if @all_parts
|
6
|
+
if type == "URN"
|
7
|
+
return "#{@id.urn}:ser"
|
8
|
+
else
|
9
|
+
return "#{id_str} (all parts)"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
type == "URN" ? @id.urn.to_s : id_str
|
13
|
+
end
|
14
|
+
|
15
|
+
def remove_part
|
16
|
+
@id.part = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def remove_date
|
20
|
+
@id.year = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def all_parts
|
24
|
+
@all_parts = true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -4,13 +4,13 @@ module RelatonIso
|
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
6
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
7
|
-
attr_writer :fetch
|
7
|
+
attr_writer :fetch, :pubid
|
8
8
|
|
9
9
|
# Parse page.
|
10
|
-
# @param lang [String,
|
10
|
+
# @param lang [String, nil]
|
11
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
12
12
|
def fetch(lang = nil)
|
13
|
-
@fetch ||= Scrapper.parse_page
|
13
|
+
@fetch ||= Scrapper.parse_page self, lang
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Integer]
|
@@ -23,5 +23,10 @@ module RelatonIso
|
|
23
23
|
else 4
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
# @return [Pubid::Iso::Identifier]
|
28
|
+
def pubid
|
29
|
+
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
30
|
+
end
|
26
31
|
end
|
27
32
|
end
|
@@ -11,25 +11,21 @@ module RelatonIso
|
|
11
11
|
# @param text [String] reference to search
|
12
12
|
def initialize(text)
|
13
13
|
super
|
14
|
-
@array = text.match?(/^ISO\
|
14
|
+
@array = text.match?(/^ISO\s(?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
|
15
15
|
end
|
16
16
|
|
17
17
|
# @param lang [String, NilClass]
|
18
18
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
19
19
|
def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
|
20
20
|
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
21
|
-
hit = @array.min_by
|
22
|
-
IsoBibliography.ref_components(h.hit[:title])[1].to_i
|
23
|
-
end
|
21
|
+
hit = @array.min_by { |h| h.pubid.part }
|
24
22
|
return @array.first.fetch lang unless hit
|
25
23
|
|
26
|
-
bibitem = hit.fetch
|
24
|
+
bibitem = hit.fetch(lang)
|
27
25
|
all_parts_item = bibitem.to_all_parts
|
28
26
|
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
29
|
-
%r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
|
30
|
-
(?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
|
31
27
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
32
|
-
formattedref: RelatonBib::FormattedRef.new(content:
|
28
|
+
formattedref: RelatonBib::FormattedRef.new(content: hi.pubid.to_s),
|
33
29
|
)
|
34
30
|
all_parts_item.relation << RelatonBib::DocumentRelation.new(
|
35
31
|
type: "instance", bibitem: isobib,
|
@@ -27,175 +27,152 @@ module RelatonIso
|
|
27
27
|
# @option opts [Boolean] :keep_year if undated reference should return
|
28
28
|
# actual reference with year
|
29
29
|
#
|
30
|
-
# @return [
|
30
|
+
# @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
|
31
31
|
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
|
32
32
|
code = ref.gsub(/\u2013/, "-")
|
33
|
-
|
34
|
-
|
35
|
-
year ||= year1
|
33
|
+
|
34
|
+
# parse "all parts" request
|
36
35
|
code.sub! " (all parts)", ""
|
37
36
|
opts[:all_parts] ||= $~ && opts[:all_parts].nil?
|
38
|
-
# opts[:keep_year] ||= opts[:keep_year].nil?
|
39
|
-
# code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
|
40
|
-
# if %r[^ISO/IEC DIR].match? code
|
41
|
-
# return RelatonIec::IecBibliography.get(code, year, opts)
|
42
|
-
# end
|
43
37
|
|
44
|
-
|
45
|
-
|
38
|
+
query_pubid = Pubid::Iso::Identifier.parse(code)
|
39
|
+
query_pubid.year = year if year
|
40
|
+
|
41
|
+
hits = isobib_search_filter(query_pubid, opts)
|
46
42
|
|
47
|
-
|
43
|
+
# return only first one if not all_parts
|
44
|
+
ret = if !opts[:all_parts] || hits.size == 1
|
45
|
+
hits.any? && hits.first.fetch(opts[:lang])
|
46
|
+
else
|
47
|
+
hits.to_all_parts(opts[:lang])
|
48
|
+
end
|
49
|
+
|
50
|
+
if ret
|
51
|
+
warn "[relaton-iso] (\"#{query_pubid}\") found #{ret.docidentifier.first.id}"
|
52
|
+
else
|
53
|
+
return fetch_ref_err(query_pubid, query_pubid.year)
|
54
|
+
end
|
55
|
+
|
56
|
+
if (query_pubid.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
|
48
57
|
ret
|
49
58
|
else
|
50
59
|
ret.to_most_recent_reference
|
51
60
|
end
|
52
61
|
end
|
53
62
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
63
|
+
# @param query_pubid [Pubid::Iso::Identifier]
|
64
|
+
# @param pubid [Pubid::Iso::Identifier]
|
65
|
+
# @param all_parts [Boolean] match with any parts when true
|
66
|
+
# @return [Boolean]
|
67
|
+
def matches_parts?(query_pubid, pubid, all_parts: false)
|
68
|
+
if all_parts
|
69
|
+
# match only with documents with part number
|
70
|
+
!pubid.part.nil?
|
71
|
+
else
|
72
|
+
query_pubid.part == pubid.part
|
73
|
+
end
|
62
74
|
end
|
63
75
|
|
64
|
-
|
76
|
+
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
77
|
+
query_pubid.publisher == pubid.publisher &&
|
78
|
+
query_pubid.number == pubid.number &&
|
79
|
+
query_pubid.copublisher == pubid.copublisher &&
|
80
|
+
((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
|
81
|
+
((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
|
82
|
+
end
|
65
83
|
|
66
|
-
#
|
84
|
+
# @param hit_collection [RelatonIso::HitCollection]
|
85
|
+
# @param year [String]
|
86
|
+
# @return [RelatonIso::HitCollection]
|
87
|
+
def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
88
|
+
missed_years = []
|
67
89
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
90
|
+
# filter by year
|
91
|
+
hits = hit_collection.select do |hit|
|
92
|
+
if hit.pubid.year == year
|
93
|
+
true
|
94
|
+
elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
|
95
|
+
hit.pubid.year = year
|
96
|
+
true
|
97
|
+
else
|
98
|
+
missed_year = hit.pubid.year || hit.hit[:year].to_s
|
99
|
+
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
100
|
+
missed_years << missed_year
|
101
|
+
end
|
102
|
+
false
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
if hits.empty? && !missed_years.empty?
|
73
107
|
warn "[relaton-iso] (There was no match for #{year}, though there "\
|
74
108
|
"were matches found for #{missed_years.join(', ')}.)"
|
75
109
|
end
|
76
|
-
|
110
|
+
hits
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
|
116
|
+
id = year ? "#{query_pubid}:#{year}" : query_pubid
|
117
|
+
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
118
|
+
"The code must be exactly like it is on the standards website."
|
119
|
+
if /\d-\d/.match? query_pubid.to_s
|
77
120
|
warn "[relaton-iso] The provided document part may not exist, "\
|
78
121
|
"or the document may no longer be published in parts."
|
79
122
|
else
|
80
123
|
warn "[relaton-iso] If you wanted to cite all document parts for "\
|
81
|
-
"the reference, use \"#{
|
124
|
+
"the reference, use \"#{query_pubid} (all parts)\".\nIf the document "\
|
82
125
|
"is not a standard, use its document type abbreviation "\
|
83
126
|
"(TS, TR, PAS, Guide)."
|
84
127
|
end
|
85
128
|
nil
|
86
129
|
end
|
87
130
|
|
88
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
89
|
-
|
90
131
|
# Search for hits. If no found then trying missed stages and ISO/IEC.
|
91
132
|
#
|
92
|
-
# @param
|
133
|
+
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
93
134
|
# @param opts [Hash]
|
94
135
|
# @return [Array<RelatonIso::Hit>]
|
95
|
-
def isobib_search_filter(
|
96
|
-
|
97
|
-
warn "[relaton-iso] (\"#{
|
98
|
-
|
99
|
-
|
136
|
+
def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
137
|
+
query_pubid.part = nil if opts[:all_parts]
|
138
|
+
warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
|
139
|
+
# fetch hits collection
|
140
|
+
hit_collection = search(query_pubid.to_s(with_date: false))
|
141
|
+
# filter only matching hits
|
142
|
+
res = filter_hits hit_collection, query_pubid,
|
143
|
+
all_parts: opts[:all_parts]
|
100
144
|
return res unless res.empty?
|
101
145
|
|
102
|
-
#
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
|
107
|
-
end
|
108
|
-
return res unless res.empty?
|
109
|
-
when %r{^\w+\s\d+} # code like ISO 123
|
110
|
-
res = try_stages(result, opts) do |st|
|
111
|
-
code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
|
112
|
-
end
|
113
|
-
return res unless res.empty?
|
114
|
-
end
|
146
|
+
# lookup for documents with stages when no match without stage
|
147
|
+
res = filter_hits hit_collection, query_pubid,
|
148
|
+
all_parts: opts[:all_parts], any_types_stages: true
|
149
|
+
return res unless res.empty?
|
115
150
|
|
116
|
-
|
151
|
+
# TODO: do this at pubid-iso
|
152
|
+
if query_pubid.publisher == "ISO" && query_pubid.copublisher.nil? # try ISO/IEC if ISO not found
|
117
153
|
warn "[relaton-iso] Attempting ISO/IEC retrieval"
|
118
|
-
|
119
|
-
res =
|
154
|
+
query_pubid.copublisher = "IEC"
|
155
|
+
res = filter_hits hit_collection, query_pubid, all_parts: opts[:all_parts]
|
120
156
|
end
|
121
157
|
res
|
122
158
|
end
|
123
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
124
159
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
end
|
130
|
-
|
131
|
-
# @param result [RelatonIso::HitCollection]
|
132
|
-
# @param opts [Hash]
|
160
|
+
# @param hits [RelatonIso::HitCollection]
|
161
|
+
# @param query_pubid [Pubid::Iso::Identifier]
|
162
|
+
# @param all_parts [Boolean]
|
163
|
+
# @param any_stages [Boolean]
|
133
164
|
# @return [RelatonIso::HitCollection]
|
134
|
-
def
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
end
|
143
|
-
|
144
|
-
# @param result [RelatonIso::HitCollection]
|
145
|
-
# @param code [String]
|
146
|
-
# @param opts [Hash]
|
147
|
-
# @return [RelatonIso::HitCollection]
|
148
|
-
def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
149
|
-
code1, part1, _, corr1, coryear1 = ref_components code
|
150
|
-
result.select do |i|
|
151
|
-
code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
|
152
|
-
code1 == code2 && ((opts[:all_parts] && part2) || (!opts[:all_parts] && part1 == part2)) &&
|
153
|
-
corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
158
|
-
|
159
|
-
# Sort through the results from RelatonIso, fetching them three at a time,
|
160
|
-
# and return the first result that matches the code, matches the year
|
161
|
-
# (if provided), and which # has a title (amendments do not).
|
162
|
-
# Only expects the first page of results to be populated.
|
163
|
-
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
164
|
-
# If no match, returns any years which caused mismatch, for error
|
165
|
-
# reporting
|
166
|
-
def isobib_results_filter(result, year, opts)
|
167
|
-
missed_years = []
|
168
|
-
hits = result.reduce!([]) do |hts, h|
|
169
|
-
if !year || (%r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[:title] && iyear == year)
|
170
|
-
hts << h
|
171
|
-
else
|
172
|
-
missed_years << iyear
|
173
|
-
hts
|
174
|
-
end
|
165
|
+
def filter_hits(hit_collection, query_pubid, all_parts: false, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
166
|
+
# filter out
|
167
|
+
result = hit_collection.select do |i|
|
168
|
+
hit_pubid = i.pubid
|
169
|
+
matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
|
170
|
+
matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
|
171
|
+
query_pubid.corrigendum == hit_pubid.corrigendum &&
|
172
|
+
query_pubid.amendment == hit_pubid.amendment
|
175
173
|
end
|
176
|
-
return { years: missed_years } unless hits.any?
|
177
174
|
|
178
|
-
|
179
|
-
return { ret: hits.first.fetch(opts[:lang]) }
|
180
|
-
end
|
181
|
-
|
182
|
-
{ ret: hits.to_all_parts(opts[:lang]) }
|
183
|
-
end
|
184
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
185
|
-
|
186
|
-
# @param code [String]
|
187
|
-
# @param year [String, NilClass]
|
188
|
-
# @param opts [Hash]
|
189
|
-
def isobib_get1(code, year, opts)
|
190
|
-
# return iev(code) if /^IEC 60050-/.match code
|
191
|
-
result = isobib_search_filter(code, opts) || return
|
192
|
-
ret = isobib_results_filter(result, year, opts)
|
193
|
-
if ret[:ret]
|
194
|
-
warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
195
|
-
ret[:ret]
|
196
|
-
else
|
197
|
-
fetch_ref_err(code, year, ret[:years])
|
198
|
-
end
|
175
|
+
query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
|
199
176
|
end
|
200
177
|
end
|
201
178
|
end
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -50,43 +50,61 @@ module RelatonIso
|
|
50
50
|
|
51
51
|
class << self
|
52
52
|
# Parse page.
|
53
|
-
# @param
|
53
|
+
# @param hit [RelatonIso::Hit]
|
54
54
|
# @param lang [String, NilClass]
|
55
|
-
# @return [
|
56
|
-
def parse_page(
|
55
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
56
|
+
def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
57
57
|
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
58
58
|
# "#{hit_data['csnumber']}.html"
|
59
|
-
|
59
|
+
|
60
|
+
doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
|
60
61
|
|
61
62
|
# Fetch edition.
|
62
63
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
63
64
|
&.children&.last&.text&.match(/\d+/)&.to_s
|
65
|
+
hit.pubid.edition = edition if edition
|
64
66
|
|
65
67
|
titles, abstract, langs = fetch_titles_abstract(doc, lang)
|
66
68
|
|
67
69
|
RelatonIsoBib::IsoBibliographicItem.new(
|
68
70
|
fetched: Date.today.to_s,
|
69
|
-
docid:
|
70
|
-
docnumber: fetch_docnumber(
|
71
|
+
docid: fetch_relaton_docids(doc, hit.pubid),
|
72
|
+
docnumber: fetch_docnumber(hit.pubid),
|
71
73
|
edition: edition,
|
72
74
|
language: langs.map { |l| l[:lang] },
|
73
75
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
74
76
|
title: titles,
|
75
|
-
doctype: fetch_type(
|
77
|
+
doctype: fetch_type(hit.hit[:title]),
|
76
78
|
docstatus: fetch_status(doc),
|
77
79
|
ics: fetch_ics(doc),
|
78
|
-
date: fetch_dates(doc,
|
79
|
-
contributor: fetch_contributors(
|
80
|
+
date: fetch_dates(doc, hit.hit[:title]),
|
81
|
+
contributor: fetch_contributors(hit.hit[:title]),
|
80
82
|
editorialgroup: fetch_workgroup(doc),
|
81
83
|
abstract: abstract,
|
82
84
|
copyright: fetch_copyright(doc),
|
83
85
|
link: fetch_link(doc, url),
|
84
86
|
relation: fetch_relations(doc),
|
85
87
|
place: ["Geneva"],
|
86
|
-
structuredidentifier: fetch_structuredidentifier(
|
88
|
+
structuredidentifier: fetch_structuredidentifier(hit.pubid),
|
87
89
|
)
|
88
90
|
end
|
89
91
|
|
92
|
+
#
|
93
|
+
# Create document ids.
|
94
|
+
#
|
95
|
+
# @param doc [Nokogiri::HTML::Document] document
|
96
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
97
|
+
#
|
98
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
99
|
+
#
|
100
|
+
def fetch_relaton_docids(doc, pubid)
|
101
|
+
pubid.urn_stage = stage_code(doc).to_f
|
102
|
+
[
|
103
|
+
RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
|
104
|
+
RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
|
105
|
+
]
|
106
|
+
end
|
107
|
+
|
90
108
|
private
|
91
109
|
|
92
110
|
# Fetch titles and abstracts.
|
@@ -168,60 +186,29 @@ module RelatonIso
|
|
168
186
|
end
|
169
187
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
170
188
|
|
171
|
-
#
|
172
|
-
#
|
173
|
-
#
|
174
|
-
# @param
|
175
|
-
#
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
RelatonBib::DocumentIdentifier.new(
|
181
|
-
id: fetch_urn(doc, pubid, edition, langs), type: "URN",
|
182
|
-
),
|
183
|
-
]
|
184
|
-
end
|
185
|
-
|
186
|
-
# @param doc [Nokogiri:HTML::Document]
|
187
|
-
# @param pubid [String]
|
188
|
-
# @param edition [String]
|
189
|
-
# @param langs [Array<Hash>]
|
190
|
-
# @returnt [String]
|
191
|
-
def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
|
192
|
-
orig = pubid.split.first.downcase.split("/").join "-"
|
193
|
-
%r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
|
194
|
-
_, part, _year, corr, = IsoBibliography.ref_components pubid
|
195
|
-
urn = "urn:iso:std:#{orig}"
|
196
|
-
urn += ":#{type.downcase}" if type
|
197
|
-
urn += ":#{fetch_docnumber(doc)}"
|
198
|
-
urn += ":-#{part}" if part
|
199
|
-
urn += ":stage-#{stage_code(doc)}"
|
200
|
-
urn += ":ed-#{edition}" if edition
|
201
|
-
if corr
|
202
|
-
corrparts = corr.split
|
203
|
-
urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
|
204
|
-
end
|
205
|
-
urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
|
206
|
-
urn
|
207
|
-
end
|
208
|
-
|
209
|
-
def fetch_docnumber(doc)
|
210
|
-
item_ref(doc)&.match(/\d+/)&.to_s
|
189
|
+
#
|
190
|
+
# Generate docnumber.
|
191
|
+
#
|
192
|
+
# @param [Pubid::Iso] pubid
|
193
|
+
#
|
194
|
+
# @return [String] docnumber
|
195
|
+
#
|
196
|
+
def fetch_docnumber(pubid)
|
197
|
+
pubid.to_s.match(/\d+/)&.to_s
|
211
198
|
end
|
212
199
|
|
213
|
-
#
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
200
|
+
#
|
201
|
+
# Parse structuredidentifier.
|
202
|
+
#
|
203
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
204
|
+
#
|
205
|
+
# @return [RelatonBib::StructuredIdentifier] structured identifier
|
206
|
+
#
|
207
|
+
def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
|
223
208
|
RelatonIsoBib::StructuredIdentifier.new(
|
224
|
-
project_number:
|
209
|
+
project_number: "#{pubid.publisher} #{pubid.number}",
|
210
|
+
part: pubid&.part&.sub(/^-/, ""),
|
211
|
+
type: pubid.publisher,
|
225
212
|
)
|
226
213
|
end
|
227
214
|
|
@@ -251,7 +238,7 @@ module RelatonIso
|
|
251
238
|
# Fetch workgroup.
|
252
239
|
# @param doc [Nokogiri::HTML::Document]
|
253
240
|
# @return [Hash]
|
254
|
-
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
|
241
|
+
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
|
255
242
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
256
243
|
# wg_url = DOMAIN + wg_link['href']
|
257
244
|
workgroup = wg_link.text.split "/"
|
@@ -275,6 +262,7 @@ module RelatonIso
|
|
275
262
|
# @param doc [Nokogiri::HTML::Document]
|
276
263
|
# @return [Array<Hash>]
|
277
264
|
def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
265
|
+
types = ["Now", "Now under review"]
|
278
266
|
doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
|
279
267
|
r_type = r.at("h4", "h5").text
|
280
268
|
date = []
|
@@ -286,14 +274,13 @@ module RelatonIso
|
|
286
274
|
"updates"
|
287
275
|
else r_type
|
288
276
|
end
|
289
|
-
if
|
277
|
+
if types.include?(type) then a
|
290
278
|
else
|
291
279
|
a + r.css("a").map do |id|
|
292
|
-
|
293
|
-
|
294
|
-
)
|
280
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
|
281
|
+
fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
|
295
282
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
296
|
-
formattedref: fref, date: date,
|
283
|
+
docid: [docid], formattedref: fref, date: date,
|
297
284
|
)
|
298
285
|
{ type: type, bibitem: bibitem }
|
299
286
|
end
|
@@ -308,7 +295,7 @@ module RelatonIso
|
|
308
295
|
def fetch_type(ref)
|
309
296
|
%r{
|
310
297
|
^(?<prefix>ISO|IWA|IEC)
|
311
|
-
(?:(
|
298
|
+
(?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
|
312
299
|
(?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
|
313
300
|
}x =~ ref
|
314
301
|
# return "international-standard" if type_match.nil?
|
data/lib/relaton_iso/version.rb
CHANGED
data/lib/relaton_iso.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -42,5 +42,6 @@ Gem::Specification.new do |spec|
|
|
42
42
|
|
43
43
|
# spec.add_dependency "relaton-iec", "~> 1.8.0"
|
44
44
|
spec.add_dependency "algolia"
|
45
|
-
spec.add_dependency "relaton-iso-bib", "~> 1.
|
45
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
|
46
|
+
spec.add_dependency "pubid-iso", "~> 0.1.7"
|
46
47
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.12.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -184,14 +184,28 @@ dependencies:
|
|
184
184
|
requirements:
|
185
185
|
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: 1.
|
187
|
+
version: 1.12.0
|
188
188
|
type: :runtime
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 1.
|
194
|
+
version: 1.12.0
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: pubid-iso
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - "~>"
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: 0.1.7
|
202
|
+
type: :runtime
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - "~>"
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: 0.1.7
|
195
209
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
196
210
|
model'
|
197
211
|
email:
|
@@ -230,7 +244,9 @@ files:
|
|
230
244
|
- bin/ruby-rewrite
|
231
245
|
- bin/safe_yaml
|
232
246
|
- bin/setup
|
247
|
+
- bin/thor
|
233
248
|
- lib/relaton_iso.rb
|
249
|
+
- lib/relaton_iso/document_identifier.rb
|
234
250
|
- lib/relaton_iso/hit.rb
|
235
251
|
- lib/relaton_iso/hit_collection.rb
|
236
252
|
- lib/relaton_iso/iso_bibliography.rb
|