relaton-iso 1.11.0 → 1.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -24
- data/Gemfile +0 -3
- data/bin/thor +29 -0
- data/lib/relaton_iso/document_identifier.rb +27 -0
- data/lib/relaton_iso/hit.rb +8 -3
- data/lib/relaton_iso/hit_collection.rb +4 -8
- data/lib/relaton_iso/iso_bibliography.rb +102 -125
- data/lib/relaton_iso/scrapper.rb +55 -68
- data/lib/relaton_iso/version.rb +1 -1
- data/lib/relaton_iso.rb +2 -0
- data/relaton_iso.gemspec +2 -1
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4479e38048aa0dfae8bcc85f1e9de03b5fe0561048b658ec47b3df8ca64794eb
|
4
|
+
data.tar.gz: c297ddc7b15d8186b85fbb7d4d3f84863d7df6e20ad243f0740364262fe43807
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fd11d9fe01bd36052cf2762df6e8f728d361fbe23410dcb0229e95436a9f7909e51e51a8be0abc9f7bd269ffd10d642cb0aa7792dba76c0c174b606e319bf58
|
7
|
+
data.tar.gz: 060edbed6bb5b11033911db2200a4e19e98585c4133a1b4a09eaa79b582f2cf23b39f424b84d3110429ff1800247cec4f2b6522a82bbbdc45cb093e63a339bda
|
data/.github/workflows/rake.yml
CHANGED
@@ -10,27 +10,4 @@ on:
|
|
10
10
|
|
11
11
|
jobs:
|
12
12
|
rake:
|
13
|
-
|
14
|
-
runs-on: ${{ matrix.os }}
|
15
|
-
continue-on-error: ${{ matrix.experimental }}
|
16
|
-
strategy:
|
17
|
-
fail-fast: false
|
18
|
-
matrix:
|
19
|
-
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
|
-
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
|
-
experimental: [ false ]
|
22
|
-
steps:
|
23
|
-
- uses: actions/checkout@v2
|
24
|
-
with:
|
25
|
-
submodules: true
|
26
|
-
|
27
|
-
# https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
|
28
|
-
- if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
|
29
|
-
run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
|
30
|
-
|
31
|
-
- uses: ruby/setup-ruby@v1
|
32
|
-
with:
|
33
|
-
ruby-version: ${{ matrix.ruby }}
|
34
|
-
bundler-cache: true
|
35
|
-
|
36
|
-
- run: bundle exec rake
|
13
|
+
uses: relaton/support/.github/workflows/rake.yml@master
|
data/Gemfile
CHANGED
data/bin/thor
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'thor' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("thor", "thor")
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module RelatonIso
|
2
|
+
class DocumentIdentifier < RelatonBib::DocumentIdentifier
|
3
|
+
def id
|
4
|
+
id_str = @id.to_s.sub(/\sED\d+/, "")
|
5
|
+
if @all_parts
|
6
|
+
if type == "URN"
|
7
|
+
return "#{@id.urn}:ser"
|
8
|
+
else
|
9
|
+
return "#{id_str} (all parts)"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
type == "URN" ? @id.urn.to_s : id_str
|
13
|
+
end
|
14
|
+
|
15
|
+
def remove_part
|
16
|
+
@id.part = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def remove_date
|
20
|
+
@id.year = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def all_parts
|
24
|
+
@all_parts = true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -4,13 +4,13 @@ module RelatonIso
|
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
6
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
7
|
-
attr_writer :fetch
|
7
|
+
attr_writer :fetch, :pubid
|
8
8
|
|
9
9
|
# Parse page.
|
10
|
-
# @param lang [String,
|
10
|
+
# @param lang [String, nil]
|
11
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
12
12
|
def fetch(lang = nil)
|
13
|
-
@fetch ||= Scrapper.parse_page
|
13
|
+
@fetch ||= Scrapper.parse_page self, lang
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Integer]
|
@@ -23,5 +23,10 @@ module RelatonIso
|
|
23
23
|
else 4
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
# @return [Pubid::Iso::Identifier]
|
28
|
+
def pubid
|
29
|
+
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
30
|
+
end
|
26
31
|
end
|
27
32
|
end
|
@@ -11,25 +11,21 @@ module RelatonIso
|
|
11
11
|
# @param text [String] reference to search
|
12
12
|
def initialize(text)
|
13
13
|
super
|
14
|
-
@array = text.match?(/^ISO\
|
14
|
+
@array = text.match?(/^ISO\s(?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
|
15
15
|
end
|
16
16
|
|
17
17
|
# @param lang [String, NilClass]
|
18
18
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
19
19
|
def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
|
20
20
|
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
21
|
-
hit = @array.min_by
|
22
|
-
IsoBibliography.ref_components(h.hit[:title])[1].to_i
|
23
|
-
end
|
21
|
+
hit = @array.min_by { |h| h.pubid.part }
|
24
22
|
return @array.first.fetch lang unless hit
|
25
23
|
|
26
|
-
bibitem = hit.fetch
|
24
|
+
bibitem = hit.fetch(lang)
|
27
25
|
all_parts_item = bibitem.to_all_parts
|
28
26
|
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
29
|
-
%r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
|
30
|
-
(?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
|
31
27
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
32
|
-
formattedref: RelatonBib::FormattedRef.new(content:
|
28
|
+
formattedref: RelatonBib::FormattedRef.new(content: hi.pubid.to_s),
|
33
29
|
)
|
34
30
|
all_parts_item.relation << RelatonBib::DocumentRelation.new(
|
35
31
|
type: "instance", bibitem: isobib,
|
@@ -27,175 +27,152 @@ module RelatonIso
|
|
27
27
|
# @option opts [Boolean] :keep_year if undated reference should return
|
28
28
|
# actual reference with year
|
29
29
|
#
|
30
|
-
# @return [
|
30
|
+
# @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
|
31
31
|
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
|
32
32
|
code = ref.gsub(/\u2013/, "-")
|
33
|
-
|
34
|
-
|
35
|
-
year ||= year1
|
33
|
+
|
34
|
+
# parse "all parts" request
|
36
35
|
code.sub! " (all parts)", ""
|
37
36
|
opts[:all_parts] ||= $~ && opts[:all_parts].nil?
|
38
|
-
# opts[:keep_year] ||= opts[:keep_year].nil?
|
39
|
-
# code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
|
40
|
-
# if %r[^ISO/IEC DIR].match? code
|
41
|
-
# return RelatonIec::IecBibliography.get(code, year, opts)
|
42
|
-
# end
|
43
37
|
|
44
|
-
|
45
|
-
|
38
|
+
query_pubid = Pubid::Iso::Identifier.parse(code)
|
39
|
+
query_pubid.year = year if year
|
40
|
+
|
41
|
+
hits = isobib_search_filter(query_pubid, opts)
|
46
42
|
|
47
|
-
|
43
|
+
# return only first one if not all_parts
|
44
|
+
ret = if !opts[:all_parts] || hits.size == 1
|
45
|
+
hits.any? && hits.first.fetch(opts[:lang])
|
46
|
+
else
|
47
|
+
hits.to_all_parts(opts[:lang])
|
48
|
+
end
|
49
|
+
|
50
|
+
if ret
|
51
|
+
warn "[relaton-iso] (\"#{query_pubid}\") found #{ret.docidentifier.first.id}"
|
52
|
+
else
|
53
|
+
return fetch_ref_err(query_pubid, query_pubid.year)
|
54
|
+
end
|
55
|
+
|
56
|
+
if (query_pubid.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
|
48
57
|
ret
|
49
58
|
else
|
50
59
|
ret.to_most_recent_reference
|
51
60
|
end
|
52
61
|
end
|
53
62
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
63
|
+
# @param query_pubid [Pubid::Iso::Identifier]
|
64
|
+
# @param pubid [Pubid::Iso::Identifier]
|
65
|
+
# @param all_parts [Boolean] match with any parts when true
|
66
|
+
# @return [Boolean]
|
67
|
+
def matches_parts?(query_pubid, pubid, all_parts: false)
|
68
|
+
if all_parts
|
69
|
+
# match only with documents with part number
|
70
|
+
!pubid.part.nil?
|
71
|
+
else
|
72
|
+
query_pubid.part == pubid.part
|
73
|
+
end
|
62
74
|
end
|
63
75
|
|
64
|
-
|
76
|
+
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
77
|
+
query_pubid.publisher == pubid.publisher &&
|
78
|
+
query_pubid.number == pubid.number &&
|
79
|
+
query_pubid.copublisher == pubid.copublisher &&
|
80
|
+
((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
|
81
|
+
((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
|
82
|
+
end
|
65
83
|
|
66
|
-
#
|
84
|
+
# @param hit_collection [RelatonIso::HitCollection]
|
85
|
+
# @param year [String]
|
86
|
+
# @return [RelatonIso::HitCollection]
|
87
|
+
def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
88
|
+
missed_years = []
|
67
89
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
90
|
+
# filter by year
|
91
|
+
hits = hit_collection.select do |hit|
|
92
|
+
if hit.pubid.year == year
|
93
|
+
true
|
94
|
+
elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
|
95
|
+
hit.pubid.year = year
|
96
|
+
true
|
97
|
+
else
|
98
|
+
missed_year = hit.pubid.year || hit.hit[:year].to_s
|
99
|
+
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
100
|
+
missed_years << missed_year
|
101
|
+
end
|
102
|
+
false
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
if hits.empty? && !missed_years.empty?
|
73
107
|
warn "[relaton-iso] (There was no match for #{year}, though there "\
|
74
108
|
"were matches found for #{missed_years.join(', ')}.)"
|
75
109
|
end
|
76
|
-
|
110
|
+
hits
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
|
116
|
+
id = year ? "#{query_pubid}:#{year}" : query_pubid
|
117
|
+
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
118
|
+
"The code must be exactly like it is on the standards website."
|
119
|
+
if /\d-\d/.match? query_pubid.to_s
|
77
120
|
warn "[relaton-iso] The provided document part may not exist, "\
|
78
121
|
"or the document may no longer be published in parts."
|
79
122
|
else
|
80
123
|
warn "[relaton-iso] If you wanted to cite all document parts for "\
|
81
|
-
"the reference, use \"#{
|
124
|
+
"the reference, use \"#{query_pubid} (all parts)\".\nIf the document "\
|
82
125
|
"is not a standard, use its document type abbreviation "\
|
83
126
|
"(TS, TR, PAS, Guide)."
|
84
127
|
end
|
85
128
|
nil
|
86
129
|
end
|
87
130
|
|
88
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
89
|
-
|
90
131
|
# Search for hits. If no found then trying missed stages and ISO/IEC.
|
91
132
|
#
|
92
|
-
# @param
|
133
|
+
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
93
134
|
# @param opts [Hash]
|
94
135
|
# @return [Array<RelatonIso::Hit>]
|
95
|
-
def isobib_search_filter(
|
96
|
-
|
97
|
-
warn "[relaton-iso] (\"#{
|
98
|
-
|
99
|
-
|
136
|
+
def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
137
|
+
query_pubid.part = nil if opts[:all_parts]
|
138
|
+
warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
|
139
|
+
# fetch hits collection
|
140
|
+
hit_collection = search(query_pubid.to_s(with_date: false))
|
141
|
+
# filter only matching hits
|
142
|
+
res = filter_hits hit_collection, query_pubid,
|
143
|
+
all_parts: opts[:all_parts]
|
100
144
|
return res unless res.empty?
|
101
145
|
|
102
|
-
#
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
|
107
|
-
end
|
108
|
-
return res unless res.empty?
|
109
|
-
when %r{^\w+\s\d+} # code like ISO 123
|
110
|
-
res = try_stages(result, opts) do |st|
|
111
|
-
code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
|
112
|
-
end
|
113
|
-
return res unless res.empty?
|
114
|
-
end
|
146
|
+
# lookup for documents with stages when no match without stage
|
147
|
+
res = filter_hits hit_collection, query_pubid,
|
148
|
+
all_parts: opts[:all_parts], any_types_stages: true
|
149
|
+
return res unless res.empty?
|
115
150
|
|
116
|
-
|
151
|
+
# TODO: do this at pubid-iso
|
152
|
+
if query_pubid.publisher == "ISO" && query_pubid.copublisher.nil? # try ISO/IEC if ISO not found
|
117
153
|
warn "[relaton-iso] Attempting ISO/IEC retrieval"
|
118
|
-
|
119
|
-
res =
|
154
|
+
query_pubid.copublisher = "IEC"
|
155
|
+
res = filter_hits hit_collection, query_pubid, all_parts: opts[:all_parts]
|
120
156
|
end
|
121
157
|
res
|
122
158
|
end
|
123
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
124
159
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
end
|
130
|
-
|
131
|
-
# @param result [RelatonIso::HitCollection]
|
132
|
-
# @param opts [Hash]
|
160
|
+
# @param hits [RelatonIso::HitCollection]
|
161
|
+
# @param query_pubid [Pubid::Iso::Identifier]
|
162
|
+
# @param all_parts [Boolean]
|
163
|
+
# @param any_stages [Boolean]
|
133
164
|
# @return [RelatonIso::HitCollection]
|
134
|
-
def
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
end
|
143
|
-
|
144
|
-
# @param result [RelatonIso::HitCollection]
|
145
|
-
# @param code [String]
|
146
|
-
# @param opts [Hash]
|
147
|
-
# @return [RelatonIso::HitCollection]
|
148
|
-
def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
149
|
-
code1, part1, _, corr1, coryear1 = ref_components code
|
150
|
-
result.select do |i|
|
151
|
-
code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
|
152
|
-
code1 == code2 && ((opts[:all_parts] && part2) || (!opts[:all_parts] && part1 == part2)) &&
|
153
|
-
corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
158
|
-
|
159
|
-
# Sort through the results from RelatonIso, fetching them three at a time,
|
160
|
-
# and return the first result that matches the code, matches the year
|
161
|
-
# (if provided), and which # has a title (amendments do not).
|
162
|
-
# Only expects the first page of results to be populated.
|
163
|
-
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
164
|
-
# If no match, returns any years which caused mismatch, for error
|
165
|
-
# reporting
|
166
|
-
def isobib_results_filter(result, year, opts)
|
167
|
-
missed_years = []
|
168
|
-
hits = result.reduce!([]) do |hts, h|
|
169
|
-
if !year || (%r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[:title] && iyear == year)
|
170
|
-
hts << h
|
171
|
-
else
|
172
|
-
missed_years << iyear
|
173
|
-
hts
|
174
|
-
end
|
165
|
+
def filter_hits(hit_collection, query_pubid, all_parts: false, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
166
|
+
# filter out
|
167
|
+
result = hit_collection.select do |i|
|
168
|
+
hit_pubid = i.pubid
|
169
|
+
matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
|
170
|
+
matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
|
171
|
+
query_pubid.corrigendum == hit_pubid.corrigendum &&
|
172
|
+
query_pubid.amendment == hit_pubid.amendment
|
175
173
|
end
|
176
|
-
return { years: missed_years } unless hits.any?
|
177
174
|
|
178
|
-
|
179
|
-
return { ret: hits.first.fetch(opts[:lang]) }
|
180
|
-
end
|
181
|
-
|
182
|
-
{ ret: hits.to_all_parts(opts[:lang]) }
|
183
|
-
end
|
184
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
185
|
-
|
186
|
-
# @param code [String]
|
187
|
-
# @param year [String, NilClass]
|
188
|
-
# @param opts [Hash]
|
189
|
-
def isobib_get1(code, year, opts)
|
190
|
-
# return iev(code) if /^IEC 60050-/.match code
|
191
|
-
result = isobib_search_filter(code, opts) || return
|
192
|
-
ret = isobib_results_filter(result, year, opts)
|
193
|
-
if ret[:ret]
|
194
|
-
warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
195
|
-
ret[:ret]
|
196
|
-
else
|
197
|
-
fetch_ref_err(code, year, ret[:years])
|
198
|
-
end
|
175
|
+
query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
|
199
176
|
end
|
200
177
|
end
|
201
178
|
end
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -50,43 +50,61 @@ module RelatonIso
|
|
50
50
|
|
51
51
|
class << self
|
52
52
|
# Parse page.
|
53
|
-
# @param
|
53
|
+
# @param hit [RelatonIso::Hit]
|
54
54
|
# @param lang [String, NilClass]
|
55
|
-
# @return [
|
56
|
-
def parse_page(
|
55
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
56
|
+
def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
57
57
|
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
58
58
|
# "#{hit_data['csnumber']}.html"
|
59
|
-
|
59
|
+
|
60
|
+
doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
|
60
61
|
|
61
62
|
# Fetch edition.
|
62
63
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
63
64
|
&.children&.last&.text&.match(/\d+/)&.to_s
|
65
|
+
hit.pubid.edition = edition if edition
|
64
66
|
|
65
67
|
titles, abstract, langs = fetch_titles_abstract(doc, lang)
|
66
68
|
|
67
69
|
RelatonIsoBib::IsoBibliographicItem.new(
|
68
70
|
fetched: Date.today.to_s,
|
69
|
-
docid:
|
70
|
-
docnumber: fetch_docnumber(
|
71
|
+
docid: fetch_relaton_docids(doc, hit.pubid),
|
72
|
+
docnumber: fetch_docnumber(hit.pubid),
|
71
73
|
edition: edition,
|
72
74
|
language: langs.map { |l| l[:lang] },
|
73
75
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
74
76
|
title: titles,
|
75
|
-
doctype: fetch_type(
|
77
|
+
doctype: fetch_type(hit.hit[:title]),
|
76
78
|
docstatus: fetch_status(doc),
|
77
79
|
ics: fetch_ics(doc),
|
78
|
-
date: fetch_dates(doc,
|
79
|
-
contributor: fetch_contributors(
|
80
|
+
date: fetch_dates(doc, hit.hit[:title]),
|
81
|
+
contributor: fetch_contributors(hit.hit[:title]),
|
80
82
|
editorialgroup: fetch_workgroup(doc),
|
81
83
|
abstract: abstract,
|
82
84
|
copyright: fetch_copyright(doc),
|
83
85
|
link: fetch_link(doc, url),
|
84
86
|
relation: fetch_relations(doc),
|
85
87
|
place: ["Geneva"],
|
86
|
-
structuredidentifier: fetch_structuredidentifier(
|
88
|
+
structuredidentifier: fetch_structuredidentifier(hit.pubid),
|
87
89
|
)
|
88
90
|
end
|
89
91
|
|
92
|
+
#
|
93
|
+
# Create document ids.
|
94
|
+
#
|
95
|
+
# @param doc [Nokogiri::HTML::Document] document
|
96
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
97
|
+
#
|
98
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
99
|
+
#
|
100
|
+
def fetch_relaton_docids(doc, pubid)
|
101
|
+
pubid.urn_stage = stage_code(doc).to_f
|
102
|
+
[
|
103
|
+
RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
|
104
|
+
RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
|
105
|
+
]
|
106
|
+
end
|
107
|
+
|
90
108
|
private
|
91
109
|
|
92
110
|
# Fetch titles and abstracts.
|
@@ -168,60 +186,29 @@ module RelatonIso
|
|
168
186
|
end
|
169
187
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
170
188
|
|
171
|
-
#
|
172
|
-
#
|
173
|
-
#
|
174
|
-
# @param
|
175
|
-
#
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
RelatonBib::DocumentIdentifier.new(
|
181
|
-
id: fetch_urn(doc, pubid, edition, langs), type: "URN",
|
182
|
-
),
|
183
|
-
]
|
184
|
-
end
|
185
|
-
|
186
|
-
# @param doc [Nokogiri:HTML::Document]
|
187
|
-
# @param pubid [String]
|
188
|
-
# @param edition [String]
|
189
|
-
# @param langs [Array<Hash>]
|
190
|
-
# @returnt [String]
|
191
|
-
def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
|
192
|
-
orig = pubid.split.first.downcase.split("/").join "-"
|
193
|
-
%r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
|
194
|
-
_, part, _year, corr, = IsoBibliography.ref_components pubid
|
195
|
-
urn = "urn:iso:std:#{orig}"
|
196
|
-
urn += ":#{type.downcase}" if type
|
197
|
-
urn += ":#{fetch_docnumber(doc)}"
|
198
|
-
urn += ":-#{part}" if part
|
199
|
-
urn += ":stage-#{stage_code(doc)}"
|
200
|
-
urn += ":ed-#{edition}" if edition
|
201
|
-
if corr
|
202
|
-
corrparts = corr.split
|
203
|
-
urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
|
204
|
-
end
|
205
|
-
urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
|
206
|
-
urn
|
207
|
-
end
|
208
|
-
|
209
|
-
def fetch_docnumber(doc)
|
210
|
-
item_ref(doc)&.match(/\d+/)&.to_s
|
189
|
+
#
|
190
|
+
# Generate docnumber.
|
191
|
+
#
|
192
|
+
# @param [Pubid::Iso] pubid
|
193
|
+
#
|
194
|
+
# @return [String] docnumber
|
195
|
+
#
|
196
|
+
def fetch_docnumber(pubid)
|
197
|
+
pubid.to_s.match(/\d+/)&.to_s
|
211
198
|
end
|
212
199
|
|
213
|
-
#
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
200
|
+
#
|
201
|
+
# Parse structuredidentifier.
|
202
|
+
#
|
203
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
204
|
+
#
|
205
|
+
# @return [RelatonBib::StructuredIdentifier] structured identifier
|
206
|
+
#
|
207
|
+
def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
|
223
208
|
RelatonIsoBib::StructuredIdentifier.new(
|
224
|
-
project_number:
|
209
|
+
project_number: "#{pubid.publisher} #{pubid.number}",
|
210
|
+
part: pubid&.part&.sub(/^-/, ""),
|
211
|
+
type: pubid.publisher,
|
225
212
|
)
|
226
213
|
end
|
227
214
|
|
@@ -251,7 +238,7 @@ module RelatonIso
|
|
251
238
|
# Fetch workgroup.
|
252
239
|
# @param doc [Nokogiri::HTML::Document]
|
253
240
|
# @return [Hash]
|
254
|
-
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
|
241
|
+
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
|
255
242
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
256
243
|
# wg_url = DOMAIN + wg_link['href']
|
257
244
|
workgroup = wg_link.text.split "/"
|
@@ -275,6 +262,7 @@ module RelatonIso
|
|
275
262
|
# @param doc [Nokogiri::HTML::Document]
|
276
263
|
# @return [Array<Hash>]
|
277
264
|
def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
265
|
+
types = ["Now", "Now under review"]
|
278
266
|
doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
|
279
267
|
r_type = r.at("h4", "h5").text
|
280
268
|
date = []
|
@@ -286,14 +274,13 @@ module RelatonIso
|
|
286
274
|
"updates"
|
287
275
|
else r_type
|
288
276
|
end
|
289
|
-
if
|
277
|
+
if types.include?(type) then a
|
290
278
|
else
|
291
279
|
a + r.css("a").map do |id|
|
292
|
-
|
293
|
-
|
294
|
-
)
|
280
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
|
281
|
+
fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
|
295
282
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
296
|
-
formattedref: fref, date: date,
|
283
|
+
docid: [docid], formattedref: fref, date: date,
|
297
284
|
)
|
298
285
|
{ type: type, bibitem: bibitem }
|
299
286
|
end
|
@@ -308,7 +295,7 @@ module RelatonIso
|
|
308
295
|
def fetch_type(ref)
|
309
296
|
%r{
|
310
297
|
^(?<prefix>ISO|IWA|IEC)
|
311
|
-
(?:(
|
298
|
+
(?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
|
312
299
|
(?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
|
313
300
|
}x =~ ref
|
314
301
|
# return "international-standard" if type_match.nil?
|
data/lib/relaton_iso/version.rb
CHANGED
data/lib/relaton_iso.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -42,5 +42,6 @@ Gem::Specification.new do |spec|
|
|
42
42
|
|
43
43
|
# spec.add_dependency "relaton-iec", "~> 1.8.0"
|
44
44
|
spec.add_dependency "algolia"
|
45
|
-
spec.add_dependency "relaton-iso-bib", "~> 1.
|
45
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
|
46
|
+
spec.add_dependency "pubid-iso", "~> 0.1.7"
|
46
47
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.12.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -184,14 +184,28 @@ dependencies:
|
|
184
184
|
requirements:
|
185
185
|
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: 1.
|
187
|
+
version: 1.12.0
|
188
188
|
type: :runtime
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 1.
|
194
|
+
version: 1.12.0
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: pubid-iso
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - "~>"
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: 0.1.7
|
202
|
+
type: :runtime
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - "~>"
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: 0.1.7
|
195
209
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
196
210
|
model'
|
197
211
|
email:
|
@@ -230,7 +244,9 @@ files:
|
|
230
244
|
- bin/ruby-rewrite
|
231
245
|
- bin/safe_yaml
|
232
246
|
- bin/setup
|
247
|
+
- bin/thor
|
233
248
|
- lib/relaton_iso.rb
|
249
|
+
- lib/relaton_iso/document_identifier.rb
|
234
250
|
- lib/relaton_iso/hit.rb
|
235
251
|
- lib/relaton_iso/hit_collection.rb
|
236
252
|
- lib/relaton_iso/iso_bibliography.rb
|