relaton-iso 1.11.1 → 1.12.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -24
- data/bin/thor +29 -0
- data/lib/relaton_iso/document_identifier.rb +27 -0
- data/lib/relaton_iso/hit.rb +8 -3
- data/lib/relaton_iso/hit_collection.rb +4 -8
- data/lib/relaton_iso/iso_bibliography.rb +102 -130
- data/lib/relaton_iso/scrapper.rb +55 -68
- data/lib/relaton_iso/version.rb +1 -1
- data/lib/relaton_iso.rb +2 -0
- data/relaton_iso.gemspec +2 -4
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f7c1d269d268e8bb7f9bc7b19b6e5466c9fb88f8a9db41f3944207535ca2edd
|
4
|
+
data.tar.gz: 4a6545be437af6c6326fed4e231c387a0a5cc7c04168fa1098c9fffc78701b29
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3be88b28acc84c9877db94ef46e38488ea63b15ca2c19cd694c69d17f0e1b5c0b003a95929673d2e8a70cb69e91faef575506dab409d0c4b5e61f4dd046eb858
|
7
|
+
data.tar.gz: 209c65c8d600a34566999600cc0fbb0b06564603821c00045a09e0e00ecd2f6abf79c59774142e4dc1474dcb08b2604aeb5f1874a25b9f29e0d0dd9b420fbd4b
|
data/.github/workflows/rake.yml
CHANGED
@@ -10,27 +10,4 @@ on:
|
|
10
10
|
|
11
11
|
jobs:
|
12
12
|
rake:
|
13
|
-
|
14
|
-
runs-on: ${{ matrix.os }}
|
15
|
-
continue-on-error: ${{ matrix.experimental }}
|
16
|
-
strategy:
|
17
|
-
fail-fast: false
|
18
|
-
matrix:
|
19
|
-
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
|
-
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
|
-
experimental: [ false ]
|
22
|
-
steps:
|
23
|
-
- uses: actions/checkout@v2
|
24
|
-
with:
|
25
|
-
submodules: true
|
26
|
-
|
27
|
-
# https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
|
28
|
-
- if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
|
29
|
-
run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
|
30
|
-
|
31
|
-
- uses: ruby/setup-ruby@v1
|
32
|
-
with:
|
33
|
-
ruby-version: ${{ matrix.ruby }}
|
34
|
-
bundler-cache: true
|
35
|
-
|
36
|
-
- run: bundle exec rake
|
13
|
+
uses: relaton/support/.github/workflows/rake.yml@master
|
data/bin/thor
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'thor' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("thor", "thor")
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module RelatonIso
|
2
|
+
class DocumentIdentifier < RelatonBib::DocumentIdentifier
|
3
|
+
def id
|
4
|
+
id_str = @id.to_s.sub(/\sED\d+/, "")
|
5
|
+
if @all_parts
|
6
|
+
if type == "URN"
|
7
|
+
return "#{@id.urn}:ser"
|
8
|
+
else
|
9
|
+
return "#{id_str} (all parts)"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
type == "URN" ? @id.urn.to_s : id_str
|
13
|
+
end
|
14
|
+
|
15
|
+
def remove_part
|
16
|
+
@id.part = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def remove_date
|
20
|
+
@id.year = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def all_parts
|
24
|
+
@all_parts = true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -4,13 +4,13 @@ module RelatonIso
|
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
6
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
7
|
-
attr_writer :fetch
|
7
|
+
attr_writer :fetch, :pubid
|
8
8
|
|
9
9
|
# Parse page.
|
10
|
-
# @param lang [String,
|
10
|
+
# @param lang [String, nil]
|
11
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
12
12
|
def fetch(lang = nil)
|
13
|
-
@fetch ||= Scrapper.parse_page
|
13
|
+
@fetch ||= Scrapper.parse_page self, lang
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Integer]
|
@@ -23,5 +23,10 @@ module RelatonIso
|
|
23
23
|
else 4
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
# @return [Pubid::Iso::Identifier]
|
28
|
+
def pubid
|
29
|
+
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
30
|
+
end
|
26
31
|
end
|
27
32
|
end
|
@@ -11,25 +11,21 @@ module RelatonIso
|
|
11
11
|
# @param text [String] reference to search
|
12
12
|
def initialize(text)
|
13
13
|
super
|
14
|
-
@array = text.match?(/^ISO\
|
14
|
+
@array = text.match?(/^ISO[\s\/](?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
|
15
15
|
end
|
16
16
|
|
17
17
|
# @param lang [String, NilClass]
|
18
18
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
19
19
|
def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
|
20
20
|
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
21
|
-
hit = @array.min_by
|
22
|
-
IsoBibliography.ref_components(h.hit[:title])[1].to_i
|
23
|
-
end
|
21
|
+
hit = @array.min_by { |h| h.pubid.part }
|
24
22
|
return @array.first.fetch lang unless hit
|
25
23
|
|
26
|
-
bibitem = hit.fetch
|
24
|
+
bibitem = hit.fetch(lang)
|
27
25
|
all_parts_item = bibitem.to_all_parts
|
28
26
|
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
29
|
-
%r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
|
30
|
-
(?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
|
31
27
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
32
|
-
formattedref: RelatonBib::FormattedRef.new(content:
|
28
|
+
formattedref: RelatonBib::FormattedRef.new(content: hi.pubid.to_s),
|
33
29
|
)
|
34
30
|
all_parts_item.relation << RelatonBib::DocumentRelation.new(
|
35
31
|
type: "instance", bibitem: isobib,
|
@@ -27,180 +27,152 @@ module RelatonIso
|
|
27
27
|
# @option opts [Boolean] :keep_year if undated reference should return
|
28
28
|
# actual reference with year
|
29
29
|
#
|
30
|
-
# @return [
|
30
|
+
# @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
|
31
31
|
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
|
32
32
|
code = ref.gsub(/\u2013/, "-")
|
33
|
-
|
34
|
-
|
33
|
+
|
34
|
+
# parse "all parts" request
|
35
35
|
code.sub! " (all parts)", ""
|
36
36
|
opts[:all_parts] ||= $~ && opts[:all_parts].nil?
|
37
|
-
# opts[:keep_year] ||= opts[:keep_year].nil?
|
38
|
-
# code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
|
39
|
-
# if %r[^ISO/IEC DIR].match? code
|
40
|
-
# return RelatonIec::IecBibliography.get(code, year, opts)
|
41
|
-
# end
|
42
37
|
|
43
|
-
|
44
|
-
|
38
|
+
query_pubid = Pubid::Iso::Identifier.parse(code)
|
39
|
+
query_pubid.year = year if year
|
40
|
+
|
41
|
+
hits = isobib_search_filter(query_pubid, opts)
|
45
42
|
|
46
|
-
|
43
|
+
# return only first one if not all_parts
|
44
|
+
ret = if !opts[:all_parts] || hits.size == 1
|
45
|
+
hits.any? && hits.first.fetch(opts[:lang])
|
46
|
+
else
|
47
|
+
hits.to_all_parts(opts[:lang])
|
48
|
+
end
|
49
|
+
|
50
|
+
if ret
|
51
|
+
warn "[relaton-iso] (\"#{query_pubid}\") found #{ret.docidentifier.first.id}"
|
52
|
+
else
|
53
|
+
return fetch_ref_err(query_pubid, query_pubid.year)
|
54
|
+
end
|
55
|
+
|
56
|
+
if (query_pubid.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
|
47
57
|
ret
|
48
58
|
else
|
49
59
|
ret.to_most_recent_reference
|
50
60
|
end
|
51
61
|
end
|
52
62
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
63
|
+
# @param query_pubid [Pubid::Iso::Identifier]
|
64
|
+
# @param pubid [Pubid::Iso::Identifier]
|
65
|
+
# @param all_parts [Boolean] match with any parts when true
|
66
|
+
# @return [Boolean]
|
67
|
+
def matches_parts?(query_pubid, pubid, all_parts: false)
|
68
|
+
if all_parts
|
69
|
+
# match only with documents with part number
|
70
|
+
!pubid.part.nil?
|
71
|
+
else
|
72
|
+
query_pubid.part == pubid.part
|
73
|
+
end
|
61
74
|
end
|
62
75
|
|
63
|
-
|
76
|
+
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
77
|
+
query_pubid.publisher == pubid.publisher &&
|
78
|
+
query_pubid.number == pubid.number &&
|
79
|
+
query_pubid.copublisher == pubid.copublisher &&
|
80
|
+
((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
|
81
|
+
((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
|
82
|
+
end
|
64
83
|
|
65
|
-
#
|
84
|
+
# @param hit_collection [RelatonIso::HitCollection]
|
85
|
+
# @param year [String]
|
86
|
+
# @return [RelatonIso::HitCollection]
|
87
|
+
def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
88
|
+
missed_years = []
|
66
89
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
90
|
+
# filter by year
|
91
|
+
hits = hit_collection.select do |hit|
|
92
|
+
if hit.pubid.year == year
|
93
|
+
true
|
94
|
+
elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
|
95
|
+
hit.pubid.year = year
|
96
|
+
true
|
97
|
+
else
|
98
|
+
missed_year = hit.pubid.year || hit.hit[:year].to_s
|
99
|
+
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
100
|
+
missed_years << missed_year
|
101
|
+
end
|
102
|
+
false
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
if hits.empty? && !missed_years.empty?
|
72
107
|
warn "[relaton-iso] (There was no match for #{year}, though there "\
|
73
108
|
"were matches found for #{missed_years.join(', ')}.)"
|
74
109
|
end
|
75
|
-
|
110
|
+
hits
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
|
116
|
+
id = year ? "#{query_pubid}:#{year}" : query_pubid
|
117
|
+
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
118
|
+
"The code must be exactly like it is on the standards website."
|
119
|
+
if /\d-\d/.match? query_pubid.to_s
|
76
120
|
warn "[relaton-iso] The provided document part may not exist, "\
|
77
121
|
"or the document may no longer be published in parts."
|
78
122
|
else
|
79
123
|
warn "[relaton-iso] If you wanted to cite all document parts for "\
|
80
|
-
"the reference, use \"#{
|
124
|
+
"the reference, use \"#{query_pubid} (all parts)\".\nIf the document "\
|
81
125
|
"is not a standard, use its document type abbreviation "\
|
82
126
|
"(TS, TR, PAS, Guide)."
|
83
127
|
end
|
84
128
|
nil
|
85
129
|
end
|
86
130
|
|
87
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
88
|
-
|
89
131
|
# Search for hits. If no found then trying missed stages and ISO/IEC.
|
90
132
|
#
|
91
|
-
# @param
|
133
|
+
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
92
134
|
# @param opts [Hash]
|
93
135
|
# @return [Array<RelatonIso::Hit>]
|
94
|
-
def isobib_search_filter(
|
95
|
-
|
96
|
-
warn "[relaton-iso] (\"#{
|
97
|
-
|
98
|
-
|
136
|
+
def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
137
|
+
query_pubid.part = nil if opts[:all_parts]
|
138
|
+
warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
|
139
|
+
# fetch hits collection
|
140
|
+
hit_collection = search(query_pubid.to_s(with_date: false))
|
141
|
+
# filter only matching hits
|
142
|
+
res = filter_hits hit_collection, query_pubid,
|
143
|
+
all_parts: opts[:all_parts]
|
99
144
|
return res unless res.empty?
|
100
145
|
|
101
|
-
#
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
|
106
|
-
end
|
107
|
-
return res unless res.empty?
|
108
|
-
when %r{^\w+\s\d+} # code like ISO 123
|
109
|
-
res = try_stages(result, opts) do |st|
|
110
|
-
code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
|
111
|
-
end
|
112
|
-
return res unless res.empty?
|
113
|
-
end
|
146
|
+
# lookup for documents with stages when no match without stage
|
147
|
+
res = filter_hits hit_collection, query_pubid,
|
148
|
+
all_parts: opts[:all_parts], any_types_stages: true
|
149
|
+
return res unless res.empty?
|
114
150
|
|
115
|
-
|
151
|
+
# TODO: do this at pubid-iso
|
152
|
+
if query_pubid.publisher == "ISO" && query_pubid.copublisher.nil? # try ISO/IEC if ISO not found
|
116
153
|
warn "[relaton-iso] Attempting ISO/IEC retrieval"
|
117
|
-
|
118
|
-
res =
|
119
|
-
end
|
120
|
-
res
|
121
|
-
end
|
122
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
123
|
-
|
124
|
-
def remove_part(ref, all_parts)
|
125
|
-
return ref unless all_parts
|
126
|
-
|
127
|
-
ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
|
128
|
-
end
|
129
|
-
|
130
|
-
# @param result [RelatonIso::HitCollection]
|
131
|
-
# @param opts [Hash]
|
132
|
-
# @return [RelatonIso::HitCollection]
|
133
|
-
def try_stages(result, opts)
|
134
|
-
res = nil
|
135
|
-
%w[NP WD CD DIS FDIS PRF IS AWI TR].each do |st| # try stages
|
136
|
-
c = yield st
|
137
|
-
res = search_code result, c, opts
|
138
|
-
return res unless res.empty?
|
154
|
+
query_pubid.copublisher = "IEC"
|
155
|
+
res = filter_hits hit_collection, query_pubid, all_parts: opts[:all_parts]
|
139
156
|
end
|
140
157
|
res
|
141
158
|
end
|
142
159
|
|
143
|
-
# @param
|
144
|
-
# @param
|
145
|
-
# @param
|
160
|
+
# @param hits [RelatonIso::HitCollection]
|
161
|
+
# @param query_pubid [Pubid::Iso::Identifier]
|
162
|
+
# @param all_parts [Boolean]
|
163
|
+
# @param any_stages [Boolean]
|
146
164
|
# @return [RelatonIso::HitCollection]
|
147
|
-
def
|
148
|
-
|
149
|
-
result.select do |i|
|
150
|
-
|
151
|
-
|
152
|
-
|
165
|
+
def filter_hits(hit_collection, query_pubid, all_parts: false, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
166
|
+
# filter out
|
167
|
+
result = hit_collection.select do |i|
|
168
|
+
hit_pubid = i.pubid
|
169
|
+
matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
|
170
|
+
matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
|
171
|
+
query_pubid.corrigendums == hit_pubid.corrigendums &&
|
172
|
+
query_pubid.amendments == hit_pubid.amendments
|
153
173
|
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
157
174
|
|
158
|
-
|
159
|
-
# and return the first result that matches the code, matches the year
|
160
|
-
# (if provided), and which # has a title (amendments do not).
|
161
|
-
# Only expects the first page of results to be populated.
|
162
|
-
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
163
|
-
# If no match, returns any years which caused mismatch, for error
|
164
|
-
# reporting
|
165
|
-
def isobib_results_filter(result, year, opts)
|
166
|
-
missed_years = []
|
167
|
-
hits = result.reduce!([]) do |hts, h|
|
168
|
-
iyear = publish_year h.hit[:title]
|
169
|
-
if !year || iyear == year
|
170
|
-
hts << h
|
171
|
-
else
|
172
|
-
missed_years << iyear
|
173
|
-
hts
|
174
|
-
end
|
175
|
-
end
|
176
|
-
return { years: missed_years } unless hits.any?
|
177
|
-
|
178
|
-
if !opts[:all_parts] || hits.size == 1
|
179
|
-
return { ret: hits.first.fetch(opts[:lang]) }
|
180
|
-
end
|
181
|
-
|
182
|
-
{ ret: hits.to_all_parts(opts[:lang]) }
|
183
|
-
end
|
184
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
185
|
-
|
186
|
-
def publish_year(ref)
|
187
|
-
%r{:(?<year>\d{4})(?!.*:\d{4})} =~ ref
|
188
|
-
year
|
189
|
-
end
|
190
|
-
|
191
|
-
# @param code [String]
|
192
|
-
# @param year [String, NilClass]
|
193
|
-
# @param opts [Hash]
|
194
|
-
def isobib_get(code, year, opts)
|
195
|
-
# return iev(code) if /^IEC 60050-/.match code
|
196
|
-
result = isobib_search_filter(code, opts) || return
|
197
|
-
ret = isobib_results_filter(result, year, opts)
|
198
|
-
if ret[:ret]
|
199
|
-
warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
200
|
-
ret[:ret]
|
201
|
-
else
|
202
|
-
fetch_ref_err(code, year, ret[:years])
|
203
|
-
end
|
175
|
+
query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
|
204
176
|
end
|
205
177
|
end
|
206
178
|
end
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -50,43 +50,61 @@ module RelatonIso
|
|
50
50
|
|
51
51
|
class << self
|
52
52
|
# Parse page.
|
53
|
-
# @param
|
53
|
+
# @param hit [RelatonIso::Hit]
|
54
54
|
# @param lang [String, NilClass]
|
55
|
-
# @return [
|
56
|
-
def parse_page(
|
55
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
56
|
+
def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
57
57
|
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
58
58
|
# "#{hit_data['csnumber']}.html"
|
59
|
-
|
59
|
+
|
60
|
+
doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
|
60
61
|
|
61
62
|
# Fetch edition.
|
62
63
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
63
64
|
&.children&.last&.text&.match(/\d+/)&.to_s
|
65
|
+
hit.pubid.edition = edition if edition
|
64
66
|
|
65
67
|
titles, abstract, langs = fetch_titles_abstract(doc, lang)
|
66
68
|
|
67
69
|
RelatonIsoBib::IsoBibliographicItem.new(
|
68
70
|
fetched: Date.today.to_s,
|
69
|
-
docid:
|
70
|
-
docnumber: fetch_docnumber(
|
71
|
+
docid: fetch_relaton_docids(doc, hit.pubid),
|
72
|
+
docnumber: fetch_docnumber(hit.pubid),
|
71
73
|
edition: edition,
|
72
74
|
language: langs.map { |l| l[:lang] },
|
73
75
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
74
76
|
title: titles,
|
75
|
-
doctype: fetch_type(
|
77
|
+
doctype: fetch_type(hit.hit[:title]),
|
76
78
|
docstatus: fetch_status(doc),
|
77
79
|
ics: fetch_ics(doc),
|
78
|
-
date: fetch_dates(doc,
|
79
|
-
contributor: fetch_contributors(
|
80
|
+
date: fetch_dates(doc, hit.hit[:title]),
|
81
|
+
contributor: fetch_contributors(hit.hit[:title]),
|
80
82
|
editorialgroup: fetch_workgroup(doc),
|
81
83
|
abstract: abstract,
|
82
84
|
copyright: fetch_copyright(doc),
|
83
85
|
link: fetch_link(doc, url),
|
84
86
|
relation: fetch_relations(doc),
|
85
87
|
place: ["Geneva"],
|
86
|
-
structuredidentifier: fetch_structuredidentifier(
|
88
|
+
structuredidentifier: fetch_structuredidentifier(hit.pubid),
|
87
89
|
)
|
88
90
|
end
|
89
91
|
|
92
|
+
#
|
93
|
+
# Create document ids.
|
94
|
+
#
|
95
|
+
# @param doc [Nokogiri::HTML::Document] document
|
96
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
97
|
+
#
|
98
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
99
|
+
#
|
100
|
+
def fetch_relaton_docids(doc, pubid)
|
101
|
+
pubid.urn_stage = stage_code(doc).to_f
|
102
|
+
[
|
103
|
+
RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
|
104
|
+
RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
|
105
|
+
]
|
106
|
+
end
|
107
|
+
|
90
108
|
private
|
91
109
|
|
92
110
|
# Fetch titles and abstracts.
|
@@ -168,60 +186,29 @@ module RelatonIso
|
|
168
186
|
end
|
169
187
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
170
188
|
|
171
|
-
#
|
172
|
-
#
|
173
|
-
#
|
174
|
-
# @param
|
175
|
-
#
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
RelatonBib::DocumentIdentifier.new(
|
181
|
-
id: fetch_urn(doc, pubid, edition, langs), type: "URN",
|
182
|
-
),
|
183
|
-
]
|
184
|
-
end
|
185
|
-
|
186
|
-
# @param doc [Nokogiri:HTML::Document]
|
187
|
-
# @param pubid [String]
|
188
|
-
# @param edition [String]
|
189
|
-
# @param langs [Array<Hash>]
|
190
|
-
# @returnt [String]
|
191
|
-
def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
|
192
|
-
orig = pubid.split.first.downcase.split("/").join "-"
|
193
|
-
%r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
|
194
|
-
_, part, _year, corr, = IsoBibliography.ref_components pubid
|
195
|
-
urn = "urn:iso:std:#{orig}"
|
196
|
-
urn += ":#{type.downcase}" if type
|
197
|
-
urn += ":#{fetch_docnumber(doc)}"
|
198
|
-
urn += ":-#{part}" if part
|
199
|
-
urn += ":stage-#{stage_code(doc)}"
|
200
|
-
urn += ":ed-#{edition}" if edition
|
201
|
-
if corr
|
202
|
-
corrparts = corr.split
|
203
|
-
urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
|
204
|
-
end
|
205
|
-
urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
|
206
|
-
urn
|
207
|
-
end
|
208
|
-
|
209
|
-
def fetch_docnumber(doc)
|
210
|
-
item_ref(doc)&.match(/\d+/)&.to_s
|
189
|
+
#
|
190
|
+
# Generate docnumber.
|
191
|
+
#
|
192
|
+
# @param [Pubid::Iso] pubid
|
193
|
+
#
|
194
|
+
# @return [String] docnumber
|
195
|
+
#
|
196
|
+
def fetch_docnumber(pubid)
|
197
|
+
pubid.to_s.match(/\d+/)&.to_s
|
211
198
|
end
|
212
199
|
|
213
|
-
#
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
200
|
+
#
|
201
|
+
# Parse structuredidentifier.
|
202
|
+
#
|
203
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
204
|
+
#
|
205
|
+
# @return [RelatonBib::StructuredIdentifier] structured identifier
|
206
|
+
#
|
207
|
+
def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
|
223
208
|
RelatonIsoBib::StructuredIdentifier.new(
|
224
|
-
project_number:
|
209
|
+
project_number: "#{pubid.publisher} #{pubid.number}",
|
210
|
+
part: pubid&.part&.sub(/^-/, ""),
|
211
|
+
type: pubid.publisher,
|
225
212
|
)
|
226
213
|
end
|
227
214
|
|
@@ -251,7 +238,7 @@ module RelatonIso
|
|
251
238
|
# Fetch workgroup.
|
252
239
|
# @param doc [Nokogiri::HTML::Document]
|
253
240
|
# @return [Hash]
|
254
|
-
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
|
241
|
+
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
|
255
242
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
256
243
|
# wg_url = DOMAIN + wg_link['href']
|
257
244
|
workgroup = wg_link.text.split "/"
|
@@ -275,6 +262,7 @@ module RelatonIso
|
|
275
262
|
# @param doc [Nokogiri::HTML::Document]
|
276
263
|
# @return [Array<Hash>]
|
277
264
|
def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
265
|
+
types = ["Now", "Now under review"]
|
278
266
|
doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
|
279
267
|
r_type = r.at("h4", "h5").text
|
280
268
|
date = []
|
@@ -286,14 +274,13 @@ module RelatonIso
|
|
286
274
|
"updates"
|
287
275
|
else r_type
|
288
276
|
end
|
289
|
-
if
|
277
|
+
if types.include?(type) then a
|
290
278
|
else
|
291
279
|
a + r.css("a").map do |id|
|
292
|
-
|
293
|
-
|
294
|
-
)
|
280
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
|
281
|
+
fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
|
295
282
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
296
|
-
formattedref: fref, date: date,
|
283
|
+
docid: [docid], formattedref: fref, date: date,
|
297
284
|
)
|
298
285
|
{ type: type, bibitem: bibitem }
|
299
286
|
end
|
@@ -308,7 +295,7 @@ module RelatonIso
|
|
308
295
|
def fetch_type(ref)
|
309
296
|
%r{
|
310
297
|
^(?<prefix>ISO|IWA|IEC)
|
311
|
-
(?:(
|
298
|
+
(?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
|
312
299
|
(?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
|
313
300
|
}x =~ ref
|
314
301
|
# return "international-standard" if type_match.nil?
|
data/lib/relaton_iso/version.rb
CHANGED
data/lib/relaton_iso.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -27,7 +27,6 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
28
28
|
|
29
29
|
spec.add_development_dependency "byebug"
|
30
|
-
# spec.add_development_dependency "debase"
|
31
30
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
32
31
|
spec.add_development_dependency "pry-byebug"
|
33
32
|
spec.add_development_dependency "rake", "~> 13.0"
|
@@ -35,12 +34,11 @@ Gem::Specification.new do |spec|
|
|
35
34
|
spec.add_development_dependency "rubocop"
|
36
35
|
spec.add_development_dependency "rubocop-performance"
|
37
36
|
spec.add_development_dependency "rubocop-rails"
|
38
|
-
# spec.add_development_dependency "ruby-debug-ide"
|
39
37
|
spec.add_development_dependency "simplecov"
|
40
38
|
spec.add_development_dependency "vcr"
|
41
39
|
spec.add_development_dependency "webmock"
|
42
40
|
|
43
|
-
# spec.add_dependency "relaton-iec", "~> 1.8.0"
|
44
41
|
spec.add_dependency "algolia"
|
45
|
-
spec.add_dependency "
|
42
|
+
spec.add_dependency "pubid-iso", "~> 0.1.8"
|
43
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
|
46
44
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.12.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -178,20 +178,34 @@ dependencies:
|
|
178
178
|
- - ">="
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: pubid-iso
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: 0.1.8
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: 0.1.8
|
181
195
|
- !ruby/object:Gem::Dependency
|
182
196
|
name: relaton-iso-bib
|
183
197
|
requirement: !ruby/object:Gem::Requirement
|
184
198
|
requirements:
|
185
199
|
- - "~>"
|
186
200
|
- !ruby/object:Gem::Version
|
187
|
-
version: 1.
|
201
|
+
version: 1.12.0
|
188
202
|
type: :runtime
|
189
203
|
prerelease: false
|
190
204
|
version_requirements: !ruby/object:Gem::Requirement
|
191
205
|
requirements:
|
192
206
|
- - "~>"
|
193
207
|
- !ruby/object:Gem::Version
|
194
|
-
version: 1.
|
208
|
+
version: 1.12.0
|
195
209
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
196
210
|
model'
|
197
211
|
email:
|
@@ -230,7 +244,9 @@ files:
|
|
230
244
|
- bin/ruby-rewrite
|
231
245
|
- bin/safe_yaml
|
232
246
|
- bin/setup
|
247
|
+
- bin/thor
|
233
248
|
- lib/relaton_iso.rb
|
249
|
+
- lib/relaton_iso/document_identifier.rb
|
234
250
|
- lib/relaton_iso/hit.rb
|
235
251
|
- lib/relaton_iso/hit_collection.rb
|
236
252
|
- lib/relaton_iso/iso_bibliography.rb
|