relaton-iso 1.11.1 → 1.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -24
- data/bin/thor +29 -0
- data/lib/relaton_iso/document_identifier.rb +27 -0
- data/lib/relaton_iso/hit.rb +8 -3
- data/lib/relaton_iso/hit_collection.rb +4 -8
- data/lib/relaton_iso/iso_bibliography.rb +102 -130
- data/lib/relaton_iso/scrapper.rb +55 -68
- data/lib/relaton_iso/version.rb +1 -1
- data/lib/relaton_iso.rb +2 -0
- data/relaton_iso.gemspec +2 -4
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f7c1d269d268e8bb7f9bc7b19b6e5466c9fb88f8a9db41f3944207535ca2edd
|
4
|
+
data.tar.gz: 4a6545be437af6c6326fed4e231c387a0a5cc7c04168fa1098c9fffc78701b29
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3be88b28acc84c9877db94ef46e38488ea63b15ca2c19cd694c69d17f0e1b5c0b003a95929673d2e8a70cb69e91faef575506dab409d0c4b5e61f4dd046eb858
|
7
|
+
data.tar.gz: 209c65c8d600a34566999600cc0fbb0b06564603821c00045a09e0e00ecd2f6abf79c59774142e4dc1474dcb08b2604aeb5f1874a25b9f29e0d0dd9b420fbd4b
|
data/.github/workflows/rake.yml
CHANGED
@@ -10,27 +10,4 @@ on:
|
|
10
10
|
|
11
11
|
jobs:
|
12
12
|
rake:
|
13
|
-
|
14
|
-
runs-on: ${{ matrix.os }}
|
15
|
-
continue-on-error: ${{ matrix.experimental }}
|
16
|
-
strategy:
|
17
|
-
fail-fast: false
|
18
|
-
matrix:
|
19
|
-
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
|
-
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
|
-
experimental: [ false ]
|
22
|
-
steps:
|
23
|
-
- uses: actions/checkout@v2
|
24
|
-
with:
|
25
|
-
submodules: true
|
26
|
-
|
27
|
-
# https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
|
28
|
-
- if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
|
29
|
-
run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
|
30
|
-
|
31
|
-
- uses: ruby/setup-ruby@v1
|
32
|
-
with:
|
33
|
-
ruby-version: ${{ matrix.ruby }}
|
34
|
-
bundler-cache: true
|
35
|
-
|
36
|
-
- run: bundle exec rake
|
13
|
+
uses: relaton/support/.github/workflows/rake.yml@master
|
data/bin/thor
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'thor' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("thor", "thor")
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module RelatonIso
|
2
|
+
class DocumentIdentifier < RelatonBib::DocumentIdentifier
|
3
|
+
def id
|
4
|
+
id_str = @id.to_s.sub(/\sED\d+/, "")
|
5
|
+
if @all_parts
|
6
|
+
if type == "URN"
|
7
|
+
return "#{@id.urn}:ser"
|
8
|
+
else
|
9
|
+
return "#{id_str} (all parts)"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
type == "URN" ? @id.urn.to_s : id_str
|
13
|
+
end
|
14
|
+
|
15
|
+
def remove_part
|
16
|
+
@id.part = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def remove_date
|
20
|
+
@id.year = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def all_parts
|
24
|
+
@all_parts = true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -4,13 +4,13 @@ module RelatonIso
|
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
6
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
7
|
-
attr_writer :fetch
|
7
|
+
attr_writer :fetch, :pubid
|
8
8
|
|
9
9
|
# Parse page.
|
10
|
-
# @param lang [String,
|
10
|
+
# @param lang [String, nil]
|
11
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
12
12
|
def fetch(lang = nil)
|
13
|
-
@fetch ||= Scrapper.parse_page
|
13
|
+
@fetch ||= Scrapper.parse_page self, lang
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Integer]
|
@@ -23,5 +23,10 @@ module RelatonIso
|
|
23
23
|
else 4
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
# @return [Pubid::Iso::Identifier]
|
28
|
+
def pubid
|
29
|
+
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
30
|
+
end
|
26
31
|
end
|
27
32
|
end
|
@@ -11,25 +11,21 @@ module RelatonIso
|
|
11
11
|
# @param text [String] reference to search
|
12
12
|
def initialize(text)
|
13
13
|
super
|
14
|
-
@array = text.match?(/^ISO\
|
14
|
+
@array = text.match?(/^ISO[\s\/](?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
|
15
15
|
end
|
16
16
|
|
17
17
|
# @param lang [String, NilClass]
|
18
18
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
19
19
|
def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
|
20
20
|
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
21
|
-
hit = @array.min_by
|
22
|
-
IsoBibliography.ref_components(h.hit[:title])[1].to_i
|
23
|
-
end
|
21
|
+
hit = @array.min_by { |h| h.pubid.part }
|
24
22
|
return @array.first.fetch lang unless hit
|
25
23
|
|
26
|
-
bibitem = hit.fetch
|
24
|
+
bibitem = hit.fetch(lang)
|
27
25
|
all_parts_item = bibitem.to_all_parts
|
28
26
|
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
29
|
-
%r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
|
30
|
-
(?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
|
31
27
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
32
|
-
formattedref: RelatonBib::FormattedRef.new(content:
|
28
|
+
formattedref: RelatonBib::FormattedRef.new(content: hi.pubid.to_s),
|
33
29
|
)
|
34
30
|
all_parts_item.relation << RelatonBib::DocumentRelation.new(
|
35
31
|
type: "instance", bibitem: isobib,
|
@@ -27,180 +27,152 @@ module RelatonIso
|
|
27
27
|
# @option opts [Boolean] :keep_year if undated reference should return
|
28
28
|
# actual reference with year
|
29
29
|
#
|
30
|
-
# @return [
|
30
|
+
# @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
|
31
31
|
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
|
32
32
|
code = ref.gsub(/\u2013/, "-")
|
33
|
-
|
34
|
-
|
33
|
+
|
34
|
+
# parse "all parts" request
|
35
35
|
code.sub! " (all parts)", ""
|
36
36
|
opts[:all_parts] ||= $~ && opts[:all_parts].nil?
|
37
|
-
# opts[:keep_year] ||= opts[:keep_year].nil?
|
38
|
-
# code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
|
39
|
-
# if %r[^ISO/IEC DIR].match? code
|
40
|
-
# return RelatonIec::IecBibliography.get(code, year, opts)
|
41
|
-
# end
|
42
37
|
|
43
|
-
|
44
|
-
|
38
|
+
query_pubid = Pubid::Iso::Identifier.parse(code)
|
39
|
+
query_pubid.year = year if year
|
40
|
+
|
41
|
+
hits = isobib_search_filter(query_pubid, opts)
|
45
42
|
|
46
|
-
|
43
|
+
# return only first one if not all_parts
|
44
|
+
ret = if !opts[:all_parts] || hits.size == 1
|
45
|
+
hits.any? && hits.first.fetch(opts[:lang])
|
46
|
+
else
|
47
|
+
hits.to_all_parts(opts[:lang])
|
48
|
+
end
|
49
|
+
|
50
|
+
if ret
|
51
|
+
warn "[relaton-iso] (\"#{query_pubid}\") found #{ret.docidentifier.first.id}"
|
52
|
+
else
|
53
|
+
return fetch_ref_err(query_pubid, query_pubid.year)
|
54
|
+
end
|
55
|
+
|
56
|
+
if (query_pubid.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
|
47
57
|
ret
|
48
58
|
else
|
49
59
|
ret.to_most_recent_reference
|
50
60
|
end
|
51
61
|
end
|
52
62
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
63
|
+
# @param query_pubid [Pubid::Iso::Identifier]
|
64
|
+
# @param pubid [Pubid::Iso::Identifier]
|
65
|
+
# @param all_parts [Boolean] match with any parts when true
|
66
|
+
# @return [Boolean]
|
67
|
+
def matches_parts?(query_pubid, pubid, all_parts: false)
|
68
|
+
if all_parts
|
69
|
+
# match only with documents with part number
|
70
|
+
!pubid.part.nil?
|
71
|
+
else
|
72
|
+
query_pubid.part == pubid.part
|
73
|
+
end
|
61
74
|
end
|
62
75
|
|
63
|
-
|
76
|
+
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
77
|
+
query_pubid.publisher == pubid.publisher &&
|
78
|
+
query_pubid.number == pubid.number &&
|
79
|
+
query_pubid.copublisher == pubid.copublisher &&
|
80
|
+
((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
|
81
|
+
((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
|
82
|
+
end
|
64
83
|
|
65
|
-
#
|
84
|
+
# @param hit_collection [RelatonIso::HitCollection]
|
85
|
+
# @param year [String]
|
86
|
+
# @return [RelatonIso::HitCollection]
|
87
|
+
def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
88
|
+
missed_years = []
|
66
89
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
90
|
+
# filter by year
|
91
|
+
hits = hit_collection.select do |hit|
|
92
|
+
if hit.pubid.year == year
|
93
|
+
true
|
94
|
+
elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
|
95
|
+
hit.pubid.year = year
|
96
|
+
true
|
97
|
+
else
|
98
|
+
missed_year = hit.pubid.year || hit.hit[:year].to_s
|
99
|
+
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
100
|
+
missed_years << missed_year
|
101
|
+
end
|
102
|
+
false
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
if hits.empty? && !missed_years.empty?
|
72
107
|
warn "[relaton-iso] (There was no match for #{year}, though there "\
|
73
108
|
"were matches found for #{missed_years.join(', ')}.)"
|
74
109
|
end
|
75
|
-
|
110
|
+
hits
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
|
116
|
+
id = year ? "#{query_pubid}:#{year}" : query_pubid
|
117
|
+
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
118
|
+
"The code must be exactly like it is on the standards website."
|
119
|
+
if /\d-\d/.match? query_pubid.to_s
|
76
120
|
warn "[relaton-iso] The provided document part may not exist, "\
|
77
121
|
"or the document may no longer be published in parts."
|
78
122
|
else
|
79
123
|
warn "[relaton-iso] If you wanted to cite all document parts for "\
|
80
|
-
"the reference, use \"#{
|
124
|
+
"the reference, use \"#{query_pubid} (all parts)\".\nIf the document "\
|
81
125
|
"is not a standard, use its document type abbreviation "\
|
82
126
|
"(TS, TR, PAS, Guide)."
|
83
127
|
end
|
84
128
|
nil
|
85
129
|
end
|
86
130
|
|
87
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
88
|
-
|
89
131
|
# Search for hits. If no found then trying missed stages and ISO/IEC.
|
90
132
|
#
|
91
|
-
# @param
|
133
|
+
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
92
134
|
# @param opts [Hash]
|
93
135
|
# @return [Array<RelatonIso::Hit>]
|
94
|
-
def isobib_search_filter(
|
95
|
-
|
96
|
-
warn "[relaton-iso] (\"#{
|
97
|
-
|
98
|
-
|
136
|
+
def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
137
|
+
query_pubid.part = nil if opts[:all_parts]
|
138
|
+
warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
|
139
|
+
# fetch hits collection
|
140
|
+
hit_collection = search(query_pubid.to_s(with_date: false))
|
141
|
+
# filter only matching hits
|
142
|
+
res = filter_hits hit_collection, query_pubid,
|
143
|
+
all_parts: opts[:all_parts]
|
99
144
|
return res unless res.empty?
|
100
145
|
|
101
|
-
#
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
|
106
|
-
end
|
107
|
-
return res unless res.empty?
|
108
|
-
when %r{^\w+\s\d+} # code like ISO 123
|
109
|
-
res = try_stages(result, opts) do |st|
|
110
|
-
code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
|
111
|
-
end
|
112
|
-
return res unless res.empty?
|
113
|
-
end
|
146
|
+
# lookup for documents with stages when no match without stage
|
147
|
+
res = filter_hits hit_collection, query_pubid,
|
148
|
+
all_parts: opts[:all_parts], any_types_stages: true
|
149
|
+
return res unless res.empty?
|
114
150
|
|
115
|
-
|
151
|
+
# TODO: do this at pubid-iso
|
152
|
+
if query_pubid.publisher == "ISO" && query_pubid.copublisher.nil? # try ISO/IEC if ISO not found
|
116
153
|
warn "[relaton-iso] Attempting ISO/IEC retrieval"
|
117
|
-
|
118
|
-
res =
|
119
|
-
end
|
120
|
-
res
|
121
|
-
end
|
122
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
123
|
-
|
124
|
-
def remove_part(ref, all_parts)
|
125
|
-
return ref unless all_parts
|
126
|
-
|
127
|
-
ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
|
128
|
-
end
|
129
|
-
|
130
|
-
# @param result [RelatonIso::HitCollection]
|
131
|
-
# @param opts [Hash]
|
132
|
-
# @return [RelatonIso::HitCollection]
|
133
|
-
def try_stages(result, opts)
|
134
|
-
res = nil
|
135
|
-
%w[NP WD CD DIS FDIS PRF IS AWI TR].each do |st| # try stages
|
136
|
-
c = yield st
|
137
|
-
res = search_code result, c, opts
|
138
|
-
return res unless res.empty?
|
154
|
+
query_pubid.copublisher = "IEC"
|
155
|
+
res = filter_hits hit_collection, query_pubid, all_parts: opts[:all_parts]
|
139
156
|
end
|
140
157
|
res
|
141
158
|
end
|
142
159
|
|
143
|
-
# @param
|
144
|
-
# @param
|
145
|
-
# @param
|
160
|
+
# @param hits [RelatonIso::HitCollection]
|
161
|
+
# @param query_pubid [Pubid::Iso::Identifier]
|
162
|
+
# @param all_parts [Boolean]
|
163
|
+
# @param any_stages [Boolean]
|
146
164
|
# @return [RelatonIso::HitCollection]
|
147
|
-
def
|
148
|
-
|
149
|
-
result.select do |i|
|
150
|
-
|
151
|
-
|
152
|
-
|
165
|
+
def filter_hits(hit_collection, query_pubid, all_parts: false, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
166
|
+
# filter out
|
167
|
+
result = hit_collection.select do |i|
|
168
|
+
hit_pubid = i.pubid
|
169
|
+
matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
|
170
|
+
matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
|
171
|
+
query_pubid.corrigendums == hit_pubid.corrigendums &&
|
172
|
+
query_pubid.amendments == hit_pubid.amendments
|
153
173
|
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
157
174
|
|
158
|
-
|
159
|
-
# and return the first result that matches the code, matches the year
|
160
|
-
# (if provided), and which # has a title (amendments do not).
|
161
|
-
# Only expects the first page of results to be populated.
|
162
|
-
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
163
|
-
# If no match, returns any years which caused mismatch, for error
|
164
|
-
# reporting
|
165
|
-
def isobib_results_filter(result, year, opts)
|
166
|
-
missed_years = []
|
167
|
-
hits = result.reduce!([]) do |hts, h|
|
168
|
-
iyear = publish_year h.hit[:title]
|
169
|
-
if !year || iyear == year
|
170
|
-
hts << h
|
171
|
-
else
|
172
|
-
missed_years << iyear
|
173
|
-
hts
|
174
|
-
end
|
175
|
-
end
|
176
|
-
return { years: missed_years } unless hits.any?
|
177
|
-
|
178
|
-
if !opts[:all_parts] || hits.size == 1
|
179
|
-
return { ret: hits.first.fetch(opts[:lang]) }
|
180
|
-
end
|
181
|
-
|
182
|
-
{ ret: hits.to_all_parts(opts[:lang]) }
|
183
|
-
end
|
184
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
185
|
-
|
186
|
-
def publish_year(ref)
|
187
|
-
%r{:(?<year>\d{4})(?!.*:\d{4})} =~ ref
|
188
|
-
year
|
189
|
-
end
|
190
|
-
|
191
|
-
# @param code [String]
|
192
|
-
# @param year [String, NilClass]
|
193
|
-
# @param opts [Hash]
|
194
|
-
def isobib_get(code, year, opts)
|
195
|
-
# return iev(code) if /^IEC 60050-/.match code
|
196
|
-
result = isobib_search_filter(code, opts) || return
|
197
|
-
ret = isobib_results_filter(result, year, opts)
|
198
|
-
if ret[:ret]
|
199
|
-
warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
200
|
-
ret[:ret]
|
201
|
-
else
|
202
|
-
fetch_ref_err(code, year, ret[:years])
|
203
|
-
end
|
175
|
+
query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
|
204
176
|
end
|
205
177
|
end
|
206
178
|
end
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -50,43 +50,61 @@ module RelatonIso
|
|
50
50
|
|
51
51
|
class << self
|
52
52
|
# Parse page.
|
53
|
-
# @param
|
53
|
+
# @param hit [RelatonIso::Hit]
|
54
54
|
# @param lang [String, NilClass]
|
55
|
-
# @return [
|
56
|
-
def parse_page(
|
55
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
56
|
+
def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
57
57
|
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
58
58
|
# "#{hit_data['csnumber']}.html"
|
59
|
-
|
59
|
+
|
60
|
+
doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
|
60
61
|
|
61
62
|
# Fetch edition.
|
62
63
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
63
64
|
&.children&.last&.text&.match(/\d+/)&.to_s
|
65
|
+
hit.pubid.edition = edition if edition
|
64
66
|
|
65
67
|
titles, abstract, langs = fetch_titles_abstract(doc, lang)
|
66
68
|
|
67
69
|
RelatonIsoBib::IsoBibliographicItem.new(
|
68
70
|
fetched: Date.today.to_s,
|
69
|
-
docid:
|
70
|
-
docnumber: fetch_docnumber(
|
71
|
+
docid: fetch_relaton_docids(doc, hit.pubid),
|
72
|
+
docnumber: fetch_docnumber(hit.pubid),
|
71
73
|
edition: edition,
|
72
74
|
language: langs.map { |l| l[:lang] },
|
73
75
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
74
76
|
title: titles,
|
75
|
-
doctype: fetch_type(
|
77
|
+
doctype: fetch_type(hit.hit[:title]),
|
76
78
|
docstatus: fetch_status(doc),
|
77
79
|
ics: fetch_ics(doc),
|
78
|
-
date: fetch_dates(doc,
|
79
|
-
contributor: fetch_contributors(
|
80
|
+
date: fetch_dates(doc, hit.hit[:title]),
|
81
|
+
contributor: fetch_contributors(hit.hit[:title]),
|
80
82
|
editorialgroup: fetch_workgroup(doc),
|
81
83
|
abstract: abstract,
|
82
84
|
copyright: fetch_copyright(doc),
|
83
85
|
link: fetch_link(doc, url),
|
84
86
|
relation: fetch_relations(doc),
|
85
87
|
place: ["Geneva"],
|
86
|
-
structuredidentifier: fetch_structuredidentifier(
|
88
|
+
structuredidentifier: fetch_structuredidentifier(hit.pubid),
|
87
89
|
)
|
88
90
|
end
|
89
91
|
|
92
|
+
#
|
93
|
+
# Create document ids.
|
94
|
+
#
|
95
|
+
# @param doc [Nokogiri::HTML::Document] document
|
96
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
97
|
+
#
|
98
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
99
|
+
#
|
100
|
+
def fetch_relaton_docids(doc, pubid)
|
101
|
+
pubid.urn_stage = stage_code(doc).to_f
|
102
|
+
[
|
103
|
+
RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
|
104
|
+
RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
|
105
|
+
]
|
106
|
+
end
|
107
|
+
|
90
108
|
private
|
91
109
|
|
92
110
|
# Fetch titles and abstracts.
|
@@ -168,60 +186,29 @@ module RelatonIso
|
|
168
186
|
end
|
169
187
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
170
188
|
|
171
|
-
#
|
172
|
-
#
|
173
|
-
#
|
174
|
-
# @param
|
175
|
-
#
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
RelatonBib::DocumentIdentifier.new(
|
181
|
-
id: fetch_urn(doc, pubid, edition, langs), type: "URN",
|
182
|
-
),
|
183
|
-
]
|
184
|
-
end
|
185
|
-
|
186
|
-
# @param doc [Nokogiri:HTML::Document]
|
187
|
-
# @param pubid [String]
|
188
|
-
# @param edition [String]
|
189
|
-
# @param langs [Array<Hash>]
|
190
|
-
# @returnt [String]
|
191
|
-
def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
|
192
|
-
orig = pubid.split.first.downcase.split("/").join "-"
|
193
|
-
%r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
|
194
|
-
_, part, _year, corr, = IsoBibliography.ref_components pubid
|
195
|
-
urn = "urn:iso:std:#{orig}"
|
196
|
-
urn += ":#{type.downcase}" if type
|
197
|
-
urn += ":#{fetch_docnumber(doc)}"
|
198
|
-
urn += ":-#{part}" if part
|
199
|
-
urn += ":stage-#{stage_code(doc)}"
|
200
|
-
urn += ":ed-#{edition}" if edition
|
201
|
-
if corr
|
202
|
-
corrparts = corr.split
|
203
|
-
urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
|
204
|
-
end
|
205
|
-
urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
|
206
|
-
urn
|
207
|
-
end
|
208
|
-
|
209
|
-
def fetch_docnumber(doc)
|
210
|
-
item_ref(doc)&.match(/\d+/)&.to_s
|
189
|
+
#
|
190
|
+
# Generate docnumber.
|
191
|
+
#
|
192
|
+
# @param [Pubid::Iso] pubid
|
193
|
+
#
|
194
|
+
# @return [String] docnumber
|
195
|
+
#
|
196
|
+
def fetch_docnumber(pubid)
|
197
|
+
pubid.to_s.match(/\d+/)&.to_s
|
211
198
|
end
|
212
199
|
|
213
|
-
#
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
200
|
+
#
|
201
|
+
# Parse structuredidentifier.
|
202
|
+
#
|
203
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
204
|
+
#
|
205
|
+
# @return [RelatonBib::StructuredIdentifier] structured identifier
|
206
|
+
#
|
207
|
+
def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
|
223
208
|
RelatonIsoBib::StructuredIdentifier.new(
|
224
|
-
project_number:
|
209
|
+
project_number: "#{pubid.publisher} #{pubid.number}",
|
210
|
+
part: pubid&.part&.sub(/^-/, ""),
|
211
|
+
type: pubid.publisher,
|
225
212
|
)
|
226
213
|
end
|
227
214
|
|
@@ -251,7 +238,7 @@ module RelatonIso
|
|
251
238
|
# Fetch workgroup.
|
252
239
|
# @param doc [Nokogiri::HTML::Document]
|
253
240
|
# @return [Hash]
|
254
|
-
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
|
241
|
+
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
|
255
242
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
256
243
|
# wg_url = DOMAIN + wg_link['href']
|
257
244
|
workgroup = wg_link.text.split "/"
|
@@ -275,6 +262,7 @@ module RelatonIso
|
|
275
262
|
# @param doc [Nokogiri::HTML::Document]
|
276
263
|
# @return [Array<Hash>]
|
277
264
|
def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
265
|
+
types = ["Now", "Now under review"]
|
278
266
|
doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
|
279
267
|
r_type = r.at("h4", "h5").text
|
280
268
|
date = []
|
@@ -286,14 +274,13 @@ module RelatonIso
|
|
286
274
|
"updates"
|
287
275
|
else r_type
|
288
276
|
end
|
289
|
-
if
|
277
|
+
if types.include?(type) then a
|
290
278
|
else
|
291
279
|
a + r.css("a").map do |id|
|
292
|
-
|
293
|
-
|
294
|
-
)
|
280
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
|
281
|
+
fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
|
295
282
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
296
|
-
formattedref: fref, date: date,
|
283
|
+
docid: [docid], formattedref: fref, date: date,
|
297
284
|
)
|
298
285
|
{ type: type, bibitem: bibitem }
|
299
286
|
end
|
@@ -308,7 +295,7 @@ module RelatonIso
|
|
308
295
|
def fetch_type(ref)
|
309
296
|
%r{
|
310
297
|
^(?<prefix>ISO|IWA|IEC)
|
311
|
-
(?:(
|
298
|
+
(?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
|
312
299
|
(?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
|
313
300
|
}x =~ ref
|
314
301
|
# return "international-standard" if type_match.nil?
|
data/lib/relaton_iso/version.rb
CHANGED
data/lib/relaton_iso.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -27,7 +27,6 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
28
28
|
|
29
29
|
spec.add_development_dependency "byebug"
|
30
|
-
# spec.add_development_dependency "debase"
|
31
30
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
32
31
|
spec.add_development_dependency "pry-byebug"
|
33
32
|
spec.add_development_dependency "rake", "~> 13.0"
|
@@ -35,12 +34,11 @@ Gem::Specification.new do |spec|
|
|
35
34
|
spec.add_development_dependency "rubocop"
|
36
35
|
spec.add_development_dependency "rubocop-performance"
|
37
36
|
spec.add_development_dependency "rubocop-rails"
|
38
|
-
# spec.add_development_dependency "ruby-debug-ide"
|
39
37
|
spec.add_development_dependency "simplecov"
|
40
38
|
spec.add_development_dependency "vcr"
|
41
39
|
spec.add_development_dependency "webmock"
|
42
40
|
|
43
|
-
# spec.add_dependency "relaton-iec", "~> 1.8.0"
|
44
41
|
spec.add_dependency "algolia"
|
45
|
-
spec.add_dependency "
|
42
|
+
spec.add_dependency "pubid-iso", "~> 0.1.8"
|
43
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
|
46
44
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.12.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -178,20 +178,34 @@ dependencies:
|
|
178
178
|
- - ">="
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: pubid-iso
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: 0.1.8
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: 0.1.8
|
181
195
|
- !ruby/object:Gem::Dependency
|
182
196
|
name: relaton-iso-bib
|
183
197
|
requirement: !ruby/object:Gem::Requirement
|
184
198
|
requirements:
|
185
199
|
- - "~>"
|
186
200
|
- !ruby/object:Gem::Version
|
187
|
-
version: 1.
|
201
|
+
version: 1.12.0
|
188
202
|
type: :runtime
|
189
203
|
prerelease: false
|
190
204
|
version_requirements: !ruby/object:Gem::Requirement
|
191
205
|
requirements:
|
192
206
|
- - "~>"
|
193
207
|
- !ruby/object:Gem::Version
|
194
|
-
version: 1.
|
208
|
+
version: 1.12.0
|
195
209
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
196
210
|
model'
|
197
211
|
email:
|
@@ -230,7 +244,9 @@ files:
|
|
230
244
|
- bin/ruby-rewrite
|
231
245
|
- bin/safe_yaml
|
232
246
|
- bin/setup
|
247
|
+
- bin/thor
|
233
248
|
- lib/relaton_iso.rb
|
249
|
+
- lib/relaton_iso/document_identifier.rb
|
234
250
|
- lib/relaton_iso/hit.rb
|
235
251
|
- lib/relaton_iso/hit_collection.rb
|
236
252
|
- lib/relaton_iso/iso_bibliography.rb
|