relaton-iso 1.11.0 → 1.12.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f7fa7c91589b331ac4748d70c790b814bd294b73c8c35c12a1e7a6a2fa54ca38
4
- data.tar.gz: 64ea8882a8c4ec26278f4bea0b8af25879b7ead65cc156117dadbeab8b606400
3
+ metadata.gz: 4479e38048aa0dfae8bcc85f1e9de03b5fe0561048b658ec47b3df8ca64794eb
4
+ data.tar.gz: c297ddc7b15d8186b85fbb7d4d3f84863d7df6e20ad243f0740364262fe43807
5
5
  SHA512:
6
- metadata.gz: 1c8716c7c2ddb6fb8644b528cbddccf2c900897326b1f3897946247886e21c25452f71783cc696ba44cba7a2247b11577085c71f780c91fb060f4d72748c1af4
7
- data.tar.gz: 1a91a1433ddf1312edb01e15348207573ed30f5d022b376757b2fda69ad8ae4be62090e726a8baede35e1ff3cdf95dcf5146bd863c6758e5dea4ccc047309dd8
6
+ metadata.gz: 6fd11d9fe01bd36052cf2762df6e8f728d361fbe23410dcb0229e95436a9f7909e51e51a8be0abc9f7bd269ffd10d642cb0aa7792dba76c0c174b606e319bf58
7
+ data.tar.gz: 060edbed6bb5b11033911db2200a4e19e98585c4133a1b4a09eaa79b582f2cf23b39f424b84d3110429ff1800247cec4f2b6522a82bbbdc45cb093e63a339bda
@@ -10,27 +10,4 @@ on:
10
10
 
11
11
  jobs:
12
12
  rake:
13
- name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
14
- runs-on: ${{ matrix.os }}
15
- continue-on-error: ${{ matrix.experimental }}
16
- strategy:
17
- fail-fast: false
18
- matrix:
19
- ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
- os: [ ubuntu-latest, windows-latest, macos-latest ]
21
- experimental: [ false ]
22
- steps:
23
- - uses: actions/checkout@v2
24
- with:
25
- submodules: true
26
-
27
- # https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
28
- - if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
29
- run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
30
-
31
- - uses: ruby/setup-ruby@v1
32
- with:
33
- ruby-version: ${{ matrix.ruby }}
34
- bundler-cache: true
35
-
36
- - run: bundle exec rake
13
+ uses: relaton/support/.github/workflows/rake.yml@master
data/Gemfile CHANGED
@@ -1,6 +1,3 @@
1
- Encoding.default_external = Encoding::UTF_8
2
- Encoding.default_internal = Encoding::UTF_8
3
-
4
1
  source "https://rubygems.org"
5
2
 
6
3
  git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
data/bin/thor ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'thor' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("thor", "thor")
@@ -0,0 +1,27 @@
1
+ module RelatonIso
2
+ class DocumentIdentifier < RelatonBib::DocumentIdentifier
3
+ def id
4
+ id_str = @id.to_s.sub(/\sED\d+/, "")
5
+ if @all_parts
6
+ if type == "URN"
7
+ return "#{@id.urn}:ser"
8
+ else
9
+ return "#{id_str} (all parts)"
10
+ end
11
+ end
12
+ type == "URN" ? @id.urn.to_s : id_str
13
+ end
14
+
15
+ def remove_part
16
+ @id.part = nil
17
+ end
18
+
19
+ def remove_date
20
+ @id.year = nil
21
+ end
22
+
23
+ def all_parts
24
+ @all_parts = true
25
+ end
26
+ end
27
+ end
@@ -4,13 +4,13 @@ module RelatonIso
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
6
  # @return [RelatonIsoBib::IsoBibliographicItem]
7
- attr_writer :fetch
7
+ attr_writer :fetch, :pubid
8
8
 
9
9
  # Parse page.
10
- # @param lang [String, NilClass]
10
+ # @param lang [String, nil]
11
11
  # @return [RelatonIso::IsoBibliographicItem]
12
12
  def fetch(lang = nil)
13
- @fetch ||= Scrapper.parse_page @hit, lang
13
+ @fetch ||= Scrapper.parse_page self, lang
14
14
  end
15
15
 
16
16
  # @return [Integer]
@@ -23,5 +23,10 @@ module RelatonIso
23
23
  else 4
24
24
  end
25
25
  end
26
+
27
+ # @return [Pubid::Iso::Identifier]
28
+ def pubid
29
+ @pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
30
+ end
26
31
  end
27
32
  end
@@ -11,25 +11,21 @@ module RelatonIso
11
11
  # @param text [String] reference to search
12
12
  def initialize(text)
13
13
  super
14
- @array = text.match?(/^ISO\sTC\s184\/SC\s?4/) ? fetch_github : fetch_iso
14
+ @array = text.match?(/^ISO\s(?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
15
15
  end
16
16
 
17
17
  # @param lang [String, NilClass]
18
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
19
19
  def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
20
20
  # parts = @array.reject { |h| h.hit["docPart"]&.empty? }
21
- hit = @array.min_by do |h|
22
- IsoBibliography.ref_components(h.hit[:title])[1].to_i
23
- end
21
+ hit = @array.min_by { |h| h.pubid.part }
24
22
  return @array.first.fetch lang unless hit
25
23
 
26
- bibitem = hit.fetch lang
24
+ bibitem = hit.fetch(lang)
27
25
  all_parts_item = bibitem.to_all_parts
28
26
  @array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
29
- %r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
30
- (?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
31
27
  isobib = RelatonIsoBib::IsoBibliographicItem.new(
32
- formattedref: RelatonBib::FormattedRef.new(content: fr),
28
+ formattedref: RelatonBib::FormattedRef.new(content: hi.pubid.to_s),
33
29
  )
34
30
  all_parts_item.relation << RelatonBib::DocumentRelation.new(
35
31
  type: "instance", bibitem: isobib,
@@ -27,175 +27,152 @@ module RelatonIso
27
27
  # @option opts [Boolean] :keep_year if undated reference should return
28
28
  # actual reference with year
29
29
  #
30
- # @return [String] Relaton XML serialisation of reference
30
+ # @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
31
31
  def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
32
32
  code = ref.gsub(/\u2013/, "-")
33
- # %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?(?::(?<year1>\d{4}))?} =~ code
34
- _, _part, year1, = ref_components ref
35
- year ||= year1
33
+
34
+ # parse "all parts" request
36
35
  code.sub! " (all parts)", ""
37
36
  opts[:all_parts] ||= $~ && opts[:all_parts].nil?
38
- # opts[:keep_year] ||= opts[:keep_year].nil?
39
- # code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
40
- # if %r[^ISO/IEC DIR].match? code
41
- # return RelatonIec::IecBibliography.get(code, year, opts)
42
- # end
43
37
 
44
- ret = isobib_get1(code, year, opts)
45
- return nil if ret.nil?
38
+ query_pubid = Pubid::Iso::Identifier.parse(code)
39
+ query_pubid.year = year if year
40
+
41
+ hits = isobib_search_filter(query_pubid, opts)
46
42
 
47
- if (year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
43
+ # return only first one if not all_parts
44
+ ret = if !opts[:all_parts] || hits.size == 1
45
+ hits.any? && hits.first.fetch(opts[:lang])
46
+ else
47
+ hits.to_all_parts(opts[:lang])
48
+ end
49
+
50
+ if ret
51
+ warn "[relaton-iso] (\"#{query_pubid}\") found #{ret.docidentifier.first.id}"
52
+ else
53
+ return fetch_ref_err(query_pubid, query_pubid.year)
54
+ end
55
+
56
+ if (query_pubid.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
48
57
  ret
49
58
  else
50
59
  ret.to_most_recent_reference
51
60
  end
52
61
  end
53
62
 
54
- def ref_components(ref)
55
- %r{
56
- ^(?<code>ISO(?:\s|/)[^-/:()]+\d+)
57
- (?:-(?<part>[\w-]+))?
58
- (?::(?<year>\d{4}))?
59
- (?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
60
- }x =~ ref
61
- [code&.strip, part, year, corr, coryear]
63
+ # @param query_pubid [Pubid::Iso::Identifier]
64
+ # @param pubid [Pubid::Iso::Identifier]
65
+ # @param all_parts [Boolean] match with any parts when true
66
+ # @return [Boolean]
67
+ def matches_parts?(query_pubid, pubid, all_parts: false)
68
+ if all_parts
69
+ # match only with documents with part number
70
+ !pubid.part.nil?
71
+ else
72
+ query_pubid.part == pubid.part
73
+ end
62
74
  end
63
75
 
64
- private
76
+ def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
77
+ query_pubid.publisher == pubid.publisher &&
78
+ query_pubid.number == pubid.number &&
79
+ query_pubid.copublisher == pubid.copublisher &&
80
+ ((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
81
+ ((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
82
+ end
65
83
 
66
- # rubocop:disable Metrics/MethodLength
84
+ # @param hit_collection [RelatonIso::HitCollection]
85
+ # @param year [String]
86
+ # @return [RelatonIso::HitCollection]
87
+ def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
88
+ missed_years = []
67
89
 
68
- def fetch_ref_err(code, year, missed_years)
69
- id = year ? "#{code}:#{year}" : code
70
- warn "[relaton-iso] WARNING: no match found online for #{id}. "\
71
- "The code must be exactly like it is on the standards website."
72
- unless missed_years.empty?
90
+ # filter by year
91
+ hits = hit_collection.select do |hit|
92
+ if hit.pubid.year == year
93
+ true
94
+ elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
95
+ hit.pubid.year = year
96
+ true
97
+ else
98
+ missed_year = hit.pubid.year || hit.hit[:year].to_s
99
+ if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
100
+ missed_years << missed_year
101
+ end
102
+ false
103
+ end
104
+ end
105
+
106
+ if hits.empty? && !missed_years.empty?
73
107
  warn "[relaton-iso] (There was no match for #{year}, though there "\
74
108
  "were matches found for #{missed_years.join(', ')}.)"
75
109
  end
76
- if /\d-\d/.match? code
110
+ hits
111
+ end
112
+
113
+ private
114
+
115
+ def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
116
+ id = year ? "#{query_pubid}:#{year}" : query_pubid
117
+ warn "[relaton-iso] WARNING: no match found online for #{id}. "\
118
+ "The code must be exactly like it is on the standards website."
119
+ if /\d-\d/.match? query_pubid.to_s
77
120
  warn "[relaton-iso] The provided document part may not exist, "\
78
121
  "or the document may no longer be published in parts."
79
122
  else
80
123
  warn "[relaton-iso] If you wanted to cite all document parts for "\
81
- "the reference, use \"#{code} (all parts)\".\nIf the document "\
124
+ "the reference, use \"#{query_pubid} (all parts)\".\nIf the document "\
82
125
  "is not a standard, use its document type abbreviation "\
83
126
  "(TS, TR, PAS, Guide)."
84
127
  end
85
128
  nil
86
129
  end
87
130
 
88
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
89
-
90
131
  # Search for hits. If no found then trying missed stages and ISO/IEC.
91
132
  #
92
- # @param code [String] reference without correction
133
+ # @param query_pubid [Pubid::Iso::Identifier] reference without correction
93
134
  # @param opts [Hash]
94
135
  # @return [Array<RelatonIso::Hit>]
95
- def isobib_search_filter(code, opts)
96
- ref = remove_part code, opts[:all_parts]
97
- warn "[relaton-iso] (\"#{code}\") fetching..."
98
- result = search(ref)
99
- res = search_code result, code, opts
136
+ def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
137
+ query_pubid.part = nil if opts[:all_parts]
138
+ warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
139
+ # fetch hits collection
140
+ hit_collection = search(query_pubid.to_s(with_date: false))
141
+ # filter only matching hits
142
+ res = filter_hits hit_collection, query_pubid,
143
+ all_parts: opts[:all_parts]
100
144
  return res unless res.empty?
101
145
 
102
- # try stages
103
- case code
104
- when %r{^\w+/[^/]+\s\d+} # code like ISO/IEC 123, ISO/IEC/IEE 123
105
- res = try_stages(result, opts) do |st|
106
- code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
107
- end
108
- return res unless res.empty?
109
- when %r{^\w+\s\d+} # code like ISO 123
110
- res = try_stages(result, opts) do |st|
111
- code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
112
- end
113
- return res unless res.empty?
114
- end
146
+ # lookup for documents with stages when no match without stage
147
+ res = filter_hits hit_collection, query_pubid,
148
+ all_parts: opts[:all_parts], any_types_stages: true
149
+ return res unless res.empty?
115
150
 
116
- if %r{^ISO\s}.match? code # try ISO/IEC if ISO not found
151
+ # TODO: do this at pubid-iso
152
+ if query_pubid.publisher == "ISO" && query_pubid.copublisher.nil? # try ISO/IEC if ISO not found
117
153
  warn "[relaton-iso] Attempting ISO/IEC retrieval"
118
- c = code.sub "ISO", "ISO/IEC"
119
- res = search_code result, c, opts
154
+ query_pubid.copublisher = "IEC"
155
+ res = filter_hits hit_collection, query_pubid, all_parts: opts[:all_parts]
120
156
  end
121
157
  res
122
158
  end
123
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
124
159
 
125
- def remove_part(ref, all_parts)
126
- return ref unless all_parts
127
-
128
- ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
129
- end
130
-
131
- # @param result [RelatonIso::HitCollection]
132
- # @param opts [Hash]
160
+ # @param hits [RelatonIso::HitCollection]
161
+ # @param query_pubid [Pubid::Iso::Identifier]
162
+ # @param all_parts [Boolean]
163
+ # @param any_stages [Boolean]
133
164
  # @return [RelatonIso::HitCollection]
134
- def try_stages(result, opts)
135
- res = nil
136
- %w[NP WD CD DIS FDIS PRF IS AWI TR].each do |st| # try stages
137
- c = yield st
138
- res = search_code result, c, opts
139
- return res unless res.empty?
140
- end
141
- res
142
- end
143
-
144
- # @param result [RelatonIso::HitCollection]
145
- # @param code [String]
146
- # @param opts [Hash]
147
- # @return [RelatonIso::HitCollection]
148
- def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
149
- code1, part1, _, corr1, coryear1 = ref_components code
150
- result.select do |i|
151
- code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
152
- code1 == code2 && ((opts[:all_parts] && part2) || (!opts[:all_parts] && part1 == part2)) &&
153
- corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
154
- end
155
- end
156
-
157
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
158
-
159
- # Sort through the results from RelatonIso, fetching them three at a time,
160
- # and return the first result that matches the code, matches the year
161
- # (if provided), and which # has a title (amendments do not).
162
- # Only expects the first page of results to be populated.
163
- # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
164
- # If no match, returns any years which caused mismatch, for error
165
- # reporting
166
- def isobib_results_filter(result, year, opts)
167
- missed_years = []
168
- hits = result.reduce!([]) do |hts, h|
169
- if !year || (%r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[:title] && iyear == year)
170
- hts << h
171
- else
172
- missed_years << iyear
173
- hts
174
- end
165
+ def filter_hits(hit_collection, query_pubid, all_parts: false, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
166
+ # filter out
167
+ result = hit_collection.select do |i|
168
+ hit_pubid = i.pubid
169
+ matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
170
+ matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
171
+ query_pubid.corrigendum == hit_pubid.corrigendum &&
172
+ query_pubid.amendment == hit_pubid.amendment
175
173
  end
176
- return { years: missed_years } unless hits.any?
177
174
 
178
- if !opts[:all_parts] || hits.size == 1
179
- return { ret: hits.first.fetch(opts[:lang]) }
180
- end
181
-
182
- { ret: hits.to_all_parts(opts[:lang]) }
183
- end
184
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
185
-
186
- # @param code [String]
187
- # @param year [String, NilClass]
188
- # @param opts [Hash]
189
- def isobib_get1(code, year, opts)
190
- # return iev(code) if /^IEC 60050-/.match code
191
- result = isobib_search_filter(code, opts) || return
192
- ret = isobib_results_filter(result, year, opts)
193
- if ret[:ret]
194
- warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
195
- ret[:ret]
196
- else
197
- fetch_ref_err(code, year, ret[:years])
198
- end
175
+ query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
199
176
  end
200
177
  end
201
178
  end
@@ -50,43 +50,61 @@ module RelatonIso
50
50
 
51
51
  class << self
52
52
  # Parse page.
53
- # @param hit_data [Hash]
53
+ # @param hit [RelatonIso::Hit]
54
54
  # @param lang [String, NilClass]
55
- # @return [Hash]
56
- def parse_page(hit_data, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
55
+ # @return [RelatonIsoBib::IsoBibliographicItem]
56
+ def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
57
57
  # path = "/contents/data/standard#{hit_data['splitPath']}/"\
58
58
  # "#{hit_data['csnumber']}.html"
59
- doc, url = get_page "#{hit_data[:path].sub '/sites/isoorg', ''}.html"
59
+
60
+ doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
60
61
 
61
62
  # Fetch edition.
62
63
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
63
64
  &.children&.last&.text&.match(/\d+/)&.to_s
65
+ hit.pubid.edition = edition if edition
64
66
 
65
67
  titles, abstract, langs = fetch_titles_abstract(doc, lang)
66
68
 
67
69
  RelatonIsoBib::IsoBibliographicItem.new(
68
70
  fetched: Date.today.to_s,
69
- docid: fetch_docid(doc, edition, langs),
70
- docnumber: fetch_docnumber(doc),
71
+ docid: fetch_relaton_docids(doc, hit.pubid),
72
+ docnumber: fetch_docnumber(hit.pubid),
71
73
  edition: edition,
72
74
  language: langs.map { |l| l[:lang] },
73
75
  script: langs.map { |l| script(l[:lang]) }.uniq,
74
76
  title: titles,
75
- doctype: fetch_type(hit_data[:title]),
77
+ doctype: fetch_type(hit.hit[:title]),
76
78
  docstatus: fetch_status(doc),
77
79
  ics: fetch_ics(doc),
78
- date: fetch_dates(doc, hit_data[:title]),
79
- contributor: fetch_contributors(hit_data[:title]),
80
+ date: fetch_dates(doc, hit.hit[:title]),
81
+ contributor: fetch_contributors(hit.hit[:title]),
80
82
  editorialgroup: fetch_workgroup(doc),
81
83
  abstract: abstract,
82
84
  copyright: fetch_copyright(doc),
83
85
  link: fetch_link(doc, url),
84
86
  relation: fetch_relations(doc),
85
87
  place: ["Geneva"],
86
- structuredidentifier: fetch_structuredidentifier(doc),
88
+ structuredidentifier: fetch_structuredidentifier(hit.pubid),
87
89
  )
88
90
  end
89
91
 
92
+ #
93
+ # Create document ids.
94
+ #
95
+ # @param doc [Nokogiri::HTML::Document] document
96
+ # @param pubid [Pubid::Iso::Identifier] pubid
97
+ #
98
+ # @return [Array<RelatonBib::DocumentIdentifier>]
99
+ #
100
+ def fetch_relaton_docids(doc, pubid)
101
+ pubid.urn_stage = stage_code(doc).to_f
102
+ [
103
+ RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
104
+ RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
105
+ ]
106
+ end
107
+
90
108
  private
91
109
 
92
110
  # Fetch titles and abstracts.
@@ -168,60 +186,29 @@ module RelatonIso
168
186
  end
169
187
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
170
188
 
171
- # Fetch docid.
172
- # @param doc [Nokogiri:HTML::Document]
173
- # @param edition [String]
174
- # @param langs [Array<Hash>]
175
- # @return [Array<RelatonBib::DocumentIdentifier>]
176
- def fetch_docid(doc, edition, langs)
177
- pubid = item_ref doc
178
- [
179
- RelatonBib::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
180
- RelatonBib::DocumentIdentifier.new(
181
- id: fetch_urn(doc, pubid, edition, langs), type: "URN",
182
- ),
183
- ]
184
- end
185
-
186
- # @param doc [Nokogiri:HTML::Document]
187
- # @param pubid [String]
188
- # @param edition [String]
189
- # @param langs [Array<Hash>]
190
- # @returnt [String]
191
- def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
192
- orig = pubid.split.first.downcase.split("/").join "-"
193
- %r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
194
- _, part, _year, corr, = IsoBibliography.ref_components pubid
195
- urn = "urn:iso:std:#{orig}"
196
- urn += ":#{type.downcase}" if type
197
- urn += ":#{fetch_docnumber(doc)}"
198
- urn += ":-#{part}" if part
199
- urn += ":stage-#{stage_code(doc)}"
200
- urn += ":ed-#{edition}" if edition
201
- if corr
202
- corrparts = corr.split
203
- urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
204
- end
205
- urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
206
- urn
207
- end
208
-
209
- def fetch_docnumber(doc)
210
- item_ref(doc)&.match(/\d+/)&.to_s
189
+ #
190
+ # Generate docnumber.
191
+ #
192
+ # @param [Pubid::Iso] pubid
193
+ #
194
+ # @return [String] docnumber
195
+ #
196
+ def fetch_docnumber(pubid)
197
+ pubid.to_s.match(/\d+/)&.to_s
211
198
  end
212
199
 
213
- # @param doc [Nokogiri::HTML::Document]
214
- def fetch_structuredidentifier(doc) # rubocop:disable Metrics/MethodLength
215
- ref = item_ref doc
216
- unless ref
217
- return RelatonIsoBib::StructuredIdentifier.new(
218
- project_number: "?", part_number: "", prefix: nil, id: "?",
219
- )
220
- end
221
-
222
- m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
200
+ #
201
+ # Parse structuredidentifier.
202
+ #
203
+ # @param pubid [Pubid::Iso::Identifier] pubid
204
+ #
205
+ # @return [RelatonBib::StructuredIdentifier] structured identifier
206
+ #
207
+ def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
223
208
  RelatonIsoBib::StructuredIdentifier.new(
224
- project_number: m[1], part: m[2], type: "ISO",
209
+ project_number: "#{pubid.publisher} #{pubid.number}",
210
+ part: pubid&.part&.sub(/^-/, ""),
211
+ type: pubid.publisher,
225
212
  )
226
213
  end
227
214
 
@@ -251,7 +238,7 @@ module RelatonIso
251
238
  # Fetch workgroup.
252
239
  # @param doc [Nokogiri::HTML::Document]
253
240
  # @return [Hash]
254
- def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
241
+ def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
255
242
  wg_link = doc.css("div.entry-name.entry-block a")[0]
256
243
  # wg_url = DOMAIN + wg_link['href']
257
244
  workgroup = wg_link.text.split "/"
@@ -275,6 +262,7 @@ module RelatonIso
275
262
  # @param doc [Nokogiri::HTML::Document]
276
263
  # @return [Array<Hash>]
277
264
  def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
265
+ types = ["Now", "Now under review"]
278
266
  doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
279
267
  r_type = r.at("h4", "h5").text
280
268
  date = []
@@ -286,14 +274,13 @@ module RelatonIso
286
274
  "updates"
287
275
  else r_type
288
276
  end
289
- if ["Now", "Now under review"].include?(type) then a
277
+ if types.include?(type) then a
290
278
  else
291
279
  a + r.css("a").map do |id|
292
- fref = RelatonBib::FormattedRef.new(
293
- content: id.text, format: "text/plain",
294
- )
280
+ docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
281
+ fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
295
282
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(
296
- formattedref: fref, date: date,
283
+ docid: [docid], formattedref: fref, date: date,
297
284
  )
298
285
  { type: type, bibitem: bibitem }
299
286
  end
@@ -308,7 +295,7 @@ module RelatonIso
308
295
  def fetch_type(ref)
309
296
  %r{
310
297
  ^(?<prefix>ISO|IWA|IEC)
311
- (?:(/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
298
+ (?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
312
299
  (?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
313
300
  }x =~ ref
314
301
  # return "international-standard" if type_match.nil?
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.11.0"
4
+ VERSION = "1.12.1"
5
5
  end
data/lib/relaton_iso.rb CHANGED
@@ -2,3 +2,5 @@
2
2
 
3
3
  require "relaton_iso/version"
4
4
  require "relaton_iso/iso_bibliography"
5
+ require "pubid-iso"
6
+ require "relaton_iso/document_identifier"
data/relaton_iso.gemspec CHANGED
@@ -42,5 +42,6 @@ Gem::Specification.new do |spec|
42
42
 
43
43
  # spec.add_dependency "relaton-iec", "~> 1.8.0"
44
44
  spec.add_dependency "algolia"
45
- spec.add_dependency "relaton-iso-bib", "~> 1.11.0"
45
+ spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
46
+ spec.add_dependency "pubid-iso", "~> 0.1.7"
46
47
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.0
4
+ version: 1.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-10 00:00:00.000000000 Z
11
+ date: 2022-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -184,14 +184,28 @@ dependencies:
184
184
  requirements:
185
185
  - - "~>"
186
186
  - !ruby/object:Gem::Version
187
- version: 1.11.0
187
+ version: 1.12.0
188
188
  type: :runtime
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
- version: 1.11.0
194
+ version: 1.12.0
195
+ - !ruby/object:Gem::Dependency
196
+ name: pubid-iso
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: 0.1.7
202
+ type: :runtime
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: 0.1.7
195
209
  description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
196
210
  model'
197
211
  email:
@@ -230,7 +244,9 @@ files:
230
244
  - bin/ruby-rewrite
231
245
  - bin/safe_yaml
232
246
  - bin/setup
247
+ - bin/thor
233
248
  - lib/relaton_iso.rb
249
+ - lib/relaton_iso/document_identifier.rb
234
250
  - lib/relaton_iso/hit.rb
235
251
  - lib/relaton_iso/hit_collection.rb
236
252
  - lib/relaton_iso/iso_bibliography.rb