relaton-iso 1.11.0 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f7fa7c91589b331ac4748d70c790b814bd294b73c8c35c12a1e7a6a2fa54ca38
4
- data.tar.gz: 64ea8882a8c4ec26278f4bea0b8af25879b7ead65cc156117dadbeab8b606400
3
+ metadata.gz: 4479e38048aa0dfae8bcc85f1e9de03b5fe0561048b658ec47b3df8ca64794eb
4
+ data.tar.gz: c297ddc7b15d8186b85fbb7d4d3f84863d7df6e20ad243f0740364262fe43807
5
5
  SHA512:
6
- metadata.gz: 1c8716c7c2ddb6fb8644b528cbddccf2c900897326b1f3897946247886e21c25452f71783cc696ba44cba7a2247b11577085c71f780c91fb060f4d72748c1af4
7
- data.tar.gz: 1a91a1433ddf1312edb01e15348207573ed30f5d022b376757b2fda69ad8ae4be62090e726a8baede35e1ff3cdf95dcf5146bd863c6758e5dea4ccc047309dd8
6
+ metadata.gz: 6fd11d9fe01bd36052cf2762df6e8f728d361fbe23410dcb0229e95436a9f7909e51e51a8be0abc9f7bd269ffd10d642cb0aa7792dba76c0c174b606e319bf58
7
+ data.tar.gz: 060edbed6bb5b11033911db2200a4e19e98585c4133a1b4a09eaa79b582f2cf23b39f424b84d3110429ff1800247cec4f2b6522a82bbbdc45cb093e63a339bda
@@ -10,27 +10,4 @@ on:
10
10
 
11
11
  jobs:
12
12
  rake:
13
- name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
14
- runs-on: ${{ matrix.os }}
15
- continue-on-error: ${{ matrix.experimental }}
16
- strategy:
17
- fail-fast: false
18
- matrix:
19
- ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
- os: [ ubuntu-latest, windows-latest, macos-latest ]
21
- experimental: [ false ]
22
- steps:
23
- - uses: actions/checkout@v2
24
- with:
25
- submodules: true
26
-
27
- # https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
28
- - if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
29
- run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
30
-
31
- - uses: ruby/setup-ruby@v1
32
- with:
33
- ruby-version: ${{ matrix.ruby }}
34
- bundler-cache: true
35
-
36
- - run: bundle exec rake
13
+ uses: relaton/support/.github/workflows/rake.yml@master
data/Gemfile CHANGED
@@ -1,6 +1,3 @@
1
- Encoding.default_external = Encoding::UTF_8
2
- Encoding.default_internal = Encoding::UTF_8
3
-
4
1
  source "https://rubygems.org"
5
2
 
6
3
  git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
data/bin/thor ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'thor' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("thor", "thor")
@@ -0,0 +1,27 @@
1
+ module RelatonIso
2
+ class DocumentIdentifier < RelatonBib::DocumentIdentifier
3
+ def id
4
+ id_str = @id.to_s.sub(/\sED\d+/, "")
5
+ if @all_parts
6
+ if type == "URN"
7
+ return "#{@id.urn}:ser"
8
+ else
9
+ return "#{id_str} (all parts)"
10
+ end
11
+ end
12
+ type == "URN" ? @id.urn.to_s : id_str
13
+ end
14
+
15
+ def remove_part
16
+ @id.part = nil
17
+ end
18
+
19
+ def remove_date
20
+ @id.year = nil
21
+ end
22
+
23
+ def all_parts
24
+ @all_parts = true
25
+ end
26
+ end
27
+ end
@@ -4,13 +4,13 @@ module RelatonIso
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
6
  # @return [RelatonIsoBib::IsoBibliographicItem]
7
- attr_writer :fetch
7
+ attr_writer :fetch, :pubid
8
8
 
9
9
  # Parse page.
10
- # @param lang [String, NilClass]
10
+ # @param lang [String, nil]
11
11
  # @return [RelatonIso::IsoBibliographicItem]
12
12
  def fetch(lang = nil)
13
- @fetch ||= Scrapper.parse_page @hit, lang
13
+ @fetch ||= Scrapper.parse_page self, lang
14
14
  end
15
15
 
16
16
  # @return [Integer]
@@ -23,5 +23,10 @@ module RelatonIso
23
23
  else 4
24
24
  end
25
25
  end
26
+
27
+ # @return [Pubid::Iso::Identifier]
28
+ def pubid
29
+ @pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
30
+ end
26
31
  end
27
32
  end
@@ -11,25 +11,21 @@ module RelatonIso
11
11
  # @param text [String] reference to search
12
12
  def initialize(text)
13
13
  super
14
- @array = text.match?(/^ISO\sTC\s184\/SC\s?4/) ? fetch_github : fetch_iso
14
+ @array = text.match?(/^ISO\s(?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
15
15
  end
16
16
 
17
17
  # @param lang [String, NilClass]
18
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
19
19
  def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
20
20
  # parts = @array.reject { |h| h.hit["docPart"]&.empty? }
21
- hit = @array.min_by do |h|
22
- IsoBibliography.ref_components(h.hit[:title])[1].to_i
23
- end
21
+ hit = @array.min_by { |h| h.pubid.part }
24
22
  return @array.first.fetch lang unless hit
25
23
 
26
- bibitem = hit.fetch lang
24
+ bibitem = hit.fetch(lang)
27
25
  all_parts_item = bibitem.to_all_parts
28
26
  @array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
29
- %r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
30
- (?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
31
27
  isobib = RelatonIsoBib::IsoBibliographicItem.new(
32
- formattedref: RelatonBib::FormattedRef.new(content: fr),
28
+ formattedref: RelatonBib::FormattedRef.new(content: hi.pubid.to_s),
33
29
  )
34
30
  all_parts_item.relation << RelatonBib::DocumentRelation.new(
35
31
  type: "instance", bibitem: isobib,
@@ -27,175 +27,152 @@ module RelatonIso
27
27
  # @option opts [Boolean] :keep_year if undated reference should return
28
28
  # actual reference with year
29
29
  #
30
- # @return [String] Relaton XML serialisation of reference
30
+ # @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
31
31
  def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
32
32
  code = ref.gsub(/\u2013/, "-")
33
- # %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?(?::(?<year1>\d{4}))?} =~ code
34
- _, _part, year1, = ref_components ref
35
- year ||= year1
33
+
34
+ # parse "all parts" request
36
35
  code.sub! " (all parts)", ""
37
36
  opts[:all_parts] ||= $~ && opts[:all_parts].nil?
38
- # opts[:keep_year] ||= opts[:keep_year].nil?
39
- # code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
40
- # if %r[^ISO/IEC DIR].match? code
41
- # return RelatonIec::IecBibliography.get(code, year, opts)
42
- # end
43
37
 
44
- ret = isobib_get1(code, year, opts)
45
- return nil if ret.nil?
38
+ query_pubid = Pubid::Iso::Identifier.parse(code)
39
+ query_pubid.year = year if year
40
+
41
+ hits = isobib_search_filter(query_pubid, opts)
46
42
 
47
- if (year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
43
+ # return only first one if not all_parts
44
+ ret = if !opts[:all_parts] || hits.size == 1
45
+ hits.any? && hits.first.fetch(opts[:lang])
46
+ else
47
+ hits.to_all_parts(opts[:lang])
48
+ end
49
+
50
+ if ret
51
+ warn "[relaton-iso] (\"#{query_pubid}\") found #{ret.docidentifier.first.id}"
52
+ else
53
+ return fetch_ref_err(query_pubid, query_pubid.year)
54
+ end
55
+
56
+ if (query_pubid.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
48
57
  ret
49
58
  else
50
59
  ret.to_most_recent_reference
51
60
  end
52
61
  end
53
62
 
54
- def ref_components(ref)
55
- %r{
56
- ^(?<code>ISO(?:\s|/)[^-/:()]+\d+)
57
- (?:-(?<part>[\w-]+))?
58
- (?::(?<year>\d{4}))?
59
- (?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
60
- }x =~ ref
61
- [code&.strip, part, year, corr, coryear]
63
+ # @param query_pubid [Pubid::Iso::Identifier]
64
+ # @param pubid [Pubid::Iso::Identifier]
65
+ # @param all_parts [Boolean] match with any parts when true
66
+ # @return [Boolean]
67
+ def matches_parts?(query_pubid, pubid, all_parts: false)
68
+ if all_parts
69
+ # match only with documents with part number
70
+ !pubid.part.nil?
71
+ else
72
+ query_pubid.part == pubid.part
73
+ end
62
74
  end
63
75
 
64
- private
76
+ def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
77
+ query_pubid.publisher == pubid.publisher &&
78
+ query_pubid.number == pubid.number &&
79
+ query_pubid.copublisher == pubid.copublisher &&
80
+ ((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
81
+ ((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
82
+ end
65
83
 
66
- # rubocop:disable Metrics/MethodLength
84
+ # @param hit_collection [RelatonIso::HitCollection]
85
+ # @param year [String]
86
+ # @return [RelatonIso::HitCollection]
87
+ def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
88
+ missed_years = []
67
89
 
68
- def fetch_ref_err(code, year, missed_years)
69
- id = year ? "#{code}:#{year}" : code
70
- warn "[relaton-iso] WARNING: no match found online for #{id}. "\
71
- "The code must be exactly like it is on the standards website."
72
- unless missed_years.empty?
90
+ # filter by year
91
+ hits = hit_collection.select do |hit|
92
+ if hit.pubid.year == year
93
+ true
94
+ elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
95
+ hit.pubid.year = year
96
+ true
97
+ else
98
+ missed_year = hit.pubid.year || hit.hit[:year].to_s
99
+ if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
100
+ missed_years << missed_year
101
+ end
102
+ false
103
+ end
104
+ end
105
+
106
+ if hits.empty? && !missed_years.empty?
73
107
  warn "[relaton-iso] (There was no match for #{year}, though there "\
74
108
  "were matches found for #{missed_years.join(', ')}.)"
75
109
  end
76
- if /\d-\d/.match? code
110
+ hits
111
+ end
112
+
113
+ private
114
+
115
+ def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
116
+ id = year ? "#{query_pubid}:#{year}" : query_pubid
117
+ warn "[relaton-iso] WARNING: no match found online for #{id}. "\
118
+ "The code must be exactly like it is on the standards website."
119
+ if /\d-\d/.match? query_pubid.to_s
77
120
  warn "[relaton-iso] The provided document part may not exist, "\
78
121
  "or the document may no longer be published in parts."
79
122
  else
80
123
  warn "[relaton-iso] If you wanted to cite all document parts for "\
81
- "the reference, use \"#{code} (all parts)\".\nIf the document "\
124
+ "the reference, use \"#{query_pubid} (all parts)\".\nIf the document "\
82
125
  "is not a standard, use its document type abbreviation "\
83
126
  "(TS, TR, PAS, Guide)."
84
127
  end
85
128
  nil
86
129
  end
87
130
 
88
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
89
-
90
131
  # Search for hits. If no found then trying missed stages and ISO/IEC.
91
132
  #
92
- # @param code [String] reference without correction
133
+ # @param query_pubid [Pubid::Iso::Identifier] reference without correction
93
134
  # @param opts [Hash]
94
135
  # @return [Array<RelatonIso::Hit>]
95
- def isobib_search_filter(code, opts)
96
- ref = remove_part code, opts[:all_parts]
97
- warn "[relaton-iso] (\"#{code}\") fetching..."
98
- result = search(ref)
99
- res = search_code result, code, opts
136
+ def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
137
+ query_pubid.part = nil if opts[:all_parts]
138
+ warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
139
+ # fetch hits collection
140
+ hit_collection = search(query_pubid.to_s(with_date: false))
141
+ # filter only matching hits
142
+ res = filter_hits hit_collection, query_pubid,
143
+ all_parts: opts[:all_parts]
100
144
  return res unless res.empty?
101
145
 
102
- # try stages
103
- case code
104
- when %r{^\w+/[^/]+\s\d+} # code like ISO/IEC 123, ISO/IEC/IEE 123
105
- res = try_stages(result, opts) do |st|
106
- code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
107
- end
108
- return res unless res.empty?
109
- when %r{^\w+\s\d+} # code like ISO 123
110
- res = try_stages(result, opts) do |st|
111
- code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
112
- end
113
- return res unless res.empty?
114
- end
146
+ # lookup for documents with stages when no match without stage
147
+ res = filter_hits hit_collection, query_pubid,
148
+ all_parts: opts[:all_parts], any_types_stages: true
149
+ return res unless res.empty?
115
150
 
116
- if %r{^ISO\s}.match? code # try ISO/IEC if ISO not found
151
+ # TODO: do this at pubid-iso
152
+ if query_pubid.publisher == "ISO" && query_pubid.copublisher.nil? # try ISO/IEC if ISO not found
117
153
  warn "[relaton-iso] Attempting ISO/IEC retrieval"
118
- c = code.sub "ISO", "ISO/IEC"
119
- res = search_code result, c, opts
154
+ query_pubid.copublisher = "IEC"
155
+ res = filter_hits hit_collection, query_pubid, all_parts: opts[:all_parts]
120
156
  end
121
157
  res
122
158
  end
123
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
124
159
 
125
- def remove_part(ref, all_parts)
126
- return ref unless all_parts
127
-
128
- ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
129
- end
130
-
131
- # @param result [RelatonIso::HitCollection]
132
- # @param opts [Hash]
160
+ # @param hits [RelatonIso::HitCollection]
161
+ # @param query_pubid [Pubid::Iso::Identifier]
162
+ # @param all_parts [Boolean]
163
+ # @param any_stages [Boolean]
133
164
  # @return [RelatonIso::HitCollection]
134
- def try_stages(result, opts)
135
- res = nil
136
- %w[NP WD CD DIS FDIS PRF IS AWI TR].each do |st| # try stages
137
- c = yield st
138
- res = search_code result, c, opts
139
- return res unless res.empty?
140
- end
141
- res
142
- end
143
-
144
- # @param result [RelatonIso::HitCollection]
145
- # @param code [String]
146
- # @param opts [Hash]
147
- # @return [RelatonIso::HitCollection]
148
- def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
149
- code1, part1, _, corr1, coryear1 = ref_components code
150
- result.select do |i|
151
- code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
152
- code1 == code2 && ((opts[:all_parts] && part2) || (!opts[:all_parts] && part1 == part2)) &&
153
- corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
154
- end
155
- end
156
-
157
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
158
-
159
- # Sort through the results from RelatonIso, fetching them three at a time,
160
- # and return the first result that matches the code, matches the year
161
- # (if provided), and which # has a title (amendments do not).
162
- # Only expects the first page of results to be populated.
163
- # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
164
- # If no match, returns any years which caused mismatch, for error
165
- # reporting
166
- def isobib_results_filter(result, year, opts)
167
- missed_years = []
168
- hits = result.reduce!([]) do |hts, h|
169
- if !year || (%r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[:title] && iyear == year)
170
- hts << h
171
- else
172
- missed_years << iyear
173
- hts
174
- end
165
+ def filter_hits(hit_collection, query_pubid, all_parts: false, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
166
+ # filter out
167
+ result = hit_collection.select do |i|
168
+ hit_pubid = i.pubid
169
+ matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
170
+ matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
171
+ query_pubid.corrigendum == hit_pubid.corrigendum &&
172
+ query_pubid.amendment == hit_pubid.amendment
175
173
  end
176
- return { years: missed_years } unless hits.any?
177
174
 
178
- if !opts[:all_parts] || hits.size == 1
179
- return { ret: hits.first.fetch(opts[:lang]) }
180
- end
181
-
182
- { ret: hits.to_all_parts(opts[:lang]) }
183
- end
184
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
185
-
186
- # @param code [String]
187
- # @param year [String, NilClass]
188
- # @param opts [Hash]
189
- def isobib_get1(code, year, opts)
190
- # return iev(code) if /^IEC 60050-/.match code
191
- result = isobib_search_filter(code, opts) || return
192
- ret = isobib_results_filter(result, year, opts)
193
- if ret[:ret]
194
- warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
195
- ret[:ret]
196
- else
197
- fetch_ref_err(code, year, ret[:years])
198
- end
175
+ query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
199
176
  end
200
177
  end
201
178
  end
@@ -50,43 +50,61 @@ module RelatonIso
50
50
 
51
51
  class << self
52
52
  # Parse page.
53
- # @param hit_data [Hash]
53
+ # @param hit [RelatonIso::Hit]
54
54
  # @param lang [String, NilClass]
55
- # @return [Hash]
56
- def parse_page(hit_data, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
55
+ # @return [RelatonIsoBib::IsoBibliographicItem]
56
+ def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
57
57
  # path = "/contents/data/standard#{hit_data['splitPath']}/"\
58
58
  # "#{hit_data['csnumber']}.html"
59
- doc, url = get_page "#{hit_data[:path].sub '/sites/isoorg', ''}.html"
59
+
60
+ doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
60
61
 
61
62
  # Fetch edition.
62
63
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
63
64
  &.children&.last&.text&.match(/\d+/)&.to_s
65
+ hit.pubid.edition = edition if edition
64
66
 
65
67
  titles, abstract, langs = fetch_titles_abstract(doc, lang)
66
68
 
67
69
  RelatonIsoBib::IsoBibliographicItem.new(
68
70
  fetched: Date.today.to_s,
69
- docid: fetch_docid(doc, edition, langs),
70
- docnumber: fetch_docnumber(doc),
71
+ docid: fetch_relaton_docids(doc, hit.pubid),
72
+ docnumber: fetch_docnumber(hit.pubid),
71
73
  edition: edition,
72
74
  language: langs.map { |l| l[:lang] },
73
75
  script: langs.map { |l| script(l[:lang]) }.uniq,
74
76
  title: titles,
75
- doctype: fetch_type(hit_data[:title]),
77
+ doctype: fetch_type(hit.hit[:title]),
76
78
  docstatus: fetch_status(doc),
77
79
  ics: fetch_ics(doc),
78
- date: fetch_dates(doc, hit_data[:title]),
79
- contributor: fetch_contributors(hit_data[:title]),
80
+ date: fetch_dates(doc, hit.hit[:title]),
81
+ contributor: fetch_contributors(hit.hit[:title]),
80
82
  editorialgroup: fetch_workgroup(doc),
81
83
  abstract: abstract,
82
84
  copyright: fetch_copyright(doc),
83
85
  link: fetch_link(doc, url),
84
86
  relation: fetch_relations(doc),
85
87
  place: ["Geneva"],
86
- structuredidentifier: fetch_structuredidentifier(doc),
88
+ structuredidentifier: fetch_structuredidentifier(hit.pubid),
87
89
  )
88
90
  end
89
91
 
92
+ #
93
+ # Create document ids.
94
+ #
95
+ # @param doc [Nokogiri::HTML::Document] document
96
+ # @param pubid [Pubid::Iso::Identifier] pubid
97
+ #
98
+ # @return [Array<RelatonBib::DocumentIdentifier>]
99
+ #
100
+ def fetch_relaton_docids(doc, pubid)
101
+ pubid.urn_stage = stage_code(doc).to_f
102
+ [
103
+ RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
104
+ RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
105
+ ]
106
+ end
107
+
90
108
  private
91
109
 
92
110
  # Fetch titles and abstracts.
@@ -168,60 +186,29 @@ module RelatonIso
168
186
  end
169
187
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
170
188
 
171
- # Fetch docid.
172
- # @param doc [Nokogiri:HTML::Document]
173
- # @param edition [String]
174
- # @param langs [Array<Hash>]
175
- # @return [Array<RelatonBib::DocumentIdentifier>]
176
- def fetch_docid(doc, edition, langs)
177
- pubid = item_ref doc
178
- [
179
- RelatonBib::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
180
- RelatonBib::DocumentIdentifier.new(
181
- id: fetch_urn(doc, pubid, edition, langs), type: "URN",
182
- ),
183
- ]
184
- end
185
-
186
- # @param doc [Nokogiri:HTML::Document]
187
- # @param pubid [String]
188
- # @param edition [String]
189
- # @param langs [Array<Hash>]
190
- # @returnt [String]
191
- def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
192
- orig = pubid.split.first.downcase.split("/").join "-"
193
- %r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
194
- _, part, _year, corr, = IsoBibliography.ref_components pubid
195
- urn = "urn:iso:std:#{orig}"
196
- urn += ":#{type.downcase}" if type
197
- urn += ":#{fetch_docnumber(doc)}"
198
- urn += ":-#{part}" if part
199
- urn += ":stage-#{stage_code(doc)}"
200
- urn += ":ed-#{edition}" if edition
201
- if corr
202
- corrparts = corr.split
203
- urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
204
- end
205
- urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
206
- urn
207
- end
208
-
209
- def fetch_docnumber(doc)
210
- item_ref(doc)&.match(/\d+/)&.to_s
189
+ #
190
+ # Generate docnumber.
191
+ #
192
+ # @param [Pubid::Iso] pubid
193
+ #
194
+ # @return [String] docnumber
195
+ #
196
+ def fetch_docnumber(pubid)
197
+ pubid.to_s.match(/\d+/)&.to_s
211
198
  end
212
199
 
213
- # @param doc [Nokogiri::HTML::Document]
214
- def fetch_structuredidentifier(doc) # rubocop:disable Metrics/MethodLength
215
- ref = item_ref doc
216
- unless ref
217
- return RelatonIsoBib::StructuredIdentifier.new(
218
- project_number: "?", part_number: "", prefix: nil, id: "?",
219
- )
220
- end
221
-
222
- m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
200
+ #
201
+ # Parse structuredidentifier.
202
+ #
203
+ # @param pubid [Pubid::Iso::Identifier] pubid
204
+ #
205
+ # @return [RelatonBib::StructuredIdentifier] structured identifier
206
+ #
207
+ def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
223
208
  RelatonIsoBib::StructuredIdentifier.new(
224
- project_number: m[1], part: m[2], type: "ISO",
209
+ project_number: "#{pubid.publisher} #{pubid.number}",
210
+ part: pubid&.part&.sub(/^-/, ""),
211
+ type: pubid.publisher,
225
212
  )
226
213
  end
227
214
 
@@ -251,7 +238,7 @@ module RelatonIso
251
238
  # Fetch workgroup.
252
239
  # @param doc [Nokogiri::HTML::Document]
253
240
  # @return [Hash]
254
- def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
241
+ def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
255
242
  wg_link = doc.css("div.entry-name.entry-block a")[0]
256
243
  # wg_url = DOMAIN + wg_link['href']
257
244
  workgroup = wg_link.text.split "/"
@@ -275,6 +262,7 @@ module RelatonIso
275
262
  # @param doc [Nokogiri::HTML::Document]
276
263
  # @return [Array<Hash>]
277
264
  def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
265
+ types = ["Now", "Now under review"]
278
266
  doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
279
267
  r_type = r.at("h4", "h5").text
280
268
  date = []
@@ -286,14 +274,13 @@ module RelatonIso
286
274
  "updates"
287
275
  else r_type
288
276
  end
289
- if ["Now", "Now under review"].include?(type) then a
277
+ if types.include?(type) then a
290
278
  else
291
279
  a + r.css("a").map do |id|
292
- fref = RelatonBib::FormattedRef.new(
293
- content: id.text, format: "text/plain",
294
- )
280
+ docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
281
+ fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
295
282
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(
296
- formattedref: fref, date: date,
283
+ docid: [docid], formattedref: fref, date: date,
297
284
  )
298
285
  { type: type, bibitem: bibitem }
299
286
  end
@@ -308,7 +295,7 @@ module RelatonIso
308
295
  def fetch_type(ref)
309
296
  %r{
310
297
  ^(?<prefix>ISO|IWA|IEC)
311
- (?:(/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
298
+ (?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
312
299
  (?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
313
300
  }x =~ ref
314
301
  # return "international-standard" if type_match.nil?
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.11.0"
4
+ VERSION = "1.12.1"
5
5
  end
data/lib/relaton_iso.rb CHANGED
@@ -2,3 +2,5 @@
2
2
 
3
3
  require "relaton_iso/version"
4
4
  require "relaton_iso/iso_bibliography"
5
+ require "pubid-iso"
6
+ require "relaton_iso/document_identifier"
data/relaton_iso.gemspec CHANGED
@@ -42,5 +42,6 @@ Gem::Specification.new do |spec|
42
42
 
43
43
  # spec.add_dependency "relaton-iec", "~> 1.8.0"
44
44
  spec.add_dependency "algolia"
45
- spec.add_dependency "relaton-iso-bib", "~> 1.11.0"
45
+ spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
46
+ spec.add_dependency "pubid-iso", "~> 0.1.7"
46
47
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.0
4
+ version: 1.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-10 00:00:00.000000000 Z
11
+ date: 2022-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -184,14 +184,28 @@ dependencies:
184
184
  requirements:
185
185
  - - "~>"
186
186
  - !ruby/object:Gem::Version
187
- version: 1.11.0
187
+ version: 1.12.0
188
188
  type: :runtime
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
- version: 1.11.0
194
+ version: 1.12.0
195
+ - !ruby/object:Gem::Dependency
196
+ name: pubid-iso
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: 0.1.7
202
+ type: :runtime
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: 0.1.7
195
209
  description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
196
210
  model'
197
211
  email:
@@ -230,7 +244,9 @@ files:
230
244
  - bin/ruby-rewrite
231
245
  - bin/safe_yaml
232
246
  - bin/setup
247
+ - bin/thor
233
248
  - lib/relaton_iso.rb
249
+ - lib/relaton_iso/document_identifier.rb
234
250
  - lib/relaton_iso/hit.rb
235
251
  - lib/relaton_iso/hit_collection.rb
236
252
  - lib/relaton_iso/iso_bibliography.rb