relaton-iso 1.11.1 → 1.12.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57aaa988aba37afa1e4cc8eced55b20d46eda761cad5fc8a68ac33c3793afb82
4
- data.tar.gz: 0ff660f7a7ffb558659c9d8577f075cfea9e225c2d9a7bf44d209745b0d7fc76
3
+ metadata.gz: 1f7c1d269d268e8bb7f9bc7b19b6e5466c9fb88f8a9db41f3944207535ca2edd
4
+ data.tar.gz: 4a6545be437af6c6326fed4e231c387a0a5cc7c04168fa1098c9fffc78701b29
5
5
  SHA512:
6
- metadata.gz: 8ef87dc1073df8b8653989494fda55f54cdcd780aaaa9d16e0af6e0ec6778021d73a2e81b23b18b751e95b2c607f6c27a49af50d8df1bbb95c7d9b7df76e3977
7
- data.tar.gz: 154c64258dfc69a6ae01822593e0e9759648cc2f773c126f4474cd0aad8483a16e208613ccd582e94f4ea38c4c996cee089c5d5b844782f93ae2ec33be17151c
6
+ metadata.gz: 3be88b28acc84c9877db94ef46e38488ea63b15ca2c19cd694c69d17f0e1b5c0b003a95929673d2e8a70cb69e91faef575506dab409d0c4b5e61f4dd046eb858
7
+ data.tar.gz: 209c65c8d600a34566999600cc0fbb0b06564603821c00045a09e0e00ecd2f6abf79c59774142e4dc1474dcb08b2604aeb5f1874a25b9f29e0d0dd9b420fbd4b
@@ -10,27 +10,4 @@ on:
10
10
 
11
11
  jobs:
12
12
  rake:
13
- name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
14
- runs-on: ${{ matrix.os }}
15
- continue-on-error: ${{ matrix.experimental }}
16
- strategy:
17
- fail-fast: false
18
- matrix:
19
- ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
- os: [ ubuntu-latest, windows-latest, macos-latest ]
21
- experimental: [ false ]
22
- steps:
23
- - uses: actions/checkout@v2
24
- with:
25
- submodules: true
26
-
27
- # https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
28
- - if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
29
- run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
30
-
31
- - uses: ruby/setup-ruby@v1
32
- with:
33
- ruby-version: ${{ matrix.ruby }}
34
- bundler-cache: true
35
-
36
- - run: bundle exec rake
13
+ uses: relaton/support/.github/workflows/rake.yml@master
data/bin/thor ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'thor' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("thor", "thor")
@@ -0,0 +1,27 @@
1
+ module RelatonIso
2
+ class DocumentIdentifier < RelatonBib::DocumentIdentifier
3
+ def id
4
+ id_str = @id.to_s.sub(/\sED\d+/, "")
5
+ if @all_parts
6
+ if type == "URN"
7
+ return "#{@id.urn}:ser"
8
+ else
9
+ return "#{id_str} (all parts)"
10
+ end
11
+ end
12
+ type == "URN" ? @id.urn.to_s : id_str
13
+ end
14
+
15
+ def remove_part
16
+ @id.part = nil
17
+ end
18
+
19
+ def remove_date
20
+ @id.year = nil
21
+ end
22
+
23
+ def all_parts
24
+ @all_parts = true
25
+ end
26
+ end
27
+ end
@@ -4,13 +4,13 @@ module RelatonIso
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
6
  # @return [RelatonIsoBib::IsoBibliographicItem]
7
- attr_writer :fetch
7
+ attr_writer :fetch, :pubid
8
8
 
9
9
  # Parse page.
10
- # @param lang [String, NilClass]
10
+ # @param lang [String, nil]
11
11
  # @return [RelatonIso::IsoBibliographicItem]
12
12
  def fetch(lang = nil)
13
- @fetch ||= Scrapper.parse_page @hit, lang
13
+ @fetch ||= Scrapper.parse_page self, lang
14
14
  end
15
15
 
16
16
  # @return [Integer]
@@ -23,5 +23,10 @@ module RelatonIso
23
23
  else 4
24
24
  end
25
25
  end
26
+
27
+ # @return [Pubid::Iso::Identifier]
28
+ def pubid
29
+ @pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
30
+ end
26
31
  end
27
32
  end
@@ -11,25 +11,21 @@ module RelatonIso
11
11
  # @param text [String] reference to search
12
12
  def initialize(text)
13
13
  super
14
- @array = text.match?(/^ISO\sTC\s184\/SC\s?4/) ? fetch_github : fetch_iso
14
+ @array = text.match?(/^ISO[\s\/](?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
15
15
  end
16
16
 
17
17
  # @param lang [String, NilClass]
18
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
19
19
  def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
20
20
  # parts = @array.reject { |h| h.hit["docPart"]&.empty? }
21
- hit = @array.min_by do |h|
22
- IsoBibliography.ref_components(h.hit[:title])[1].to_i
23
- end
21
+ hit = @array.min_by { |h| h.pubid.part }
24
22
  return @array.first.fetch lang unless hit
25
23
 
26
- bibitem = hit.fetch lang
24
+ bibitem = hit.fetch(lang)
27
25
  all_parts_item = bibitem.to_all_parts
28
26
  @array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
29
- %r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
30
- (?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
31
27
  isobib = RelatonIsoBib::IsoBibliographicItem.new(
32
- formattedref: RelatonBib::FormattedRef.new(content: fr),
28
+ formattedref: RelatonBib::FormattedRef.new(content: hi.pubid.to_s),
33
29
  )
34
30
  all_parts_item.relation << RelatonBib::DocumentRelation.new(
35
31
  type: "instance", bibitem: isobib,
@@ -27,180 +27,152 @@ module RelatonIso
27
27
  # @option opts [Boolean] :keep_year if undated reference should return
28
28
  # actual reference with year
29
29
  #
30
- # @return [String] Relaton XML serialisation of reference
30
+ # @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
31
31
  def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
32
32
  code = ref.gsub(/\u2013/, "-")
33
- # %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?(?::(?<year1>\d{4}))?} =~ code
34
- year ||= publish_year ref
33
+
34
+ # parse "all parts" request
35
35
  code.sub! " (all parts)", ""
36
36
  opts[:all_parts] ||= $~ && opts[:all_parts].nil?
37
- # opts[:keep_year] ||= opts[:keep_year].nil?
38
- # code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
39
- # if %r[^ISO/IEC DIR].match? code
40
- # return RelatonIec::IecBibliography.get(code, year, opts)
41
- # end
42
37
 
43
- ret = isobib_get(code, year, opts)
44
- return nil if ret.nil?
38
+ query_pubid = Pubid::Iso::Identifier.parse(code)
39
+ query_pubid.year = year if year
40
+
41
+ hits = isobib_search_filter(query_pubid, opts)
45
42
 
46
- if (year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
43
+ # return only first one if not all_parts
44
+ ret = if !opts[:all_parts] || hits.size == 1
45
+ hits.any? && hits.first.fetch(opts[:lang])
46
+ else
47
+ hits.to_all_parts(opts[:lang])
48
+ end
49
+
50
+ if ret
51
+ warn "[relaton-iso] (\"#{query_pubid}\") found #{ret.docidentifier.first.id}"
52
+ else
53
+ return fetch_ref_err(query_pubid, query_pubid.year)
54
+ end
55
+
56
+ if (query_pubid.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
47
57
  ret
48
58
  else
49
59
  ret.to_most_recent_reference
50
60
  end
51
61
  end
52
62
 
53
- def ref_components(ref)
54
- %r{
55
- ^(?<code>ISO(?:\s|/)[^-/:()]+\d+)
56
- (?:-(?<part>[\w-]+))?
57
- (?::(?<year>\d{4}))?
58
- (?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
59
- }x =~ ref
60
- [code&.strip, part, year, corr, coryear]
63
+ # @param query_pubid [Pubid::Iso::Identifier]
64
+ # @param pubid [Pubid::Iso::Identifier]
65
+ # @param all_parts [Boolean] match with any parts when true
66
+ # @return [Boolean]
67
+ def matches_parts?(query_pubid, pubid, all_parts: false)
68
+ if all_parts
69
+ # match only with documents with part number
70
+ !pubid.part.nil?
71
+ else
72
+ query_pubid.part == pubid.part
73
+ end
61
74
  end
62
75
 
63
- private
76
+ def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
77
+ query_pubid.publisher == pubid.publisher &&
78
+ query_pubid.number == pubid.number &&
79
+ query_pubid.copublisher == pubid.copublisher &&
80
+ ((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
81
+ ((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
82
+ end
64
83
 
65
- # rubocop:disable Metrics/MethodLength
84
+ # @param hit_collection [RelatonIso::HitCollection]
85
+ # @param year [String]
86
+ # @return [RelatonIso::HitCollection]
87
+ def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
88
+ missed_years = []
66
89
 
67
- def fetch_ref_err(code, year, missed_years)
68
- id = year ? "#{code}:#{year}" : code
69
- warn "[relaton-iso] WARNING: no match found online for #{id}. "\
70
- "The code must be exactly like it is on the standards website."
71
- unless missed_years.empty?
90
+ # filter by year
91
+ hits = hit_collection.select do |hit|
92
+ if hit.pubid.year == year
93
+ true
94
+ elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
95
+ hit.pubid.year = year
96
+ true
97
+ else
98
+ missed_year = hit.pubid.year || hit.hit[:year].to_s
99
+ if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
100
+ missed_years << missed_year
101
+ end
102
+ false
103
+ end
104
+ end
105
+
106
+ if hits.empty? && !missed_years.empty?
72
107
  warn "[relaton-iso] (There was no match for #{year}, though there "\
73
108
  "were matches found for #{missed_years.join(', ')}.)"
74
109
  end
75
- if /\d-\d/.match? code
110
+ hits
111
+ end
112
+
113
+ private
114
+
115
+ def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
116
+ id = year ? "#{query_pubid}:#{year}" : query_pubid
117
+ warn "[relaton-iso] WARNING: no match found online for #{id}. "\
118
+ "The code must be exactly like it is on the standards website."
119
+ if /\d-\d/.match? query_pubid.to_s
76
120
  warn "[relaton-iso] The provided document part may not exist, "\
77
121
  "or the document may no longer be published in parts."
78
122
  else
79
123
  warn "[relaton-iso] If you wanted to cite all document parts for "\
80
- "the reference, use \"#{code} (all parts)\".\nIf the document "\
124
+ "the reference, use \"#{query_pubid} (all parts)\".\nIf the document "\
81
125
  "is not a standard, use its document type abbreviation "\
82
126
  "(TS, TR, PAS, Guide)."
83
127
  end
84
128
  nil
85
129
  end
86
130
 
87
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
88
-
89
131
  # Search for hits. If no found then trying missed stages and ISO/IEC.
90
132
  #
91
- # @param code [String] reference without correction
133
+ # @param query_pubid [Pubid::Iso::Identifier] reference without correction
92
134
  # @param opts [Hash]
93
135
  # @return [Array<RelatonIso::Hit>]
94
- def isobib_search_filter(code, opts)
95
- ref = remove_part code, opts[:all_parts]
96
- warn "[relaton-iso] (\"#{code}\") fetching..."
97
- result = search(ref)
98
- res = search_code result, code, opts
136
+ def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
137
+ query_pubid.part = nil if opts[:all_parts]
138
+ warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
139
+ # fetch hits collection
140
+ hit_collection = search(query_pubid.to_s(with_date: false))
141
+ # filter only matching hits
142
+ res = filter_hits hit_collection, query_pubid,
143
+ all_parts: opts[:all_parts]
99
144
  return res unless res.empty?
100
145
 
101
- # try stages
102
- case code
103
- when %r{^\w+/[^/]+\s\d+} # code like ISO/IEC 123, ISO/IEC/IEE 123
104
- res = try_stages(result, opts) do |st|
105
- code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
106
- end
107
- return res unless res.empty?
108
- when %r{^\w+\s\d+} # code like ISO 123
109
- res = try_stages(result, opts) do |st|
110
- code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
111
- end
112
- return res unless res.empty?
113
- end
146
+ # lookup for documents with stages when no match without stage
147
+ res = filter_hits hit_collection, query_pubid,
148
+ all_parts: opts[:all_parts], any_types_stages: true
149
+ return res unless res.empty?
114
150
 
115
- if %r{^ISO\s}.match? code # try ISO/IEC if ISO not found
151
+ # TODO: do this at pubid-iso
152
+ if query_pubid.publisher == "ISO" && query_pubid.copublisher.nil? # try ISO/IEC if ISO not found
116
153
  warn "[relaton-iso] Attempting ISO/IEC retrieval"
117
- c = code.sub "ISO", "ISO/IEC"
118
- res = search_code result, c, opts
119
- end
120
- res
121
- end
122
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
123
-
124
- def remove_part(ref, all_parts)
125
- return ref unless all_parts
126
-
127
- ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
128
- end
129
-
130
- # @param result [RelatonIso::HitCollection]
131
- # @param opts [Hash]
132
- # @return [RelatonIso::HitCollection]
133
- def try_stages(result, opts)
134
- res = nil
135
- %w[NP WD CD DIS FDIS PRF IS AWI TR].each do |st| # try stages
136
- c = yield st
137
- res = search_code result, c, opts
138
- return res unless res.empty?
154
+ query_pubid.copublisher = "IEC"
155
+ res = filter_hits hit_collection, query_pubid, all_parts: opts[:all_parts]
139
156
  end
140
157
  res
141
158
  end
142
159
 
143
- # @param result [RelatonIso::HitCollection]
144
- # @param code [String]
145
- # @param opts [Hash]
160
+ # @param hits [RelatonIso::HitCollection]
161
+ # @param query_pubid [Pubid::Iso::Identifier]
162
+ # @param all_parts [Boolean]
163
+ # @param any_stages [Boolean]
146
164
  # @return [RelatonIso::HitCollection]
147
- def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
148
- code1, part1, _, corr1, coryear1 = ref_components code
149
- result.select do |i|
150
- code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
151
- code1 == code2 && ((opts[:all_parts] && part2) || (!opts[:all_parts] && part1 == part2)) &&
152
- corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
165
+ def filter_hits(hit_collection, query_pubid, all_parts: false, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
166
+ # filter out
167
+ result = hit_collection.select do |i|
168
+ hit_pubid = i.pubid
169
+ matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
170
+ matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
171
+ query_pubid.corrigendums == hit_pubid.corrigendums &&
172
+ query_pubid.amendments == hit_pubid.amendments
153
173
  end
154
- end
155
-
156
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
157
174
 
158
- # Sort through the results from RelatonIso, fetching them three at a time,
159
- # and return the first result that matches the code, matches the year
160
- # (if provided), and which # has a title (amendments do not).
161
- # Only expects the first page of results to be populated.
162
- # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
163
- # If no match, returns any years which caused mismatch, for error
164
- # reporting
165
- def isobib_results_filter(result, year, opts)
166
- missed_years = []
167
- hits = result.reduce!([]) do |hts, h|
168
- iyear = publish_year h.hit[:title]
169
- if !year || iyear == year
170
- hts << h
171
- else
172
- missed_years << iyear
173
- hts
174
- end
175
- end
176
- return { years: missed_years } unless hits.any?
177
-
178
- if !opts[:all_parts] || hits.size == 1
179
- return { ret: hits.first.fetch(opts[:lang]) }
180
- end
181
-
182
- { ret: hits.to_all_parts(opts[:lang]) }
183
- end
184
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
185
-
186
- def publish_year(ref)
187
- %r{:(?<year>\d{4})(?!.*:\d{4})} =~ ref
188
- year
189
- end
190
-
191
- # @param code [String]
192
- # @param year [String, NilClass]
193
- # @param opts [Hash]
194
- def isobib_get(code, year, opts)
195
- # return iev(code) if /^IEC 60050-/.match code
196
- result = isobib_search_filter(code, opts) || return
197
- ret = isobib_results_filter(result, year, opts)
198
- if ret[:ret]
199
- warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
200
- ret[:ret]
201
- else
202
- fetch_ref_err(code, year, ret[:years])
203
- end
175
+ query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
204
176
  end
205
177
  end
206
178
  end
@@ -50,43 +50,61 @@ module RelatonIso
50
50
 
51
51
  class << self
52
52
  # Parse page.
53
- # @param hit_data [Hash]
53
+ # @param hit [RelatonIso::Hit]
54
54
  # @param lang [String, NilClass]
55
- # @return [Hash]
56
- def parse_page(hit_data, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
55
+ # @return [RelatonIsoBib::IsoBibliographicItem]
56
+ def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
57
57
  # path = "/contents/data/standard#{hit_data['splitPath']}/"\
58
58
  # "#{hit_data['csnumber']}.html"
59
- doc, url = get_page "#{hit_data[:path].sub '/sites/isoorg', ''}.html"
59
+
60
+ doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
60
61
 
61
62
  # Fetch edition.
62
63
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
63
64
  &.children&.last&.text&.match(/\d+/)&.to_s
65
+ hit.pubid.edition = edition if edition
64
66
 
65
67
  titles, abstract, langs = fetch_titles_abstract(doc, lang)
66
68
 
67
69
  RelatonIsoBib::IsoBibliographicItem.new(
68
70
  fetched: Date.today.to_s,
69
- docid: fetch_docid(doc, edition, langs),
70
- docnumber: fetch_docnumber(doc),
71
+ docid: fetch_relaton_docids(doc, hit.pubid),
72
+ docnumber: fetch_docnumber(hit.pubid),
71
73
  edition: edition,
72
74
  language: langs.map { |l| l[:lang] },
73
75
  script: langs.map { |l| script(l[:lang]) }.uniq,
74
76
  title: titles,
75
- doctype: fetch_type(hit_data[:title]),
77
+ doctype: fetch_type(hit.hit[:title]),
76
78
  docstatus: fetch_status(doc),
77
79
  ics: fetch_ics(doc),
78
- date: fetch_dates(doc, hit_data[:title]),
79
- contributor: fetch_contributors(hit_data[:title]),
80
+ date: fetch_dates(doc, hit.hit[:title]),
81
+ contributor: fetch_contributors(hit.hit[:title]),
80
82
  editorialgroup: fetch_workgroup(doc),
81
83
  abstract: abstract,
82
84
  copyright: fetch_copyright(doc),
83
85
  link: fetch_link(doc, url),
84
86
  relation: fetch_relations(doc),
85
87
  place: ["Geneva"],
86
- structuredidentifier: fetch_structuredidentifier(doc),
88
+ structuredidentifier: fetch_structuredidentifier(hit.pubid),
87
89
  )
88
90
  end
89
91
 
92
+ #
93
+ # Create document ids.
94
+ #
95
+ # @param doc [Nokogiri::HTML::Document] document
96
+ # @param pubid [Pubid::Iso::Identifier] pubid
97
+ #
98
+ # @return [Array<RelatonBib::DocumentIdentifier>]
99
+ #
100
+ def fetch_relaton_docids(doc, pubid)
101
+ pubid.urn_stage = stage_code(doc).to_f
102
+ [
103
+ RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
104
+ RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
105
+ ]
106
+ end
107
+
90
108
  private
91
109
 
92
110
  # Fetch titles and abstracts.
@@ -168,60 +186,29 @@ module RelatonIso
168
186
  end
169
187
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
170
188
 
171
- # Fetch docid.
172
- # @param doc [Nokogiri:HTML::Document]
173
- # @param edition [String]
174
- # @param langs [Array<Hash>]
175
- # @return [Array<RelatonBib::DocumentIdentifier>]
176
- def fetch_docid(doc, edition, langs)
177
- pubid = item_ref doc
178
- [
179
- RelatonBib::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
180
- RelatonBib::DocumentIdentifier.new(
181
- id: fetch_urn(doc, pubid, edition, langs), type: "URN",
182
- ),
183
- ]
184
- end
185
-
186
- # @param doc [Nokogiri:HTML::Document]
187
- # @param pubid [String]
188
- # @param edition [String]
189
- # @param langs [Array<Hash>]
190
- # @returnt [String]
191
- def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
192
- orig = pubid.split.first.downcase.split("/").join "-"
193
- %r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
194
- _, part, _year, corr, = IsoBibliography.ref_components pubid
195
- urn = "urn:iso:std:#{orig}"
196
- urn += ":#{type.downcase}" if type
197
- urn += ":#{fetch_docnumber(doc)}"
198
- urn += ":-#{part}" if part
199
- urn += ":stage-#{stage_code(doc)}"
200
- urn += ":ed-#{edition}" if edition
201
- if corr
202
- corrparts = corr.split
203
- urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
204
- end
205
- urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
206
- urn
207
- end
208
-
209
- def fetch_docnumber(doc)
210
- item_ref(doc)&.match(/\d+/)&.to_s
189
+ #
190
+ # Generate docnumber.
191
+ #
192
+ # @param [Pubid::Iso] pubid
193
+ #
194
+ # @return [String] docnumber
195
+ #
196
+ def fetch_docnumber(pubid)
197
+ pubid.to_s.match(/\d+/)&.to_s
211
198
  end
212
199
 
213
- # @param doc [Nokogiri::HTML::Document]
214
- def fetch_structuredidentifier(doc) # rubocop:disable Metrics/MethodLength
215
- ref = item_ref doc
216
- unless ref
217
- return RelatonIsoBib::StructuredIdentifier.new(
218
- project_number: "?", part_number: "", prefix: nil, id: "?",
219
- )
220
- end
221
-
222
- m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
200
+ #
201
+ # Parse structuredidentifier.
202
+ #
203
+ # @param pubid [Pubid::Iso::Identifier] pubid
204
+ #
205
+ # @return [RelatonBib::StructuredIdentifier] structured identifier
206
+ #
207
+ def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
223
208
  RelatonIsoBib::StructuredIdentifier.new(
224
- project_number: m[1], part: m[2], type: "ISO",
209
+ project_number: "#{pubid.publisher} #{pubid.number}",
210
+ part: pubid&.part&.sub(/^-/, ""),
211
+ type: pubid.publisher,
225
212
  )
226
213
  end
227
214
 
@@ -251,7 +238,7 @@ module RelatonIso
251
238
  # Fetch workgroup.
252
239
  # @param doc [Nokogiri::HTML::Document]
253
240
  # @return [Hash]
254
- def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
241
+ def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
255
242
  wg_link = doc.css("div.entry-name.entry-block a")[0]
256
243
  # wg_url = DOMAIN + wg_link['href']
257
244
  workgroup = wg_link.text.split "/"
@@ -275,6 +262,7 @@ module RelatonIso
275
262
  # @param doc [Nokogiri::HTML::Document]
276
263
  # @return [Array<Hash>]
277
264
  def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
265
+ types = ["Now", "Now under review"]
278
266
  doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
279
267
  r_type = r.at("h4", "h5").text
280
268
  date = []
@@ -286,14 +274,13 @@ module RelatonIso
286
274
  "updates"
287
275
  else r_type
288
276
  end
289
- if ["Now", "Now under review"].include?(type) then a
277
+ if types.include?(type) then a
290
278
  else
291
279
  a + r.css("a").map do |id|
292
- fref = RelatonBib::FormattedRef.new(
293
- content: id.text, format: "text/plain",
294
- )
280
+ docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
281
+ fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
295
282
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(
296
- formattedref: fref, date: date,
283
+ docid: [docid], formattedref: fref, date: date,
297
284
  )
298
285
  { type: type, bibitem: bibitem }
299
286
  end
@@ -308,7 +295,7 @@ module RelatonIso
308
295
  def fetch_type(ref)
309
296
  %r{
310
297
  ^(?<prefix>ISO|IWA|IEC)
311
- (?:(/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
298
+ (?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
312
299
  (?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
313
300
  }x =~ ref
314
301
  # return "international-standard" if type_match.nil?
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.11.1"
4
+ VERSION = "1.12.2"
5
5
  end
data/lib/relaton_iso.rb CHANGED
@@ -2,3 +2,5 @@
2
2
 
3
3
  require "relaton_iso/version"
4
4
  require "relaton_iso/iso_bibliography"
5
+ require "pubid-iso"
6
+ require "relaton_iso/document_identifier"
data/relaton_iso.gemspec CHANGED
@@ -27,7 +27,6 @@ Gem::Specification.new do |spec|
27
27
  spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
28
28
 
29
29
  spec.add_development_dependency "byebug"
30
- # spec.add_development_dependency "debase"
31
30
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
32
31
  spec.add_development_dependency "pry-byebug"
33
32
  spec.add_development_dependency "rake", "~> 13.0"
@@ -35,12 +34,11 @@ Gem::Specification.new do |spec|
35
34
  spec.add_development_dependency "rubocop"
36
35
  spec.add_development_dependency "rubocop-performance"
37
36
  spec.add_development_dependency "rubocop-rails"
38
- # spec.add_development_dependency "ruby-debug-ide"
39
37
  spec.add_development_dependency "simplecov"
40
38
  spec.add_development_dependency "vcr"
41
39
  spec.add_development_dependency "webmock"
42
40
 
43
- # spec.add_dependency "relaton-iec", "~> 1.8.0"
44
41
  spec.add_dependency "algolia"
45
- spec.add_dependency "relaton-iso-bib", "~> 1.11.0"
42
+ spec.add_dependency "pubid-iso", "~> 0.1.8"
43
+ spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
46
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.1
4
+ version: 1.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-05-03 00:00:00.000000000 Z
11
+ date: 2022-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -178,20 +178,34 @@ dependencies:
178
178
  - - ">="
179
179
  - !ruby/object:Gem::Version
180
180
  version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: pubid-iso
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 0.1.8
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 0.1.8
181
195
  - !ruby/object:Gem::Dependency
182
196
  name: relaton-iso-bib
183
197
  requirement: !ruby/object:Gem::Requirement
184
198
  requirements:
185
199
  - - "~>"
186
200
  - !ruby/object:Gem::Version
187
- version: 1.11.0
201
+ version: 1.12.0
188
202
  type: :runtime
189
203
  prerelease: false
190
204
  version_requirements: !ruby/object:Gem::Requirement
191
205
  requirements:
192
206
  - - "~>"
193
207
  - !ruby/object:Gem::Version
194
- version: 1.11.0
208
+ version: 1.12.0
195
209
  description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
196
210
  model'
197
211
  email:
@@ -230,7 +244,9 @@ files:
230
244
  - bin/ruby-rewrite
231
245
  - bin/safe_yaml
232
246
  - bin/setup
247
+ - bin/thor
233
248
  - lib/relaton_iso.rb
249
+ - lib/relaton_iso/document_identifier.rb
234
250
  - lib/relaton_iso/hit.rb
235
251
  - lib/relaton_iso/hit_collection.rb
236
252
  - lib/relaton_iso/iso_bibliography.rb