relaton-iso 1.11.1 → 1.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57aaa988aba37afa1e4cc8eced55b20d46eda761cad5fc8a68ac33c3793afb82
4
- data.tar.gz: 0ff660f7a7ffb558659c9d8577f075cfea9e225c2d9a7bf44d209745b0d7fc76
3
+ metadata.gz: 1f7c1d269d268e8bb7f9bc7b19b6e5466c9fb88f8a9db41f3944207535ca2edd
4
+ data.tar.gz: 4a6545be437af6c6326fed4e231c387a0a5cc7c04168fa1098c9fffc78701b29
5
5
  SHA512:
6
- metadata.gz: 8ef87dc1073df8b8653989494fda55f54cdcd780aaaa9d16e0af6e0ec6778021d73a2e81b23b18b751e95b2c607f6c27a49af50d8df1bbb95c7d9b7df76e3977
7
- data.tar.gz: 154c64258dfc69a6ae01822593e0e9759648cc2f773c126f4474cd0aad8483a16e208613ccd582e94f4ea38c4c996cee089c5d5b844782f93ae2ec33be17151c
6
+ metadata.gz: 3be88b28acc84c9877db94ef46e38488ea63b15ca2c19cd694c69d17f0e1b5c0b003a95929673d2e8a70cb69e91faef575506dab409d0c4b5e61f4dd046eb858
7
+ data.tar.gz: 209c65c8d600a34566999600cc0fbb0b06564603821c00045a09e0e00ecd2f6abf79c59774142e4dc1474dcb08b2604aeb5f1874a25b9f29e0d0dd9b420fbd4b
@@ -10,27 +10,4 @@ on:
10
10
 
11
11
  jobs:
12
12
  rake:
13
- name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
14
- runs-on: ${{ matrix.os }}
15
- continue-on-error: ${{ matrix.experimental }}
16
- strategy:
17
- fail-fast: false
18
- matrix:
19
- ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
- os: [ ubuntu-latest, windows-latest, macos-latest ]
21
- experimental: [ false ]
22
- steps:
23
- - uses: actions/checkout@v2
24
- with:
25
- submodules: true
26
-
27
- # https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
28
- - if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
29
- run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
30
-
31
- - uses: ruby/setup-ruby@v1
32
- with:
33
- ruby-version: ${{ matrix.ruby }}
34
- bundler-cache: true
35
-
36
- - run: bundle exec rake
13
+ uses: relaton/support/.github/workflows/rake.yml@master
data/bin/thor ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'thor' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("thor", "thor")
@@ -0,0 +1,27 @@
1
+ module RelatonIso
2
+ class DocumentIdentifier < RelatonBib::DocumentIdentifier
3
+ def id
4
+ id_str = @id.to_s.sub(/\sED\d+/, "")
5
+ if @all_parts
6
+ if type == "URN"
7
+ return "#{@id.urn}:ser"
8
+ else
9
+ return "#{id_str} (all parts)"
10
+ end
11
+ end
12
+ type == "URN" ? @id.urn.to_s : id_str
13
+ end
14
+
15
+ def remove_part
16
+ @id.part = nil
17
+ end
18
+
19
+ def remove_date
20
+ @id.year = nil
21
+ end
22
+
23
+ def all_parts
24
+ @all_parts = true
25
+ end
26
+ end
27
+ end
@@ -4,13 +4,13 @@ module RelatonIso
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
6
  # @return [RelatonIsoBib::IsoBibliographicItem]
7
- attr_writer :fetch
7
+ attr_writer :fetch, :pubid
8
8
 
9
9
  # Parse page.
10
- # @param lang [String, NilClass]
10
+ # @param lang [String, nil]
11
11
  # @return [RelatonIso::IsoBibliographicItem]
12
12
  def fetch(lang = nil)
13
- @fetch ||= Scrapper.parse_page @hit, lang
13
+ @fetch ||= Scrapper.parse_page self, lang
14
14
  end
15
15
 
16
16
  # @return [Integer]
@@ -23,5 +23,10 @@ module RelatonIso
23
23
  else 4
24
24
  end
25
25
  end
26
+
27
+ # @return [Pubid::Iso::Identifier]
28
+ def pubid
29
+ @pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
30
+ end
26
31
  end
27
32
  end
@@ -11,25 +11,21 @@ module RelatonIso
11
11
  # @param text [String] reference to search
12
12
  def initialize(text)
13
13
  super
14
- @array = text.match?(/^ISO\sTC\s184\/SC\s?4/) ? fetch_github : fetch_iso
14
+ @array = text.match?(/^ISO[\s\/](?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
15
15
  end
16
16
 
17
17
  # @param lang [String, NilClass]
18
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
19
19
  def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
20
20
  # parts = @array.reject { |h| h.hit["docPart"]&.empty? }
21
- hit = @array.min_by do |h|
22
- IsoBibliography.ref_components(h.hit[:title])[1].to_i
23
- end
21
+ hit = @array.min_by { |h| h.pubid.part }
24
22
  return @array.first.fetch lang unless hit
25
23
 
26
- bibitem = hit.fetch lang
24
+ bibitem = hit.fetch(lang)
27
25
  all_parts_item = bibitem.to_all_parts
28
26
  @array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
29
- %r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
30
- (?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
31
27
  isobib = RelatonIsoBib::IsoBibliographicItem.new(
32
- formattedref: RelatonBib::FormattedRef.new(content: fr),
28
+ formattedref: RelatonBib::FormattedRef.new(content: hi.pubid.to_s),
33
29
  )
34
30
  all_parts_item.relation << RelatonBib::DocumentRelation.new(
35
31
  type: "instance", bibitem: isobib,
@@ -27,180 +27,152 @@ module RelatonIso
27
27
  # @option opts [Boolean] :keep_year if undated reference should return
28
28
  # actual reference with year
29
29
  #
30
- # @return [String] Relaton XML serialisation of reference
30
+ # @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
31
31
  def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
32
32
  code = ref.gsub(/\u2013/, "-")
33
- # %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?(?::(?<year1>\d{4}))?} =~ code
34
- year ||= publish_year ref
33
+
34
+ # parse "all parts" request
35
35
  code.sub! " (all parts)", ""
36
36
  opts[:all_parts] ||= $~ && opts[:all_parts].nil?
37
- # opts[:keep_year] ||= opts[:keep_year].nil?
38
- # code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
39
- # if %r[^ISO/IEC DIR].match? code
40
- # return RelatonIec::IecBibliography.get(code, year, opts)
41
- # end
42
37
 
43
- ret = isobib_get(code, year, opts)
44
- return nil if ret.nil?
38
+ query_pubid = Pubid::Iso::Identifier.parse(code)
39
+ query_pubid.year = year if year
40
+
41
+ hits = isobib_search_filter(query_pubid, opts)
45
42
 
46
- if (year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
43
+ # return only first one if not all_parts
44
+ ret = if !opts[:all_parts] || hits.size == 1
45
+ hits.any? && hits.first.fetch(opts[:lang])
46
+ else
47
+ hits.to_all_parts(opts[:lang])
48
+ end
49
+
50
+ if ret
51
+ warn "[relaton-iso] (\"#{query_pubid}\") found #{ret.docidentifier.first.id}"
52
+ else
53
+ return fetch_ref_err(query_pubid, query_pubid.year)
54
+ end
55
+
56
+ if (query_pubid.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
47
57
  ret
48
58
  else
49
59
  ret.to_most_recent_reference
50
60
  end
51
61
  end
52
62
 
53
- def ref_components(ref)
54
- %r{
55
- ^(?<code>ISO(?:\s|/)[^-/:()]+\d+)
56
- (?:-(?<part>[\w-]+))?
57
- (?::(?<year>\d{4}))?
58
- (?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
59
- }x =~ ref
60
- [code&.strip, part, year, corr, coryear]
63
+ # @param query_pubid [Pubid::Iso::Identifier]
64
+ # @param pubid [Pubid::Iso::Identifier]
65
+ # @param all_parts [Boolean] match with any parts when true
66
+ # @return [Boolean]
67
+ def matches_parts?(query_pubid, pubid, all_parts: false)
68
+ if all_parts
69
+ # match only with documents with part number
70
+ !pubid.part.nil?
71
+ else
72
+ query_pubid.part == pubid.part
73
+ end
61
74
  end
62
75
 
63
- private
76
+ def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
77
+ query_pubid.publisher == pubid.publisher &&
78
+ query_pubid.number == pubid.number &&
79
+ query_pubid.copublisher == pubid.copublisher &&
80
+ ((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
81
+ ((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
82
+ end
64
83
 
65
- # rubocop:disable Metrics/MethodLength
84
+ # @param hit_collection [RelatonIso::HitCollection]
85
+ # @param year [String]
86
+ # @return [RelatonIso::HitCollection]
87
+ def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
88
+ missed_years = []
66
89
 
67
- def fetch_ref_err(code, year, missed_years)
68
- id = year ? "#{code}:#{year}" : code
69
- warn "[relaton-iso] WARNING: no match found online for #{id}. "\
70
- "The code must be exactly like it is on the standards website."
71
- unless missed_years.empty?
90
+ # filter by year
91
+ hits = hit_collection.select do |hit|
92
+ if hit.pubid.year == year
93
+ true
94
+ elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
95
+ hit.pubid.year = year
96
+ true
97
+ else
98
+ missed_year = hit.pubid.year || hit.hit[:year].to_s
99
+ if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
100
+ missed_years << missed_year
101
+ end
102
+ false
103
+ end
104
+ end
105
+
106
+ if hits.empty? && !missed_years.empty?
72
107
  warn "[relaton-iso] (There was no match for #{year}, though there "\
73
108
  "were matches found for #{missed_years.join(', ')}.)"
74
109
  end
75
- if /\d-\d/.match? code
110
+ hits
111
+ end
112
+
113
+ private
114
+
115
+ def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
116
+ id = year ? "#{query_pubid}:#{year}" : query_pubid
117
+ warn "[relaton-iso] WARNING: no match found online for #{id}. "\
118
+ "The code must be exactly like it is on the standards website."
119
+ if /\d-\d/.match? query_pubid.to_s
76
120
  warn "[relaton-iso] The provided document part may not exist, "\
77
121
  "or the document may no longer be published in parts."
78
122
  else
79
123
  warn "[relaton-iso] If you wanted to cite all document parts for "\
80
- "the reference, use \"#{code} (all parts)\".\nIf the document "\
124
+ "the reference, use \"#{query_pubid} (all parts)\".\nIf the document "\
81
125
  "is not a standard, use its document type abbreviation "\
82
126
  "(TS, TR, PAS, Guide)."
83
127
  end
84
128
  nil
85
129
  end
86
130
 
87
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
88
-
89
131
  # Search for hits. If no found then trying missed stages and ISO/IEC.
90
132
  #
91
- # @param code [String] reference without correction
133
+ # @param query_pubid [Pubid::Iso::Identifier] reference without correction
92
134
  # @param opts [Hash]
93
135
  # @return [Array<RelatonIso::Hit>]
94
- def isobib_search_filter(code, opts)
95
- ref = remove_part code, opts[:all_parts]
96
- warn "[relaton-iso] (\"#{code}\") fetching..."
97
- result = search(ref)
98
- res = search_code result, code, opts
136
+ def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
137
+ query_pubid.part = nil if opts[:all_parts]
138
+ warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
139
+ # fetch hits collection
140
+ hit_collection = search(query_pubid.to_s(with_date: false))
141
+ # filter only matching hits
142
+ res = filter_hits hit_collection, query_pubid,
143
+ all_parts: opts[:all_parts]
99
144
  return res unless res.empty?
100
145
 
101
- # try stages
102
- case code
103
- when %r{^\w+/[^/]+\s\d+} # code like ISO/IEC 123, ISO/IEC/IEE 123
104
- res = try_stages(result, opts) do |st|
105
- code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
106
- end
107
- return res unless res.empty?
108
- when %r{^\w+\s\d+} # code like ISO 123
109
- res = try_stages(result, opts) do |st|
110
- code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
111
- end
112
- return res unless res.empty?
113
- end
146
+ # lookup for documents with stages when no match without stage
147
+ res = filter_hits hit_collection, query_pubid,
148
+ all_parts: opts[:all_parts], any_types_stages: true
149
+ return res unless res.empty?
114
150
 
115
- if %r{^ISO\s}.match? code # try ISO/IEC if ISO not found
151
+ # TODO: do this at pubid-iso
152
+ if query_pubid.publisher == "ISO" && query_pubid.copublisher.nil? # try ISO/IEC if ISO not found
116
153
  warn "[relaton-iso] Attempting ISO/IEC retrieval"
117
- c = code.sub "ISO", "ISO/IEC"
118
- res = search_code result, c, opts
119
- end
120
- res
121
- end
122
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
123
-
124
- def remove_part(ref, all_parts)
125
- return ref unless all_parts
126
-
127
- ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
128
- end
129
-
130
- # @param result [RelatonIso::HitCollection]
131
- # @param opts [Hash]
132
- # @return [RelatonIso::HitCollection]
133
- def try_stages(result, opts)
134
- res = nil
135
- %w[NP WD CD DIS FDIS PRF IS AWI TR].each do |st| # try stages
136
- c = yield st
137
- res = search_code result, c, opts
138
- return res unless res.empty?
154
+ query_pubid.copublisher = "IEC"
155
+ res = filter_hits hit_collection, query_pubid, all_parts: opts[:all_parts]
139
156
  end
140
157
  res
141
158
  end
142
159
 
143
- # @param result [RelatonIso::HitCollection]
144
- # @param code [String]
145
- # @param opts [Hash]
160
+ # @param hits [RelatonIso::HitCollection]
161
+ # @param query_pubid [Pubid::Iso::Identifier]
162
+ # @param all_parts [Boolean]
163
+ # @param any_stages [Boolean]
146
164
  # @return [RelatonIso::HitCollection]
147
- def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
148
- code1, part1, _, corr1, coryear1 = ref_components code
149
- result.select do |i|
150
- code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
151
- code1 == code2 && ((opts[:all_parts] && part2) || (!opts[:all_parts] && part1 == part2)) &&
152
- corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
165
+ def filter_hits(hit_collection, query_pubid, all_parts: false, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
166
+ # filter out
167
+ result = hit_collection.select do |i|
168
+ hit_pubid = i.pubid
169
+ matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
170
+ matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
171
+ query_pubid.corrigendums == hit_pubid.corrigendums &&
172
+ query_pubid.amendments == hit_pubid.amendments
153
173
  end
154
- end
155
-
156
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
157
174
 
158
- # Sort through the results from RelatonIso, fetching them three at a time,
159
- # and return the first result that matches the code, matches the year
160
- # (if provided), and which # has a title (amendments do not).
161
- # Only expects the first page of results to be populated.
162
- # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
163
- # If no match, returns any years which caused mismatch, for error
164
- # reporting
165
- def isobib_results_filter(result, year, opts)
166
- missed_years = []
167
- hits = result.reduce!([]) do |hts, h|
168
- iyear = publish_year h.hit[:title]
169
- if !year || iyear == year
170
- hts << h
171
- else
172
- missed_years << iyear
173
- hts
174
- end
175
- end
176
- return { years: missed_years } unless hits.any?
177
-
178
- if !opts[:all_parts] || hits.size == 1
179
- return { ret: hits.first.fetch(opts[:lang]) }
180
- end
181
-
182
- { ret: hits.to_all_parts(opts[:lang]) }
183
- end
184
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
185
-
186
- def publish_year(ref)
187
- %r{:(?<year>\d{4})(?!.*:\d{4})} =~ ref
188
- year
189
- end
190
-
191
- # @param code [String]
192
- # @param year [String, NilClass]
193
- # @param opts [Hash]
194
- def isobib_get(code, year, opts)
195
- # return iev(code) if /^IEC 60050-/.match code
196
- result = isobib_search_filter(code, opts) || return
197
- ret = isobib_results_filter(result, year, opts)
198
- if ret[:ret]
199
- warn "[relaton-iso] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
200
- ret[:ret]
201
- else
202
- fetch_ref_err(code, year, ret[:years])
203
- end
175
+ query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
204
176
  end
205
177
  end
206
178
  end
@@ -50,43 +50,61 @@ module RelatonIso
50
50
 
51
51
  class << self
52
52
  # Parse page.
53
- # @param hit_data [Hash]
53
+ # @param hit [RelatonIso::Hit]
54
54
  # @param lang [String, NilClass]
55
- # @return [Hash]
56
- def parse_page(hit_data, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
55
+ # @return [RelatonIsoBib::IsoBibliographicItem]
56
+ def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
57
57
  # path = "/contents/data/standard#{hit_data['splitPath']}/"\
58
58
  # "#{hit_data['csnumber']}.html"
59
- doc, url = get_page "#{hit_data[:path].sub '/sites/isoorg', ''}.html"
59
+
60
+ doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
60
61
 
61
62
  # Fetch edition.
62
63
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
63
64
  &.children&.last&.text&.match(/\d+/)&.to_s
65
+ hit.pubid.edition = edition if edition
64
66
 
65
67
  titles, abstract, langs = fetch_titles_abstract(doc, lang)
66
68
 
67
69
  RelatonIsoBib::IsoBibliographicItem.new(
68
70
  fetched: Date.today.to_s,
69
- docid: fetch_docid(doc, edition, langs),
70
- docnumber: fetch_docnumber(doc),
71
+ docid: fetch_relaton_docids(doc, hit.pubid),
72
+ docnumber: fetch_docnumber(hit.pubid),
71
73
  edition: edition,
72
74
  language: langs.map { |l| l[:lang] },
73
75
  script: langs.map { |l| script(l[:lang]) }.uniq,
74
76
  title: titles,
75
- doctype: fetch_type(hit_data[:title]),
77
+ doctype: fetch_type(hit.hit[:title]),
76
78
  docstatus: fetch_status(doc),
77
79
  ics: fetch_ics(doc),
78
- date: fetch_dates(doc, hit_data[:title]),
79
- contributor: fetch_contributors(hit_data[:title]),
80
+ date: fetch_dates(doc, hit.hit[:title]),
81
+ contributor: fetch_contributors(hit.hit[:title]),
80
82
  editorialgroup: fetch_workgroup(doc),
81
83
  abstract: abstract,
82
84
  copyright: fetch_copyright(doc),
83
85
  link: fetch_link(doc, url),
84
86
  relation: fetch_relations(doc),
85
87
  place: ["Geneva"],
86
- structuredidentifier: fetch_structuredidentifier(doc),
88
+ structuredidentifier: fetch_structuredidentifier(hit.pubid),
87
89
  )
88
90
  end
89
91
 
92
+ #
93
+ # Create document ids.
94
+ #
95
+ # @param doc [Nokogiri::HTML::Document] document
96
+ # @param pubid [Pubid::Iso::Identifier] pubid
97
+ #
98
+ # @return [Array<RelatonBib::DocumentIdentifier>]
99
+ #
100
+ def fetch_relaton_docids(doc, pubid)
101
+ pubid.urn_stage = stage_code(doc).to_f
102
+ [
103
+ RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
104
+ RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
105
+ ]
106
+ end
107
+
90
108
  private
91
109
 
92
110
  # Fetch titles and abstracts.
@@ -168,60 +186,29 @@ module RelatonIso
168
186
  end
169
187
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
170
188
 
171
- # Fetch docid.
172
- # @param doc [Nokogiri:HTML::Document]
173
- # @param edition [String]
174
- # @param langs [Array<Hash>]
175
- # @return [Array<RelatonBib::DocumentIdentifier>]
176
- def fetch_docid(doc, edition, langs)
177
- pubid = item_ref doc
178
- [
179
- RelatonBib::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
180
- RelatonBib::DocumentIdentifier.new(
181
- id: fetch_urn(doc, pubid, edition, langs), type: "URN",
182
- ),
183
- ]
184
- end
185
-
186
- # @param doc [Nokogiri:HTML::Document]
187
- # @param pubid [String]
188
- # @param edition [String]
189
- # @param langs [Array<Hash>]
190
- # @returnt [String]
191
- def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
192
- orig = pubid.split.first.downcase.split("/").join "-"
193
- %r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
194
- _, part, _year, corr, = IsoBibliography.ref_components pubid
195
- urn = "urn:iso:std:#{orig}"
196
- urn += ":#{type.downcase}" if type
197
- urn += ":#{fetch_docnumber(doc)}"
198
- urn += ":-#{part}" if part
199
- urn += ":stage-#{stage_code(doc)}"
200
- urn += ":ed-#{edition}" if edition
201
- if corr
202
- corrparts = corr.split
203
- urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
204
- end
205
- urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
206
- urn
207
- end
208
-
209
- def fetch_docnumber(doc)
210
- item_ref(doc)&.match(/\d+/)&.to_s
189
+ #
190
+ # Generate docnumber.
191
+ #
192
+ # @param [Pubid::Iso] pubid
193
+ #
194
+ # @return [String] docnumber
195
+ #
196
+ def fetch_docnumber(pubid)
197
+ pubid.to_s.match(/\d+/)&.to_s
211
198
  end
212
199
 
213
- # @param doc [Nokogiri::HTML::Document]
214
- def fetch_structuredidentifier(doc) # rubocop:disable Metrics/MethodLength
215
- ref = item_ref doc
216
- unless ref
217
- return RelatonIsoBib::StructuredIdentifier.new(
218
- project_number: "?", part_number: "", prefix: nil, id: "?",
219
- )
220
- end
221
-
222
- m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
200
+ #
201
+ # Parse structuredidentifier.
202
+ #
203
+ # @param pubid [Pubid::Iso::Identifier] pubid
204
+ #
205
+ # @return [RelatonBib::StructuredIdentifier] structured identifier
206
+ #
207
+ def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
223
208
  RelatonIsoBib::StructuredIdentifier.new(
224
- project_number: m[1], part: m[2], type: "ISO",
209
+ project_number: "#{pubid.publisher} #{pubid.number}",
210
+ part: pubid&.part&.sub(/^-/, ""),
211
+ type: pubid.publisher,
225
212
  )
226
213
  end
227
214
 
@@ -251,7 +238,7 @@ module RelatonIso
251
238
  # Fetch workgroup.
252
239
  # @param doc [Nokogiri::HTML::Document]
253
240
  # @return [Hash]
254
- def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
241
+ def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
255
242
  wg_link = doc.css("div.entry-name.entry-block a")[0]
256
243
  # wg_url = DOMAIN + wg_link['href']
257
244
  workgroup = wg_link.text.split "/"
@@ -275,6 +262,7 @@ module RelatonIso
275
262
  # @param doc [Nokogiri::HTML::Document]
276
263
  # @return [Array<Hash>]
277
264
  def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
265
+ types = ["Now", "Now under review"]
278
266
  doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
279
267
  r_type = r.at("h4", "h5").text
280
268
  date = []
@@ -286,14 +274,13 @@ module RelatonIso
286
274
  "updates"
287
275
  else r_type
288
276
  end
289
- if ["Now", "Now under review"].include?(type) then a
277
+ if types.include?(type) then a
290
278
  else
291
279
  a + r.css("a").map do |id|
292
- fref = RelatonBib::FormattedRef.new(
293
- content: id.text, format: "text/plain",
294
- )
280
+ docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
281
+ fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
295
282
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(
296
- formattedref: fref, date: date,
283
+ docid: [docid], formattedref: fref, date: date,
297
284
  )
298
285
  { type: type, bibitem: bibitem }
299
286
  end
@@ -308,7 +295,7 @@ module RelatonIso
308
295
  def fetch_type(ref)
309
296
  %r{
310
297
  ^(?<prefix>ISO|IWA|IEC)
311
- (?:(/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
298
+ (?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
312
299
  (?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
313
300
  }x =~ ref
314
301
  # return "international-standard" if type_match.nil?
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.11.1"
4
+ VERSION = "1.12.2"
5
5
  end
data/lib/relaton_iso.rb CHANGED
@@ -2,3 +2,5 @@
2
2
 
3
3
  require "relaton_iso/version"
4
4
  require "relaton_iso/iso_bibliography"
5
+ require "pubid-iso"
6
+ require "relaton_iso/document_identifier"
data/relaton_iso.gemspec CHANGED
@@ -27,7 +27,6 @@ Gem::Specification.new do |spec|
27
27
  spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
28
28
 
29
29
  spec.add_development_dependency "byebug"
30
- # spec.add_development_dependency "debase"
31
30
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
32
31
  spec.add_development_dependency "pry-byebug"
33
32
  spec.add_development_dependency "rake", "~> 13.0"
@@ -35,12 +34,11 @@ Gem::Specification.new do |spec|
35
34
  spec.add_development_dependency "rubocop"
36
35
  spec.add_development_dependency "rubocop-performance"
37
36
  spec.add_development_dependency "rubocop-rails"
38
- # spec.add_development_dependency "ruby-debug-ide"
39
37
  spec.add_development_dependency "simplecov"
40
38
  spec.add_development_dependency "vcr"
41
39
  spec.add_development_dependency "webmock"
42
40
 
43
- # spec.add_dependency "relaton-iec", "~> 1.8.0"
44
41
  spec.add_dependency "algolia"
45
- spec.add_dependency "relaton-iso-bib", "~> 1.11.0"
42
+ spec.add_dependency "pubid-iso", "~> 0.1.8"
43
+ spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
46
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.1
4
+ version: 1.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-05-03 00:00:00.000000000 Z
11
+ date: 2022-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -178,20 +178,34 @@ dependencies:
178
178
  - - ">="
179
179
  - !ruby/object:Gem::Version
180
180
  version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: pubid-iso
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 0.1.8
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 0.1.8
181
195
  - !ruby/object:Gem::Dependency
182
196
  name: relaton-iso-bib
183
197
  requirement: !ruby/object:Gem::Requirement
184
198
  requirements:
185
199
  - - "~>"
186
200
  - !ruby/object:Gem::Version
187
- version: 1.11.0
201
+ version: 1.12.0
188
202
  type: :runtime
189
203
  prerelease: false
190
204
  version_requirements: !ruby/object:Gem::Requirement
191
205
  requirements:
192
206
  - - "~>"
193
207
  - !ruby/object:Gem::Version
194
- version: 1.11.0
208
+ version: 1.12.0
195
209
  description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
196
210
  model'
197
211
  email:
@@ -230,7 +244,9 @@ files:
230
244
  - bin/ruby-rewrite
231
245
  - bin/safe_yaml
232
246
  - bin/setup
247
+ - bin/thor
233
248
  - lib/relaton_iso.rb
249
+ - lib/relaton_iso/document_identifier.rb
234
250
  - lib/relaton_iso/hit.rb
235
251
  - lib/relaton_iso/hit_collection.rb
236
252
  - lib/relaton_iso/iso_bibliography.rb