relaton-iso 1.18.1 → 1.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,205 +6,197 @@ require "relaton_iso/hit_collection"
6
6
  # require "relaton_iec"
7
7
 
8
8
  module RelatonIso
9
- # Class methods for search ISO standards.
10
- class IsoBibliography
11
- class << self
12
- # @param text [String]
13
- # @return [RelatonIso::HitCollection]
14
- def search(text)
15
- HitCollection.new(text.gsub("\u2013", "-")).fetch
16
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
17
- EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
18
- Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
19
- Algolia::AlgoliaUnreachableHostError => e
20
- raise RelatonBib::RequestError, e.message
21
- end
9
+ # Methods for search ISO standards.
10
+ module IsoBibliography
11
+ extend self
12
+
13
+ # @param text [Pubid::Iso::Identifier, String]
14
+ # @return [RelatonIso::HitCollection]
15
+ def search(pubid, opts = {})
16
+ pubid = Pubid::Iso::Identifier.parse(pubid) if pubid.is_a? String
17
+ HitCollection.new(pubid, opts).fetch
18
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
19
+ EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
20
+ Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
21
+ Algolia::AlgoliaUnreachableHostError => e
22
+ raise RelatonBib::RequestError, e.message
23
+ end
22
24
 
23
- # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
24
- # @param year [String, NilClass] the year the standard was published
25
- # @param opts [Hash] options; restricted to :all_parts if all-parts
26
- # @option opts [Boolean] :all_parts if all-parts reference is required
27
- # @option opts [Boolean] :keep_year if undated reference should return
28
- # actual reference with year
29
- #
30
- # @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
31
- def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
32
- code = ref.gsub("\u2013", "-")
33
-
34
- # parse "all parts" request
35
- code.sub! " (all parts)", ""
36
- opts[:all_parts] ||= $~ && opts[:all_parts].nil?
37
-
38
- query_pubid = Pubid::Iso::Identifier.parse(code)
39
- query_pubid.year = year if year
40
- query_pubid.part = nil if opts[:all_parts]
41
- Util.warn "(#{query_pubid}) Fetching from iso.org ..."
42
-
43
- hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
44
- tip_ids = look_up_with_any_types_stages(hits, ref, opts)
45
-
46
- ret = if !opts[:all_parts] || hits.size == 1
47
- hits.any? && hits.first.fetch(opts[:lang])
48
- else
49
- hits.to_all_parts(opts[:lang])
50
- end
51
-
52
- return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
53
-
54
- response_docid = ret.docidentifier.first.id.sub(" (all parts)", "")
55
- response_pubid = Pubid::Iso::Identifier.parse(response_docid)
56
-
57
- Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
58
-
59
- get_all = (
60
- (query_pubid.year && opts[:keep_year].nil?) ||
61
- opts[:keep_year] ||
62
- opts[:all_parts]
63
- )
64
- return ret if get_all
65
-
66
- ret.to_most_recent_reference
67
- rescue Pubid::Core::Errors::ParseError
68
- Util.warn "(#{code}) Is not recognized as a standards identifier."
69
- nil
70
- end
25
+ # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
26
+ # @param year [String, NilClass] the year the standard was published
27
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
28
+ # @option opts [Boolean] :all_parts if all-parts reference is required
29
+ # @option opts [Boolean] :keep_year if undated reference should return
30
+ # actual reference with year
31
+ #
32
+ # @return [RelatonIsoBib::IsoBibliographicItem] Bibliographic item
33
+ def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
34
+ code = ref.gsub("\u2013", "-")
35
+
36
+ # parse "all parts" request
37
+ code.sub! " (all parts)", ""
38
+ opts[:all_parts] ||= $~ && opts[:all_parts].nil?
39
+
40
+ query_pubid = Pubid::Iso::Identifier.parse(code)
41
+ query_pubid.root.year = year.to_i if year&.respond_to?(:to_i)
42
+ Util.warn "(#{query_pubid}) Fetching from Relaton repository ..."
43
+
44
+ hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
45
+ tip_ids = look_up_with_any_types_stages(hits, ref, opts)
46
+ ret = hits.fetch_doc
47
+ return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
48
+
49
+ response_pubid = ret.docidentifier.first.id # .sub(" (all parts)", "")
50
+ Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
51
+ get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
52
+ return ret if get_all
53
+
54
+ ret.to_most_recent_reference
55
+ rescue Pubid::Core::Errors::ParseError
56
+ Util.warn "(#{code}) Is not recognized as a standards identifier."
57
+ nil
58
+ end
71
59
 
72
- # @param query_pubid [Pubid::Iso::Identifier]
73
- # @param pubid [Pubid::Iso::Identifier]
74
- # @param all_parts [Boolean] match with any parts when true
75
- # @return [Boolean]
76
- def matches_parts?(query_pubid, pubid, all_parts: false)
77
- # match only with documents with part number
78
- return !pubid.part.nil? if all_parts
60
+ # @param query_pubid [Pubid::Iso::Identifier]
61
+ # @param pubid [Pubid::Iso::Identifier]
62
+ # @param all_parts [Boolean] match with any parts when true
63
+ # @return [Boolean]
64
+ def matches_parts?(query_pubid, pubid, all_parts: false)
65
+ # match only with documents with part number
66
+ return !pubid.part.nil? if all_parts
79
67
 
80
- query_pubid.part == pubid.part
81
- end
68
+ query_pubid.part == pubid.part
69
+ end
82
70
 
83
- #
84
- # Matches base of query_pubid and pubid.
85
- #
86
- # @param [Pubid::Iso::Identifier] query_pubid pubid to match
87
- # @param [Pubid::Iso::Identifier] pubid pubid to match
88
- # @param [Boolean] any_types_stages match with any types and stages
89
- #
90
- # @return [<Type>] <description>
91
- #
92
- def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
93
- return false unless pubid.respond_to?(:publisher)
94
-
95
- query_pubid.publisher == pubid.publisher &&
96
- query_pubid.number == pubid.number &&
97
- query_pubid.copublisher == pubid.copublisher &&
98
- (any_types_stages || query_pubid.stage == pubid.stage) &&
99
- (any_types_stages || query_pubid.is_a?(pubid.class))
71
+ #
72
+ # Matches base of query_pubid and pubid.
73
+ #
74
+ # @param [Pubid::Iso::Identifier] query_pubid pubid to match
75
+ # @param [Pubid::Iso::Identifier] pubid pubid to match
76
+ # @param [Boolean] any_types_stages match with any types and stages
77
+ #
78
+ # @return [<Type>] <description>
79
+ #
80
+ def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics?PerceivedComplexity
81
+ return false unless pubid.respond_to?(:publisher)
82
+
83
+ query_pubid.publisher == pubid.publisher &&
84
+ query_pubid.number == pubid.number &&
85
+ query_pubid.copublisher == pubid.copublisher &&
86
+ (any_types_stages || query_pubid.stage == pubid.stage) &&
87
+ (any_types_stages || query_pubid.is_a?(pubid.class))
88
+ end
89
+
90
+ # @param hit_collection [RelatonIso::HitCollection]
91
+ # @param year [String]
92
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
93
+ def filter_hits_by_year(hit_collection, year)
94
+ missed_year_ids = Set.new
95
+ return [hit_collection, missed_year_ids] if year.nil?
96
+
97
+ # filter by year
98
+ hits = hit_collection.select do |hit|
99
+ hit.pubid.year ||= hit.hit[:year]
100
+ next true if check_year(year, hit)
101
+
102
+ missed_year_ids << hit.pubid.to_s if hit.pubid.year
103
+ false
100
104
  end
101
105
 
102
- # @param hit_collection [RelatonIso::HitCollection]
103
- # @param year [String]
104
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
105
- def filter_hits_by_year(hit_collection, year)
106
- missed_year_ids = Set.new
107
- return [hit_collection, missed_year_ids] if year.nil?
106
+ [hits, missed_year_ids]
107
+ end
108
+
109
+ private
108
110
 
109
- # filter by year
110
- hits = hit_collection.select do |hit|
111
- hit.pubid.year ||= hit.hit[:year]
112
- next true if check_year(year, hit)
111
+ def check_year(year, hit) # rubocop:disable Metrics/AbcSize
112
+ (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
113
+ (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
114
+ (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
115
+ end
113
116
 
114
- missed_year_ids << hit.pubid.to_s if hit.pubid.year
115
- false
116
- end
117
+ # @param pubid [Pubid::Iso::Identifier] PubID with no results
118
+ def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
119
+ Util.warn "(#{pubid}) Not found."
117
120
 
118
- [hits, missed_year_ids]
121
+ if missed_year_ids.any?
122
+ ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
123
+ Util.warn "(#{pubid}) TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}."
119
124
  end
120
125
 
121
- private
122
-
123
- def check_year(year, hit) # rubocop:disable Metrics/AbcSize
124
- (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
125
- (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
126
- (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
126
+ if tip_ids.any?
127
+ ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
128
+ Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
127
129
  end
128
130
 
129
- # @param pubid [Pubid::Iso::Identifier] PubID with no results
130
- def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
131
- Util.warn "(#{pubid}) Not found."
132
-
133
- if missed_year_ids.any?
134
- ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
135
- Util.warn "(#{pubid}) TIP: No match for edition year " \
136
- "#{pubid.year}, but matches exist for #{ids}."
137
- end
138
-
139
- if tip_ids.any?
140
- ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
141
- Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
142
- end
143
-
144
- if pubid.part
145
- Util.warn "(#{pubid}) TIP: If it cannot be found, " \
146
- "the document may no longer be published in parts."
147
- else
148
- Util.warn "(#{pubid}) TIP: If you wish to cite " \
149
- "all document parts for the reference, use " \
150
- "`#{pubid.to_s(format: :ref_undated)} (all parts)`."
151
- end
152
-
153
- nil
131
+ if pubid.part
132
+ Util.warn "(#{pubid}) TIP: If it cannot be found, the document may no longer be published in parts."
133
+ else
134
+ Util.warn "(#{pubid}) TIP: If you wish to cite all document parts for the reference, " \
135
+ "use `#{pubid.to_s(format: :ref_undated)} (all parts)`."
154
136
  end
155
137
 
156
- def look_up_with_any_types_stages(hits, ref, opts) # rubocop:disable Metrics/MethodLength
157
- found_ids = []
158
- return found_ids if hits.from_gh || hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
138
+ nil
139
+ end
140
+
141
+ def look_up_with_any_types_stages(hits, ref, opts)
142
+ return [] if hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
159
143
 
160
- ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
161
- pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
162
- resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
163
- resp.map &:pubid
164
- end
144
+ ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
145
+ pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
146
+ resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
147
+ resp.map &:pubid
148
+ end
165
149
 
166
- #
167
- # Search for hits. If no found then trying missed stages.
168
- #
169
- # @param query_pubid [Pubid::Iso::Identifier] reference without correction
170
- # @param opts [Hash]
171
- # @param any_types_stages [Boolean] match with any stages
172
- #
173
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
174
- #
175
- def isobib_search_filter(query_pubid, opts, any_types_stages: false) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
176
- query_pubid_without_year = query_pubid.dup
177
- # remove year for query
178
- query_pubid_without_year.year = nil
179
- hit_collection = search(query_pubid_without_year.to_s)
180
-
181
- # filter only matching hits
182
- filter_hits hit_collection, query_pubid, opts[:all_parts],
183
- any_types_stages
150
+ #
151
+ # Search for hits. If no found then trying missed stages.
152
+ #
153
+ # @param query_pubid [Pubid::Iso::Identifier] reference without correction
154
+ # @param opts [Hash]
155
+ # @param any_types_stages [Boolean] match with any stages
156
+ #
157
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
158
+ #
159
+ def isobib_search_filter(query_pubid, opts, any_types_stages: false)
160
+ hit_collection = search(query_pubid, opts)
161
+
162
+ # filter only matching hits
163
+ filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
164
+ end
165
+
166
+ #
167
+ # Filter hits by query_pubid.
168
+ #
169
+ # @param hit_collection [RelatonIso::HitCollection]
170
+ # @param query_pubid [Pubid::Iso::Identifier]
171
+ # @param all_parts [Boolean]
172
+ # @param any_types_stages [Boolean]
173
+ #
174
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
175
+ #
176
+ def filter_hits(hit_collection, query_pubid, all_parts, any_types_stages)
177
+ # filter out
178
+ excludings = build_excludings(all_parts, any_types_stages)
179
+ no_year_ref = hit_collection.ref_pubid_no_year.exclude(*excludings)
180
+ result = hit_collection.select do |i|
181
+ pubid_match?(i.pubid, query_pubid, excludings, no_year_ref) && !(all_parts && i.pubid.part.nil?)
184
182
  end
185
183
 
186
- #
187
- # Filter hits by query_pubid.
188
- #
189
- # @param hit_collection [RelatonIso::HitCollection]
190
- # @param query_pubid [Pubid::Iso::Identifier]
191
- # @param all_parts [Boolean]
192
- # @param any_stypes_tages [Boolean]
193
- #
194
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
195
- #
196
- def filter_hits(hit_collection, query_pubid, all_parts, any_stypes_tages) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
197
- # filter out
198
- result = hit_collection.select do |i|
199
- hit_pubid = i.pubid
200
- matches_base?(query_pubid, hit_pubid,
201
- any_types_stages: any_stypes_tages) &&
202
- matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
203
- query_pubid.corrigendums == hit_pubid.corrigendums &&
204
- query_pubid.amendments == hit_pubid.amendments
205
- end
206
-
207
- filter_hits_by_year(result, query_pubid.year)
184
+ filter_hits_by_year(result, query_pubid.year)
185
+ end
186
+
187
+ def build_excludings(all_parts, any_types_stages)
188
+ excludings = %i[year edition]
189
+ excludings += %i[type stage iteration] if any_types_stages
190
+ excludings << :part if all_parts
191
+ excludings
192
+ end
193
+
194
+ def pubid_match?(pubid, query_pubid, excludings, no_year_ref)
195
+ if pubid.is_a? String then pubid == query_pubid.to_s
196
+ else
197
+ pubid = pubid.dup
198
+ pubid.base = pubid.base.exclude(:year, :edition) if pubid.base
199
+ pubid.exclude(*excludings) == no_year_ref
208
200
  end
209
201
  end
210
202
  end
@@ -9,16 +9,29 @@ module RelatonIso
9
9
  @prefix = "ISO"
10
10
  @defaultprefix = %r{^ISO(/IEC)?\s}
11
11
  @idtype = "ISO"
12
+ @datasets = %w[iso-ics]
12
13
  end
13
14
 
14
15
  # @param code [String]
15
- # @param date [String, NilClass] year
16
+ # @param date [String, nil] year
16
17
  # @param opts [Hash]
17
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
18
19
  def get(code, date, opts)
19
20
  ::RelatonIso::IsoBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from https://www.iso.org/standards-catalogue/browse-by-ics.html
25
+ #
26
+ # @param [String] source source name (iso-rss, iso-rss-all)
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format output format (xml, yaml, bibxml)
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonIsoBib::IsoBibliographicItem]
24
37
  def from_xml(xml)
@@ -28,7 +41,7 @@ module RelatonIso
28
41
  # @param hash [Hash]
29
42
  # @return [RelatonIsoBib::IsoBibliographicItem]
30
43
  def hash_to_bib(hash)
31
- item_hash = ::RelatonIsoBib::HashConverter.hash_to_bib(hash)
44
+ item_hash = HashConverter.hash_to_bib(hash)
32
45
  ::RelatonIsoBib::IsoBibliographicItem.new(**item_hash)
33
46
  end
34
47
 
@@ -43,5 +56,12 @@ module RelatonIso
43
56
  def threads
44
57
  3
45
58
  end
59
+
60
+ #
61
+ # Remove index file
62
+ #
63
+ def remove_index_file
64
+ Relaton::Index.find_or_create(:iso, url: true, file: HitCollection::INDEXFILE).remove_file
65
+ end
46
66
  end
47
67
  end
@@ -0,0 +1,61 @@
1
+ module RelatonIso
2
+ #
3
+ # Queue of links to fetch.
4
+ #
5
+ class Queue
6
+ extend Forwardable
7
+ def_delegator :queue, :[]
8
+
9
+ FILE = "iso-queue.txt".freeze
10
+
11
+ #
12
+ # Open queue file if exist. If not, create new empty queue.
13
+ #
14
+ # @return [Array<String>] queue
15
+ #
16
+ def queue
17
+ @queue ||= File.exist?(FILE) ? File.read(FILE).split("\n") : []
18
+ end
19
+
20
+ #
21
+ # Add item to queue at first position if it is not already there.
22
+ #
23
+ # @param [String] item item to add
24
+ #
25
+ # @return [void]
26
+ #
27
+ def add_first(item)
28
+ queue.unshift item unless queue.include? item
29
+ end
30
+
31
+ #
32
+ # Move or add item to the end of the queue.
33
+ #
34
+ # @param [String] item item to move or add
35
+ #
36
+ # @return [void]
37
+ #
38
+ def move_last(item)
39
+ queue.delete item
40
+ queue << item
41
+ end
42
+
43
+ #
44
+ # Take first item from the queue.
45
+ #
46
+ # @return [String] an item
47
+ #
48
+ # def take_first
49
+ # queue.shift
50
+ # end
51
+
52
+ #
53
+ # Save queue to file.
54
+ #
55
+ # @return [void]
56
+ #
57
+ def save
58
+ File.write FILE, queue.to_a.join("\n")
59
+ end
60
+ end
61
+ end