relaton-iso 1.18.1 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,205 +6,196 @@ require "relaton_iso/hit_collection"
6
6
  # require "relaton_iec"
7
7
 
8
8
  module RelatonIso
9
- # Class methods for search ISO standards.
10
- class IsoBibliography
11
- class << self
12
- # @param text [String]
13
- # @return [RelatonIso::HitCollection]
14
- def search(text)
15
- HitCollection.new(text.gsub("\u2013", "-")).fetch
16
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
17
- EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
18
- Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
19
- Algolia::AlgoliaUnreachableHostError => e
20
- raise RelatonBib::RequestError, e.message
21
- end
9
+ # Methods for search ISO standards.
10
+ module IsoBibliography
11
+ extend self
12
+
13
+ # @param text [Pubid::Iso::Identifier, String]
14
+ # @return [RelatonIso::HitCollection]
15
+ def search(pubid, opts = {})
16
+ pubid = Pubid::Iso::Identifier.parse(pubid) if pubid.is_a? String
17
+ HitCollection.new(pubid, opts).fetch
18
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
19
+ EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
20
+ Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT => e
21
+ raise RelatonBib::RequestError, e.message
22
+ end
22
23
 
23
- # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
24
- # @param year [String, NilClass] the year the standard was published
25
- # @param opts [Hash] options; restricted to :all_parts if all-parts
26
- # @option opts [Boolean] :all_parts if all-parts reference is required
27
- # @option opts [Boolean] :keep_year if undated reference should return
28
- # actual reference with year
29
- #
30
- # @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
31
- def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
32
- code = ref.gsub("\u2013", "-")
33
-
34
- # parse "all parts" request
35
- code.sub! " (all parts)", ""
36
- opts[:all_parts] ||= $~ && opts[:all_parts].nil?
37
-
38
- query_pubid = Pubid::Iso::Identifier.parse(code)
39
- query_pubid.year = year if year
40
- query_pubid.part = nil if opts[:all_parts]
41
- Util.warn "(#{query_pubid}) Fetching from iso.org ..."
42
-
43
- hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
44
- tip_ids = look_up_with_any_types_stages(hits, ref, opts)
45
-
46
- ret = if !opts[:all_parts] || hits.size == 1
47
- hits.any? && hits.first.fetch(opts[:lang])
48
- else
49
- hits.to_all_parts(opts[:lang])
50
- end
51
-
52
- return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
53
-
54
- response_docid = ret.docidentifier.first.id.sub(" (all parts)", "")
55
- response_pubid = Pubid::Iso::Identifier.parse(response_docid)
56
-
57
- Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
58
-
59
- get_all = (
60
- (query_pubid.year && opts[:keep_year].nil?) ||
61
- opts[:keep_year] ||
62
- opts[:all_parts]
63
- )
64
- return ret if get_all
65
-
66
- ret.to_most_recent_reference
67
- rescue Pubid::Core::Errors::ParseError
68
- Util.warn "(#{code}) Is not recognized as a standards identifier."
69
- nil
70
- end
24
+ # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
25
+ # @param year [String, NilClass] the year the standard was published
26
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
27
+ # @option opts [Boolean] :all_parts if all-parts reference is required
28
+ # @option opts [Boolean] :keep_year if undated reference should return
29
+ # actual reference with year
30
+ #
31
+ # @return [RelatonIsoBib::IsoBibliographicItem] Bibliographic item
32
+ def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
33
+ code = ref.gsub("\u2013", "-")
34
+
35
+ # parse "all parts" request
36
+ code.sub! " (all parts)", ""
37
+ opts[:all_parts] ||= $~ && opts[:all_parts].nil?
38
+
39
+ query_pubid = Pubid::Iso::Identifier.parse(code)
40
+ query_pubid.root.year = year.to_i if year&.respond_to?(:to_i)
41
+ Util.warn "(#{query_pubid}) Fetching from Relaton repository ..."
42
+
43
+ hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
44
+ tip_ids = look_up_with_any_types_stages(hits, ref, opts)
45
+ ret = hits.fetch_doc
46
+ return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
47
+
48
+ response_pubid = ret.docidentifier.first.id # .sub(" (all parts)", "")
49
+ Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
50
+ get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
51
+ return ret if get_all
52
+
53
+ ret.to_most_recent_reference
54
+ rescue Pubid::Core::Errors::ParseError
55
+ Util.warn "(#{code}) Is not recognized as a standards identifier."
56
+ nil
57
+ end
71
58
 
72
- # @param query_pubid [Pubid::Iso::Identifier]
73
- # @param pubid [Pubid::Iso::Identifier]
74
- # @param all_parts [Boolean] match with any parts when true
75
- # @return [Boolean]
76
- def matches_parts?(query_pubid, pubid, all_parts: false)
77
- # match only with documents with part number
78
- return !pubid.part.nil? if all_parts
59
+ # @param query_pubid [Pubid::Iso::Identifier]
60
+ # @param pubid [Pubid::Iso::Identifier]
61
+ # @param all_parts [Boolean] match with any parts when true
62
+ # @return [Boolean]
63
+ def matches_parts?(query_pubid, pubid, all_parts: false)
64
+ # match only with documents with part number
65
+ return !pubid.part.nil? if all_parts
79
66
 
80
- query_pubid.part == pubid.part
81
- end
67
+ query_pubid.part == pubid.part
68
+ end
82
69
 
83
- #
84
- # Matches base of query_pubid and pubid.
85
- #
86
- # @param [Pubid::Iso::Identifier] query_pubid pubid to match
87
- # @param [Pubid::Iso::Identifier] pubid pubid to match
88
- # @param [Boolean] any_types_stages match with any types and stages
89
- #
90
- # @return [<Type>] <description>
91
- #
92
- def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
93
- return false unless pubid.respond_to?(:publisher)
94
-
95
- query_pubid.publisher == pubid.publisher &&
96
- query_pubid.number == pubid.number &&
97
- query_pubid.copublisher == pubid.copublisher &&
98
- (any_types_stages || query_pubid.stage == pubid.stage) &&
99
- (any_types_stages || query_pubid.is_a?(pubid.class))
70
+ #
71
+ # Matches base of query_pubid and pubid.
72
+ #
73
+ # @param [Pubid::Iso::Identifier] query_pubid pubid to match
74
+ # @param [Pubid::Iso::Identifier] pubid pubid to match
75
+ # @param [Boolean] any_types_stages match with any types and stages
76
+ #
77
+ # @return [<Type>] <description>
78
+ #
79
+ def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics?PerceivedComplexity
80
+ return false unless pubid.respond_to?(:publisher)
81
+
82
+ query_pubid.publisher == pubid.publisher &&
83
+ query_pubid.number == pubid.number &&
84
+ query_pubid.copublisher == pubid.copublisher &&
85
+ (any_types_stages || query_pubid.stage == pubid.stage) &&
86
+ (any_types_stages || query_pubid.is_a?(pubid.class))
87
+ end
88
+
89
+ # @param hit_collection [RelatonIso::HitCollection]
90
+ # @param year [String]
91
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
92
+ def filter_hits_by_year(hit_collection, year)
93
+ missed_year_ids = Set.new
94
+ return [hit_collection, missed_year_ids] if year.nil?
95
+
96
+ # filter by year
97
+ hits = hit_collection.select do |hit|
98
+ hit.pubid.year ||= hit.hit[:year]
99
+ next true if check_year(year, hit)
100
+
101
+ missed_year_ids << hit.pubid.to_s if hit.pubid.year
102
+ false
100
103
  end
101
104
 
102
- # @param hit_collection [RelatonIso::HitCollection]
103
- # @param year [String]
104
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
105
- def filter_hits_by_year(hit_collection, year)
106
- missed_year_ids = Set.new
107
- return [hit_collection, missed_year_ids] if year.nil?
105
+ [hits, missed_year_ids]
106
+ end
107
+
108
+ private
108
109
 
109
- # filter by year
110
- hits = hit_collection.select do |hit|
111
- hit.pubid.year ||= hit.hit[:year]
112
- next true if check_year(year, hit)
110
+ def check_year(year, hit) # rubocop:disable Metrics/AbcSize
111
+ (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
112
+ (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
113
+ (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
114
+ end
113
115
 
114
- missed_year_ids << hit.pubid.to_s if hit.pubid.year
115
- false
116
- end
116
+ # @param pubid [Pubid::Iso::Identifier] PubID with no results
117
+ def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
118
+ Util.warn "(#{pubid}) Not found."
117
119
 
118
- [hits, missed_year_ids]
120
+ if missed_year_ids.any?
121
+ ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
122
+ Util.warn "(#{pubid}) TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}."
119
123
  end
120
124
 
121
- private
122
-
123
- def check_year(year, hit) # rubocop:disable Metrics/AbcSize
124
- (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
125
- (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
126
- (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
125
+ if tip_ids.any?
126
+ ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
127
+ Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
127
128
  end
128
129
 
129
- # @param pubid [Pubid::Iso::Identifier] PubID with no results
130
- def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
131
- Util.warn "(#{pubid}) Not found."
132
-
133
- if missed_year_ids.any?
134
- ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
135
- Util.warn "(#{pubid}) TIP: No match for edition year " \
136
- "#{pubid.year}, but matches exist for #{ids}."
137
- end
138
-
139
- if tip_ids.any?
140
- ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
141
- Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
142
- end
143
-
144
- if pubid.part
145
- Util.warn "(#{pubid}) TIP: If it cannot be found, " \
146
- "the document may no longer be published in parts."
147
- else
148
- Util.warn "(#{pubid}) TIP: If you wish to cite " \
149
- "all document parts for the reference, use " \
150
- "`#{pubid.to_s(format: :ref_undated)} (all parts)`."
151
- end
152
-
153
- nil
130
+ if pubid.part
131
+ Util.warn "(#{pubid}) TIP: If it cannot be found, the document may no longer be published in parts."
132
+ else
133
+ Util.warn "(#{pubid}) TIP: If you wish to cite all document parts for the reference, " \
134
+ "use `#{pubid.to_s(format: :ref_undated)} (all parts)`."
154
135
  end
155
136
 
156
- def look_up_with_any_types_stages(hits, ref, opts) # rubocop:disable Metrics/MethodLength
157
- found_ids = []
158
- return found_ids if hits.from_gh || hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
137
+ nil
138
+ end
139
+
140
+ def look_up_with_any_types_stages(hits, ref, opts)
141
+ return [] if hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
159
142
 
160
- ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
161
- pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
162
- resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
163
- resp.map &:pubid
164
- end
143
+ ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
144
+ pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
145
+ resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
146
+ resp.map &:pubid
147
+ end
165
148
 
166
- #
167
- # Search for hits. If no found then trying missed stages.
168
- #
169
- # @param query_pubid [Pubid::Iso::Identifier] reference without correction
170
- # @param opts [Hash]
171
- # @param any_types_stages [Boolean] match with any stages
172
- #
173
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
174
- #
175
- def isobib_search_filter(query_pubid, opts, any_types_stages: false) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
176
- query_pubid_without_year = query_pubid.dup
177
- # remove year for query
178
- query_pubid_without_year.year = nil
179
- hit_collection = search(query_pubid_without_year.to_s)
180
-
181
- # filter only matching hits
182
- filter_hits hit_collection, query_pubid, opts[:all_parts],
183
- any_types_stages
149
+ #
150
+ # Search for hits. If no found then trying missed stages.
151
+ #
152
+ # @param query_pubid [Pubid::Iso::Identifier] reference without correction
153
+ # @param opts [Hash]
154
+ # @param any_types_stages [Boolean] match with any stages
155
+ #
156
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
157
+ #
158
+ def isobib_search_filter(query_pubid, opts, any_types_stages: false)
159
+ hit_collection = search(query_pubid, opts)
160
+
161
+ # filter only matching hits
162
+ filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
163
+ end
164
+
165
+ #
166
+ # Filter hits by query_pubid.
167
+ #
168
+ # @param hit_collection [RelatonIso::HitCollection]
169
+ # @param query_pubid [Pubid::Iso::Identifier]
170
+ # @param all_parts [Boolean]
171
+ # @param any_types_stages [Boolean]
172
+ #
173
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
174
+ #
175
+ def filter_hits(hit_collection, query_pubid, all_parts, any_types_stages)
176
+ # filter out
177
+ excludings = build_excludings(all_parts, any_types_stages)
178
+ no_year_ref = hit_collection.ref_pubid_no_year.exclude(*excludings)
179
+ result = hit_collection.select do |i|
180
+ pubid_match?(i.pubid, query_pubid, excludings, no_year_ref) && !(all_parts && i.pubid.part.nil?)
184
181
  end
185
182
 
186
- #
187
- # Filter hits by query_pubid.
188
- #
189
- # @param hit_collection [RelatonIso::HitCollection]
190
- # @param query_pubid [Pubid::Iso::Identifier]
191
- # @param all_parts [Boolean]
192
- # @param any_stypes_tages [Boolean]
193
- #
194
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
195
- #
196
- def filter_hits(hit_collection, query_pubid, all_parts, any_stypes_tages) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
197
- # filter out
198
- result = hit_collection.select do |i|
199
- hit_pubid = i.pubid
200
- matches_base?(query_pubid, hit_pubid,
201
- any_types_stages: any_stypes_tages) &&
202
- matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
203
- query_pubid.corrigendums == hit_pubid.corrigendums &&
204
- query_pubid.amendments == hit_pubid.amendments
205
- end
206
-
207
- filter_hits_by_year(result, query_pubid.year)
183
+ filter_hits_by_year(result, query_pubid.year)
184
+ end
185
+
186
+ def build_excludings(all_parts, any_types_stages)
187
+ excludings = %i[year edition]
188
+ excludings += %i[type stage iteration] if any_types_stages
189
+ excludings << :part if all_parts
190
+ excludings
191
+ end
192
+
193
+ def pubid_match?(pubid, query_pubid, excludings, no_year_ref)
194
+ if pubid.is_a? String then pubid == query_pubid.to_s
195
+ else
196
+ pubid = pubid.dup
197
+ pubid.base = pubid.base.exclude(:year, :edition) if pubid.base
198
+ pubid.exclude(*excludings) == no_year_ref
208
199
  end
209
200
  end
210
201
  end
@@ -9,16 +9,29 @@ module RelatonIso
9
9
  @prefix = "ISO"
10
10
  @defaultprefix = %r{^ISO(/IEC)?\s}
11
11
  @idtype = "ISO"
12
+ @datasets = %w[iso-ics]
12
13
  end
13
14
 
14
15
  # @param code [String]
15
- # @param date [String, NilClass] year
16
+ # @param date [String, nil] year
16
17
  # @param opts [Hash]
17
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
18
19
  def get(code, date, opts)
19
20
  ::RelatonIso::IsoBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from https://www.iso.org/standards-catalogue/browse-by-ics.html
25
+ #
26
+ # @param [String] source source name (iso-rss, iso-rss-all)
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format output format (xml, yaml, bibxml)
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonIsoBib::IsoBibliographicItem]
24
37
  def from_xml(xml)
@@ -28,7 +41,7 @@ module RelatonIso
28
41
  # @param hash [Hash]
29
42
  # @return [RelatonIsoBib::IsoBibliographicItem]
30
43
  def hash_to_bib(hash)
31
- item_hash = ::RelatonIsoBib::HashConverter.hash_to_bib(hash)
44
+ item_hash = HashConverter.hash_to_bib(hash)
32
45
  ::RelatonIsoBib::IsoBibliographicItem.new(**item_hash)
33
46
  end
34
47
 
@@ -43,5 +56,12 @@ module RelatonIso
43
56
  def threads
44
57
  3
45
58
  end
59
+
60
+ #
61
+ # Remove index file
62
+ #
63
+ def remove_index_file
64
+ Relaton::Index.find_or_create(:iso, url: true, file: HitCollection::INDEXFILE).remove_file
65
+ end
46
66
  end
47
67
  end
@@ -0,0 +1,61 @@
1
+ module RelatonIso
2
+ #
3
+ # Queue of links to fetch.
4
+ #
5
+ class Queue
6
+ extend Forwardable
7
+ def_delegator :queue, :[]
8
+
9
+ FILE = "iso-queue.txt".freeze
10
+
11
+ #
12
+ # Open queue file if exist. If not, create new empty queue.
13
+ #
14
+ # @return [Array<String>] queue
15
+ #
16
+ def queue
17
+ @queue ||= File.exist?(FILE) ? File.read(FILE).split("\n") : []
18
+ end
19
+
20
+ #
21
+ # Add item to queue at first position if it is not already there.
22
+ #
23
+ # @param [String] item item to add
24
+ #
25
+ # @return [void]
26
+ #
27
+ def add_first(item)
28
+ queue.unshift item unless queue.include? item
29
+ end
30
+
31
+ #
32
+ # Move or add item to the end of the queue.
33
+ #
34
+ # @param [String] item item to move or add
35
+ #
36
+ # @return [void]
37
+ #
38
+ def move_last(item)
39
+ queue.delete item
40
+ queue << item
41
+ end
42
+
43
+ #
44
+ # Take first item from the queue.
45
+ #
46
+ # @return [String] an item
47
+ #
48
+ # def take_first
49
+ # queue.shift
50
+ # end
51
+
52
+ #
53
+ # Save queue to file.
54
+ #
55
+ # @return [void]
56
+ #
57
+ def save
58
+ File.write FILE, queue.to_a.join("\n")
59
+ end
60
+ end
61
+ end