relaton-iso 1.18.0 → 1.18.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,205 +6,197 @@ require "relaton_iso/hit_collection"
6
6
  # require "relaton_iec"
7
7
 
8
8
  module RelatonIso
9
- # Class methods for search ISO standards.
10
- class IsoBibliography
11
- class << self
12
- # @param text [String]
13
- # @return [RelatonIso::HitCollection]
14
- def search(text)
15
- HitCollection.new(text.gsub("\u2013", "-")).fetch
16
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
17
- EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
18
- Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
19
- Algolia::AlgoliaUnreachableHostError => e
20
- raise RelatonBib::RequestError, e.message
21
- end
9
+ # Methods for search ISO standards.
10
+ module IsoBibliography
11
+ extend self
12
+
13
+ # @param text [Pubid::Iso::Identifier, String]
14
+ # @return [RelatonIso::HitCollection]
15
+ def search(pubid, opts = {})
16
+ pubid = Pubid::Iso::Identifier.parse(pubid) if pubid.is_a? String
17
+ HitCollection.new(pubid, opts).fetch
18
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
19
+ EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
20
+ Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
21
+ Algolia::AlgoliaUnreachableHostError => e
22
+ raise RelatonBib::RequestError, e.message
23
+ end
22
24
 
23
- # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
24
- # @param year [String, NilClass] the year the standard was published
25
- # @param opts [Hash] options; restricted to :all_parts if all-parts
26
- # @option opts [Boolean] :all_parts if all-parts reference is required
27
- # @option opts [Boolean] :keep_year if undated reference should return
28
- # actual reference with year
29
- #
30
- # @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
31
- def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
32
- code = ref.gsub("\u2013", "-")
33
-
34
- # parse "all parts" request
35
- code.sub! " (all parts)", ""
36
- opts[:all_parts] ||= $~ && opts[:all_parts].nil?
37
-
38
- query_pubid = Pubid::Iso::Identifier.parse(code)
39
- query_pubid.year = year if year
40
- query_pubid.part = nil if opts[:all_parts]
41
- Util.warn "(#{query_pubid}) Fetching from iso.org ..."
42
-
43
- hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
44
- tip_ids = look_up_with_any_types_stages(hits, ref, opts)
45
-
46
- ret = if !opts[:all_parts] || hits.size == 1
47
- hits.any? && hits.first.fetch(opts[:lang])
48
- else
49
- hits.to_all_parts(opts[:lang])
50
- end
51
-
52
- return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
53
-
54
- response_docid = ret.docidentifier.first.id.sub(" (all parts)", "")
55
- response_pubid = Pubid::Iso::Identifier.parse(response_docid)
56
-
57
- Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
58
-
59
- get_all = (
60
- (query_pubid.year && opts[:keep_year].nil?) ||
61
- opts[:keep_year] ||
62
- opts[:all_parts]
63
- )
64
- return ret if get_all
65
-
66
- ret.to_most_recent_reference
67
- rescue Pubid::Core::Errors::ParseError
68
- Util.warn "(#{code}) Is not recognized as a standards identifier."
69
- nil
70
- end
25
+ # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
26
+ # @param year [String, NilClass] the year the standard was published
27
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
28
+ # @option opts [Boolean] :all_parts if all-parts reference is required
29
+ # @option opts [Boolean] :keep_year if undated reference should return
30
+ # actual reference with year
31
+ #
32
+ # @return [RelatonIsoBib::IsoBibliographicItem] Bibliographic item
33
+ def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
34
+ code = ref.gsub("\u2013", "-")
35
+
36
+ # parse "all parts" request
37
+ code.sub! " (all parts)", ""
38
+ opts[:all_parts] ||= $~ && opts[:all_parts].nil?
39
+
40
+ query_pubid = Pubid::Iso::Identifier.parse(code)
41
+ query_pubid.root.year = year.to_i if year&.respond_to?(:to_i)
42
+ Util.warn "(#{query_pubid}) Fetching from Relaton repository ..."
43
+
44
+ hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
45
+ tip_ids = look_up_with_any_types_stages(hits, ref, opts)
46
+ ret = hits.fetch_doc
47
+ return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
48
+
49
+ response_pubid = ret.docidentifier.first.id # .sub(" (all parts)", "")
50
+ Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
51
+ get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
52
+ return ret if get_all
53
+
54
+ ret.to_most_recent_reference
55
+ rescue Pubid::Core::Errors::ParseError
56
+ Util.warn "(#{code}) Is not recognized as a standards identifier."
57
+ nil
58
+ end
71
59
 
72
- # @param query_pubid [Pubid::Iso::Identifier]
73
- # @param pubid [Pubid::Iso::Identifier]
74
- # @param all_parts [Boolean] match with any parts when true
75
- # @return [Boolean]
76
- def matches_parts?(query_pubid, pubid, all_parts: false)
77
- # match only with documents with part number
78
- return !pubid.part.nil? if all_parts
60
+ # @param query_pubid [Pubid::Iso::Identifier]
61
+ # @param pubid [Pubid::Iso::Identifier]
62
+ # @param all_parts [Boolean] match with any parts when true
63
+ # @return [Boolean]
64
+ def matches_parts?(query_pubid, pubid, all_parts: false)
65
+ # match only with documents with part number
66
+ return !pubid.part.nil? if all_parts
79
67
 
80
- query_pubid.part == pubid.part
81
- end
68
+ query_pubid.part == pubid.part
69
+ end
82
70
 
83
- #
84
- # Matches base of query_pubid and pubid.
85
- #
86
- # @param [Pubid::Iso::Identifier] query_pubid pubid to match
87
- # @param [Pubid::Iso::Identifier] pubid pubid to match
88
- # @param [Boolean] any_types_stages match with any types and stages
89
- #
90
- # @return [<Type>] <description>
91
- #
92
- def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
93
- return false unless pubid.respond_to?(:publisher)
94
-
95
- query_pubid.publisher == pubid.publisher &&
96
- query_pubid.number == pubid.number &&
97
- query_pubid.copublisher == pubid.copublisher &&
98
- (any_types_stages || query_pubid.stage == pubid.stage) &&
99
- (any_types_stages || query_pubid.is_a?(pubid.class))
71
+ #
72
+ # Matches base of query_pubid and pubid.
73
+ #
74
+ # @param [Pubid::Iso::Identifier] query_pubid pubid to match
75
+ # @param [Pubid::Iso::Identifier] pubid pubid to match
76
+ # @param [Boolean] any_types_stages match with any types and stages
77
+ #
78
+ # @return [<Type>] <description>
79
+ #
80
+ def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics?PerceivedComplexity
81
+ return false unless pubid.respond_to?(:publisher)
82
+
83
+ query_pubid.publisher == pubid.publisher &&
84
+ query_pubid.number == pubid.number &&
85
+ query_pubid.copublisher == pubid.copublisher &&
86
+ (any_types_stages || query_pubid.stage == pubid.stage) &&
87
+ (any_types_stages || query_pubid.is_a?(pubid.class))
88
+ end
89
+
90
+ # @param hit_collection [RelatonIso::HitCollection]
91
+ # @param year [String]
92
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
93
+ def filter_hits_by_year(hit_collection, year)
94
+ missed_year_ids = Set.new
95
+ return [hit_collection, missed_year_ids] if year.nil?
96
+
97
+ # filter by year
98
+ hits = hit_collection.select do |hit|
99
+ hit.pubid.year ||= hit.hit[:year]
100
+ next true if check_year(year, hit)
101
+
102
+ missed_year_ids << hit.pubid.to_s if hit.pubid.year
103
+ false
100
104
  end
101
105
 
102
- # @param hit_collection [RelatonIso::HitCollection]
103
- # @param year [String]
104
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
105
- def filter_hits_by_year(hit_collection, year)
106
- missed_year_ids = Set.new
107
- return [hit_collection, missed_year_ids] if year.nil?
106
+ [hits, missed_year_ids]
107
+ end
108
+
109
+ private
108
110
 
109
- # filter by year
110
- hits = hit_collection.select do |hit|
111
- hit.pubid.year ||= hit.hit[:year]
112
- next true if check_year(year, hit)
111
+ def check_year(year, hit) # rubocop:disable Metrics/AbcSize
112
+ (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
113
+ (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
114
+ (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
115
+ end
113
116
 
114
- missed_year_ids << hit.pubid.to_s if hit.pubid.year
115
- false
116
- end
117
+ # @param pubid [Pubid::Iso::Identifier] PubID with no results
118
+ def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
119
+ Util.warn "(#{pubid}) Not found."
117
120
 
118
- [hits, missed_year_ids]
121
+ if missed_year_ids.any?
122
+ ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
123
+ Util.warn "(#{pubid}) TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}."
119
124
  end
120
125
 
121
- private
122
-
123
- def check_year(year, hit) # rubocop:disable Metrics/AbcSize
124
- (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
125
- (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
126
- (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
126
+ if tip_ids.any?
127
+ ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
128
+ Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
127
129
  end
128
130
 
129
- # @param pubid [Pubid::Iso::Identifier] PubID with no results
130
- def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
131
- Util.warn "(#{pubid}) Not found."
132
-
133
- if missed_year_ids.any?
134
- ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
135
- Util.warn "(#{pubid}) TIP: No match for edition year " \
136
- "#{pubid.year}, but matches exist for #{ids}."
137
- end
138
-
139
- if tip_ids.any?
140
- ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
141
- Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
142
- end
143
-
144
- if pubid.part
145
- Util.warn "(#{pubid}) TIP: If it cannot be found, " \
146
- "the document may no longer be published in parts."
147
- else
148
- Util.warn "(#{pubid}) TIP: If you wish to cite " \
149
- "all document parts for the reference, use " \
150
- "`#{pubid.to_s(format: :ref_undated)} (all parts)`."
151
- end
152
-
153
- nil
131
+ if pubid.part
132
+ Util.warn "(#{pubid}) TIP: If it cannot be found, the document may no longer be published in parts."
133
+ else
134
+ Util.warn "(#{pubid}) TIP: If you wish to cite all document parts for the reference, " \
135
+ "use `#{pubid.to_s(format: :ref_undated)} (all parts)`."
154
136
  end
155
137
 
156
- def look_up_with_any_types_stages(hits, ref, opts) # rubocop:disable Metrics/MethodLength
157
- found_ids = []
158
- return found_ids if hits.from_gh || hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
138
+ nil
139
+ end
140
+
141
+ def look_up_with_any_types_stages(hits, ref, opts)
142
+ return [] if hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
159
143
 
160
- ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
161
- pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
162
- resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
163
- resp.map &:pubid
164
- end
144
+ ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
145
+ pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
146
+ resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
147
+ resp.map &:pubid
148
+ end
165
149
 
166
- #
167
- # Search for hits. If no found then trying missed stages.
168
- #
169
- # @param query_pubid [Pubid::Iso::Identifier] reference without correction
170
- # @param opts [Hash]
171
- # @param any_types_stages [Boolean] match with any stages
172
- #
173
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
174
- #
175
- def isobib_search_filter(query_pubid, opts, any_types_stages: false) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
176
- query_pubid_without_year = query_pubid.dup
177
- # remove year for query
178
- query_pubid_without_year.year = nil
179
- hit_collection = search(query_pubid_without_year.to_s)
180
-
181
- # filter only matching hits
182
- filter_hits hit_collection, query_pubid, opts[:all_parts],
183
- any_types_stages
150
+ #
151
+ # Search for hits. If no found then trying missed stages.
152
+ #
153
+ # @param query_pubid [Pubid::Iso::Identifier] reference without correction
154
+ # @param opts [Hash]
155
+ # @param any_types_stages [Boolean] match with any stages
156
+ #
157
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
158
+ #
159
+ def isobib_search_filter(query_pubid, opts, any_types_stages: false)
160
+ hit_collection = search(query_pubid, opts)
161
+
162
+ # filter only matching hits
163
+ filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
164
+ end
165
+
166
+ #
167
+ # Filter hits by query_pubid.
168
+ #
169
+ # @param hit_collection [RelatonIso::HitCollection]
170
+ # @param query_pubid [Pubid::Iso::Identifier]
171
+ # @param all_parts [Boolean]
172
+ # @param any_types_stages [Boolean]
173
+ #
174
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
175
+ #
176
+ def filter_hits(hit_collection, query_pubid, all_parts, any_types_stages)
177
+ # filter out
178
+ excludings = build_excludings(all_parts, any_types_stages)
179
+ no_year_ref = hit_collection.ref_pubid_no_year.exclude(*excludings)
180
+ result = hit_collection.select do |i|
181
+ pubid_match?(i.pubid, query_pubid, excludings, no_year_ref) && !(all_parts && i.pubid.part.nil?)
184
182
  end
185
183
 
186
- #
187
- # Filter hits by query_pubid.
188
- #
189
- # @param hit_collection [RelatonIso::HitCollection]
190
- # @param query_pubid [Pubid::Iso::Identifier]
191
- # @param all_parts [Boolean]
192
- # @param any_stypes_tages [Boolean]
193
- #
194
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
195
- #
196
- def filter_hits(hit_collection, query_pubid, all_parts, any_stypes_tages) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
197
- # filter out
198
- result = hit_collection.select do |i|
199
- hit_pubid = i.pubid
200
- matches_base?(query_pubid, hit_pubid,
201
- any_types_stages: any_stypes_tages) &&
202
- matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
203
- query_pubid.corrigendums == hit_pubid.corrigendums &&
204
- query_pubid.amendments == hit_pubid.amendments
205
- end
206
-
207
- filter_hits_by_year(result, query_pubid.year)
184
+ filter_hits_by_year(result, query_pubid.year)
185
+ end
186
+
187
+ def build_excludings(all_parts, any_types_stages)
188
+ excludings = %i[year edition]
189
+ excludings += %i[type stage iteration] if any_types_stages
190
+ excludings << :part if all_parts
191
+ excludings
192
+ end
193
+
194
+ def pubid_match?(pubid, query_pubid, excludings, no_year_ref)
195
+ if pubid.is_a? String then pubid == query_pubid.to_s
196
+ else
197
+ pubid = pubid.dup
198
+ pubid.base = pubid.base.exclude(:year, :edition) if pubid.base
199
+ pubid.exclude(*excludings) == no_year_ref
208
200
  end
209
201
  end
210
202
  end
@@ -9,16 +9,29 @@ module RelatonIso
9
9
  @prefix = "ISO"
10
10
  @defaultprefix = %r{^ISO(/IEC)?\s}
11
11
  @idtype = "ISO"
12
+ @datasets = %w[iso-ics]
12
13
  end
13
14
 
14
15
  # @param code [String]
15
- # @param date [String, NilClass] year
16
+ # @param date [String, nil] year
16
17
  # @param opts [Hash]
17
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
18
19
  def get(code, date, opts)
19
20
  ::RelatonIso::IsoBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from https://www.iso.org/standards-catalogue/browse-by-ics.html
25
+ #
26
+ # @param [String] source source name (iso-rss, iso-rss-all)
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format output format (xml, yaml, bibxml)
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonIsoBib::IsoBibliographicItem]
24
37
  def from_xml(xml)
@@ -28,7 +41,7 @@ module RelatonIso
28
41
  # @param hash [Hash]
29
42
  # @return [RelatonIsoBib::IsoBibliographicItem]
30
43
  def hash_to_bib(hash)
31
- item_hash = ::RelatonIsoBib::HashConverter.hash_to_bib(hash)
44
+ item_hash = HashConverter.hash_to_bib(hash)
32
45
  ::RelatonIsoBib::IsoBibliographicItem.new(**item_hash)
33
46
  end
34
47
 
@@ -43,5 +56,12 @@ module RelatonIso
43
56
  def threads
44
57
  3
45
58
  end
59
+
60
+ #
61
+ # Remove index file
62
+ #
63
+ def remove_index_file
64
+ Relaton::Index.find_or_create(:iso, url: true, file: HitCollection::INDEXFILE).remove_file
65
+ end
46
66
  end
47
67
  end
@@ -0,0 +1,61 @@
1
+ module RelatonIso
2
+ #
3
+ # Queue of links to fetch.
4
+ #
5
+ class Queue
6
+ extend Forwardable
7
+ def_delegator :queue, :[]
8
+
9
+ FILE = "iso-queue.txt".freeze
10
+
11
+ #
12
+ # Open queue file if exist. If not, create new empty queue.
13
+ #
14
+ # @return [Array<String>] queue
15
+ #
16
+ def queue
17
+ @queue ||= File.exist?(FILE) ? File.read(FILE).split("\n") : []
18
+ end
19
+
20
+ #
21
+ # Add item to queue at first position if it is not already there.
22
+ #
23
+ # @param [String] item item to add
24
+ #
25
+ # @return [void]
26
+ #
27
+ def add_first(item)
28
+ queue.unshift item unless queue.include? item
29
+ end
30
+
31
+ #
32
+ # Move or add item to the end of the queue.
33
+ #
34
+ # @param [String] item item to move or add
35
+ #
36
+ # @return [void]
37
+ #
38
+ def move_last(item)
39
+ queue.delete item
40
+ queue << item
41
+ end
42
+
43
+ #
44
+ # Take first item from the queue.
45
+ #
46
+ # @return [String] an item
47
+ #
48
+ # def take_first
49
+ # queue.shift
50
+ # end
51
+
52
+ #
53
+ # Save queue to file.
54
+ #
55
+ # @return [void]
56
+ #
57
+ def save
58
+ File.write FILE, queue.to_a.join("\n")
59
+ end
60
+ end
61
+ end