relaton-iso 1.18.1 → 1.18.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,205 +6,196 @@ require "relaton_iso/hit_collection"
6
6
  # require "relaton_iec"
7
7
 
8
8
  module RelatonIso
9
- # Class methods for search ISO standards.
10
- class IsoBibliography
11
- class << self
12
- # @param text [String]
13
- # @return [RelatonIso::HitCollection]
14
- def search(text)
15
- HitCollection.new(text.gsub("\u2013", "-")).fetch
16
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
17
- EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
18
- Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
19
- Algolia::AlgoliaUnreachableHostError => e
20
- raise RelatonBib::RequestError, e.message
21
- end
9
+ # Methods for search ISO standards.
10
+ module IsoBibliography
11
+ extend self
12
+
13
+ # @param text [Pubid::Iso::Identifier, String]
14
+ # @return [RelatonIso::HitCollection]
15
+ def search(pubid, opts = {})
16
+ pubid = Pubid::Iso::Identifier.parse(pubid) if pubid.is_a? String
17
+ HitCollection.new(pubid, opts).fetch
18
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
19
+ EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
20
+ Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT => e
21
+ raise RelatonBib::RequestError, e.message
22
+ end
22
23
 
23
- # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
24
- # @param year [String, NilClass] the year the standard was published
25
- # @param opts [Hash] options; restricted to :all_parts if all-parts
26
- # @option opts [Boolean] :all_parts if all-parts reference is required
27
- # @option opts [Boolean] :keep_year if undated reference should return
28
- # actual reference with year
29
- #
30
- # @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
31
- def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
32
- code = ref.gsub("\u2013", "-")
33
-
34
- # parse "all parts" request
35
- code.sub! " (all parts)", ""
36
- opts[:all_parts] ||= $~ && opts[:all_parts].nil?
37
-
38
- query_pubid = Pubid::Iso::Identifier.parse(code)
39
- query_pubid.year = year if year
40
- query_pubid.part = nil if opts[:all_parts]
41
- Util.warn "(#{query_pubid}) Fetching from iso.org ..."
42
-
43
- hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
44
- tip_ids = look_up_with_any_types_stages(hits, ref, opts)
45
-
46
- ret = if !opts[:all_parts] || hits.size == 1
47
- hits.any? && hits.first.fetch(opts[:lang])
48
- else
49
- hits.to_all_parts(opts[:lang])
50
- end
51
-
52
- return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
53
-
54
- response_docid = ret.docidentifier.first.id.sub(" (all parts)", "")
55
- response_pubid = Pubid::Iso::Identifier.parse(response_docid)
56
-
57
- Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
58
-
59
- get_all = (
60
- (query_pubid.year && opts[:keep_year].nil?) ||
61
- opts[:keep_year] ||
62
- opts[:all_parts]
63
- )
64
- return ret if get_all
65
-
66
- ret.to_most_recent_reference
67
- rescue Pubid::Core::Errors::ParseError
68
- Util.warn "(#{code}) Is not recognized as a standards identifier."
69
- nil
70
- end
24
+ # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
25
+ # @param year [String, NilClass] the year the standard was published
26
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
27
+ # @option opts [Boolean] :all_parts if all-parts reference is required
28
+ # @option opts [Boolean] :keep_year if undated reference should return
29
+ # actual reference with year
30
+ #
31
+ # @return [RelatonIsoBib::IsoBibliographicItem] Bibliographic item
32
+ def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
33
+ code = ref.gsub("\u2013", "-")
34
+
35
+ # parse "all parts" request
36
+ code.sub! " (all parts)", ""
37
+ opts[:all_parts] ||= $~ && opts[:all_parts].nil?
38
+
39
+ query_pubid = Pubid::Iso::Identifier.parse(code)
40
+ query_pubid.root.year = year.to_i if year&.respond_to?(:to_i)
41
+ Util.warn "(#{query_pubid}) Fetching from Relaton repository ..."
42
+
43
+ hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
44
+ tip_ids = look_up_with_any_types_stages(hits, ref, opts)
45
+ ret = hits.fetch_doc
46
+ return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
47
+
48
+ response_pubid = ret.docidentifier.first.id # .sub(" (all parts)", "")
49
+ Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
50
+ get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
51
+ return ret if get_all
52
+
53
+ ret.to_most_recent_reference
54
+ rescue Pubid::Core::Errors::ParseError
55
+ Util.warn "(#{code}) Is not recognized as a standards identifier."
56
+ nil
57
+ end
71
58
 
72
- # @param query_pubid [Pubid::Iso::Identifier]
73
- # @param pubid [Pubid::Iso::Identifier]
74
- # @param all_parts [Boolean] match with any parts when true
75
- # @return [Boolean]
76
- def matches_parts?(query_pubid, pubid, all_parts: false)
77
- # match only with documents with part number
78
- return !pubid.part.nil? if all_parts
59
+ # @param query_pubid [Pubid::Iso::Identifier]
60
+ # @param pubid [Pubid::Iso::Identifier]
61
+ # @param all_parts [Boolean] match with any parts when true
62
+ # @return [Boolean]
63
+ def matches_parts?(query_pubid, pubid, all_parts: false)
64
+ # match only with documents with part number
65
+ return !pubid.part.nil? if all_parts
79
66
 
80
- query_pubid.part == pubid.part
81
- end
67
+ query_pubid.part == pubid.part
68
+ end
82
69
 
83
- #
84
- # Matches base of query_pubid and pubid.
85
- #
86
- # @param [Pubid::Iso::Identifier] query_pubid pubid to match
87
- # @param [Pubid::Iso::Identifier] pubid pubid to match
88
- # @param [Boolean] any_types_stages match with any types and stages
89
- #
90
- # @return [<Type>] <description>
91
- #
92
- def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
93
- return false unless pubid.respond_to?(:publisher)
94
-
95
- query_pubid.publisher == pubid.publisher &&
96
- query_pubid.number == pubid.number &&
97
- query_pubid.copublisher == pubid.copublisher &&
98
- (any_types_stages || query_pubid.stage == pubid.stage) &&
99
- (any_types_stages || query_pubid.is_a?(pubid.class))
70
+ #
71
+ # Matches base of query_pubid and pubid.
72
+ #
73
+ # @param [Pubid::Iso::Identifier] query_pubid pubid to match
74
+ # @param [Pubid::Iso::Identifier] pubid pubid to match
75
+ # @param [Boolean] any_types_stages match with any types and stages
76
+ #
77
+ # @return [<Type>] <description>
78
+ #
79
+ def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics?PerceivedComplexity
80
+ return false unless pubid.respond_to?(:publisher)
81
+
82
+ query_pubid.publisher == pubid.publisher &&
83
+ query_pubid.number == pubid.number &&
84
+ query_pubid.copublisher == pubid.copublisher &&
85
+ (any_types_stages || query_pubid.stage == pubid.stage) &&
86
+ (any_types_stages || query_pubid.is_a?(pubid.class))
87
+ end
88
+
89
+ # @param hit_collection [RelatonIso::HitCollection]
90
+ # @param year [String]
91
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
92
+ def filter_hits_by_year(hit_collection, year)
93
+ missed_year_ids = Set.new
94
+ return [hit_collection, missed_year_ids] if year.nil?
95
+
96
+ # filter by year
97
+ hits = hit_collection.select do |hit|
98
+ hit.pubid.year ||= hit.hit[:year]
99
+ next true if check_year(year, hit)
100
+
101
+ missed_year_ids << hit.pubid.to_s if hit.pubid.year
102
+ false
100
103
  end
101
104
 
102
- # @param hit_collection [RelatonIso::HitCollection]
103
- # @param year [String]
104
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
105
- def filter_hits_by_year(hit_collection, year)
106
- missed_year_ids = Set.new
107
- return [hit_collection, missed_year_ids] if year.nil?
105
+ [hits, missed_year_ids]
106
+ end
107
+
108
+ private
108
109
 
109
- # filter by year
110
- hits = hit_collection.select do |hit|
111
- hit.pubid.year ||= hit.hit[:year]
112
- next true if check_year(year, hit)
110
+ def check_year(year, hit) # rubocop:disable Metrics/AbcSize
111
+ (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
112
+ (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
113
+ (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
114
+ end
113
115
 
114
- missed_year_ids << hit.pubid.to_s if hit.pubid.year
115
- false
116
- end
116
+ # @param pubid [Pubid::Iso::Identifier] PubID with no results
117
+ def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
118
+ Util.warn "(#{pubid}) Not found."
117
119
 
118
- [hits, missed_year_ids]
120
+ if missed_year_ids.any?
121
+ ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
122
+ Util.warn "(#{pubid}) TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}."
119
123
  end
120
124
 
121
- private
122
-
123
- def check_year(year, hit) # rubocop:disable Metrics/AbcSize
124
- (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
125
- (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
126
- (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
125
+ if tip_ids.any?
126
+ ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
127
+ Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
127
128
  end
128
129
 
129
- # @param pubid [Pubid::Iso::Identifier] PubID with no results
130
- def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
131
- Util.warn "(#{pubid}) Not found."
132
-
133
- if missed_year_ids.any?
134
- ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
135
- Util.warn "(#{pubid}) TIP: No match for edition year " \
136
- "#{pubid.year}, but matches exist for #{ids}."
137
- end
138
-
139
- if tip_ids.any?
140
- ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
141
- Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
142
- end
143
-
144
- if pubid.part
145
- Util.warn "(#{pubid}) TIP: If it cannot be found, " \
146
- "the document may no longer be published in parts."
147
- else
148
- Util.warn "(#{pubid}) TIP: If you wish to cite " \
149
- "all document parts for the reference, use " \
150
- "`#{pubid.to_s(format: :ref_undated)} (all parts)`."
151
- end
152
-
153
- nil
130
+ if pubid.part
131
+ Util.warn "(#{pubid}) TIP: If it cannot be found, the document may no longer be published in parts."
132
+ else
133
+ Util.warn "(#{pubid}) TIP: If you wish to cite all document parts for the reference, " \
134
+ "use `#{pubid.to_s(format: :ref_undated)} (all parts)`."
154
135
  end
155
136
 
156
- def look_up_with_any_types_stages(hits, ref, opts) # rubocop:disable Metrics/MethodLength
157
- found_ids = []
158
- return found_ids if hits.from_gh || hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
137
+ nil
138
+ end
139
+
140
+ def look_up_with_any_types_stages(hits, ref, opts)
141
+ return [] if hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
159
142
 
160
- ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
161
- pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
162
- resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
163
- resp.map &:pubid
164
- end
143
+ ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
144
+ pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
145
+ resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
146
+ resp.map &:pubid
147
+ end
165
148
 
166
- #
167
- # Search for hits. If no found then trying missed stages.
168
- #
169
- # @param query_pubid [Pubid::Iso::Identifier] reference without correction
170
- # @param opts [Hash]
171
- # @param any_types_stages [Boolean] match with any stages
172
- #
173
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
174
- #
175
- def isobib_search_filter(query_pubid, opts, any_types_stages: false) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
176
- query_pubid_without_year = query_pubid.dup
177
- # remove year for query
178
- query_pubid_without_year.year = nil
179
- hit_collection = search(query_pubid_without_year.to_s)
180
-
181
- # filter only matching hits
182
- filter_hits hit_collection, query_pubid, opts[:all_parts],
183
- any_types_stages
149
+ #
150
+ # Search for hits. If no found then trying missed stages.
151
+ #
152
+ # @param query_pubid [Pubid::Iso::Identifier] reference without correction
153
+ # @param opts [Hash]
154
+ # @param any_types_stages [Boolean] match with any stages
155
+ #
156
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
157
+ #
158
+ def isobib_search_filter(query_pubid, opts, any_types_stages: false)
159
+ hit_collection = search(query_pubid, opts)
160
+
161
+ # filter only matching hits
162
+ filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
163
+ end
164
+
165
+ #
166
+ # Filter hits by query_pubid.
167
+ #
168
+ # @param hit_collection [RelatonIso::HitCollection]
169
+ # @param query_pubid [Pubid::Iso::Identifier]
170
+ # @param all_parts [Boolean]
171
+ # @param any_types_stages [Boolean]
172
+ #
173
+ # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
174
+ #
175
+ def filter_hits(hit_collection, query_pubid, all_parts, any_types_stages)
176
+ # filter out
177
+ excludings = build_excludings(all_parts, any_types_stages)
178
+ no_year_ref = hit_collection.ref_pubid_no_year.exclude(*excludings)
179
+ result = hit_collection.select do |i|
180
+ pubid_match?(i.pubid, query_pubid, excludings, no_year_ref) && !(all_parts && i.pubid.part.nil?)
184
181
  end
185
182
 
186
- #
187
- # Filter hits by query_pubid.
188
- #
189
- # @param hit_collection [RelatonIso::HitCollection]
190
- # @param query_pubid [Pubid::Iso::Identifier]
191
- # @param all_parts [Boolean]
192
- # @param any_stypes_tages [Boolean]
193
- #
194
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
195
- #
196
- def filter_hits(hit_collection, query_pubid, all_parts, any_stypes_tages) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
197
- # filter out
198
- result = hit_collection.select do |i|
199
- hit_pubid = i.pubid
200
- matches_base?(query_pubid, hit_pubid,
201
- any_types_stages: any_stypes_tages) &&
202
- matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
203
- query_pubid.corrigendums == hit_pubid.corrigendums &&
204
- query_pubid.amendments == hit_pubid.amendments
205
- end
206
-
207
- filter_hits_by_year(result, query_pubid.year)
183
+ filter_hits_by_year(result, query_pubid.year)
184
+ end
185
+
186
+ def build_excludings(all_parts, any_types_stages)
187
+ excludings = %i[year edition]
188
+ excludings += %i[type stage iteration] if any_types_stages
189
+ excludings << :part if all_parts
190
+ excludings
191
+ end
192
+
193
+ def pubid_match?(pubid, query_pubid, excludings, no_year_ref)
194
+ if pubid.is_a? String then pubid == query_pubid.to_s
195
+ else
196
+ pubid = pubid.dup
197
+ pubid.base = pubid.base.exclude(:year, :edition) if pubid.base
198
+ pubid.exclude(*excludings) == no_year_ref
208
199
  end
209
200
  end
210
201
  end
@@ -9,16 +9,29 @@ module RelatonIso
9
9
  @prefix = "ISO"
10
10
  @defaultprefix = %r{^ISO(/IEC)?\s}
11
11
  @idtype = "ISO"
12
+ @datasets = %w[iso-ics]
12
13
  end
13
14
 
14
15
  # @param code [String]
15
- # @param date [String, NilClass] year
16
+ # @param date [String, nil] year
16
17
  # @param opts [Hash]
17
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
18
19
  def get(code, date, opts)
19
20
  ::RelatonIso::IsoBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from https://www.iso.org/standards-catalogue/browse-by-ics.html
25
+ #
26
+ # @param [String] source source name (iso-rss, iso-rss-all)
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format output format (xml, yaml, bibxml)
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonIsoBib::IsoBibliographicItem]
24
37
  def from_xml(xml)
@@ -28,7 +41,7 @@ module RelatonIso
28
41
  # @param hash [Hash]
29
42
  # @return [RelatonIsoBib::IsoBibliographicItem]
30
43
  def hash_to_bib(hash)
31
- item_hash = ::RelatonIsoBib::HashConverter.hash_to_bib(hash)
44
+ item_hash = HashConverter.hash_to_bib(hash)
32
45
  ::RelatonIsoBib::IsoBibliographicItem.new(**item_hash)
33
46
  end
34
47
 
@@ -43,5 +56,12 @@ module RelatonIso
43
56
  def threads
44
57
  3
45
58
  end
59
+
60
+ #
61
+ # Remove index file
62
+ #
63
+ def remove_index_file
64
+ Relaton::Index.find_or_create(:iso, url: true, file: HitCollection::INDEXFILE).remove_file
65
+ end
46
66
  end
47
67
  end
@@ -0,0 +1,61 @@
1
+ module RelatonIso
2
+ #
3
+ # Queue of links to fetch.
4
+ #
5
+ class Queue
6
+ extend Forwardable
7
+ def_delegator :queue, :[]
8
+
9
+ FILE = "iso-queue.txt".freeze
10
+
11
+ #
12
+ # Open queue file if exist. If not, create new empty queue.
13
+ #
14
+ # @return [Array<String>] queue
15
+ #
16
+ def queue
17
+ @queue ||= File.exist?(FILE) ? File.read(FILE).split("\n") : []
18
+ end
19
+
20
+ #
21
+ # Add item to queue at first position if it is not already there.
22
+ #
23
+ # @param [String] item item to add
24
+ #
25
+ # @return [void]
26
+ #
27
+ def add_first(item)
28
+ queue.unshift item unless queue.include? item
29
+ end
30
+
31
+ #
32
+ # Move or add item to the end of the queue.
33
+ #
34
+ # @param [String] item item to move or add
35
+ #
36
+ # @return [void]
37
+ #
38
+ def move_last(item)
39
+ queue.delete item
40
+ queue << item
41
+ end
42
+
43
+ #
44
+ # Take first item from the queue.
45
+ #
46
+ # @return [String] an item
47
+ #
48
+ # def take_first
49
+ # queue.shift
50
+ # end
51
+
52
+ #
53
+ # Save queue to file.
54
+ #
55
+ # @return [void]
56
+ #
57
+ def save
58
+ File.write FILE, queue.to_a.join("\n")
59
+ end
60
+ end
61
+ end