relaton-iso 1.20.0 → 2.0.0.pre.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/Gemfile +1 -0
- data/README.adoc +134 -130
- data/bin/console +1 -1
- data/grammars/basicdoc.rng +2110 -0
- data/grammars/biblio-standoc.rng +287 -0
- data/grammars/biblio.rng +2097 -0
- data/grammars/relaton-iso-compile.rng +11 -0
- data/grammars/relaton-iso.rng +214 -0
- data/lib/relaton/iso/bibliography.rb +206 -0
- data/lib/relaton/iso/data_fetcher.rb +227 -0
- data/lib/relaton/iso/hash_parser_v1.rb +121 -0
- data/lib/relaton/iso/hit.rb +62 -0
- data/lib/relaton/iso/hit_collection.rb +117 -0
- data/lib/relaton/iso/item_data.rb +49 -0
- data/lib/relaton/iso/model/bibdata.rb +9 -0
- data/lib/relaton/iso/model/bibitem.rb +7 -0
- data/lib/relaton/iso/model/contributor.rb +7 -0
- data/lib/relaton/iso/model/contributor_info.rb +9 -0
- data/lib/relaton/iso/model/docidentifier.rb +128 -0
- data/lib/relaton/iso/model/doctype.rb +13 -0
- data/lib/relaton/iso/model/ext.rb +47 -0
- data/lib/relaton/iso/model/iso_project_group.rb +21 -0
- data/lib/relaton/iso/model/item.rb +17 -0
- data/lib/relaton/iso/model/item_base.rb +19 -0
- data/lib/relaton/iso/model/organization.rb +9 -0
- data/lib/relaton/iso/model/project_number.rb +22 -0
- data/lib/relaton/iso/model/relation.rb +9 -0
- data/lib/relaton/iso/model/stagename.rb +14 -0
- data/lib/relaton/iso/model/structured_identifier.rb +31 -0
- data/lib/relaton/iso/processor.rb +78 -0
- data/lib/relaton/iso/queue.rb +63 -0
- data/lib/relaton/iso/scraper.rb +591 -0
- data/lib/relaton/iso/util.rb +8 -0
- data/lib/relaton/iso/version.rb +7 -0
- data/lib/relaton/iso.rb +17 -0
- data/relaton_iso.gemspec +9 -7
- metadata +76 -46
- data/bin/bundle +0 -109
- data/bin/byebug +0 -27
- data/bin/coderay +0 -27
- data/bin/gdb_wrapper +0 -29
- data/bin/htmldiff +0 -27
- data/bin/httpclient +0 -29
- data/bin/ldiff +0 -27
- data/bin/nokogiri +0 -27
- data/bin/pry +0 -27
- data/bin/pubid-nist +0 -27
- data/bin/racc +0 -27
- data/bin/rackup +0 -29
- data/bin/rake +0 -27
- data/bin/rubocop +0 -27
- data/bin/ruby-parse +0 -27
- data/bin/ruby-rewrite +0 -27
- data/bin/safe_yaml +0 -29
- data/bin/thor +0 -27
- data/lib/relaton_iso/data_fetcher.rb +0 -246
- data/lib/relaton_iso/document_identifier.rb +0 -46
- data/lib/relaton_iso/hash_converter.rb +0 -15
- data/lib/relaton_iso/hit.rb +0 -59
- data/lib/relaton_iso/hit_collection.rb +0 -100
- data/lib/relaton_iso/iso_bibliography.rb +0 -202
- data/lib/relaton_iso/processor.rb +0 -67
- data/lib/relaton_iso/queue.rb +0 -61
- data/lib/relaton_iso/scrapper.rb +0 -553
- data/lib/relaton_iso/util.rb +0 -6
- data/lib/relaton_iso/version.rb +0 -5
- data/lib/relaton_iso.rb +0 -17
@@ -1,202 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# require 'relaton_iso/iso_bibliographic_item'
|
4
|
-
require "relaton_iso/scrapper"
|
5
|
-
require "relaton_iso/hit_collection"
|
6
|
-
# require "relaton_iec"
|
7
|
-
|
8
|
-
module RelatonIso
|
9
|
-
# Methods for search ISO standards.
|
10
|
-
module IsoBibliography
|
11
|
-
extend self
|
12
|
-
|
13
|
-
# @param text [Pubid::Iso::Identifier, String]
|
14
|
-
# @return [RelatonIso::HitCollection]
|
15
|
-
def search(pubid, opts = {})
|
16
|
-
pubid = Pubid::Iso::Identifier.parse(pubid) if pubid.is_a? String
|
17
|
-
HitCollection.new(pubid, opts).fetch
|
18
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
19
|
-
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
20
|
-
Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT => e
|
21
|
-
raise RelatonBib::RequestError, e.message
|
22
|
-
end
|
23
|
-
|
24
|
-
# @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
|
25
|
-
# @param year [String, NilClass] the year the standard was published
|
26
|
-
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
27
|
-
# @option opts [Boolean] :all_parts if all-parts reference is required
|
28
|
-
# @option opts [Boolean] :keep_year if undated reference should return
|
29
|
-
# actual reference with year
|
30
|
-
#
|
31
|
-
# @return [RelatonIsoBib::IsoBibliographicItem] Bibliographic item
|
32
|
-
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
|
33
|
-
code = ref.gsub("\u2013", "-")
|
34
|
-
|
35
|
-
# parse "all parts" request
|
36
|
-
code.sub! " (all parts)", ""
|
37
|
-
opts[:all_parts] ||= $~ && opts[:all_parts].nil?
|
38
|
-
|
39
|
-
query_pubid = Pubid::Iso::Identifier.parse(code)
|
40
|
-
query_pubid.root.year = year.to_i if year&.respond_to?(:to_i)
|
41
|
-
Util.info "Fetching from Relaton repository ...", key: query_pubid.to_s
|
42
|
-
|
43
|
-
hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
|
44
|
-
tip_ids = look_up_with_any_types_stages(hits, ref, opts)
|
45
|
-
ret = hits.fetch_doc
|
46
|
-
return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
|
47
|
-
|
48
|
-
response_pubid = ret.docidentifier.first.id # .sub(" (all parts)", "")
|
49
|
-
Util.info "Found: `#{response_pubid}`", key: query_pubid.to_s
|
50
|
-
get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
|
51
|
-
return ret if get_all
|
52
|
-
|
53
|
-
ret.to_most_recent_reference
|
54
|
-
rescue Pubid::Core::Errors::ParseError
|
55
|
-
Util.warn "Is not recognized as a standards identifier.", key: code
|
56
|
-
nil
|
57
|
-
end
|
58
|
-
|
59
|
-
# @param query_pubid [Pubid::Iso::Identifier]
|
60
|
-
# @param pubid [Pubid::Iso::Identifier]
|
61
|
-
# @param all_parts [Boolean] match with any parts when true
|
62
|
-
# @return [Boolean]
|
63
|
-
def matches_parts?(query_pubid, pubid, all_parts: false)
|
64
|
-
# match only with documents with part number
|
65
|
-
return !pubid.part.nil? if all_parts
|
66
|
-
|
67
|
-
query_pubid.part == pubid.part
|
68
|
-
end
|
69
|
-
|
70
|
-
#
|
71
|
-
# Matches base of query_pubid and pubid.
|
72
|
-
#
|
73
|
-
# @param [Pubid::Iso::Identifier] query_pubid pubid to match
|
74
|
-
# @param [Pubid::Iso::Identifier] pubid pubid to match
|
75
|
-
# @param [Boolean] any_types_stages match with any types and stages
|
76
|
-
#
|
77
|
-
# @return [<Type>] <description>
|
78
|
-
#
|
79
|
-
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics?PerceivedComplexity
|
80
|
-
return false unless pubid.respond_to?(:publisher)
|
81
|
-
|
82
|
-
query_pubid.publisher == pubid.publisher &&
|
83
|
-
query_pubid.number == pubid.number &&
|
84
|
-
query_pubid.copublisher == pubid.copublisher &&
|
85
|
-
(any_types_stages || query_pubid.stage == pubid.stage) &&
|
86
|
-
(any_types_stages || query_pubid.is_a?(pubid.class))
|
87
|
-
end
|
88
|
-
|
89
|
-
# @param hit_collection [RelatonIso::HitCollection]
|
90
|
-
# @param year [String]
|
91
|
-
# @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
|
92
|
-
def filter_hits_by_year(hit_collection, year)
|
93
|
-
missed_year_ids = Set.new
|
94
|
-
return [hit_collection, missed_year_ids] if year.nil?
|
95
|
-
|
96
|
-
# filter by year
|
97
|
-
hits = hit_collection.select do |hit|
|
98
|
-
hit.pubid.year ||= hit.hit[:year]
|
99
|
-
next true if check_year(year, hit)
|
100
|
-
|
101
|
-
missed_year_ids << hit.pubid.to_s if hit.pubid.year
|
102
|
-
false
|
103
|
-
end
|
104
|
-
|
105
|
-
[hits, missed_year_ids]
|
106
|
-
end
|
107
|
-
|
108
|
-
private
|
109
|
-
|
110
|
-
def check_year(year, hit) # rubocop:disable Metrics/AbcSize
|
111
|
-
(hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
|
112
|
-
(!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
|
113
|
-
(!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
|
114
|
-
end
|
115
|
-
|
116
|
-
# @param pubid [Pubid::Iso::Identifier] PubID with no results
|
117
|
-
def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
118
|
-
Util.info "Not found.", key: pubid.to_s
|
119
|
-
|
120
|
-
if missed_year_ids.any?
|
121
|
-
ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
|
122
|
-
Util.info "TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}.", key: pubid.to_s
|
123
|
-
end
|
124
|
-
|
125
|
-
if tip_ids.any?
|
126
|
-
ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
|
127
|
-
Util.info "TIP: Matches exist for #{ids}.", key: pubid.to_s
|
128
|
-
end
|
129
|
-
|
130
|
-
if pubid.part
|
131
|
-
Util.info "TIP: If it cannot be found, the document may no longer be published in parts.", key: pubid.to_s
|
132
|
-
else
|
133
|
-
Util.info "TIP: If you wish to cite all document parts for the reference, " \
|
134
|
-
"use `#{pubid.to_s(format: :ref_undated)} (all parts)`.", key: pubid.to_s
|
135
|
-
end
|
136
|
-
|
137
|
-
nil
|
138
|
-
end
|
139
|
-
|
140
|
-
def look_up_with_any_types_stages(hits, ref, opts)
|
141
|
-
return [] if hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
|
142
|
-
|
143
|
-
ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
|
144
|
-
pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
|
145
|
-
resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
|
146
|
-
resp.map &:pubid
|
147
|
-
end
|
148
|
-
|
149
|
-
#
|
150
|
-
# Search for hits. If no found then trying missed stages.
|
151
|
-
#
|
152
|
-
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
153
|
-
# @param opts [Hash]
|
154
|
-
# @param any_types_stages [Boolean] match with any stages
|
155
|
-
#
|
156
|
-
# @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
|
157
|
-
#
|
158
|
-
def isobib_search_filter(query_pubid, opts, any_types_stages: false)
|
159
|
-
hit_collection = search(query_pubid, opts)
|
160
|
-
|
161
|
-
# filter only matching hits
|
162
|
-
filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
|
163
|
-
end
|
164
|
-
|
165
|
-
#
|
166
|
-
# Filter hits by query_pubid.
|
167
|
-
#
|
168
|
-
# @param hit_collection [RelatonIso::HitCollection]
|
169
|
-
# @param query_pubid [Pubid::Iso::Identifier]
|
170
|
-
# @param all_parts [Boolean]
|
171
|
-
# @param any_types_stages [Boolean]
|
172
|
-
#
|
173
|
-
# @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
|
174
|
-
#
|
175
|
-
def filter_hits(hit_collection, query_pubid, all_parts, any_types_stages)
|
176
|
-
# filter out
|
177
|
-
excludings = build_excludings(all_parts, any_types_stages)
|
178
|
-
no_year_ref = hit_collection.ref_pubid_no_year.exclude(*excludings)
|
179
|
-
result = hit_collection.select do |i|
|
180
|
-
pubid_match?(i.pubid, query_pubid, excludings, no_year_ref) && !(all_parts && i.pubid.part.nil?)
|
181
|
-
end
|
182
|
-
|
183
|
-
filter_hits_by_year(result, query_pubid.year)
|
184
|
-
end
|
185
|
-
|
186
|
-
def build_excludings(all_parts, any_types_stages)
|
187
|
-
excludings = %i[year edition]
|
188
|
-
excludings += %i[type stage iteration] if any_types_stages
|
189
|
-
excludings << :part if all_parts
|
190
|
-
excludings
|
191
|
-
end
|
192
|
-
|
193
|
-
def pubid_match?(pubid, query_pubid, excludings, no_year_ref)
|
194
|
-
if pubid.is_a? String then pubid == query_pubid.to_s
|
195
|
-
else
|
196
|
-
pubid = pubid.dup
|
197
|
-
pubid.base = pubid.base.exclude(:year, :edition) if pubid.base
|
198
|
-
pubid.exclude(*excludings) == no_year_ref
|
199
|
-
end
|
200
|
-
end
|
201
|
-
end
|
202
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
require "relaton/processor"
|
2
|
-
|
3
|
-
module RelatonIso
|
4
|
-
class Processor < Relaton::Processor
|
5
|
-
attr_reader :idtype
|
6
|
-
|
7
|
-
def initialize # rubocop:disable Lint/MissingSuper
|
8
|
-
@short = :relaton_iso
|
9
|
-
@prefix = "ISO"
|
10
|
-
@defaultprefix = %r{^ISO(/IEC)?\s}
|
11
|
-
@idtype = "ISO"
|
12
|
-
@datasets = %w[iso-ics]
|
13
|
-
end
|
14
|
-
|
15
|
-
# @param code [String]
|
16
|
-
# @param date [String, nil] year
|
17
|
-
# @param opts [Hash]
|
18
|
-
# @return [RelatonIsoBib::IsoBibliographicItem]
|
19
|
-
def get(code, date, opts)
|
20
|
-
::RelatonIso::IsoBibliography.get(code, date, opts)
|
21
|
-
end
|
22
|
-
|
23
|
-
#
|
24
|
-
# Fetch all the documents from https://www.iso.org/standards-catalogue/browse-by-ics.html
|
25
|
-
#
|
26
|
-
# @param [String] source source name (iso-rss, iso-rss-all)
|
27
|
-
# @param [Hash] opts
|
28
|
-
# @option opts [String] :output directory to output documents
|
29
|
-
# @option opts [String] :format output format (xml, yaml, bibxml)
|
30
|
-
#
|
31
|
-
def fetch_data(_source, opts)
|
32
|
-
DataFetcher.fetch(**opts)
|
33
|
-
end
|
34
|
-
|
35
|
-
# @param xml [String]
|
36
|
-
# @return [RelatonIsoBib::IsoBibliographicItem]
|
37
|
-
def from_xml(xml)
|
38
|
-
::RelatonIsoBib::XMLParser.from_xml xml
|
39
|
-
end
|
40
|
-
|
41
|
-
# @param hash [Hash]
|
42
|
-
# @return [RelatonIsoBib::IsoBibliographicItem]
|
43
|
-
def hash_to_bib(hash)
|
44
|
-
item_hash = HashConverter.hash_to_bib(hash)
|
45
|
-
::RelatonIsoBib::IsoBibliographicItem.new(**item_hash)
|
46
|
-
end
|
47
|
-
|
48
|
-
# Returns hash of XML grammar
|
49
|
-
# @return [String]
|
50
|
-
def grammar_hash
|
51
|
-
@grammar_hash ||= ::RelatonIsoBib.grammar_hash
|
52
|
-
end
|
53
|
-
|
54
|
-
# Returns number of workers
|
55
|
-
# @return [Integer]
|
56
|
-
def threads
|
57
|
-
3
|
58
|
-
end
|
59
|
-
|
60
|
-
#
|
61
|
-
# Remove index file
|
62
|
-
#
|
63
|
-
def remove_index_file
|
64
|
-
Relaton::Index.find_or_create(:iso, url: true, file: HitCollection::INDEXFILE).remove_file
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
data/lib/relaton_iso/queue.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
module RelatonIso
|
2
|
-
#
|
3
|
-
# Queue of links to fetch.
|
4
|
-
#
|
5
|
-
class Queue
|
6
|
-
extend Forwardable
|
7
|
-
def_delegator :queue, :[]
|
8
|
-
|
9
|
-
FILE = "iso-queue.txt".freeze
|
10
|
-
|
11
|
-
#
|
12
|
-
# Open queue file if exist. If not, create new empty queue.
|
13
|
-
#
|
14
|
-
# @return [Array<String>] queue
|
15
|
-
#
|
16
|
-
def queue
|
17
|
-
@queue ||= File.exist?(FILE) ? File.read(FILE).split("\n") : []
|
18
|
-
end
|
19
|
-
|
20
|
-
#
|
21
|
-
# Add item to queue at first position if it is not already there.
|
22
|
-
#
|
23
|
-
# @param [String] item item to add
|
24
|
-
#
|
25
|
-
# @return [void]
|
26
|
-
#
|
27
|
-
def add_first(item)
|
28
|
-
queue.unshift item unless queue.include? item
|
29
|
-
end
|
30
|
-
|
31
|
-
#
|
32
|
-
# Move or add item to the end of the queue.
|
33
|
-
#
|
34
|
-
# @param [String] item item to move or add
|
35
|
-
#
|
36
|
-
# @return [void]
|
37
|
-
#
|
38
|
-
def move_last(item)
|
39
|
-
queue.delete item
|
40
|
-
queue << item
|
41
|
-
end
|
42
|
-
|
43
|
-
#
|
44
|
-
# Take first item from the queue.
|
45
|
-
#
|
46
|
-
# @return [String] an item
|
47
|
-
#
|
48
|
-
# def take_first
|
49
|
-
# queue.shift
|
50
|
-
# end
|
51
|
-
|
52
|
-
#
|
53
|
-
# Save queue to file.
|
54
|
-
#
|
55
|
-
# @return [void]
|
56
|
-
#
|
57
|
-
def save
|
58
|
-
File.write FILE, queue.to_a.join("\n")
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|