relaton-iso 1.20.0 → 2.0.0.pre.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/Gemfile +1 -0
  4. data/README.adoc +134 -130
  5. data/bin/console +1 -1
  6. data/grammars/basicdoc.rng +2110 -0
  7. data/grammars/biblio-standoc.rng +287 -0
  8. data/grammars/biblio.rng +2097 -0
  9. data/grammars/relaton-iso-compile.rng +11 -0
  10. data/grammars/relaton-iso.rng +214 -0
  11. data/lib/relaton/iso/bibliography.rb +206 -0
  12. data/lib/relaton/iso/data_fetcher.rb +227 -0
  13. data/lib/relaton/iso/hash_parser_v1.rb +121 -0
  14. data/lib/relaton/iso/hit.rb +62 -0
  15. data/lib/relaton/iso/hit_collection.rb +117 -0
  16. data/lib/relaton/iso/item_data.rb +49 -0
  17. data/lib/relaton/iso/model/bibdata.rb +9 -0
  18. data/lib/relaton/iso/model/bibitem.rb +7 -0
  19. data/lib/relaton/iso/model/contributor.rb +7 -0
  20. data/lib/relaton/iso/model/contributor_info.rb +9 -0
  21. data/lib/relaton/iso/model/docidentifier.rb +128 -0
  22. data/lib/relaton/iso/model/doctype.rb +13 -0
  23. data/lib/relaton/iso/model/ext.rb +47 -0
  24. data/lib/relaton/iso/model/iso_project_group.rb +21 -0
  25. data/lib/relaton/iso/model/item.rb +17 -0
  26. data/lib/relaton/iso/model/item_base.rb +19 -0
  27. data/lib/relaton/iso/model/organization.rb +9 -0
  28. data/lib/relaton/iso/model/project_number.rb +22 -0
  29. data/lib/relaton/iso/model/relation.rb +9 -0
  30. data/lib/relaton/iso/model/stagename.rb +14 -0
  31. data/lib/relaton/iso/model/structured_identifier.rb +31 -0
  32. data/lib/relaton/iso/processor.rb +78 -0
  33. data/lib/relaton/iso/queue.rb +63 -0
  34. data/lib/relaton/iso/scraper.rb +591 -0
  35. data/lib/relaton/iso/util.rb +8 -0
  36. data/lib/relaton/iso/version.rb +7 -0
  37. data/lib/relaton/iso.rb +17 -0
  38. data/relaton_iso.gemspec +9 -7
  39. metadata +76 -46
  40. data/bin/bundle +0 -109
  41. data/bin/byebug +0 -27
  42. data/bin/coderay +0 -27
  43. data/bin/gdb_wrapper +0 -29
  44. data/bin/htmldiff +0 -27
  45. data/bin/httpclient +0 -29
  46. data/bin/ldiff +0 -27
  47. data/bin/nokogiri +0 -27
  48. data/bin/pry +0 -27
  49. data/bin/pubid-nist +0 -27
  50. data/bin/racc +0 -27
  51. data/bin/rackup +0 -29
  52. data/bin/rake +0 -27
  53. data/bin/rubocop +0 -27
  54. data/bin/ruby-parse +0 -27
  55. data/bin/ruby-rewrite +0 -27
  56. data/bin/safe_yaml +0 -29
  57. data/bin/thor +0 -27
  58. data/lib/relaton_iso/data_fetcher.rb +0 -246
  59. data/lib/relaton_iso/document_identifier.rb +0 -46
  60. data/lib/relaton_iso/hash_converter.rb +0 -15
  61. data/lib/relaton_iso/hit.rb +0 -59
  62. data/lib/relaton_iso/hit_collection.rb +0 -100
  63. data/lib/relaton_iso/iso_bibliography.rb +0 -202
  64. data/lib/relaton_iso/processor.rb +0 -67
  65. data/lib/relaton_iso/queue.rb +0 -61
  66. data/lib/relaton_iso/scrapper.rb +0 -553
  67. data/lib/relaton_iso/util.rb +0 -6
  68. data/lib/relaton_iso/version.rb +0 -5
  69. data/lib/relaton_iso.rb +0 -17
@@ -1,202 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # require 'relaton_iso/iso_bibliographic_item'
4
- require "relaton_iso/scrapper"
5
- require "relaton_iso/hit_collection"
6
- # require "relaton_iec"
7
-
8
- module RelatonIso
9
- # Methods for search ISO standards.
10
- module IsoBibliography
11
- extend self
12
-
13
- # @param text [Pubid::Iso::Identifier, String]
14
- # @return [RelatonIso::HitCollection]
15
- def search(pubid, opts = {})
16
- pubid = Pubid::Iso::Identifier.parse(pubid) if pubid.is_a? String
17
- HitCollection.new(pubid, opts).fetch
18
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
19
- EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
20
- Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT => e
21
- raise RelatonBib::RequestError, e.message
22
- end
23
-
24
- # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
25
- # @param year [String, NilClass] the year the standard was published
26
- # @param opts [Hash] options; restricted to :all_parts if all-parts
27
- # @option opts [Boolean] :all_parts if all-parts reference is required
28
- # @option opts [Boolean] :keep_year if undated reference should return
29
- # actual reference with year
30
- #
31
- # @return [RelatonIsoBib::IsoBibliographicItem] Bibliographic item
32
- def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
33
- code = ref.gsub("\u2013", "-")
34
-
35
- # parse "all parts" request
36
- code.sub! " (all parts)", ""
37
- opts[:all_parts] ||= $~ && opts[:all_parts].nil?
38
-
39
- query_pubid = Pubid::Iso::Identifier.parse(code)
40
- query_pubid.root.year = year.to_i if year&.respond_to?(:to_i)
41
- Util.info "Fetching from Relaton repository ...", key: query_pubid.to_s
42
-
43
- hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
44
- tip_ids = look_up_with_any_types_stages(hits, ref, opts)
45
- ret = hits.fetch_doc
46
- return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
47
-
48
- response_pubid = ret.docidentifier.first.id # .sub(" (all parts)", "")
49
- Util.info "Found: `#{response_pubid}`", key: query_pubid.to_s
50
- get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
51
- return ret if get_all
52
-
53
- ret.to_most_recent_reference
54
- rescue Pubid::Core::Errors::ParseError
55
- Util.warn "Is not recognized as a standards identifier.", key: code
56
- nil
57
- end
58
-
59
- # @param query_pubid [Pubid::Iso::Identifier]
60
- # @param pubid [Pubid::Iso::Identifier]
61
- # @param all_parts [Boolean] match with any parts when true
62
- # @return [Boolean]
63
- def matches_parts?(query_pubid, pubid, all_parts: false)
64
- # match only with documents with part number
65
- return !pubid.part.nil? if all_parts
66
-
67
- query_pubid.part == pubid.part
68
- end
69
-
70
- #
71
- # Matches base of query_pubid and pubid.
72
- #
73
- # @param [Pubid::Iso::Identifier] query_pubid pubid to match
74
- # @param [Pubid::Iso::Identifier] pubid pubid to match
75
- # @param [Boolean] any_types_stages match with any types and stages
76
- #
77
- # @return [<Type>] <description>
78
- #
79
- def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics?PerceivedComplexity
80
- return false unless pubid.respond_to?(:publisher)
81
-
82
- query_pubid.publisher == pubid.publisher &&
83
- query_pubid.number == pubid.number &&
84
- query_pubid.copublisher == pubid.copublisher &&
85
- (any_types_stages || query_pubid.stage == pubid.stage) &&
86
- (any_types_stages || query_pubid.is_a?(pubid.class))
87
- end
88
-
89
- # @param hit_collection [RelatonIso::HitCollection]
90
- # @param year [String]
91
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
92
- def filter_hits_by_year(hit_collection, year)
93
- missed_year_ids = Set.new
94
- return [hit_collection, missed_year_ids] if year.nil?
95
-
96
- # filter by year
97
- hits = hit_collection.select do |hit|
98
- hit.pubid.year ||= hit.hit[:year]
99
- next true if check_year(year, hit)
100
-
101
- missed_year_ids << hit.pubid.to_s if hit.pubid.year
102
- false
103
- end
104
-
105
- [hits, missed_year_ids]
106
- end
107
-
108
- private
109
-
110
- def check_year(year, hit) # rubocop:disable Metrics/AbcSize
111
- (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
112
- (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
113
- (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
114
- end
115
-
116
- # @param pubid [Pubid::Iso::Identifier] PubID with no results
117
- def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
118
- Util.info "Not found.", key: pubid.to_s
119
-
120
- if missed_year_ids.any?
121
- ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
122
- Util.info "TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}.", key: pubid.to_s
123
- end
124
-
125
- if tip_ids.any?
126
- ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
127
- Util.info "TIP: Matches exist for #{ids}.", key: pubid.to_s
128
- end
129
-
130
- if pubid.part
131
- Util.info "TIP: If it cannot be found, the document may no longer be published in parts.", key: pubid.to_s
132
- else
133
- Util.info "TIP: If you wish to cite all document parts for the reference, " \
134
- "use `#{pubid.to_s(format: :ref_undated)} (all parts)`.", key: pubid.to_s
135
- end
136
-
137
- nil
138
- end
139
-
140
- def look_up_with_any_types_stages(hits, ref, opts)
141
- return [] if hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
142
-
143
- ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
144
- pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
145
- resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
146
- resp.map &:pubid
147
- end
148
-
149
- #
150
- # Search for hits. If no found then trying missed stages.
151
- #
152
- # @param query_pubid [Pubid::Iso::Identifier] reference without correction
153
- # @param opts [Hash]
154
- # @param any_types_stages [Boolean] match with any stages
155
- #
156
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
157
- #
158
- def isobib_search_filter(query_pubid, opts, any_types_stages: false)
159
- hit_collection = search(query_pubid, opts)
160
-
161
- # filter only matching hits
162
- filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
163
- end
164
-
165
- #
166
- # Filter hits by query_pubid.
167
- #
168
- # @param hit_collection [RelatonIso::HitCollection]
169
- # @param query_pubid [Pubid::Iso::Identifier]
170
- # @param all_parts [Boolean]
171
- # @param any_types_stages [Boolean]
172
- #
173
- # @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
174
- #
175
- def filter_hits(hit_collection, query_pubid, all_parts, any_types_stages)
176
- # filter out
177
- excludings = build_excludings(all_parts, any_types_stages)
178
- no_year_ref = hit_collection.ref_pubid_no_year.exclude(*excludings)
179
- result = hit_collection.select do |i|
180
- pubid_match?(i.pubid, query_pubid, excludings, no_year_ref) && !(all_parts && i.pubid.part.nil?)
181
- end
182
-
183
- filter_hits_by_year(result, query_pubid.year)
184
- end
185
-
186
- def build_excludings(all_parts, any_types_stages)
187
- excludings = %i[year edition]
188
- excludings += %i[type stage iteration] if any_types_stages
189
- excludings << :part if all_parts
190
- excludings
191
- end
192
-
193
- def pubid_match?(pubid, query_pubid, excludings, no_year_ref)
194
- if pubid.is_a? String then pubid == query_pubid.to_s
195
- else
196
- pubid = pubid.dup
197
- pubid.base = pubid.base.exclude(:year, :edition) if pubid.base
198
- pubid.exclude(*excludings) == no_year_ref
199
- end
200
- end
201
- end
202
- end
@@ -1,67 +0,0 @@
1
- require "relaton/processor"
2
-
3
- module RelatonIso
4
- class Processor < Relaton::Processor
5
- attr_reader :idtype
6
-
7
- def initialize # rubocop:disable Lint/MissingSuper
8
- @short = :relaton_iso
9
- @prefix = "ISO"
10
- @defaultprefix = %r{^ISO(/IEC)?\s}
11
- @idtype = "ISO"
12
- @datasets = %w[iso-ics]
13
- end
14
-
15
- # @param code [String]
16
- # @param date [String, nil] year
17
- # @param opts [Hash]
18
- # @return [RelatonIsoBib::IsoBibliographicItem]
19
- def get(code, date, opts)
20
- ::RelatonIso::IsoBibliography.get(code, date, opts)
21
- end
22
-
23
- #
24
- # Fetch all the documents from https://www.iso.org/standards-catalogue/browse-by-ics.html
25
- #
26
- # @param [String] source source name (iso-rss, iso-rss-all)
27
- # @param [Hash] opts
28
- # @option opts [String] :output directory to output documents
29
- # @option opts [String] :format output format (xml, yaml, bibxml)
30
- #
31
- def fetch_data(_source, opts)
32
- DataFetcher.fetch(**opts)
33
- end
34
-
35
- # @param xml [String]
36
- # @return [RelatonIsoBib::IsoBibliographicItem]
37
- def from_xml(xml)
38
- ::RelatonIsoBib::XMLParser.from_xml xml
39
- end
40
-
41
- # @param hash [Hash]
42
- # @return [RelatonIsoBib::IsoBibliographicItem]
43
- def hash_to_bib(hash)
44
- item_hash = HashConverter.hash_to_bib(hash)
45
- ::RelatonIsoBib::IsoBibliographicItem.new(**item_hash)
46
- end
47
-
48
- # Returns hash of XML grammar
49
- # @return [String]
50
- def grammar_hash
51
- @grammar_hash ||= ::RelatonIsoBib.grammar_hash
52
- end
53
-
54
- # Returns number of workers
55
- # @return [Integer]
56
- def threads
57
- 3
58
- end
59
-
60
- #
61
- # Remove index file
62
- #
63
- def remove_index_file
64
- Relaton::Index.find_or_create(:iso, url: true, file: HitCollection::INDEXFILE).remove_file
65
- end
66
- end
67
- end
@@ -1,61 +0,0 @@
1
- module RelatonIso
2
- #
3
- # Queue of links to fetch.
4
- #
5
- class Queue
6
- extend Forwardable
7
- def_delegator :queue, :[]
8
-
9
- FILE = "iso-queue.txt".freeze
10
-
11
- #
12
- # Open queue file if exist. If not, create new empty queue.
13
- #
14
- # @return [Array<String>] queue
15
- #
16
- def queue
17
- @queue ||= File.exist?(FILE) ? File.read(FILE).split("\n") : []
18
- end
19
-
20
- #
21
- # Add item to queue at first position if it is not already there.
22
- #
23
- # @param [String] item item to add
24
- #
25
- # @return [void]
26
- #
27
- def add_first(item)
28
- queue.unshift item unless queue.include? item
29
- end
30
-
31
- #
32
- # Move or add item to the end of the queue.
33
- #
34
- # @param [String] item item to move or add
35
- #
36
- # @return [void]
37
- #
38
- def move_last(item)
39
- queue.delete item
40
- queue << item
41
- end
42
-
43
- #
44
- # Take first item from the queue.
45
- #
46
- # @return [String] an item
47
- #
48
- # def take_first
49
- # queue.shift
50
- # end
51
-
52
- #
53
- # Save queue to file.
54
- #
55
- # @return [void]
56
- #
57
- def save
58
- File.write FILE, queue.to_a.join("\n")
59
- end
60
- end
61
- end