relaton-iso 1.8.0 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61646e164a8a384cb744e3e50dafdea336f74cbf7418b59d1b5a30e8aabeea62
4
- data.tar.gz: f3c861c4fd11e792f9bd7d7edd0e83d95a5972c6e1590825ce0562360cc5f0d6
3
+ metadata.gz: c03256162a043a8d8842d3cd31d95dad25487cdde4992b34d44951c650421c3d
4
+ data.tar.gz: '0295b78c2fbfaaa848e07bb46fabfed6a4ea44c0af0f5e865bf1b25333a4e376'
5
5
  SHA512:
6
- metadata.gz: 073cf929ab4f17e00651c03443b4bc0d21526994ebd5532a5914f841066f4ec6522d58344ddabf09e69f409243c59c6c721db2a3c1c38c9784b6211cd48d9982
7
- data.tar.gz: 2e710926a48e59aba377f89cea8589f4dbb0abe0ddd53a906d9463faf7adf70681f8ef8d8b68f30d8398c9db4b9859a898b243ec53f5f2010dd7bd5a8e99b086
6
+ metadata.gz: f34e505b97d03f6e8ed481ee87036e51d8748e81e9ab77ea78fda8c7764eca21942a033a06c5e363b0ffacee648e3125cf93a1254897ab60512877836177ce3c
7
+ data.tar.gz: 9266cb080c0e888b9f6ffaa151d5bc118a6f3b73655cf3f9c8f9d1727dd98e779eef2047f977d492f54c668b58d5f23663374bf2adb8fe306b97f06558b3e973
@@ -16,19 +16,9 @@ jobs:
16
16
  strategy:
17
17
  fail-fast: false
18
18
  matrix:
19
- ruby: [ '2.7', '2.6', '2.5', '2.4' ]
19
+ ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
20
  os: [ ubuntu-latest, windows-latest, macos-latest ]
21
21
  experimental: [ false ]
22
- include:
23
- - ruby: '3.0'
24
- os: 'ubuntu-latest'
25
- experimental: true
26
- - ruby: '3.0'
27
- os: 'windows-latest'
28
- experimental: true
29
- - ruby: '3.0'
30
- os: 'macos-latest'
31
- experimental: true
32
22
  steps:
33
23
  - uses: actions/checkout@v2
34
24
  with:
data/.rubocop.yml CHANGED
@@ -5,6 +5,6 @@
5
5
  inherit_from:
6
6
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
7
  AllCops:
8
- TargetRubyVersion: 2.4
8
+ TargetRubyVersion: 2.5
9
9
  Rails:
10
10
  Enabled: false
data/README.adoc CHANGED
@@ -31,7 +31,7 @@ Or install it yourself as:
31
31
 
32
32
  == Usage
33
33
 
34
- === Search for a standard using keywords
34
+ === Search for standards using keywords
35
35
 
36
36
  [source,ruby]
37
37
  ----
@@ -50,6 +50,7 @@ item = hit_collection[2].fetch
50
50
  [#<RelatonBib::FormattedString:0x007fa5dca88458
51
51
  @content=
52
52
  "ISO/TS 19115-3:2016 defines an integrated XML implementation of ISO 19115‑1, ..."
53
+ ...
53
54
 
54
55
  item.docidentifier
55
56
  => [#<RelatonBib::DocumentIdentifier:0x007fd9ce9c6878 @id="ISO/TS 19115-3:2016", @scope=nil, @type="ISO">,
@@ -59,6 +60,88 @@ item.docidentifier
59
60
  => "urn:iso:std:iso-ts:ts:19115:-3:stage-90.92:ed-1:en,fr"
60
61
  ----
61
62
 
63
+ === Fetch document by reference and year
64
+
65
+ [source,ruby]
66
+ ----
67
+ item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
68
+ [relaton-iso] ("ISO 19115:2003") fetching...
69
+ [relaton-iso] ("ISO 19115:2003") found ISO 19115:2003
70
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83429e30
71
+ ...
72
+
73
+ item = RelatonIso::IsoBibliography.get "ISO 19115", "2003"
74
+ [relaton-iso] ("ISO 19115") fetching...
75
+ [relaton-iso] ("ISO 19115") found ISO 19115:2003
76
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c828d3180
77
+ ...
78
+
79
+ item.docidentifier[0].id
80
+ => "ISO 19115:2003"
81
+ ----
82
+
83
+ === Fetch non-part document
84
+
85
+ [source,ruby]
86
+ ----
87
+ item = RelatonIso::IsoBibliography.get "ISO 19115"
88
+ [relaton-iso] ("ISO 19115") fetching...
89
+ [relaton-iso] ("ISO 19115") found ISO 19115:2003
90
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830275a8
91
+ ...
92
+
93
+ item.docidentifier[0].id
94
+ => "ISO 19115:2003"
95
+ ----
96
+
97
+ === Fetch part document
98
+
99
+ [source,ruby]
100
+ ----
101
+ item = RelatonIso::IsoBibliography.get "ISO 19115-1"
102
+ [relaton-iso] ("ISO 19115-1") fetching...
103
+ [relaton-iso] ("ISO 19115-1") found ISO 19115-1:2014
104
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83408af0
105
+ ...
106
+
107
+ item.docidentifier[0].id
108
+ => "ISO 19115-1:2014"
109
+ ----
110
+
111
+ === Fetch all-parts document
112
+
113
+ [source,ruby]
114
+ ----
115
+ item = RelatonIso::IsoBibliography.get "ISO 19115 (all parts)"
116
+ [relaton-iso] ("ISO 19115") fetching...
117
+ [relaton-iso] ("ISO 19115") found ISO 19115 (all parts)
118
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8ca216e118
119
+ ...
120
+
121
+ item = RelatonIso::IsoBibliography.get "ISO 19115", nil, all_parts: true
122
+ [relaton-iso] ("ISO 19115") fetching...
123
+ [relaton-iso] ("ISO 19115") found ISO 19115 (all parts)
124
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830f3d38
125
+ ...
126
+
127
+ item.docidentifier[0].id
128
+ => "ISO 19115 (all parts)"
129
+
130
+ item = RelatonIso::IsoBibliography.get "ISO 19115-1 (all parts)"
131
+ [relaton-iso] ("ISO 19115-1") fetching...
132
+ [relaton-iso] ("ISO 19115-1") found ISO 19115 (all parts)
133
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c8290e5a0
134
+
135
+ item = RelatonIso::IsoBibliography.get "ISO 19115-1", nil, all_parts: true
136
+ [relaton-iso] ("ISO 19115-1") fetching...
137
+ [relaton-iso] ("ISO 19115-1") found ISO 19115 (all parts)
138
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c925355b8
139
+ ...
140
+
141
+ item.docidentifier[0].id
142
+ => "ISO 19115 (all parts)"
143
+ ----
144
+
62
145
  === Search for ISO/IEC Directives
63
146
 
64
147
  The ISO/IEC Derectives are stored in a static cache in a relaton gem. It needs to use the relaton gem to fetch the ISO/IEC Directives. Folloving reaferences are allowed to fetch:
data/bin/rackup ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rackup' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rack", "rackup")
data/bin/rubocop ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rubocop' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rubocop", "rubocop")
data/bin/ruby-parse ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'ruby-parse' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("parser", "ruby-parse")
data/bin/ruby-rewrite ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'ruby-rewrite' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("parser", "ruby-rewrite")
@@ -4,7 +4,7 @@ module RelatonIso
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
6
  # @return [RelatonIsoBib::IsoBibliographicItem]
7
- attr_accessor :fetch
7
+ attr_writer :fetch
8
8
 
9
9
  # Parse page.
10
10
  # @param lang [String, NilClass]
@@ -15,11 +15,12 @@ module RelatonIso
15
15
 
16
16
  # @return [Integer]
17
17
  def sort_weight
18
- case hit["publicationStatus"] && hit["publicationStatus"]["key"]
19
- when "ENT_ACTIVE" then 0
20
- when "ENT_PROGRESS" then 1
21
- when "ENT_INACTIVE" then 2
22
- else 3
18
+ case hit[:status] # && hit["publicationStatus"]["key"]
19
+ when "Published" then 0
20
+ when "Under development" then 1
21
+ when "Withdrawn" then 2
22
+ when "Deleted" then 3
23
+ else 4
23
24
  end
24
25
  end
25
26
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "algolia"
3
4
  require "relaton_iso/hit"
4
5
 
5
6
  module RelatonIso
@@ -16,18 +17,22 @@ module RelatonIso
16
17
  # @param lang [String, NilClass]
17
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
18
19
  def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
19
- parts = @array.reject { |h| h.hit["docPart"]&.empty? }
20
- hit = parts.min_by { |h| h.hit["docPart"].to_i }
20
+ # parts = @array.reject { |h| h.hit["docPart"]&.empty? }
21
+ hit = @array.min_by do |h|
22
+ IsoBibliography.ref_components(h.hit[:title])[1].to_i
23
+ end
21
24
  return @array.first.fetch lang unless hit
22
25
 
23
26
  bibitem = hit.fetch lang
24
27
  all_parts_item = bibitem.to_all_parts
25
- parts.reject { |h| h.hit["docRef"] == hit.hit["docRef"] }.each do |hi|
28
+ @array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
29
+ %r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
30
+ (?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
26
31
  isobib = RelatonIsoBib::IsoBibliographicItem.new(
27
- formattedref: RelatonBib::FormattedRef.new(content: hi.hit["docRef"])
32
+ formattedref: RelatonBib::FormattedRef.new(content: fr),
28
33
  )
29
34
  all_parts_item.relation << RelatonBib::DocumentRelation.new(
30
- type: "instance", bibitem: isobib
35
+ type: "instance", bibitem: isobib,
31
36
  )
32
37
  end
33
38
  all_parts_item
@@ -49,8 +54,8 @@ module RelatonIso
49
54
 
50
55
  hash = YAML.safe_load resp.body
51
56
  bib_hash = RelatonIsoBib::HashConverter.hash_to_bib hash
52
- bib = RelatonIsoBib::IsoBibliographicItem.new **bib_hash
53
- hit = Hit.new({ "docRef" => text }, self)
57
+ bib = RelatonIsoBib::IsoBibliographicItem.new(**bib_hash)
58
+ hit = Hit.new({ title: text }, self)
54
59
  hit.fetch = bib
55
60
  [hit]
56
61
  end
@@ -61,21 +66,28 @@ module RelatonIso
61
66
  # @return [Array<RelatonIso::Hit>]
62
67
  #
63
68
  def fetch_iso # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
64
- %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?} =~ text
65
- http = Net::HTTP.new "www.iso.org", 443
66
- http.use_ssl = true
67
- search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
68
- search << "docNumber=#{num}"
69
- search << "docPartNo=#{part}" if part
70
- q = search.join "&"
71
- resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
72
- "Accept" => "application/json, text/plain, */*")
73
- return [] if resp.body.empty?
69
+ # %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?} =~ text
70
+ # http = Net::HTTP.new "www.iso.org", 443
71
+ # http.use_ssl = true
72
+ # search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
73
+ # search << "docNumber=#{num}"
74
+ # search << "docPartNo=#{part}" if part
75
+ # q = search.join "&"
76
+ # resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
77
+ # "Accept" => "application/json, text/plain, */*")
78
+ config = Algolia::Search::Config.new(application_id: "JCL49WV5AR", api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0")
79
+ client = Algolia::Search::Client.new config, logger: ::Logger.new($stderr)
80
+ index = client.init_index "all_en"
81
+ resp = index.search text, hitsPerPage: 100, filters: "category:standard"
82
+ # return [] if resp.body.empty?
74
83
 
75
- json = JSON.parse resp.body
76
- json["standards"].map { |h| Hit.new h, self }.sort! do |a, b|
77
- if a.sort_weight == b.sort_weight
78
- (parse_date(b.hit) - parse_date(a.hit)).to_i
84
+ # json = JSON.parse resp.body
85
+ # json["standards"]
86
+ resp[:hits].map { |h| Hit.new h, self }.sort! do |a, b|
87
+ if a.sort_weight == b.sort_weight && b.hit[:year] = a.hit[:year]
88
+ a.hit[:title] <=> b.hit[:title]
89
+ elsif a.sort_weight == b.sort_weight
90
+ b.hit[:year] - a.hit[:year]
79
91
  else
80
92
  a.sort_weight - b.sort_weight
81
93
  end
@@ -84,16 +96,16 @@ module RelatonIso
84
96
 
85
97
  # @param hit [Hash]
86
98
  # @return [Date]
87
- def parse_date(hit)
88
- if hit["publicationDate"]
89
- Date.strptime(hit["publicationDate"], "%Y-%m")
90
- elsif %r{:(?<year>\d{4})} =~ hit["docRef"]
91
- Date.strptime(year, "%Y")
92
- elsif hit["newProjectDate"]
93
- Date.parse hit["newProjectDate"]
94
- else
95
- Date.new 0
96
- end
97
- end
99
+ # def parse_date(hit)
100
+ # if hit["publicationDate"]
101
+ # Date.strptime(hit["publicationDate"], "%Y-%m")
102
+ # elsif %r{:(?<year>\d{4})} =~ hit["docRef"]
103
+ # Date.strptime(year, "%Y")
104
+ # elsif hit["newProjectDate"]
105
+ # Date.parse hit["newProjectDate"]
106
+ # else
107
+ # Date.new 0
108
+ # end
109
+ # end
98
110
  end
99
111
  end
@@ -15,8 +15,9 @@ module RelatonIso
15
15
  HitCollection.new text.gsub(/\u2013/, "-")
16
16
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
17
17
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
18
- Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
19
- raise RelatonBib::RequestError, "Could not access http://www.iso.org"
18
+ Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
19
+ Algolia::AlgoliaUnreachableHostError => e
20
+ raise RelatonBib::RequestError, e.message
20
21
  end
21
22
 
22
23
  # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
@@ -25,11 +26,15 @@ module RelatonIso
25
26
  # reference is required, :keep_year if undated reference should
26
27
  # return actual reference with year
27
28
  # @return [String] Relaton XML serialisation of reference
28
- def get(ref, year = nil, opts = {})
29
+ def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
29
30
  code = ref.gsub(/\u2013/, "-")
30
- %r{\s(?<num>\d+)(-(?<part>[\d-]+))?(:(?<year1>\d{4}))?} =~ code
31
+ # %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?(?::(?<year1>\d{4}))?} =~ code
32
+ _, _part, year1, = ref_components ref
31
33
  year ||= year1
32
- opts[:all_parts] ||= !part && opts[:all_parts].nil? # && code2.nil?
34
+ code.sub! " (all parts)", ""
35
+ opts[:all_parts] ||= $~ && opts[:all_parts].nil?
36
+ opts[:keep_year] ||= opts[:keep_year].nil?
37
+ # code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
33
38
  # if %r[^ISO/IEC DIR].match? code
34
39
  # return RelatonIec::IecBibliography.get(code, year, opts)
35
40
  # end
@@ -44,6 +49,16 @@ module RelatonIso
44
49
  end
45
50
  end
46
51
 
52
+ def ref_components(ref)
53
+ %r{
54
+ ^(?<code>ISO(?:\s|/)[^-/:()]+\d+)
55
+ (?:-(?<part>[\w-]+))?
56
+ (?::(?<year>\d{4}))?
57
+ (?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
58
+ }x =~ ref
59
+ [code&.strip, part, year, corr, coryear]
60
+ end
61
+
47
62
  private
48
63
 
49
64
  # rubocop:disable Metrics/MethodLength
@@ -51,19 +66,19 @@ module RelatonIso
51
66
  def fetch_ref_err(code, year, missed_years)
52
67
  id = year ? "#{code}:#{year}" : code
53
68
  warn "[relaton-iso] WARNING: no match found online for #{id}. "\
54
- "The code must be exactly like it is on the standards website."
69
+ "The code must be exactly like it is on the standards website."
55
70
  unless missed_years.empty?
56
71
  warn "[relaton-iso] (There was no match for #{year}, though there "\
57
- "were matches found for #{missed_years.join(', ')}.)"
72
+ "were matches found for #{missed_years.join(', ')}.)"
58
73
  end
59
74
  if /\d-\d/.match? code
60
75
  warn "[relaton-iso] The provided document part may not exist, "\
61
- "or the document may no longer be published in parts."
76
+ "or the document may no longer be published in parts."
62
77
  else
63
78
  warn "[relaton-iso] If you wanted to cite all document parts for "\
64
- "the reference, use \"#{code} (all parts)\".\nIf the document is "\
65
- "not a standard, use its document type abbreviation (TS, TR, PAS, "\
66
- "Guide)."
79
+ "the reference, use \"#{code} (all parts)\".\nIf the document "\
80
+ "is not a standard, use its document type abbreviation "\
81
+ "(TS, TR, PAS, Guide)."
67
82
  end
68
83
  nil
69
84
  end
@@ -76,18 +91,20 @@ module RelatonIso
76
91
  # @param opts [Hash]
77
92
  # @return [Array<RelatonIso::Hit>]
78
93
  def isobib_search_filter(code, opts)
94
+ ref = remove_part code, opts[:all_parts]
79
95
  warn "[relaton-iso] (\"#{code}\") fetching..."
80
- result = search(code)
96
+ result = search(ref)
81
97
  res = search_code result, code, opts
82
98
  return res unless res.empty?
83
99
 
84
100
  # try stages
85
- if %r{^\w+/[^/]+\s\d+} =~ code # code like ISO/IEC 123, ISO/IEC/IEE 123
101
+ case code
102
+ when %r{^\w+/[^/]+\s\d+} # code like ISO/IEC 123, ISO/IEC/IEE 123
86
103
  res = try_stages(result, opts) do |st|
87
104
  code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
88
105
  end
89
106
  return res unless res.empty?
90
- elsif %r{^\w+\s\d+} =~ code # code like ISO 123
107
+ when %r{^\w+\s\d+} # code like ISO 123
91
108
  res = try_stages(result, opts) do |st|
92
109
  code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
93
110
  end
@@ -103,6 +120,12 @@ module RelatonIso
103
120
  end
104
121
  # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
105
122
 
123
+ def remove_part(ref, all_parts)
124
+ return ref unless all_parts
125
+
126
+ ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
127
+ end
128
+
106
129
  # @param result [RelatonIso::HitCollection]
107
130
  # @param opts [Hash]
108
131
  # @return [RelatonIso::HitCollection]
@@ -120,25 +143,15 @@ module RelatonIso
120
143
  # @param code [String]
121
144
  # @param opts [Hash]
122
145
  # @return [RelatonIso::HitCollection]
123
- def search_code(result, code, _opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
124
- code1, part1, corr1, coryear1 = ref_components code
146
+ def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
147
+ code1, part1, _, corr1, coryear1 = ref_components code
125
148
  result.select do |i|
126
- code2, part2, corr2, coryear2 = ref_components i.hit["docRef"]
127
- code1 == code2 && (!part1 || part1 == part2) &&
149
+ code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
150
+ code1 == code2 && (opts[:all_parts] && part2 || !opts[:all_parts] && part1 == part2) &&
128
151
  corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
129
152
  end
130
153
  end
131
154
 
132
- def ref_components(ref)
133
- %r{
134
- ^(?<code>ISO(?:\s|/)[^-/:()]+)
135
- (?:-(?<part>[^:/]+))?
136
- (?::\d{4})?
137
- (?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
138
- }x =~ ref
139
- [code&.strip, part, corr, coryear]
140
- end
141
-
142
155
  # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
143
156
 
144
157
  # Sort through the results from RelatonIso, fetching them three at a time,
@@ -151,7 +164,7 @@ module RelatonIso
151
164
  def isobib_results_filter(result, year, opts)
152
165
  missed_years = []
153
166
  hits = result.reduce!([]) do |hts, h|
154
- if !year || %r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit["docRef"] && iyear == year
167
+ if !year || %r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[:title] && iyear == year
155
168
  hts << h
156
169
  else
157
170
  missed_years << iyear
@@ -7,8 +7,7 @@ require "net/http"
7
7
 
8
8
  module RelatonIso
9
9
  # Scrapper.
10
- # rubocop:disable Metrics/ModuleLength
11
- module Scrapper
10
+ module Scrapper # rubocop:disable Metrics/ModuleLength
12
11
  DOMAIN = "https://www.iso.org"
13
12
 
14
13
  TYPES = {
@@ -55,9 +54,9 @@ module RelatonIso
55
54
  # @param lang [String, NilClass]
56
55
  # @return [Hash]
57
56
  def parse_page(hit_data, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
58
- path = "/contents/data/standard#{hit_data['splitPath']}/"\
59
- "#{hit_data['csnumber']}.html"
60
- doc, url = get_page path
57
+ # path = "/contents/data/standard#{hit_data['splitPath']}/"\
58
+ # "#{hit_data['csnumber']}.html"
59
+ doc, url = get_page "#{hit_data[:path].sub '/sites/isoorg', ''}.html"
61
60
 
62
61
  # Fetch edition.
63
62
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
@@ -67,24 +66,24 @@ module RelatonIso
67
66
 
68
67
  RelatonIsoBib::IsoBibliographicItem.new(
69
68
  fetched: Date.today.to_s,
70
- docid: fetch_docid(hit_data, langs),
69
+ docid: fetch_docid(doc, edition, langs),
71
70
  docnumber: fetch_docnumber(doc),
72
71
  edition: edition,
73
72
  language: langs.map { |l| l[:lang] },
74
73
  script: langs.map { |l| script(l[:lang]) }.uniq,
75
74
  title: titles,
76
- doctype: fetch_type(hit_data["docRef"]),
75
+ doctype: fetch_type(hit_data[:title]),
77
76
  docstatus: fetch_status(doc),
78
77
  ics: fetch_ics(doc),
79
- date: fetch_dates(doc, hit_data["docRef"]),
80
- contributor: fetch_contributors(hit_data["docRef"]),
78
+ date: fetch_dates(doc, hit_data[:title]),
79
+ contributor: fetch_contributors(hit_data[:title]),
81
80
  editorialgroup: fetch_workgroup(doc),
82
81
  abstract: abstract,
83
- copyright: fetch_copyright(hit_data["docRef"], doc),
82
+ copyright: fetch_copyright(doc),
84
83
  link: fetch_link(doc, url),
85
84
  relation: fetch_relations(doc),
86
85
  place: ["Geneva"],
87
- structuredidentifier: fetch_structuredidentifier(doc)
86
+ structuredidentifier: fetch_structuredidentifier(doc),
88
87
  )
89
88
  end
90
89
 
@@ -94,8 +93,7 @@ module RelatonIso
94
93
  # @param doc [Nokigiri::HTML::Document]
95
94
  # @param lang [String, NilClass]
96
95
  # @return [Array<Array>]
97
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
98
- def fetch_titles_abstract(doc, lang)
96
+ def fetch_titles_abstract(doc, lang) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
99
97
  titles = RelatonBib::TypedTitleStringCollection.new
100
98
  abstract = []
101
99
  langs = languages(doc, lang).reduce([]) do |s, l|
@@ -107,7 +105,11 @@ module RelatonIso
107
105
  titles += fetch_title(d, l[:lang])
108
106
 
109
107
  # Fetch abstracts.
110
- abstract_content = d.css("div[itemprop='description'] p").text
108
+ abstract_content = d.xpath(
109
+ "//div[@itemprop='description']/p|//div[@itemprop='description']/ul/li",
110
+ ).map do |a|
111
+ a.name == "li" ? "- #{a.text}" : a.text
112
+ end.reject(&:empty?).join("\n")
111
113
  unless abstract_content.empty?
112
114
  abstract << {
113
115
  content: abstract_content,
@@ -121,7 +123,6 @@ module RelatonIso
121
123
  end
122
124
  [titles, abstract, langs]
123
125
  end
124
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
125
126
 
126
127
  # Returns available languages.
127
128
  # @param doc [Nokogiri::HTML::Document]
@@ -131,7 +132,7 @@ module RelatonIso
131
132
  lgs = [{ lang: "en" }]
132
133
  doc.css("li#lang-switcher ul li a").each do |lang_link|
133
134
  lang_path = lang_link.attr("href")
134
- l = lang_path.match(%r{^\/(fr)\/})
135
+ l = lang_path.match(%r{^/(fr)/})
135
136
  lgs << { lang: l[1], path: lang_path } if l && (!lang || l[1] == lang)
136
137
  end
137
138
  lgs
@@ -168,67 +169,80 @@ module RelatonIso
168
169
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
169
170
 
170
171
  # Fetch docid.
171
- # @param hit [Hash]
172
+ # @param doc [Nokogiri:HTML::Document]
173
+ # @param edition [String]
172
174
  # @param langs [Array<Hash>]
173
175
  # @return [Array<RelatonBib::DocumentIdentifier>]
174
- def fetch_docid(hit, langs)
176
+ def fetch_docid(doc, edition, langs)
177
+ pubid = item_ref doc
175
178
  [
176
- RelatonBib::DocumentIdentifier.new(id: hit["docRef"], type: "ISO"),
177
- RelatonBib::DocumentIdentifier.new(id: fetch_urn(hit, langs),
178
- type: "URN"),
179
+ RelatonBib::DocumentIdentifier.new(id: pubid, type: "ISO"),
180
+ RelatonBib::DocumentIdentifier.new(
181
+ id: fetch_urn(doc, pubid, edition, langs), type: "URN",
182
+ ),
179
183
  ]
180
184
  end
181
185
 
182
- # @param hit [Hash]
186
+ # @param doc [Nokogiri:HTML::Document]
187
+ # @param pubid [String]
188
+ # @param edition [String]
183
189
  # @param langs [Array<Hash>]
184
190
  # @returnt [String]
185
- def fetch_urn(hit, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
186
- orig = hit["docRef"].split(" ").first.downcase.split("/").join "-"
187
- %r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ hit["docRef"]
191
+ def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
192
+ orig = pubid.split.first.downcase.split("/").join "-"
193
+ %r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
194
+ _, part, _year, corr, = IsoBibliography.ref_components pubid
188
195
  urn = "urn:iso:std:#{orig}"
189
196
  urn += ":#{type.downcase}" if type
190
- urn += ":#{hit['docNumber']}"
191
- urn += ":-#{hit['docPart']}" if hit["docPart"] && !hit["docPart"].empty?
192
- urn += ":stage-#{hit['stageId']}"
193
- urn += ":ed-#{hit['docEdition']}" if hit["docEdition"]
194
- if hit["docElem"] && !hit["docElem"].empty? && hit["docElem"] != "0"
195
- urn += ":#{hit['docElem'].downcase}:#{hit['docElemSeq']}"
197
+ urn += ":#{fetch_docnumber(doc)}"
198
+ urn += ":-#{part}" if part
199
+ urn += ":stage-#{stage_code(doc)}"
200
+ urn += ":ed-#{edition}" if edition
201
+ if corr
202
+ corrparts = corr.split
203
+ urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
196
204
  end
197
- urn += ":" + langs.map { |l| l[:lang] }.join(",")
205
+ urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
198
206
  urn
199
207
  end
200
208
 
201
209
  def fetch_docnumber(doc)
202
- id = doc.at("//nav[contains(@class, 'heading-condensed')]/h1")&.text
203
- id&.match(/\d+/)&.to_s
210
+ item_ref(doc)&.match(/\d+/)&.to_s
204
211
  end
205
212
 
206
213
  # @param doc [Nokogiri::HTML::Document]
207
214
  def fetch_structuredidentifier(doc) # rubocop:disable Metrics/MethodLength
208
- item_ref = doc.at("//nav[contains(@class, 'heading-condensed')]/h1")
209
- unless item_ref
215
+ ref = item_ref doc
216
+ unless ref
210
217
  return RelatonIsoBib::StructuredIdentifier.new(
211
- project_number: "?", part_number: "", prefix: nil, id: "?"
218
+ project_number: "?", part_number: "", prefix: nil, id: "?",
212
219
  )
213
220
  end
214
221
 
215
- m = item_ref.text.match(/^(.*?\d+)-?((?<=-)\d+|)/)
222
+ m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
216
223
  RelatonIsoBib::StructuredIdentifier.new(
217
- project_number: m[1], part_number: m[2], prefix: nil,
218
- id: item_ref.text, type: "ISO"
224
+ project_number: m[1], part: m[2], type: "ISO",
219
225
  )
220
226
  end
221
227
 
228
+ def item_ref(doc)
229
+ doc.at("//nav[contains(@class, 'heading-condensed')]/h1")&.text
230
+ end
231
+
222
232
  # Fetch status.
223
233
  # @param doc [Nokogiri::HTML::Document]
224
234
  # @param status [String]
225
235
  # @return [Hash]
226
236
  def fetch_status(doc)
227
- stg, substg = doc.at("//ul[@class='dropdown-menu']/li[@class='active']/a/span[@class='stage-code']")
228
- .text.split "."
237
+ stg, substg = stage_code(doc).split "."
229
238
  RelatonBib::DocumentStatus.new(stage: stg, substage: substg)
230
239
  end
231
240
 
241
+ def stage_code(doc)
242
+ doc.at("//ul[@class='dropdown-menu']/li[@class='active']"\
243
+ "/a/span[@class='stage-code']").text
244
+ end
245
+
232
246
  # def stage(stg, substg)
233
247
  # abbr = STGABBR[stg].is_a?(Hash) ? STGABBR[stg][substg] : STGABBR[stg]
234
248
  # RelatonBib::DocumentStatus::Stage.new value: stg, abbreviation: abbr
@@ -241,13 +255,15 @@ module RelatonIso
241
255
  wg_link = doc.css("div.entry-name.entry-block a")[0]
242
256
  # wg_url = DOMAIN + wg_link['href']
243
257
  workgroup = wg_link.text.split "/"
258
+ type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
244
259
  {
245
260
  name: "International Organization for Standardization",
246
261
  abbreviation: "ISO",
247
262
  url: "www.iso.org",
248
263
  technical_committee: [{
249
- name: wg_link.text + doc.css("div.entry-title")[0].text,
250
- type: "TC",
264
+ name: doc.css("div.entry-title")[0].text,
265
+ identifier: wg_link.text,
266
+ type: type,
251
267
  number: workgroup[1]&.match(/\d+/)&.to_s&.to_i,
252
268
  }],
253
269
  }
@@ -274,10 +290,10 @@ module RelatonIso
274
290
  else
275
291
  a + r.css("a").map do |id|
276
292
  fref = RelatonBib::FormattedRef.new(
277
- content: id.text, format: "text/plain"
293
+ content: id.text, format: "text/plain",
278
294
  )
279
295
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(
280
- formattedref: fref, date: date
296
+ formattedref: fref, date: date,
281
297
  )
282
298
  { type: type, bibitem: bibitem }
283
299
  end
@@ -311,7 +327,7 @@ module RelatonIso
311
327
  def fetch_title(doc, lang)
312
328
  content = doc.at(
313
329
  "//nav[contains(@class,'heading-condensed')]/h2 | "\
314
- "//nav[contains(@class,'heading-condensed')]/h3"
330
+ "//nav[contains(@class,'heading-condensed')]/h3",
315
331
  )&.text&.gsub(/\u2014/, "-")
316
332
  return RelatonBib::TypedTitleStringCollection.new unless content
317
333
 
@@ -373,7 +389,7 @@ module RelatonIso
373
389
  def fetch_ics(doc)
374
390
  doc.xpath("//strong[contains(text(), "\
375
391
  "'ICS')]/../following-sibling::dd/div/a").map do |i|
376
- code = i.text.match(/[\d\.]+/).to_s.split "."
392
+ code = i.text.match(/[\d.]+/).to_s.split "."
377
393
  { field: code[0], group: code[1], subgroup: code[2] }
378
394
  end
379
395
  end
@@ -395,10 +411,10 @@ module RelatonIso
395
411
  end
396
412
 
397
413
  # Fetch copyright.
398
- # @param ref [String]
399
414
  # @param doc [Nokogiri::HTML::Document]
400
415
  # @return [Array<Hash>]
401
- def fetch_copyright(ref, doc)
416
+ def fetch_copyright(doc)
417
+ ref = item_ref doc
402
418
  owner_name = ref.match(/.*?(?=\s)/).to_s
403
419
  from = ref.match(/(?<=:)\d{4}/).to_s
404
420
  if from.empty?
@@ -408,5 +424,4 @@ module RelatonIso
408
424
  end
409
425
  end
410
426
  end
411
- # rubocop:enable Metrics/ModuleLength
412
427
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.8.0"
4
+ VERSION = "1.9.0"
5
5
  end
data/relaton_iso.gemspec CHANGED
@@ -32,11 +32,15 @@ Gem::Specification.new do |spec|
32
32
  spec.add_development_dependency "pry-byebug"
33
33
  spec.add_development_dependency "rake", "~> 13.0"
34
34
  spec.add_development_dependency "rspec", "~> 3.0"
35
+ spec.add_development_dependency "rubocop"
36
+ spec.add_development_dependency "rubocop-performance"
37
+ spec.add_development_dependency "rubocop-rails"
35
38
  # spec.add_development_dependency "ruby-debug-ide"
36
39
  spec.add_development_dependency "simplecov"
37
40
  spec.add_development_dependency "vcr"
38
41
  spec.add_development_dependency "webmock"
39
42
 
40
43
  # spec.add_dependency "relaton-iec", "~> 1.8.0"
41
- spec.add_dependency "relaton-iso-bib", "~> 1.8.0"
44
+ spec.add_dependency "algolia"
45
+ spec.add_dependency "relaton-iso-bib", "~> 1.9.0"
42
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-17 00:00:00.000000000 Z
11
+ date: 2021-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -80,6 +80,48 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '3.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop-performance
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rubocop-rails
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
83
125
  - !ruby/object:Gem::Dependency
84
126
  name: simplecov
85
127
  requirement: !ruby/object:Gem::Requirement
@@ -122,20 +164,34 @@ dependencies:
122
164
  - - ">="
123
165
  - !ruby/object:Gem::Version
124
166
  version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: algolia
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
125
181
  - !ruby/object:Gem::Dependency
126
182
  name: relaton-iso-bib
127
183
  requirement: !ruby/object:Gem::Requirement
128
184
  requirements:
129
185
  - - "~>"
130
186
  - !ruby/object:Gem::Version
131
- version: 1.8.0
187
+ version: 1.9.0
132
188
  type: :runtime
133
189
  prerelease: false
134
190
  version_requirements: !ruby/object:Gem::Requirement
135
191
  requirements:
136
192
  - - "~>"
137
193
  - !ruby/object:Gem::Version
138
- version: 1.8.0
194
+ version: 1.9.0
139
195
  description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
140
196
  model'
141
197
  email:
@@ -165,9 +221,13 @@ files:
165
221
  - bin/nokogiri
166
222
  - bin/pry
167
223
  - bin/racc
224
+ - bin/rackup
168
225
  - bin/rake
169
226
  - bin/rdebug-ide
170
227
  - bin/rspec
228
+ - bin/rubocop
229
+ - bin/ruby-parse
230
+ - bin/ruby-rewrite
171
231
  - bin/safe_yaml
172
232
  - bin/setup
173
233
  - lib/relaton_iso.rb